Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cagentlistener.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef CSEMIMDPLISTENER_H
00033 #define CSEMIMDPLISTENER_H
00034 
00035 #include "cparameters.h"
00036 
00037 class CAction;
00038 class CState;
00039 class CStateCollection;
00040 class CRewardFunction;
00041 
00043 
00059 class CSemiMDPListener : virtual public CParameterObject
00060 {
00061 public:
00062         bool enabled;
00063         
00064         CSemiMDPListener() {enabled = true;};
00065 
00067         virtual void nextStep(CStateCollection *, CAction *, CStateCollection *) {};
00069         virtual void intermediateStep(CStateCollection *, CAction *, CStateCollection *) {};
00071         virtual void newEpisode() {};
00072 };
00073 
00074 
00076 
00080 class CSemiMDPRewardListener : public CSemiMDPListener
00081 {
00082 protected:
00084         CRewardFunction *semiMDPRewardFunction;
00085 
00086 public:
00088         CSemiMDPRewardListener(CRewardFunction *semiMDPRewardFunction);
00089 
00091         virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
00093         virtual void nextStep(CStateCollection *, CAction *, double , CStateCollection *) {};
00094 
00096         virtual void intermediateStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
00098         virtual void intermediateStep(CStateCollection *, CAction *, double , CStateCollection *) {};
00099 
00100         void setRewardFunction(CRewardFunction *semiMDPRewardFunction);
00101         CRewardFunction *getRewardFunction();
00102 };
00103 
00105 
00112 class CAdaptiveParameterFromNStepsCalculator : public CAdaptiveParameterUnBoundedValuesCalculator, public CSemiMDPListener
00113 {
00114 protected:
00115         int targetValue;
00116         int nStepsPerUpdate;
00117 public:
00118         CAdaptiveParameterFromNStepsCalculator(CParameters *targetObject, string targetParameter, int nStepsPerUpdate, int functionKind, double param0, double paramScale, double targetOffset, double targetScale);
00119         virtual ~CAdaptiveParameterFromNStepsCalculator();
00120 
00121         virtual void nextStep(CStateCollection *, CAction *, CStateCollection *);
00122         virtual void onParametersChanged(){CAdaptiveParameterUnBoundedValuesCalculator::onParametersChanged();}; 
00123         
00124         virtual void resetCalculator();
00125 };
00126 
00128 
00135 class CAdaptiveParameterFromNEpisodesCalculator : public CAdaptiveParameterUnBoundedValuesCalculator, public CSemiMDPListener
00136 {
00137 protected:
00138         int targetValue;
00139 public:
00140         CAdaptiveParameterFromNEpisodesCalculator(CParameters *targetObject, string targetParameter, int functionKind, double param0, double paramScale, double targetOffset, double targetScale);
00141         virtual ~CAdaptiveParameterFromNEpisodesCalculator();
00142 
00143         virtual void newEpisode();
00144         virtual void onParametersChanged(){CAdaptiveParameterUnBoundedValuesCalculator::onParametersChanged();}; 
00145 
00146         virtual void resetCalculator();
00147 };
00148 
00149 
00151 
00159 class CAdaptiveParameterFromAverageRewardCalculator : public CAdaptiveParameterBoundedValuesCalculator, public CSemiMDPRewardListener
00160 {
00161 protected:
00162         double alpha;
00163         double targetValue;
00164         int nSteps;
00165         int nStepsPerUpdate;
00166 public:
00167         CAdaptiveParameterFromAverageRewardCalculator(CParameters *targetObject, string targetParameter, CRewardFunction *reward, int nStepsPerUpdate, int functionKind, double paramMin, double paramMax, double targetMin, double targetMax, double alpha);
00168         ~CAdaptiveParameterFromAverageRewardCalculator();
00169 
00170         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00171         virtual void onParametersChanged(); 
00172         
00173         virtual void resetCalculator();
00174 };
00175 
00176 #endif // CSEMIMDPLISTENER_H
00177