Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cactorcritic.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_ACTORCRITIC_H
00033 #define C_ACTORCRITIC_H
00034 
00035 #include "cerrorlistener.h"
00036 #include "cagentlistener.h"
00037 #include "cagentcontroller.h"
00038 
00039 
00040 class CState;
00041 class CStateCollection;
00042 class CStateProperties;
00043 
00044 class CAction;
00045 class CActionData;
00046 class CActionDataSet;
00047 
00048 class CAbstractQFunction;
00049 class CAbstractQETraces;
00050 
00051 class CAbstractVFunction;
00052 class CAbstractVETraces;
00053 
00054 class CStochasticPolicy;
00055 
00056 class CContinuousActionGradientPolicy;
00057 class CGradientVETraces;
00058 class CFeatureList;
00059 class CContinuousActionData;
00060 
00061 
00063 
00067 class CActor : public CErrorListener
00068 {
00069 protected:
00070         
00071 public:
00072         CActor();
00073         
00075 
00076         virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL) = 0;
00077 
00078 
00079         double getLearningRate();
00080         void setLearningRate(double learningRate);
00081 };
00082 
00084 
00090 class CActorFromQFunction : public CActor, public CSemiMDPListener
00091 {
00092 protected:
00094         CAbstractQFunction *qFunction;
00096         CAbstractQETraces *eTraces;
00097 
00098 public:
00100         CActorFromQFunction(CAbstractQFunction *qFunction);
00101         virtual ~CActorFromQFunction();
00102 
00104 
00109         virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL);
00111         CAbstractQFunction *getQFunction();
00113         CAbstractQETraces *getETraces();
00114 
00116         virtual void newEpisode();
00117 
00118 };
00119 
00121 
00125 class CActorFromQFunctionAndPolicy : public CActorFromQFunction
00126 {
00127 protected:
00128         CStochasticPolicy *policy;
00129         double *actionValues;
00130 
00131 public:
00133         CActorFromQFunctionAndPolicy(CAbstractQFunction *qFunction, CStochasticPolicy *policy);
00134         virtual ~CActorFromQFunctionAndPolicy();
00135 
00137 
00139         virtual void receiveError(double critic, CStateCollection *state, CAction *Action, CActionData *data = NULL);
00140 
00141         CStochasticPolicy *getPolicy();
00142 
00143         
00144 };
00145 
00147 
00154 class CActorFromActionValue : public CAgentController, public CActor, public CSemiMDPListener
00155 {
00156 protected:
00157         CAbstractVFunction *vFunction;
00158         CAbstractVETraces *eTraces;
00159 
00160 public:
00161         CActorFromActionValue(CAbstractVFunction *vFunction, CAction *action1, CAction *action2);
00162         ~CActorFromActionValue();
00163 
00165         virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL);
00166 
00167         virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
00169         virtual void newEpisode();
00170 };
00171 
00172 class CActorFromContinuousActionGradientPolicy : public CActor, public CSemiMDPListener
00173 {
00174 protected:
00175         CContinuousActionGradientPolicy *gradientPolicy;
00176         CGradientVETraces *gradientETraces;
00177         CFeatureList *gradientFeatureList;
00178 
00179         CContinuousActionData *policyDifference;
00180 public:
00181         CActorFromContinuousActionGradientPolicy(CContinuousActionGradientPolicy *gradientPolicy);
00182         virtual ~CActorFromContinuousActionGradientPolicy();
00183 
00184         virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL);
00185         virtual void newEpisode();
00186 };
00187 
00188 
00189 class CActorForMultipleAgents : public CActor, public CAgentController
00190 {
00191         protected:
00192                 std::list<CActor *> *actors;
00193                 std::list<CAgentController *> *actionSets;
00194                 unsigned int numActions ;
00195                         
00196         public:
00197                 
00198                 CActorForMultipleAgents(CActionSet *actions);
00199 
00200                 virtual ~CActorForMultipleAgents();
00201 
00202 
00203                 void addActor(CActor *actor, CAgentController *policy);
00204 
00205                 virtual void receiveError(double critic, CStateCollection *state, CAction *action,  CActionData *data);
00206 
00207                 virtual CAction* getNextAction(CStateCollection *state, CActionDataSet *dataset);
00208 };
00209 
00210 
00211 #endif