00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_ACTORCRITIC_H
00033 #define C_ACTORCRITIC_H
00034
00035 #include "cerrorlistener.h"
00036 #include "cagentlistener.h"
00037 #include "cagentcontroller.h"
00038
00039
00040 class CState;
00041 class CStateCollection;
00042 class CStateProperties;
00043
00044 class CAction;
00045 class CActionData;
00046 class CActionDataSet;
00047
00048 class CAbstractQFunction;
00049 class CAbstractQETraces;
00050
00051 class CAbstractVFunction;
00052 class CAbstractVETraces;
00053
00054 class CStochasticPolicy;
00055
00056 class CContinuousActionGradientPolicy;
00057 class CGradientVETraces;
00058 class CFeatureList;
00059 class CContinuousActionData;
00060
00061
00063
00067 class CActor : public CErrorListener
00068 {
00069 protected:
00070
00071 public:
00072 CActor();
00073
00075
00076 virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL) = 0;
00077
00078
00079 double getLearningRate();
00080 void setLearningRate(double learningRate);
00081 };
00082
00084
00090 class CActorFromQFunction : public CActor, public CSemiMDPListener
00091 {
00092 protected:
00094 CAbstractQFunction *qFunction;
00096 CAbstractQETraces *eTraces;
00097
00098 public:
00100 CActorFromQFunction(CAbstractQFunction *qFunction);
00101 virtual ~CActorFromQFunction();
00102
00104
00109 virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL);
00111 CAbstractQFunction *getQFunction();
00113 CAbstractQETraces *getETraces();
00114
00116 virtual void newEpisode();
00117
00118 };
00119
00121
00125 class CActorFromQFunctionAndPolicy : public CActorFromQFunction
00126 {
00127 protected:
00128 CStochasticPolicy *policy;
00129 double *actionValues;
00130
00131 public:
00133 CActorFromQFunctionAndPolicy(CAbstractQFunction *qFunction, CStochasticPolicy *policy);
00134 virtual ~CActorFromQFunctionAndPolicy();
00135
00137
00139 virtual void receiveError(double critic, CStateCollection *state, CAction *Action, CActionData *data = NULL);
00140
00141 CStochasticPolicy *getPolicy();
00142
00143
00144 };
00145
00147
00154 class CActorFromActionValue : public CAgentController, public CActor, public CSemiMDPListener
00155 {
00156 protected:
00157 CAbstractVFunction *vFunction;
00158 CAbstractVETraces *eTraces;
00159
00160 public:
00161 CActorFromActionValue(CAbstractVFunction *vFunction, CAction *action1, CAction *action2);
00162 ~CActorFromActionValue();
00163
00165 virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL);
00166
00167 virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
00169 virtual void newEpisode();
00170 };
00171
00172 class CActorFromContinuousActionGradientPolicy : public CActor, public CSemiMDPListener
00173 {
00174 protected:
00175 CContinuousActionGradientPolicy *gradientPolicy;
00176 CGradientVETraces *gradientETraces;
00177 CFeatureList *gradientFeatureList;
00178
00179 CContinuousActionData *policyDifference;
00180 public:
00181 CActorFromContinuousActionGradientPolicy(CContinuousActionGradientPolicy *gradientPolicy);
00182 virtual ~CActorFromContinuousActionGradientPolicy();
00183
00184 virtual void receiveError(double critic, CStateCollection *oldState, CAction *Action, CActionData *data = NULL);
00185 virtual void newEpisode();
00186 };
00187
00188
00189 class CActorForMultipleAgents : public CActor, public CAgentController
00190 {
00191 protected:
00192 std::list<CActor *> *actors;
00193 std::list<CAgentController *> *actionSets;
00194 unsigned int numActions ;
00195
00196 public:
00197
00198 CActorForMultipleAgents(CActionSet *actions);
00199
00200 virtual ~CActorForMultipleAgents();
00201
00202
00203 void addActor(CActor *actor, CAgentController *policy);
00204
00205 virtual void receiveError(double critic, CStateCollection *state, CAction *action, CActionData *data);
00206
00207 virtual CAction* getNextAction(CStateCollection *state, CActionDataSet *dataset);
00208 };
00209
00210
00211 #endif