00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_MONTECARLO__H
00033 #define C_MONTECARLO__H
00034
00035 #include "cbatchlearning.h"
00036 #include "cevaluator.h"
00037 #include "cparameters.h"
00038
00039 #define MC_MSE 0
00040 #define MC_MAE 1
00041
00042 class CAgent;
00043 class CEpisode;
00044 class CRewardFunction;
00045
00046 class CStateCollectionImpl;
00047 class CStateCollectionImpl;
00048
00049 class CSemiMDPSender;
00050 class CRewardHistory;
00051 class CRewardEpisode;
00052 class CEpisodeHistory;
00053
00054 class CMonteCarloError : public CEvaluator, public CParameterObject
00055 {
00056 protected:
00057 CAgent *agent;
00058 CEpisode *episode;
00059 CRewardFunction *rewardFunction;
00060
00061 CStateCollectionImpl *oldState;
00062 CStateCollectionImpl *newState;
00063
00064 int nEpisodes;
00065 int nStepsPerEpisode;
00066
00067 CSemiMDPSender *semiMDPSender;
00068
00069 virtual double getValue(CStateCollection *state, CAction *action) = 0;
00070
00071 CRewardHistory *rewardLogger;
00072 CEpisodeHistory *episodeHistory;
00073 public:
00074 bool useRewardEpisode;
00075 int errorFunction;
00076
00077 CMonteCarloError(CAgent *agent, CRewardFunction *reward, CStateProperties *modelState, CActionSet *actions, std::list<CStateModifier *> *modifiers, int numEpisodes, int numSteps, double discountFactor);
00078 virtual ~CMonteCarloError();
00079
00080 void setEpisodeHistory(CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger);
00081
00082 double getMonteCarloError(CEpisode *episode, CRewardEpisode *rewardEpisode);
00083 double getMeanMonteCarloError(CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger);
00084
00085 virtual double evaluate();
00086
00087 void setSemiMDPSender(CSemiMDPSender *sender);
00088
00089 };
00090
00091 class CMonteCarloVError : public CMonteCarloError
00092 {
00093 protected:
00094 CAbstractVFunction *vFunction;
00095
00096 virtual double getValue(CStateCollection *state, CAction *action);
00097 public:
00098 CMonteCarloVError(CAbstractVFunction *vFunction, CAgent *agent, CRewardFunction *reward, CStateProperties *modelState, CActionSet *actions, std::list<CStateModifier *> *modifiers, int numEpisodes, int numSteps, double discountFactor);
00099 virtual ~CMonteCarloVError();
00100 };
00101
00102 class CMonteCarloQError : public CMonteCarloError
00103 {
00104 protected:
00105 CAbstractQFunction *qFunction;
00106
00107 virtual double getValue(CStateCollection *state, CAction *action);
00108 public:
00109 CMonteCarloQError(CAbstractQFunction *vFunction, CAgent *agent, CRewardFunction *reward, CStateProperties *modelState, CActionSet *actions, std::list<CStateModifier *> *modifiers, int numEpisodes, int numSteps, double discountFactor);
00110 virtual ~CMonteCarloQError();
00111 };
00112
00113 class CMonteCarloSupervisedLearner : public CPolicyEvaluation
00114 {
00115 protected:
00116
00117 CEpisodeHistory *episodeHistory;
00118 CRewardHistory *rewardLogger;
00119
00120 CBatchDataGenerator *dataGenerator;
00121
00122 public:
00123 CMonteCarloSupervisedLearner(CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CBatchDataGenerator *dataGenerator);
00124
00125 virtual ~CMonteCarloSupervisedLearner();
00126
00127 virtual void evaluatePolicy(int trials);
00128 };
00129
00130
00131
00132 class CMonteCarloVLearner : public CMonteCarloSupervisedLearner
00133 {
00134 protected:
00135 public:
00136 CMonteCarloVLearner(CAbstractVFunction *vFunction, CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CSupervisedLearner *learner);
00137
00138 virtual ~CMonteCarloVLearner();
00139 };
00140
00141 class CMonteCarloCAQLearner : public CMonteCarloSupervisedLearner
00142 {
00143 protected:
00144
00145 public:
00146 CMonteCarloCAQLearner(CStateProperties *properties, CContinuousActionQFunction *qFunction, CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CSupervisedLearner *learner);
00147
00148 virtual ~CMonteCarloCAQLearner();
00149 };
00150
00151
00152 class CMonteCarloQLearner : public CMonteCarloSupervisedLearner
00153 {
00154 protected:
00155 public:
00156 CMonteCarloQLearner(CQFunction *qFunction, CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CSupervisedQFunctionLearner *learner);
00157
00158 virtual ~CMonteCarloQLearner();
00159 };
00160
00161
00162 #endif
00163