00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_REWARDMODEL_H
00033 #define C_REWARDMODEL_H
00034
00035 #include <map>
00036 #include <vector>
00037
00038 #include "crewardfunction.h"
00039 #include "clearndataobject.h"
00040 #include "cbaseobjects.h"
00041 #include "cutility.h"
00042 #include "cagentlistener.h"
00043
00044 class CAbstractFeatureStochasticEstimatedModel;
00045
00046 class CFeatureStateRewardFunction : public CFeatureRewardFunction
00047 {
00048 protected:
00049 std::map<int, double> *rewards;
00050 public:
00051 CFeatureStateRewardFunction(CStateProperties *discretizer);
00052 virtual ~CFeatureStateRewardFunction();
00053
00054 virtual double getReward(int oldState, CAction *action, int newState);
00055 virtual double getReward(int state);
00056
00057 virtual void setReward(int state, double reward);
00058 };
00059
00060
00072 class CFeatureRewardModel : public CFeatureRewardFunction, public CSemiMDPRewardListener, public CActionObject, public CLearnDataObject
00073 {
00074 protected:
00076 CMyArray2D<CFeatureMap *> *rewardTable;
00078
00080 CMyArray2D<CFeatureMap *> *visitTable;
00081
00083 CAbstractFeatureStochasticEstimatedModel *model;
00084
00085 bool bExternVisitSparse;
00086
00088
00091 double getTransitionVisits(int oldState, int action, int newState);
00092
00093 public:
00095
00097 CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *discretizer);
00099 CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CStateModifier *discretizer);
00100 virtual ~CFeatureRewardModel();
00101
00103
00106 virtual double getReward(int oldState, CAction *action, int newState);
00107 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00108
00110
00111 virtual void saveData(FILE *stream);
00113
00114 virtual void loadData(FILE *stream);
00115
00116 virtual void resetData();
00117 };
00118
00119 class CFeatureStateRewardModel : public CFeatureRewardFunction, public CSemiMDPRewardListener, public CLearnDataObject
00120 {
00121 protected:
00122 double *rewards;
00123 double *visits;
00124
00125 double rewardMean;
00126 int numRewards;
00127
00128 public:
00130
00132 CFeatureStateRewardModel(CRewardFunction *function, CStateModifier *discretizer);
00134 virtual ~CFeatureStateRewardModel();
00135
00137
00140 virtual double getReward(CState *oldState, CAction *action, CState *newState);
00142
00145 virtual double getReward(int oldState, CAction *action, int newState);
00146 virtual double getReward(int newState);
00147
00148 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00149
00151
00152 virtual void saveData(FILE *stream);
00154
00155 virtual void loadData(FILE *stream);
00156
00157 virtual void resetData();
00158 };
00159
00161
00166 class CRewardEpisode : public CSemiMDPRewardListener
00167 {
00168 protected:
00170 std::vector<double> *rewards;
00171 public:
00172 CRewardEpisode(CRewardFunction *rewardFunction);
00173 virtual ~CRewardEpisode();
00174
00176 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00178 virtual void newEpisode();
00179
00180 int getNumRewards();
00182 double getReward(int index);
00183
00184 double getMeanReward();
00185 double getLastStepsMeanReward(int Steps);
00186
00187
00188 virtual void saveBIN(FILE *stream);
00189 virtual void saveData(FILE *stream);
00190 virtual void loadBIN(FILE *stream);
00191 virtual void loadData(FILE *stream);
00192 };
00193
00194 class CRewardHistory
00195 {
00196 protected:
00197
00198 public:
00199 CRewardHistory() {};
00200 virtual ~CRewardHistory(){};
00201
00202 virtual CRewardEpisode* getEpisode(int index) = 0;
00203 virtual int getNumEpisodes() = 0;
00204 };
00205
00206 class CRewardHistorySubset : public CRewardHistory
00207 {
00208 protected:
00209 CRewardHistory *episodes;
00210 std::vector<int> *indices;
00211 public:
00212 CRewardHistorySubset(CRewardHistory *episodes, std::vector<int> *indices);
00213 virtual ~CRewardHistorySubset();
00214
00216 virtual int getNumEpisodes();
00218 virtual CRewardEpisode* getEpisode(int index);
00219 };
00220
00221 class CRewardLogger : public CSemiMDPRewardListener, public CLearnDataObject, public CRewardHistory
00222 {
00223 protected:
00225 char filename[512];
00227 FILE* file;
00228
00229 char loadFileName[512];
00230
00232 int holdMemory;
00233
00235 std::list<CRewardEpisode *> *episodes;
00237 CRewardEpisode *currentEpisode;
00238
00239 void init();
00240 public:
00241
00242 CRewardLogger(CRewardFunction *rewardFunction, char* autoSavefile, int holdMemory);
00243 CRewardLogger(CRewardFunction *rewardFunction);
00244 CRewardLogger(char *loadFile, CRewardFunction *rewardFunction);
00245
00246 virtual ~CRewardLogger();
00247
00248 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00249 virtual void newEpisode();
00250
00251 void setAutoSaveFile(char *filename);
00252 virtual void saveBIN(FILE *stream);
00253
00254 virtual void saveData(FILE *stream);
00255 void loadBIN(FILE *stream, int episodes = -1);
00256 virtual void loadData(FILE *stream, int episodes = -1);
00257 virtual void loadData(FILE *stream);
00258
00259 virtual int getNumEpisodes();
00260
00261 virtual CRewardEpisode* getCurrentEpisode();
00262 virtual CRewardEpisode* getEpisode(int index);
00263
00264 void clearAutoSaveFile();
00265 void setLoadDataFile(char *loadData);
00266
00267 virtual void resetData();
00268 };
00269
00271
00281 class CSemiMDPLastNRewardFunction : public CRewardFunction, public CRewardEpisode
00282 {
00283 protected:
00285 double gamma;
00286 public:
00288 CSemiMDPLastNRewardFunction(CRewardFunction *rewardFunction, double gamma);
00289 virtual ~CSemiMDPLastNRewardFunction();
00291
00295 virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00296 };
00297
00298 #endif // REWARDMODEL_H
00299