00001 #ifndef C_EVALUATOR__H
00002 #define C_EVALUATOR__H
00003
00004 #include "cagentlistener.h"
00005
00006 class CStateList;
00007 class CTransitionFunctionEnvironment;
00008 class CAgent;
00009 class CSemiMDPSender;
00010 class CAgentController;
00011 class CActionDataSet;
00012 class CAgentController;
00013 class CDeterministicController;
00014
00015 class CEvaluator
00016 {
00017 protected:
00018
00019 public:
00020 virtual ~CEvaluator() {};
00021 virtual double evaluate() = 0;
00022 };
00023
00024
00025 class CPolicyEvaluator : public CSemiMDPRewardListener, public CEvaluator
00026 {
00027 protected:
00028 CAgent *agent;
00029 CAgentController *controller;
00030 CDeterministicController *detController;
00031
00032 double policyValue;
00033
00034 int nEpisodes;
00035 int nStepsPerEpisode;
00036
00037 virtual double getEpisodeValue() = 0;
00038 public:
00039 CPolicyEvaluator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode);
00040 virtual ~CPolicyEvaluator() {};
00041
00042 virtual double evaluatePolicy();
00043 virtual double evaluate() {return evaluatePolicy();};
00044
00045 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState) = 0;
00046
00047 virtual void setStepsPerEpisode(int steps) {nStepsPerEpisode = steps;};
00048
00049 virtual void setAgentController(CAgentController *controller);
00050 virtual void setDeterministicController(CDeterministicController *detController);
00051 };
00052
00053
00054 class CAverageRewardCalculator : public CPolicyEvaluator
00055 {
00056 protected:
00057 int nSteps;
00058 double averageReward;
00059 double minReward;
00060
00061 virtual double getEpisodeValue();
00062 public:
00063 CAverageRewardCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode,double minReward = -2.0);
00064 virtual ~CAverageRewardCalculator(){};
00065
00066 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00067 virtual void newEpisode();
00068 };
00069
00070 class CRewardPerEpisodeCalculator : public CPolicyEvaluator
00071 {
00072 protected:
00073
00074 double reward;;
00075
00076
00077 virtual double getEpisodeValue();
00078 public:
00079 CRewardPerEpisodeCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode);
00080
00081 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00082 virtual void newEpisode();
00083 };
00084
00085 class CValueCalculator : public CPolicyEvaluator
00086 {
00087 protected:
00088 int nSteps;
00089 double value;
00090
00091 virtual double getEpisodeValue();
00092 public:
00093 CValueCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode, double gamma);
00094
00095 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00096 virtual void newEpisode();
00097 };
00098
00099 class CPolicySameStateEvaluator : public CPolicyEvaluator
00100 {
00101 protected:
00102 CStateList *startStates;
00103 CTransitionFunctionEnvironment *environment;
00104
00105 CSemiMDPSender *sender;
00106
00107 virtual double getEpisodeValue() = 0;
00108 public:
00109 CPolicySameStateEvaluator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode);
00110 CPolicySameStateEvaluator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, int numStartStates, int nStepsPerEpisode);
00111 virtual ~CPolicySameStateEvaluator();
00112
00113 void setSemiMDPSender(CSemiMDPSender *l_sender) {sender = l_sender;};
00114
00115 virtual double evaluatePolicy();
00116
00117 virtual double getValueForState(CState *state, int nSteps);
00118 virtual double getActionValueForState(CState *state, CAction *action, int nSteps);
00119
00120
00121 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState) = 0;
00122
00123 virtual CStateList *getStartStates(){return startStates;};
00124 virtual void setStartStates(CStateList *newList);
00125
00126 void getNewStartStates(int numStartStates);
00127 };
00128
00129 class CAverageRewardSameStateCalculator : public CPolicySameStateEvaluator
00130 {
00131 protected:
00132 int nSteps;
00133 double averageReward;
00134 double minReward;
00135
00136 virtual double getEpisodeValue();
00137 public:
00138 CAverageRewardSameStateCalculator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode, double minReward = -2.0);
00139
00140 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00141 virtual void newEpisode();
00142 };
00143
00144 class CValueSameStateCalculator : public CPolicySameStateEvaluator
00145 {
00146 protected:
00147 int nSteps;
00148 double value;
00149
00150 virtual double getEpisodeValue();
00151 public:
00152 CValueSameStateCalculator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode, double gamma);
00153
00154 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00155 virtual void newEpisode();
00156 };
00157
00158 class CPolicyGreedynessEvaluator : public CPolicyEvaluator
00159 {
00160 protected:
00161 CAgentController *greedyPolicy;
00162 CActionDataSet *actionDataSet;
00163
00164 virtual double getEpisodeValue();
00165
00166 int nGreedyActions;
00167 public:
00168 CPolicyGreedynessEvaluator(CAgent *agent, CRewardFunction *reward, int nEpisodes, int nStepsPerEpsiode, CAgentController *l_greedyPolicy);
00169 ~CPolicyGreedynessEvaluator();
00170
00171 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00172 virtual void newEpisode();
00173
00174 };
00175
00176
00177
00178 #endif