Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cevaluator.h

Go to the documentation of this file.
00001 #ifndef C_EVALUATOR__H 
00002 #define C_EVALUATOR__H 
00003 
00004 #include "cagentlistener.h"
00005 
00006 class CStateList; 
00007 class CTransitionFunctionEnvironment;
00008 class CAgent;
00009 class CSemiMDPSender;
00010 class CAgentController;
00011 class CActionDataSet;
00012 class CAgentController;
00013 class CDeterministicController;
00014 
00015 class CEvaluator 
00016 {
00017         protected:
00018 
00019         public: 
00020                 virtual ~CEvaluator() {};
00021                 virtual double evaluate() = 0;
00022 };
00023 
00024 
00025 class CPolicyEvaluator : public CSemiMDPRewardListener, public CEvaluator 
00026 {
00027         protected:
00028                 CAgent *agent;
00029                 CAgentController *controller;
00030                 CDeterministicController *detController;
00031 
00032                 double policyValue;
00033         
00034                 int nEpisodes;
00035                 int nStepsPerEpisode;
00036 
00037                 virtual double getEpisodeValue() = 0;
00038         public:
00039                 CPolicyEvaluator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode);
00040                 virtual ~CPolicyEvaluator() {};
00041 
00042                 virtual double evaluatePolicy();
00043                 virtual double evaluate() {return evaluatePolicy();};
00044         
00045                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState) = 0;
00046 
00047                 virtual void setStepsPerEpisode(int steps) {nStepsPerEpisode = steps;};
00048 
00049                 virtual void setAgentController(CAgentController *controller);
00050                 virtual void setDeterministicController(CDeterministicController *detController);
00051 };
00052 
00053 
00054 class CAverageRewardCalculator : public CPolicyEvaluator
00055 {
00056         protected:
00057                 int nSteps;
00058                 double averageReward;
00059                 double minReward;
00060 
00061                 virtual double getEpisodeValue();
00062         public:
00063                 CAverageRewardCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode,double minReward = -2.0);
00064                 virtual ~CAverageRewardCalculator(){};
00065 
00066                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00067                 virtual void newEpisode();
00068 };
00069 
00070 class CRewardPerEpisodeCalculator : public CPolicyEvaluator
00071 {
00072         protected:
00073                 
00074                 double reward;;
00075                 
00076 
00077                 virtual double getEpisodeValue();
00078         public:
00079                 CRewardPerEpisodeCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode);
00080 
00081                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00082                 virtual void newEpisode();
00083 };
00084 
00085 class CValueCalculator : public CPolicyEvaluator
00086 {
00087         protected:
00088                 int nSteps;
00089                 double value;
00090 
00091                 virtual double getEpisodeValue();
00092         public:
00093                 CValueCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode, double gamma);
00094 
00095                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00096                 virtual void newEpisode();
00097 };
00098 
00099 class CPolicySameStateEvaluator : public CPolicyEvaluator
00100 {
00101         protected:
00102                 CStateList *startStates;
00103                 CTransitionFunctionEnvironment *environment;
00104 
00105                 CSemiMDPSender *sender;
00106 
00107                 virtual double getEpisodeValue() = 0;
00108         public:
00109                 CPolicySameStateEvaluator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode);
00110                 CPolicySameStateEvaluator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, int numStartStates, int nStepsPerEpisode);
00111                 virtual ~CPolicySameStateEvaluator();
00112         
00113                 void setSemiMDPSender(CSemiMDPSender  *l_sender) {sender = l_sender;};
00114 
00115                 virtual double evaluatePolicy();
00116 
00117                 virtual double getValueForState(CState *state, int nSteps);
00118                 virtual double getActionValueForState(CState *state, CAction *action, int nSteps);
00119 
00120 
00121                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState) = 0;
00122 
00123                 virtual CStateList *getStartStates(){return startStates;};
00124                 virtual void setStartStates(CStateList *newList);
00125 
00126                 void getNewStartStates(int numStartStates);
00127 };
00128 
00129 class CAverageRewardSameStateCalculator : public CPolicySameStateEvaluator
00130 {
00131         protected:
00132                 int nSteps;
00133                 double averageReward;
00134                 double minReward;
00135 
00136                 virtual double getEpisodeValue();
00137         public:
00138                 CAverageRewardSameStateCalculator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode, double minReward = -2.0);
00139 
00140                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00141                 virtual void newEpisode();
00142 };
00143 
00144 class CValueSameStateCalculator : public CPolicySameStateEvaluator
00145 {
00146         protected:
00147                 int nSteps;
00148                 double value;
00149 
00150                 virtual double getEpisodeValue();
00151         public:
00152                 CValueSameStateCalculator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode, double gamma);
00153 
00154                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00155                 virtual void newEpisode();
00156 };
00157 
00158 class CPolicyGreedynessEvaluator : public CPolicyEvaluator
00159 {
00160         protected:
00161                 CAgentController *greedyPolicy;
00162                 CActionDataSet *actionDataSet;
00163 
00164                 virtual double getEpisodeValue();
00165 
00166                 int nGreedyActions;
00167         public:
00168                 CPolicyGreedynessEvaluator(CAgent *agent, CRewardFunction *reward, int nEpisodes, int nStepsPerEpsiode, CAgentController *l_greedyPolicy);
00169                 ~CPolicyGreedynessEvaluator();
00170 
00171                 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00172                 virtual void newEpisode();
00173 
00174 };
00175 
00176 
00177 
00178 #endif