00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_REINFORCE__H
00033 #define C_REINFORCE__H
00034
00035 #include "cparameters.h"
00036 #include "cagentlistener.h"
00037
00038 class CGradientUpdateFunction;
00039 class CGradientVETraces;
00040 class CStochasticPolicy;
00041 class CFeatureList;
00042
00043
00044 class CReinforcementBaseLineCalculator : virtual public CParameterObject
00045 {
00046 public:
00047 virtual double getReinforcementBaseLine(int feature) = 0;
00048 };
00049
00050 class CConstantReinforcementBaseLineCalculator : public CReinforcementBaseLineCalculator
00051 {
00052 public:
00053 CConstantReinforcementBaseLineCalculator(double b);
00054
00055 virtual double getReinforcementBaseLine(int feature);
00056 };
00057
00058 class CAverageReinforcementBaseLineCalculator : public CReinforcementBaseLineCalculator, public CSemiMDPRewardListener
00059 {
00060 protected:
00061 double averageReward;
00062 int steps;
00063 public:
00064 CAverageReinforcementBaseLineCalculator(CRewardFunction *reward, double minUpdateFactor);
00065
00066 virtual double getReinforcementBaseLine(int feature);
00067
00068 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00069 virtual void newEpisode();
00070 };
00071
00072 class CREINFORCELearner : public CSemiMDPRewardListener
00073 {
00074 protected:
00075 CStochasticPolicy *policy;
00076 CGradientUpdateFunction *updateFunction;
00077
00078 CReinforcementBaseLineCalculator *baseLine;
00079
00080 CFeatureList *gradient;
00081 CGradientVETraces *eTraces;
00082 public:
00083 CREINFORCELearner(CRewardFunction *reward, CStochasticPolicy *policy, CGradientUpdateFunction *updateFunction, CReinforcementBaseLineCalculator *baesLine);
00084 ~CREINFORCELearner();
00085
00086 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00087 virtual void newEpisode();
00088
00089 CGradientVETraces *getETraces();
00090
00091 };
00092
00093 #endif
00094