00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_REWARDFUNCTION_H
00033 #define C_REWARDFUNCTION_H
00034
00035 #include "cbaseobjects.h"
00036
00037 #include "newmat/newmat.h"
00038
00039 class CFeatureList;
00040 class CAbstractVFunction;
00041 class CFeatureVFunction;
00042
00044
00052 class CRewardFunction
00053 {
00054 public:
00055 virtual ~CRewardFunction() {};
00057 virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState) = 0;
00058 };
00059
00061
00066 class CFeatureRewardFunction : public CRewardFunction, public CStateObject
00067 {
00068 protected:
00069 CStateProperties *discretizer;
00070
00071 public:
00073 CFeatureRewardFunction(CStateProperties *discretizer);
00074 virtual ~CFeatureRewardFunction();
00075
00077 virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00079 virtual double getReward(int oldState, CAction *action, int newState) = 0;
00081
00084 virtual double getReward(CState *oldState, CAction *action, CState *newState);
00086 virtual double getReward(CFeatureList *oldState, CAction *action, CFeatureList *newState);
00087
00088 };
00089
00091
00095 class CStateReward : public CRewardFunction, public CStateObject
00096 {
00097 protected:
00098 CStateProperties *properties;
00099 public:
00100 CStateReward(CStateProperties *properties);
00101 virtual ~CStateReward() {};
00102
00103 virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00104
00105 virtual double getStateReward(CState *modelState) = 0;
00106 virtual void getInputDerivation(CState *, ColumnVector *) {};
00107 };
00108
00109 class CZeroReward : public CRewardFunction
00110 {
00111 public:
00112 virtual double getReward(CStateCollection *, CAction *, CStateCollection *) {return 0;};
00113 };
00114
00115 class CRewardFunctionFromValueFunction : public CRewardFunction
00116 {
00117 protected:
00118 CAbstractVFunction *vFunction;
00119 bool useNewState;
00120 public:
00121 CRewardFunctionFromValueFunction(CAbstractVFunction *vFunction, bool useNewState = true);
00122
00123 virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00124 };
00125
00126 class CFeatureRewardFunctionFromValueFunction : public CFeatureRewardFunction
00127 {
00128 protected:
00129 CFeatureVFunction *vFunction;
00130 bool useNewState;
00131 public:
00132 CFeatureRewardFunctionFromValueFunction(CStateModifier *discretizer, CFeatureVFunction *vFunction, bool useNewState = true);
00133 ~CFeatureRewardFunctionFromValueFunction();
00134
00135 virtual double getReward(int oldState, CAction *action, int newState);
00136 };
00137
00138
00139 #endif
00140
00141