00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C__PEGASUS__H
00033 #define C__PEGASUS__H
00034
00035 #include "cparameters.h"
00036 #include "cpolicygradient.h"
00037 #include "cagentlistener.h"
00038
00039 class CContinuousTimeAndActionTransitionFunction;
00040 class CState;
00041 class CContinuousActionData;
00042
00043 class CContinuousActionGradientPolicy;
00044 class CStateList;
00045 class CTransitionFunctionEnvironment;
00046 class CPolicySameStateEvaluator;
00047 class CAgent;
00048 class CRewardFunction;
00049 class CCAGradientPolicyInputDerivationCalculator;
00050 class CFeatureList;
00051 class CStateReward;
00052
00053 class CTransitionFunctionInputDerivationCalculator : virtual public CParameterObject
00054 {
00055 protected:
00056 CContinuousTimeAndActionTransitionFunction *dynModel;
00057 CState *nextState;
00058 CContinuousActionData *buffData;
00059
00060 public:
00061 CTransitionFunctionInputDerivationCalculator(CContinuousTimeAndActionTransitionFunction *dynModel);
00062 ~CTransitionFunctionInputDerivationCalculator();
00063
00064 virtual void getInputDerivation(CState *currentState, CContinuousActionData *data, Matrix *dModelInput) = 0;
00065 };
00066
00067 class CTransitionFunctionNumericalInputDerivationCalculator : public CTransitionFunctionInputDerivationCalculator
00068 {
00069 protected:
00070 CState *buffState;
00071
00072 CState *nextState1;
00073 CState *nextState2;
00074 public:
00075 CTransitionFunctionNumericalInputDerivationCalculator(CContinuousTimeAndActionTransitionFunction *dynModel, double stepsize);
00076 ~CTransitionFunctionNumericalInputDerivationCalculator();
00077
00078 virtual void getInputDerivation(CState *currentState, CContinuousActionData *data, Matrix *dModelInput);
00079 };
00080
00081 class CPEGASUSPolicyGradientCalculator : public CPolicyGradientCalculator
00082 {
00083 protected:
00084 CContinuousActionGradientPolicy *policy;
00085
00086
00087 CStateList *startStates;
00088
00089 CTransitionFunctionEnvironment *dynModel;
00090
00091 CPolicySameStateEvaluator *sameStateEvaluator;
00092
00093 public:
00094 CPEGASUSPolicyGradientCalculator(CAgent *agent, CRewardFunction *reward, CContinuousActionGradientPolicy *policy, CTransitionFunctionEnvironment *dynModel, int numStartStates, int horizon, double gamma);
00095 ~CPEGASUSPolicyGradientCalculator();
00096
00097 virtual void getGradient(CFeatureList *gradient);
00098 virtual void getPEGASUSGradient(CFeatureList *gradient, CStateList *startStates) = 0;
00099
00100 virtual CStateList* getStartStates();
00101 virtual void setStartStates(CStateList *startStates);
00102
00103 virtual void setRandomStartStates();
00104 };
00105
00106 class CPEGASUSAnalyticalPolicyGradientCalculator : public CPEGASUSPolicyGradientCalculator, public CSemiMDPListener
00107 {
00108 protected:
00109 ColumnVector *dReward;
00110 Matrix *dPolicy;
00111 Matrix *dModelInput;
00112 std::list<CFeatureList *> *stateGradient1;
00113 std::list<CFeatureList *> *stateGradient2;
00114 std::list<CFeatureList *> *dModelGradient;
00115
00116 CFeatureList *episodeGradient;
00117
00118 CStateReward *rewardFunction;
00119 CTransitionFunctionInputDerivationCalculator *dynModeldInput;
00120 CCAGradientPolicyInputDerivationCalculator *policydInput;
00121
00122 int steps;
00123
00124 CAgent *agent;
00125
00126 void multMatrixFeatureList(Matrix *matrix, CFeatureList *features, int index, std::list<CFeatureList *> *newFeatures);
00127 public:
00128 CPEGASUSAnalyticalPolicyGradientCalculator(CAgent *agent, CContinuousActionGradientPolicy *policy, CCAGradientPolicyInputDerivationCalculator *policyInputDerivation, CTransitionFunctionEnvironment *dynModel, CTransitionFunctionInputDerivationCalculator *dynModeldInput, CStateReward *reward, int numStartStates, int horizon, double gamma);
00129 ~CPEGASUSAnalyticalPolicyGradientCalculator();
00130
00131 virtual void getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates);
00132
00133 virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00134 virtual void newEpisode();
00135 };
00136
00137 class CPEGASUSNumericPolicyGradientCalculator : public CPEGASUSPolicyGradientCalculator
00138 {
00139 protected:
00140 CFeatureList *gradientFeatures;
00141 double *weights;
00142
00143 CRewardFunction *rewardFunction;
00144 CAgent *agent;
00145 public:
00146 CPEGASUSNumericPolicyGradientCalculator(CAgent *agent, CContinuousActionGradientPolicy *policy, CTransitionFunctionEnvironment *dynModel, CRewardFunction *reward, double stepSize, int startStates, int horizon, double gamma);
00147 ~CPEGASUSNumericPolicyGradientCalculator();
00148
00149 virtual void getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates);
00150 };
00151
00152 #endif
00153