00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_VPOLICYFUNCTIONLEARNER__H
00033 #define C_VPOLICYFUNCTIONLEARNER__H
00034
00035
00036 #include "cagentlistener.h"
00037 #include "cqfunction.h"
00038 #include "cvfunction.h"
00039 #include "cqetraces.h"
00040 #include "cresiduals.h"
00041 #include "cpolicygradient.h"
00042 #include "cdynamicmodel.h"
00043 #include "cpegasus.h"
00044 #include "ccontinuousactiongradientpolicy.h"
00045
00046 class CVPolicyLearner : public CSemiMDPRewardListener
00047 {
00048 protected:
00049 typedef std::list<CFeatureList *> CStateGradient;
00050
00052 CGradientVFunction *vFunction;
00053 CVFunctionInputDerivationCalculator *vFunctionInputDerivation;
00054
00055 CContinuousActionGradientPolicy *gradientPolicy;
00056 CCAGradientPolicyInputDerivationCalculator *policydInput;
00057
00058
00059
00060
00061 ColumnVector *dReward;
00062 ColumnVector *dVFunction;
00063 Matrix *dPolicy;
00064 Matrix *dModelInput;
00065
00066 CContinuousActionData *data;
00067
00068 std::list<CStateGradient *> *stateGradients;
00069
00070 CStateGradient *stateGradient1;
00071 CStateGradient *stateGradient2;
00072 CStateGradient *dModelGradient;
00073
00074 CStateReward *rewardFunction;
00075 CDynamicModel *dynModel;
00076 CDynamicModelInputDerivationCalculator *dynModeldInput;
00077
00078
00079 CStateCollectionImpl *tempStateCol;
00080
00081 CFeatureList *policyGradient;
00082
00083 void getDNextState(CStateGradient *stateGradient1, CStateGradient *stateGradient2, CStateCollection *currentState, CContinuousActionData *data);
00084 void multMatrixFeatureList(Matrix *matrix, CFeatureList *features, int index, std::list<CFeatureList *> *newFeatures);
00085
00086
00087
00088 int nForwardView;
00089
00090 public:
00091 CVPolicyLearner(CStateReward *rewardFunction, CDynamicModel *dynModel, CDynamicModelInputDerivationCalculator *dynModeldInput,CGradientVFunction *vFunction, CVFunctionInputDerivationCalculator *vFunctionInputDerivation, CContinuousActionGradientPolicy *gradientPolicy, CCAGradientPolicyInputDerivationCalculator *policydInput, std::list<CStateModifier *> *stateModifiers, int nForwardView);
00092 virtual ~CVPolicyLearner();
00093
00094 virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00095
00096 virtual void newEpisode();
00097 };
00098
00099 #endif
00100