00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_CONTINUOUSTIME
00033 #define C_CONTINUOUSTIME
00034
00035
00036 #include "cpolicies.h"
00037 #include "ccontinuousactions.h"
00038 #include "ccontinuousactiongradientpolicy.h"
00039 #include "newmat/newmat.h"
00040
00041
00042
00043 class CVFunctionInputDerivationCalculator;
00044 class CContinuousTimeTransitionFunction;
00045 class CContinuousTimeQFunctionFromTransitionFunction;
00046 class CRewardFunction;
00047 class CTransitionFunction;
00048 class CGradientVFunction;
00049 class CStateCollection;
00050
00051 class CContinuousTimeParameters
00052 {
00053 public:
00054 static double getGammaFromSgamma(double sgamma, double dt);
00055 static double getLambdaFromKappa(double kappa, double sgamma, double dt);
00056 };
00057
00058 class CContinuousTimeVMPolicy : public CQStochasticPolicy
00059 {
00060 protected:
00061 CVFunctionInputDerivationCalculator *vfunction;
00062 CContinuousTimeTransitionFunction *model;
00063 public:
00064
00065 CContinuousTimeVMPolicy(CActionSet *actions, CActionDistribution *distribution, CVFunctionInputDerivationCalculator *vFunction, CContinuousTimeTransitionFunction *model, CRewardFunction *rewardFunction);
00066 ~CContinuousTimeVMPolicy();
00067
00068 CContinuousTimeQFunctionFromTransitionFunction *getQFunctionFromTransitionFunction();
00069
00070 };
00071
00072 class CContinuousTimeAndActionVMPolicy : public CContinuousActionController
00073 {
00074 protected:
00075 CVFunctionInputDerivationCalculator *dVFunction;
00076 CTransitionFunction *model;
00077
00078 ColumnVector *actionValues;
00079 ColumnVector *derivationX;
00080 Matrix *derivationU;
00081
00082 virtual void getActionValues(ColumnVector *actionValues, ColumnVector *noise) = 0;
00083 public:
00084 CContinuousTimeAndActionVMPolicy(CContinuousAction *action, CVFunctionInputDerivationCalculator *dVFunction, CTransitionFunction *model);
00085 ~CContinuousTimeAndActionVMPolicy();
00086
00087 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *contAction);
00088
00089
00090 };
00091
00092 class CContinuousTimeAndActionSigmoidVMPolicy : public CContinuousTimeAndActionVMPolicy
00093 {
00094 protected:
00095
00096 ColumnVector *c;
00097
00098 void getActionValues(ColumnVector *actionValues, ColumnVector *noise);
00099
00100 public:
00101 CContinuousTimeAndActionSigmoidVMPolicy(CContinuousAction *action, CVFunctionInputDerivationCalculator *vfunction, CTransitionFunction *model);
00102 ~CContinuousTimeAndActionSigmoidVMPolicy();
00103
00104 void setC(int index, double value);
00105 double getC(int index);
00106
00107 ColumnVector *getC() {return c;};
00108
00109 virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00110 };
00111
00112 class CContinuousTimeAndActionSigmoidVMGradientPolicy : public CContinuousActionGradientPolicy
00113 {
00114 protected:
00115 CGradientVFunction *vFunction;
00116 CStateCollectionImpl *derivationState;
00117
00118 CFeatureList *gradient1;
00119 CFeatureList *gradient2;
00120
00121 virtual void updateWeights(CFeatureList *dParams);
00122
00123
00124 CVFunctionInputDerivationCalculator *dVFunction;
00125 CTransitionFunction *model;
00126 ColumnVector *actionValues;
00127 ColumnVector *derivationX;
00128 Matrix *derivationU;
00129
00130 ColumnVector *c;
00131
00132 void getActionValues(ColumnVector *actionValues, ColumnVector *noise);
00133 virtual void getGradientActionValues(ColumnVector *, ColumnVector *) {};
00134
00135
00136
00137
00138 public:
00139 CContinuousTimeAndActionSigmoidVMGradientPolicy(CContinuousAction *action, CGradientVFunction *gradVFunction, CVFunctionInputDerivationCalculator *vfunction, CTransitionFunction *model, std::list<CStateModifier *> *modifiers);
00140 virtual ~CContinuousTimeAndActionSigmoidVMGradientPolicy();
00141
00142 virtual int getNumWeights();
00143
00144 virtual void getWeights(double *parameters);
00145 virtual void setWeights(double *parameters);
00146
00147 virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00148
00149 virtual void resetData();
00150
00151 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *contAction);
00152
00153 void setC(int index, double value);
00154 double getC(int index);
00155
00156 ColumnVector *getC() {return c;};
00157
00158 virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00159 };
00160
00161 class CContinuousTimeAndActionBangBangVMPolicy : public CContinuousTimeAndActionVMPolicy
00162 {
00163 protected:
00164 virtual void getActionValues(ColumnVector *actionValues, ColumnVector *noise);
00165
00166 public:
00167 CContinuousTimeAndActionBangBangVMPolicy(CContinuousAction *action, CVFunctionInputDerivationCalculator *vfunction, CTransitionFunction *model);
00168
00169 virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00170 };
00171
00172 class CContinuousActionSmoother : public CContinuousActionController
00173 {
00174 protected:
00175 CContinuousActionController *policy;
00176 double *actionValues;
00177
00178 double alpha;
00179 public:
00180 CContinuousActionSmoother(CContinuousAction *action, CContinuousActionController *policy, double alpha = 0.3);
00181 ~CContinuousActionSmoother();
00182
00183 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *contAction);
00184
00185 void setAlpha(double alpha);
00186 virtual double getAlpha();
00187
00188 };
00189
00190 #endif
00191