00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_CONTINUOUSACTIONGRADIENTPOLICY__H
00033 #define C_CONTINUOUSACTIONGRADIENTPOLICY__H
00034
00035 #include "cparameters.h"
00036 #include "cbaseobjects.h"
00037 #include "ccontinuousactions.h"
00038 #include "cgradientfunction.h"
00039
00040
00041 #include <list>
00042 #include <map>
00043 #include <vector>
00044
00045 class CStateProperties;
00046 class CStateCollection;
00047 class CStateCollectionImpl;
00048 class CFeatureList;
00049 class CFeatureCalculator;
00050 class CFeatureVFunction;
00051 class CVFunctionInputDerivationCalculator;
00052
00053 class CStateProperties;
00054
00055 class CCAGradientPolicyInputDerivationCalculator : virtual public CParameterObject
00056 {
00057 protected:
00058
00059 public:
00060
00061 virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector) = 0;
00062
00063 };
00064
00065 class CContinuousActionGradientPolicy : public CContinuousActionController, public CGradientFunction, public CStateObject
00066 {
00067 protected:
00068 CStateProperties *modelState;
00069
00070 virtual void updateWeights(CFeatureList *dParams) = 0;
00071
00072 public:
00073 CContinuousActionGradientPolicy(CContinuousAction *contAction, CStateProperties *modelState);
00074 ~CContinuousActionGradientPolicy();
00075
00076 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action) = 0;
00077
00078 virtual int getNumWeights() = 0;
00079
00080 virtual void getWeights(double *parameters) = 0;
00081 virtual void setWeights(double *parameters) = 0;
00082
00083 virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures) = 0;
00084 virtual void getGradientPre(ColumnVector *input, ColumnVector *outputErrors, CFeatureList *gradientFeatures);
00085
00086 virtual void getFunctionValuePre(ColumnVector *input, ColumnVector *output);
00087
00088 virtual void resetData() = 0;
00089 };
00090
00091 class CContinuousActionPolicyFromGradientFunction : public CContinuousActionGradientPolicy, public CCAGradientPolicyInputDerivationCalculator
00092 {
00093 protected:
00094 CGradientFunction *gradientFunction;
00095
00096 ColumnVector *outputError;
00097
00098
00099 virtual void updateWeights(CFeatureList *dParams);
00100
00101
00102
00103
00104
00105 public:
00106 CContinuousActionPolicyFromGradientFunction(CContinuousAction *contAction, CGradientFunction *gradientFunction, CStateProperties *modelState);
00107 ~CContinuousActionPolicyFromGradientFunction();
00108
00109 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00110 virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00111 virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00112
00113 virtual int getNumWeights();
00114
00115 virtual void getWeights(double *parameters);
00116 virtual void setWeights(double *parameters);
00117
00118
00119 virtual void resetData();
00120 };
00121
00122 class CContinuousActionFeaturePolicy : public CContinuousActionGradientPolicy, public CCAGradientPolicyInputDerivationCalculator
00123 {
00124 protected:
00125 std::list<CFeatureCalculator *> *featureCalculators;
00126 std::list<CFeatureVFunction *> *featureFunctions;
00127
00128 virtual void updateWeights(CFeatureList *dParams);
00129
00130 int numWeights;
00131 CFeatureList *localGradient;
00132
00133 ColumnVector *inputDerivation;
00134
00135 std::map<CFeatureVFunction *, CVFunctionInputDerivationCalculator *> *inputDerivationFunctions;
00136
00137
00138
00139
00140
00141 public:
00142 CContinuousActionFeaturePolicy(CContinuousAction *contAction, CStateProperties *modelState, std::list<CFeatureCalculator *> *featureCalcualtors);
00143 ~CContinuousActionFeaturePolicy();
00144
00145 virtual int getNumWeights();
00146
00147 virtual void getWeights(double *parameters);
00148 virtual void setWeights(double *parameters);
00149
00150 virtual void resetData();
00151
00152 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00153 virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00154 virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00155 };
00156
00157 class CContinuousActionSigmoidPolicy : public CContinuousActionGradientPolicy, public CCAGradientPolicyInputDerivationCalculator
00158 {
00159 protected:
00160 CContinuousActionGradientPolicy *policy;
00161 CCAGradientPolicyInputDerivationCalculator *inputDerivation;
00162
00163 CContinuousActionData *contData;
00164
00165 virtual void updateWeights(CFeatureList *dParams);
00166
00167 public:
00168 CContinuousActionSigmoidPolicy(CContinuousActionGradientPolicy *policy, CCAGradientPolicyInputDerivationCalculator *inputDerivation);
00169 ~CContinuousActionSigmoidPolicy();
00170
00171
00172 virtual int getNumWeights();
00173
00174 virtual void getWeights(double *parameters);
00175 virtual void setWeights(double *parameters);
00176
00177 virtual void resetData();
00178
00179 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00180 virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00181 virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00182
00183 virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *l_noise);
00184
00185 };
00186
00187 class CCAGradientPolicyNumericInputDerivationCalculator : public CCAGradientPolicyInputDerivationCalculator
00188 {
00189 protected:
00190 CContinuousActionGradientPolicy *policy;
00191
00192 CContinuousActionData *contDataPlus;
00193 CContinuousActionData *contDataMinus;
00194
00195 CStateCollectionImpl *stateBuffer;
00196 public:
00197 CCAGradientPolicyNumericInputDerivationCalculator(CContinuousActionGradientPolicy *policy, double stepSize, std::list<CStateModifier *> *modifiers);
00198 ~CCAGradientPolicyNumericInputDerivationCalculator();
00199
00200 virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00201 };
00202
00203 #endif
00204