00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef __CCONTINUOUSRL_H
00034 #define __CCONTINUOUSRL_H
00035
00036
00037
00038
00039 #include "caction.h"
00040 #include "cutility.h"
00041 #include "cqfunction.h"
00042 #include "cqetraces.h"
00043 #include "cagentcontroller.h"
00044 #include "cagentlistener.h"
00045
00046 class CAbstractQFunction;
00047 class CQFunction;
00048
00050
00053 class CContinuousActionData : public CMultiStepActionData, public ColumnVector
00054 {
00055 protected:
00056
00057 public:
00058 CContinuousActionData(CContinuousActionProperties *properties);
00059 virtual ~CContinuousActionData();
00060
00061 CContinuousActionProperties *properties;
00062
00063 virtual void setActionValue(int dim, double value);
00064 double getActionValue(int dim);
00065
00066
00067
00068 void normalizeAction();
00069
00070 double getDistance(ColumnVector *vector);
00071
00072 virtual void saveASCII(FILE *stream);
00073 virtual void loadASCII(FILE *stream);
00074
00075 virtual void saveBIN(FILE *stream);
00076 virtual void loadBIN(FILE *stream);
00077
00078 virtual void setData(CActionData *actionData);
00079 void initData(double initVal);
00080 };
00081
00082 class CContinuousActionProperties
00083 {
00084 protected:
00085 unsigned int numActionValues;
00086 double *minValues;
00087 double *maxValues;
00088 public:
00089 CContinuousActionProperties(int numActionValues);
00090 virtual ~CContinuousActionProperties();
00091
00092 unsigned int getNumActionValues();
00093
00094 double getMinActionValue(int dim);
00095 double getMaxActionValue(int dim);
00096
00097 void setMinActionValue(int dim, double value);
00098 void setMaxActionValue(int dim, double value);
00099 };
00100
00101 class CContinuousAction : public CPrimitiveAction
00102 {
00103 protected:
00104 CContinuousActionData *continuousActionData;
00105 CContinuousActionProperties *properties;
00106
00107 CContinuousAction(CContinuousActionProperties *properties, CContinuousActionData *actionData);
00108 public:
00109 CContinuousAction(CContinuousActionProperties *properties);
00110 virtual ~CContinuousAction();
00111
00112
00113 CContinuousActionProperties *getContinuousActionProperties();
00114
00115 virtual CContinuousActionData *getContinuousActionData() {return continuousActionData;};
00116
00117 virtual CActionData *getNewActionData();
00118
00119 double getActionValue(int dim);
00120 unsigned int getNumDimensions();
00121
00122 virtual void loadActionData(CActionData *data);
00123
00124 virtual bool equals(CAction *action);
00125 virtual bool isSameAction(CAction *action, CActionData *data);
00126 };
00127
00128 #define NO_RANDOM_CONTROLLER 0
00129 #define EXTERN_RANDOM_CONTROLLER 1
00130 #define INTERN_RANDOM_CONTROLLER 2
00131
00132 class CContinuousActionRandomPolicy;
00133
00134 class CContinuousActionController : public CAgentController
00135 {
00136 protected:
00137 CContinuousAction *contAction;
00138
00139 CContinuousActionRandomPolicy *randomController;
00140 CContinuousActionData *noise;
00141
00142 int randomControllerMode;
00143 public:
00144 CContinuousActionController(CContinuousAction *contAction, int randomControllerMode = 1);
00145 virtual ~CContinuousActionController();
00146
00147 virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
00148 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action) = 0;
00149
00150 virtual CContinuousActionProperties *getContinuousActionProperties() {return contAction->getContinuousActionProperties();};
00151 virtual CContinuousAction *getContinuousAction() {return contAction;};
00152
00153 virtual void setRandomController(CContinuousActionRandomPolicy *randomController);
00154 virtual CContinuousActionRandomPolicy *getRandomController();
00155
00156 void setRandomControllerMode(int randomControllerMode);
00157 int getRandomControllerMode();
00158
00159 virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00160
00161
00162 };
00163
00164
00165 class CStaticContinuousAction : public CContinuousAction
00166 {
00167 protected:
00168 CContinuousAction *contAction;
00169
00170 double maximumDistance;
00171 public:
00172 CStaticContinuousAction(CContinuousAction *properties, double *actionValues, double maximumDistance = 0.0);
00173 virtual ~CStaticContinuousAction();
00174
00175 virtual void setContinuousAction(CContinuousActionData *contAction);
00176 virtual void addToContinuousAction(CContinuousActionData *contAction, double factor);
00177
00178 CContinuousAction *getContinuousAction();
00179
00180 virtual void loadActionData(CActionData *) {};
00181 virtual void setData(CActionData *) {assert(false);};
00182
00183 virtual bool equals(CAction *action);
00184 virtual bool isSameAction(CAction *action, CActionData *data);
00185
00186 virtual double getMaximumDistance();
00187 };
00188
00189
00190 class CLinearFAContinuousAction : public CStaticContinuousAction
00191 {
00192 protected:
00193 public:
00194 CLinearFAContinuousAction(CContinuousAction *properties, double *actionValues);
00195 virtual ~CLinearFAContinuousAction() {};
00196
00197 virtual double getActionFactor(CContinuousActionData *contAction) = 0;
00198
00199 };
00200
00201 class CContinuousRBFAction : public CLinearFAContinuousAction
00202 {
00203 protected:
00204 double *rbfSigma;
00205 public:
00206 CContinuousRBFAction(CContinuousAction *properties, double *rbfCenter, double *rbfSigma);
00207 virtual ~CContinuousRBFAction();
00208
00209 virtual double getActionFactor(CContinuousActionData *contAction);
00210 };
00211
00212 class CContinuousActionLinearFA
00213 {
00214 protected:
00215 CActionSet *contActions;
00216 CContinuousActionProperties *actionProperties;
00217
00218 public:
00219
00220 CContinuousActionLinearFA(CActionSet *contActions, CContinuousActionProperties *properties);
00221 virtual ~CContinuousActionLinearFA();
00222
00223 void getActionFactors(CContinuousActionData *action, double *actionFactors);
00224
00225 void getContinuousAction(unsigned int index, CContinuousActionData *action);
00226 void getContinuousAction(CContinuousActionData *action, double *actionFactors);
00227
00228 int getNumContinuousActionFA();
00229 };
00230
00231
00232 class CCALinearFAQETraces;
00233
00234 class CContinuousActionQFunction : public CGradientQFunction
00235 {
00236 protected:
00237 CContinuousAction *contAction;
00238 public:
00239 CContinuousActionQFunction(CContinuousAction *contAction);
00240 virtual ~CContinuousActionQFunction();
00241
00242 virtual CAction *getMax(CStateCollection *, CActionSet *availableActions, CActionDataSet *actionDatas);
00243
00244 virtual void getBestContinuousAction(CStateCollection *state, CContinuousActionData *actionData) = 0;
00245
00246 virtual void updateValue(CStateCollection *state, CAction *action, double td, CActionData *data = NULL);
00248
00250 virtual void setValue(CStateCollection *state, CAction *action, double qValue, CActionData *data = NULL);
00252
00254 virtual double getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
00255
00256
00257 virtual void updateCAValue(CStateCollection *state, CContinuousActionData *data, double td);
00258 virtual void setCAValue(CStateCollection *state, CContinuousActionData *data, double qValue);
00259 virtual double getCAValue(CStateCollection *state, CContinuousActionData *data) = 0;
00260
00261
00262 virtual void getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradient);
00263 virtual void getCAGradient(CStateCollection *state, CContinuousActionData *data, CFeatureList *gradient);
00264
00265 CContinuousAction *getContinuousActionObject() {return contAction;};
00266
00267 virtual int getNumWeights() {return 0;};
00268
00269 virtual void getWeights(double *parameters);
00270 virtual void setWeights(double *parameters);
00271
00272
00273 };
00274
00275 class CCALinearFAQFunction : public CContinuousActionQFunction, public CContinuousActionLinearFA
00276 {
00277 protected:
00278 double *actionFactors;
00279 double *CAactionValues;
00280 CQFunction *qFunction;
00281
00282 CFeatureList *tempGradient;
00283
00284 virtual void updateWeights(CFeatureList *features);
00285
00286 public:
00287
00288 CCALinearFAQFunction(CQFunction *qFunction, CContinuousAction *returnAction);
00289
00290 virtual ~CCALinearFAQFunction();
00291
00292 virtual void getBestContinuousAction(CStateCollection *state, CContinuousActionData *actionData);
00293
00294 virtual void updateCAValue(CStateCollection *state, CContinuousActionData *data, double td);
00295 virtual void setCAValue(CStateCollection *state, CContinuousActionData *data, double qValue);
00296 virtual double getCAValue(CStateCollection *state, CContinuousActionData *data);
00297
00298
00299 CQFunction *getQFunctionForCA();
00300
00301 virtual CAbstractQETraces* getStandardETraces();
00302 virtual void getCAGradient(CStateCollection *state, CContinuousActionData *action, CFeatureList *gradient);
00303
00304 virtual int getNumWeights();
00305
00306 virtual void getWeights(double *weights);
00307 virtual void setWeights(double *weights);
00308
00309 virtual int getWeightsOffset(CAction *) {return 0;};
00310 };
00311
00312 class CCALinearFAQETraces : public CQETraces
00313 {
00314 protected:
00315 double *actionFactors;
00316 CCALinearFAQFunction *contQFunc;
00317
00318 public:
00319
00320 CCALinearFAQETraces(CCALinearFAQFunction *qfunction);
00321 virtual ~CCALinearFAQETraces();
00322
00323 virtual void addETrace(CStateCollection *State, CAction *action, double factor = 1.0, CActionData *data = NULL);
00324 };
00325
00326 class CActionDistribution;
00327
00328 class CContinuousActionPolicy : public CContinuousActionController
00329 {
00330 protected:
00331 CActionDistribution *distribution;
00332 double *actionValues;
00333 CAbstractQFunction *continuousActionQFunc;
00334
00335 CActionSet *continuousStaticActions;
00336
00337 public:
00338 CContinuousActionPolicy(CContinuousAction *contAction, CActionDistribution *distribution, CAbstractQFunction *continuousActionQFunc, CActionSet *continuousStaticActions);
00339 virtual ~CContinuousActionPolicy();
00340
00341 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00342
00343 };
00344
00345 class CContinuousActionRandomPolicy : public CContinuousActionController, public CSemiMDPListener
00346 {
00347 protected:
00348 ColumnVector *lastNoise;
00349 ColumnVector *currentNoise;
00350
00351 double sigma;
00352 double alpha;
00353 public:
00354 CContinuousActionRandomPolicy(CContinuousAction *action, double sigma, double alpha);
00355 virtual ~CContinuousActionRandomPolicy();
00356
00357 virtual void newEpisode();
00358 virtual void nextStep(CStateCollection *, CAction *, CStateCollection *);
00359
00360 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00361
00362 virtual void onParametersChanged();
00363
00364 ColumnVector *getCurrentNoise();
00365 ColumnVector *getLastNoise();
00366 };
00367
00368
00369 class CContinuousActionAddController : public CContinuousActionController
00370 {
00371 protected:
00372 std::list<CContinuousActionController *> *controllers;
00373
00374 std::map<CContinuousActionController *,double> *controllerWeights;
00375
00376 ColumnVector *actionValues;
00377 public:
00378 CContinuousActionAddController(CContinuousAction *action);
00379 virtual ~CContinuousActionAddController();
00380
00381 virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00382
00383 void addContinuousActionController(CContinuousActionController *controller, double weight = 1.0);
00384 void setControllerWeight(CContinuousActionController *controller, double weight);
00385 double getControllerWeight(CContinuousActionController *controller);
00386
00387 };
00388
00389
00390 #endif
00391