00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_DYNAMICPROGRAMMING__H
00033 #define C_DYNAMICPROGRAMMING__H
00034
00035
00036 #include "cparameters.h"
00037
00038
00039 #include <map>
00040
00041
00042 class CTransition;
00043 class CAbstractFeatureStochasticModel;
00044 class CFeatureRewardFunction;
00045 class CAbstractVFunction;
00046 class CState;
00047 class CAction;
00048
00049
00050 class CFeatureQFunction;
00051 class CFeatureVFunction;
00052 class CQFunctionFromStochasticModel;
00053 class CActionSet;
00054
00055 class CFeatureList;
00056 class CStochasticPolicy;
00057
00059
00061 class CDynamicProgramming
00062 {
00063 public:
00065
00076 static double getActionValue(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *discState, CAction *action, double gamma);
00078
00080 static double getBellmanValue(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *discState, double gamma);
00082
00084 static double getBellmanError(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *discState, double gamma);
00085 };
00086
00088
00112 class CValueIteration : virtual public CParameterObject
00113 {
00114 protected:
00116 CAbstractVFunction *vFunction;
00118 CAbstractVFunction *vFunctionFromQFunction;
00120 CFeatureQFunction *qFunction;
00122 CQFunctionFromStochasticModel *qFunctionFromVFunction;
00124 CAbstractFeatureStochasticModel *model;
00126 CFeatureRewardFunction *rewardModel;
00128 CActionSet *actions;
00129
00131 bool learnVFunction;
00133 CState *discState;
00134
00136 CFeatureList *priorityList;
00137
00139 CStochasticPolicy *stochPolicy;
00140
00142
00144 virtual double getPriority(CTransition *trans, double bellE);
00145 void init(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel);
00146
00147
00148 public:
00150 CValueIteration(CFeatureQFunction *qFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel);
00152 CValueIteration(CFeatureQFunction *qFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel, CStochasticPolicy *stochPolicy);
00154 CValueIteration(CFeatureVFunction *vFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel);
00156 CValueIteration(CFeatureVFunction *vFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel, CStochasticPolicy *stochPolicy);
00157 virtual ~CValueIteration();
00158
00160
00169 virtual void updateFeature(int feature);
00170
00172 void updateFirstFeature();
00173
00175 void addPriority(int feature, double priority);
00177 void addPriorities(CFeatureList *featList);
00178
00179 CAbstractFeatureStochasticModel *getTheoreticalModel();
00180 CAbstractVFunction *getVFunction();
00181 CFeatureQFunction *getQFunction();
00182 CStochasticPolicy *getStochasticPolicy();
00183
00184 int getMaxListSize();
00185 void setMaxListSize(int maxListSize);
00186
00188
00189 void doUpdateSteps(int k);
00191 void doUpdateStepsUntilEmptyList(int k);
00192
00194
00197 void doUpdateBackwardStates(int state);
00198 };
00199
00200
00201 #endif
00202