00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef CADVANTAGELEARNING_H
00033 #define CADVANTAGELEARNING_H
00034
00035 #include "ctdlearner.h"
00036
00037 class CAbstractVFunction;
00038 class CAbstractVETraces;
00039
00040 #include "ril_debug.h"
00041
00042 class CAdvantageUpdating : public CTDLearner
00043 {
00044 protected:
00045
00046 CAbstractVFunction *vFunction;
00047 CAbstractVETraces *vETraces;
00048
00049 virtual double getTemporalDifference(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00050
00051 virtual void addETraces(CStateCollection *oldState, CStateCollection *newState, CAction *action);
00052 virtual void learnStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00053 public:
00054
00055 CAdvantageUpdating(CRewardFunction *rewardFunction, CAbstractQFunction *qfunction, CAbstractVFunction *vFunction, double dt);
00056 virtual ~CAdvantageUpdating();
00057
00058 };
00059
00060 class CAdvantageLearner : public CTDResidualLearner
00061 {
00062
00063 protected:
00064 CActionDataSet *actionDataSet2;
00065
00066 virtual double getTemporalDifference(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00067
00068 virtual void addETraces(CStateCollection *oldState, CStateCollection *newState, CAction *action, double td = 0.0);
00069
00070 public:
00071
00072 CAdvantageLearner(CRewardFunction *rewardFunction, CGradientQFunction *qfunction, double dt, CAbstractBetaCalculator *betaCalc);
00073 ~CAdvantageLearner();
00074 };
00075
00076 #endif