00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef C_EXPLORATION__H
00033 #define C_EXPLORATION__H
00034
00035 #include "cagentlistener.h"
00036 #include "cqfunction.h"
00037 #include "cpolicies.h"
00038
00039
00040
00041
00042 class CGradientVFunction;
00043 class CFeatureVFunction;
00044 class CStateCollection;
00045 class CAction;
00046 class CActionData;
00047
00048 class CAbstractVFunction;
00049 class CAbstractQFunction;
00050
00051 class CVisitStateCounter : public CSemiMDPListener
00052 {
00053 protected:
00054 CGradientVFunction *visits;
00055 double *weights;
00056 int steps;
00057
00058 virtual void doDecay(double decay);
00059 public:
00060 CVisitStateCounter(CFeatureVFunction *visits, double decay = 1.0);
00061 virtual ~CVisitStateCounter();
00062
00063 virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00064 virtual void newEpisode();
00065 };
00066
00067 class CVisitStateActionCounter : public CSemiMDPListener
00068 {
00069 protected:
00070 CGradientQFunction *visits;
00071 double *weights;
00072 int steps;
00073
00074 virtual void doDecay(double decay);
00075
00076 public:
00077 CVisitStateActionCounter(CFeatureQFunction *visits, double decay = 1.0);
00078 virtual ~CVisitStateActionCounter();
00079
00080 virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00081 virtual void newEpisode();
00082 };
00083
00084 class CVisitStateActionEstimator : public CVisitStateCounter
00085 {
00086 protected:
00087 CGradientQFunction *actionVisits;
00088
00089 virtual void doDecay(double decay);
00090
00091 public:
00092 CVisitStateActionEstimator(CFeatureVFunction *stateVisits, CFeatureQFunction *actionVisits, double decay = 1.0);
00093 virtual ~CVisitStateActionEstimator();
00094
00095 virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00096 virtual void newEpisode();
00097 };
00098
00099 class CExplorationQFunction : public CAbstractQFunction
00100 {
00101 protected:
00102 CAbstractVFunction *stateVisitCounter;
00103 CAbstractQFunction *actionVisitCounter;
00104
00105 public:
00106 CExplorationQFunction(CAbstractVFunction *stateVisitCounter, CAbstractQFunction *actionVisitCounter);
00107
00108 virtual ~CExplorationQFunction();
00109
00110 virtual void updateValue(CStateCollection *state, CAction *action, double td, CActionData *data = NULL);
00111 virtual void setValue(CStateCollection *state, CAction *action, double qValue, CActionData *data = NULL);
00112 virtual double getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
00113
00114 virtual CAbstractQETraces *getStandardETraces();
00115 };
00116
00117
00118 class CQStochasticExplorationPolicy : public CQStochasticPolicy
00119 {
00120 protected:
00121 CAbstractQFunction *explorationFunction;
00122
00123 double *explorationValues;
00124
00125 public:
00126 CQStochasticExplorationPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractQFunction *qFunctoin, CAbstractQFunction *explorationFunction, double explorationFactor);
00127 ~CQStochasticExplorationPolicy();
00128
00129 virtual void getActionValues(CStateCollection *state, CActionSet *availableActions, double *actionValues, CActionDataSet *actionDataSet = NULL);
00130
00131 virtual CAbstractQFunction *getExplorationQFunction() {return explorationFunction;};
00132 };
00133
00134 class CSelectiveExplorationCalculator : public CSemiMDPListener
00135 {
00136 protected:
00137 CQStochasticExplorationPolicy *explorationPolicy;
00138
00139 double attention;
00140 public:
00141 CSelectiveExplorationCalculator(CQStochasticExplorationPolicy *explorationFunction);
00142 virtual ~CSelectiveExplorationCalculator();
00143
00144 virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00145 virtual void newEpisode();
00146 };
00147
00148
00149 #endif
00150