Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cexploration.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_EXPLORATION__H
00033 #define C_EXPLORATION__H
00034 
00035 #include "cagentlistener.h"
00036 #include "cqfunction.h"
00037 #include "cpolicies.h"
00038 
00039 
00040 
00041 
00042 class CGradientVFunction;
00043 class CFeatureVFunction;
00044 class CStateCollection;
00045 class CAction;
00046 class CActionData;
00047 
00048 class CAbstractVFunction;
00049 class CAbstractQFunction;
00050 
00051 class CVisitStateCounter : public CSemiMDPListener
00052 {
00053 protected:
00054         CGradientVFunction *visits;
00055         double *weights;
00056         int steps;
00057 
00058         virtual void doDecay(double decay);
00059 public:
00060         CVisitStateCounter(CFeatureVFunction *visits, double decay = 1.0);
00061         virtual ~CVisitStateCounter();
00062 
00063         virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00064         virtual void newEpisode();
00065 };
00066 
00067 class CVisitStateActionCounter : public CSemiMDPListener
00068 {
00069 protected:
00070         CGradientQFunction *visits;
00071         double *weights;
00072         int steps;
00073 
00074         virtual void doDecay(double decay);
00075 
00076 public:
00077         CVisitStateActionCounter(CFeatureQFunction *visits, double decay = 1.0);
00078         virtual ~CVisitStateActionCounter();
00079 
00080         virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00081         virtual void newEpisode();
00082 };
00083 
00084 class CVisitStateActionEstimator : public CVisitStateCounter
00085 {
00086 protected:
00087         CGradientQFunction *actionVisits;
00088 
00089         virtual void doDecay(double decay);
00090 
00091 public:
00092         CVisitStateActionEstimator(CFeatureVFunction *stateVisits, CFeatureQFunction *actionVisits, double decay = 1.0);
00093         virtual ~CVisitStateActionEstimator();
00094 
00095         virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00096         virtual void newEpisode();
00097 };
00098 
00099 class CExplorationQFunction : public CAbstractQFunction
00100 {
00101 protected:
00102         CAbstractVFunction *stateVisitCounter;
00103         CAbstractQFunction *actionVisitCounter;
00104 
00105 public:
00106         CExplorationQFunction(CAbstractVFunction *stateVisitCounter, CAbstractQFunction *actionVisitCounter);
00107 
00108         virtual ~CExplorationQFunction();
00109 
00110         virtual void updateValue(CStateCollection *state, CAction *action, double td, CActionData *data = NULL);
00111         virtual void setValue(CStateCollection *state, CAction *action, double qValue, CActionData *data = NULL); 
00112         virtual double getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
00113 
00114         virtual CAbstractQETraces *getStandardETraces();
00115 };
00116 
00117 
00118 class CQStochasticExplorationPolicy : public CQStochasticPolicy
00119 {
00120 protected:
00121         CAbstractQFunction *explorationFunction;
00122 
00123         double *explorationValues;
00124 
00125 public:
00126         CQStochasticExplorationPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractQFunction *qFunctoin, CAbstractQFunction *explorationFunction, double explorationFactor);
00127         ~CQStochasticExplorationPolicy();
00128 
00129         virtual void getActionValues(CStateCollection *state, CActionSet *availableActions, double *actionValues, CActionDataSet *actionDataSet = NULL);
00130 
00131         virtual CAbstractQFunction *getExplorationQFunction() {return explorationFunction;};
00132 };
00133 
00134 class CSelectiveExplorationCalculator : public CSemiMDPListener
00135 {
00136 protected:
00137         CQStochasticExplorationPolicy *explorationPolicy;
00138         
00139         double attention;
00140 public:
00141         CSelectiveExplorationCalculator(CQStochasticExplorationPolicy *explorationFunction);
00142         virtual ~CSelectiveExplorationCalculator();
00143 
00144         virtual void nextStep(CStateCollection *state, CAction *action, CStateCollection *nextState);
00145         virtual void newEpisode();
00146 };
00147 
00148 
00149 #endif
00150