Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cpolicies.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef CTDLEARNERPOLICIES_H
00033 #define CTDLEARNERPOLICIES_H
00034 
00035 #include "cagentcontroller.h"
00036 #include "cparameters.h"
00037 
00038 class CAbstractFeatureStochasticEstimatedModel;
00039 class CTransitionFunction;
00040 class CAbstractQFunction;
00041 class CActionSet;
00042 class CFeatureList;
00043 class CActionStatistics;
00044 class CAbstractVFunction;
00045 class CQFunctionFromTransitionFunction;
00046 class CStateCollectionImpl;
00047 
00048 #include "newmat/newmat.h"
00049 
00051 
00054 class CQGreedyPolicy : public CAgentController
00055 {
00056 protected:
00057         CAbstractQFunction *qFunction;
00058         CActionSet *availableActions;
00059 public:
00060         CQGreedyPolicy(CActionSet *actions, CAbstractQFunction *qFunction);
00061         ~CQGreedyPolicy();
00062 
00064         virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
00065 
00066 };
00067 
00069 
00073 class CActionDistribution : virtual public CParameterObject
00074 {
00075 public:
00077 
00080         virtual void getDistribution(CStateCollection *state, CActionSet *availableActions, double *actionFactors) = 0;
00081         virtual bool isDifferentiable() {return false;};
00082 
00084 
00085         virtual void getGradientFactors(CStateCollection *state, CAction *usedAction, CActionSet *actions, double *actionFactors, ColumnVector *gradientFactors);
00086 };
00087 
00089 
00096 class CSoftMaxDistribution : public CActionDistribution
00097 {
00098 protected:
00099 public:
00100 
00101         CSoftMaxDistribution(double beta);
00102 
00103         virtual void getDistribution(CStateCollection *state, CActionSet *availableActions, double *values);
00104 
00105         virtual bool isDifferentiable() {return true;};
00106 
00107         virtual void getGradientFactors(CStateCollection *state, CAction *usedAction, CActionSet *actions, double *actionFactors, ColumnVector *gradientFactors);
00108 
00109 };
00110 
00111 class CAbsoluteSoftMaxDistribution : public CActionDistribution
00112 {
00113 protected:
00114 public:
00115 
00116         CAbsoluteSoftMaxDistribution(double maxAbsValue);
00117 
00118         virtual void getDistribution(CStateCollection *state, CActionSet *availableActions, double *values);
00119 
00120         virtual bool isDifferentiable() {return false;};
00121 
00122         //virtual void getGradientFactors(CStateCollection *state, CAction *usedAction, CActionSet *actions, double *actionFactors, ColumnVector *gradientFactors);
00123 };
00124 
00126 
00130 class CGreedyDistribution : public CActionDistribution
00131 {
00132 public:
00133         virtual void getDistribution(CStateCollection *state, CActionSet *availableActions, double *values);
00134 };
00135 
00137 
00142 class CEpsilonGreedyDistribution : public CActionDistribution
00143 {
00144 protected:
00145 public:
00146 //      double epsilon;
00147 
00148         CEpsilonGreedyDistribution(double epsilon);
00149         virtual void getDistribution(CStateCollection *state, CActionSet *availableActions, double *values);
00150 };
00151 
00152 
00154 
00162 class CStochasticPolicy: public CAgentStatisticController
00163 {
00164 protected:
00166         double *actionValues;
00167         CActionDistribution *distribution;
00168 
00169         ColumnVector *gradientFactors;
00170 
00171         CFeatureList *actionGradientFeatures;
00172         
00173         CActionSet *availableActions;
00174 
00176 
00179         virtual void getActionStatistics(CStateCollection *, CAction *, CActionStatistics *) {};
00180 
00181 public:
00183         CStochasticPolicy(CActionSet *actions, CActionDistribution *distribution);
00184         ~CStochasticPolicy();
00185 
00187 
00191         virtual void getActionProbabilities(CStateCollection *state, CActionSet *availableActions, double *actionValues, CActionDataSet *actionDataSet = NULL);
00193 
00197         virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *dataset, CActionStatistics *stat);
00198 
00200         virtual void getActionValues(CStateCollection *state, CActionSet *availableActions, double *actionValues, CActionDataSet *actionDataSet = NULL) = 0;
00201 
00202 
00203         virtual bool isDifferentiable() {return false;};
00204 
00205         virtual void getActionProbabilityGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);
00206         virtual void getActionProbabilityLnGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);
00207 
00209 
00212         virtual void getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);
00213 };
00214 
00216 
00222 class CQStochasticPolicy : public CStochasticPolicy
00223 {
00224 protected:
00226         CAbstractQFunction *qfunction;
00228     virtual void getActionStatistics(CStateCollection *state, CAction *action, CActionStatistics *stat);
00229 
00230 public:
00231         CQStochasticPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractQFunction *qfunction);
00232         ~CQStochasticPolicy();
00233 
00234         virtual void getActionValues(CStateCollection *state, CActionSet *availableActions, double *actionValues, CActionDataSet *actionDataSet = NULL);
00235 
00236         virtual void getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);
00237         virtual bool isDifferentiable();
00238 
00239         virtual CAbstractQFunction *getQFunction() {return qfunction;};
00240 };
00241 
00242 class CQFunctionFromTransitionFunction;
00243 
00244 
00246 
00257 class CVMStochasticPolicy : public CQStochasticPolicy
00258 {
00259 protected:
00260         CStateCollectionImpl *nextState;
00261         CStateCollectionImpl *intermediateState;
00262 
00263         CAbstractVFunction *vFunction;
00264         CQFunctionFromTransitionFunction *qFunctionFromTransitionFunction;
00265         CTransitionFunction *model;
00266         CRewardFunction *reward;
00267 public:
00268         
00269         CVMStochasticPolicy(CActionSet *actions, CActionDistribution *distribution, CAbstractVFunction *vFunction, CTransitionFunction *model, CRewardFunction *reward, std::list<CStateModifier *> *modifiers);
00270         ~CVMStochasticPolicy();
00271 
00272         virtual void getActionGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradientState);
00273 
00274         virtual bool isDifferentiable();
00275 };
00276 
00277 
00278 #endif
00279