Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

ccontinuoustime.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_CONTINUOUSTIME
00033 #define C_CONTINUOUSTIME
00034 
00035 
00036 #include "cpolicies.h"
00037 #include "ccontinuousactions.h"
00038 #include "ccontinuousactiongradientpolicy.h"
00039 #include "newmat/newmat.h"
00040 
00041 
00042 
00043 class CVFunctionInputDerivationCalculator;
00044 class CContinuousTimeTransitionFunction;
00045 class CContinuousTimeQFunctionFromTransitionFunction;
00046 class CRewardFunction;
00047 class CTransitionFunction;
00048 class CGradientVFunction;
00049 class CStateCollection;
00050 
00051 class CContinuousTimeParameters
00052 {
00053 public:
00054         static double getGammaFromSgamma(double sgamma, double dt);
00055         static double getLambdaFromKappa(double kappa, double sgamma, double dt);
00056 };
00057 
00058 class CContinuousTimeVMPolicy : public CQStochasticPolicy
00059 {
00060 protected:
00061         CVFunctionInputDerivationCalculator *vfunction;
00062         CContinuousTimeTransitionFunction *model;
00063 public:
00064 
00065         CContinuousTimeVMPolicy(CActionSet *actions, CActionDistribution *distribution, CVFunctionInputDerivationCalculator *vFunction, CContinuousTimeTransitionFunction *model, CRewardFunction *rewardFunction);
00066         ~CContinuousTimeVMPolicy();
00067 
00068         CContinuousTimeQFunctionFromTransitionFunction *getQFunctionFromTransitionFunction();
00069 
00070 };
00071 
00072 class CContinuousTimeAndActionVMPolicy : public CContinuousActionController
00073 {
00074 protected:
00075         CVFunctionInputDerivationCalculator *dVFunction;
00076         CTransitionFunction *model;
00077 
00078         ColumnVector *actionValues;
00079         ColumnVector *derivationX;
00080         Matrix *derivationU;
00081 
00082         virtual void getActionValues(ColumnVector *actionValues, ColumnVector *noise) = 0;
00083 public:
00084         CContinuousTimeAndActionVMPolicy(CContinuousAction *action, CVFunctionInputDerivationCalculator *dVFunction, CTransitionFunction *model);
00085         ~CContinuousTimeAndActionVMPolicy();
00086 
00087         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *contAction);
00088 
00089         
00090 };
00091 
00092 class CContinuousTimeAndActionSigmoidVMPolicy : public CContinuousTimeAndActionVMPolicy
00093 {
00094 protected:
00095 
00096         ColumnVector *c;
00097 
00098         void getActionValues(ColumnVector *actionValues, ColumnVector *noise);
00099 
00100 public:
00101         CContinuousTimeAndActionSigmoidVMPolicy(CContinuousAction *action, CVFunctionInputDerivationCalculator *vfunction, CTransitionFunction *model);
00102         ~CContinuousTimeAndActionSigmoidVMPolicy();
00103 
00104         void setC(int index, double value);
00105         double getC(int index);
00106 
00107         ColumnVector *getC() {return c;};
00108 
00109         virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00110 };
00111 
00112 class CContinuousTimeAndActionSigmoidVMGradientPolicy : public CContinuousActionGradientPolicy
00113 {
00114 protected:
00115         CGradientVFunction *vFunction;
00116         CStateCollectionImpl *derivationState;
00117 
00118         CFeatureList *gradient1;
00119         CFeatureList *gradient2;
00120 
00121         virtual void updateWeights(CFeatureList *dParams);
00122 
00123         
00124         CVFunctionInputDerivationCalculator *dVFunction;
00125         CTransitionFunction *model;
00126         ColumnVector *actionValues;
00127         ColumnVector *derivationX;
00128         Matrix *derivationU;
00129 
00130         ColumnVector *c;
00131 
00132         void getActionValues(ColumnVector *actionValues, ColumnVector *noise);
00133         virtual void getGradientActionValues(ColumnVector *, ColumnVector *) {};
00134 
00135 
00136 
00137 
00138 public:
00139         CContinuousTimeAndActionSigmoidVMGradientPolicy(CContinuousAction *action, CGradientVFunction *gradVFunction, CVFunctionInputDerivationCalculator *vfunction, CTransitionFunction *model, std::list<CStateModifier *> *modifiers);
00140         virtual ~CContinuousTimeAndActionSigmoidVMGradientPolicy();
00141 
00142         virtual int getNumWeights();
00143 
00144         virtual void getWeights(double *parameters);
00145         virtual void setWeights(double *parameters);
00146 
00147         virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00148 
00149         virtual void resetData();
00150 
00151         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *contAction);
00152 
00153         void setC(int index, double value);
00154         double getC(int index);
00155 
00156         ColumnVector *getC() {return c;};
00157 
00158         virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00159 };
00160 
00161 class CContinuousTimeAndActionBangBangVMPolicy : public CContinuousTimeAndActionVMPolicy
00162 {
00163 protected:
00164         virtual void getActionValues(ColumnVector *actionValues, ColumnVector *noise);
00165  
00166 public:
00167         CContinuousTimeAndActionBangBangVMPolicy(CContinuousAction *action, CVFunctionInputDerivationCalculator *vfunction, CTransitionFunction *model);
00168 
00169         virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00170 };
00171 
00172 class CContinuousActionSmoother : public CContinuousActionController
00173 {
00174 protected:
00175         CContinuousActionController *policy;
00176         double *actionValues;
00177 
00178         double alpha;
00179 public:
00180         CContinuousActionSmoother(CContinuousAction *action, CContinuousActionController *policy, double alpha = 0.3);
00181         ~CContinuousActionSmoother();
00182 
00183         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *contAction);
00184 
00185         void setAlpha(double alpha);
00186         virtual double getAlpha();
00187 
00188 };
00189 
00190 #endif
00191