Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

ccontinuousactiongradientpolicy.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_CONTINUOUSACTIONGRADIENTPOLICY__H
00033 #define C_CONTINUOUSACTIONGRADIENTPOLICY__H
00034 
00035 #include "cparameters.h"
00036 #include "cbaseobjects.h"
00037 #include "ccontinuousactions.h"
00038 #include "cgradientfunction.h"
00039 
00040 
00041 #include <list>
00042 #include <map>
00043 #include <vector>
00044 
00045 class CStateProperties;
00046 class CStateCollection;
00047 class CStateCollectionImpl;
00048 class CFeatureList;
00049 class CFeatureCalculator;
00050 class CFeatureVFunction;
00051 class CVFunctionInputDerivationCalculator;
00052 
00053 class CStateProperties;
00054 
00055 class CCAGradientPolicyInputDerivationCalculator : virtual public CParameterObject
00056 {
00057 protected:
00058 
00059 public:
00060 
00061         virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector) = 0;
00062 
00063 };
00064 
00065 class CContinuousActionGradientPolicy : public CContinuousActionController, public CGradientFunction, public CStateObject
00066 {
00067 protected:
00068         CStateProperties *modelState;
00069 
00070         virtual void updateWeights(CFeatureList *dParams) = 0;
00071 
00072 public:
00073         CContinuousActionGradientPolicy(CContinuousAction *contAction, CStateProperties *modelState);
00074         ~CContinuousActionGradientPolicy();
00075 
00076         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action) = 0;
00077 
00078         virtual int getNumWeights() = 0;
00079 
00080         virtual void getWeights(double *parameters) = 0;
00081         virtual void setWeights(double *parameters) = 0;
00082 
00083         virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures) = 0;
00084         virtual void getGradientPre(ColumnVector *input, ColumnVector *outputErrors, CFeatureList *gradientFeatures);
00085 
00086         virtual void getFunctionValuePre(ColumnVector *input, ColumnVector *output);
00087 
00088         virtual void resetData() = 0;
00089 };
00090 
00091 class CContinuousActionPolicyFromGradientFunction : public CContinuousActionGradientPolicy, public CCAGradientPolicyInputDerivationCalculator
00092 {
00093 protected:
00094         CGradientFunction *gradientFunction;
00095         
00096         ColumnVector *outputError;
00097         
00098 
00099         virtual void updateWeights(CFeatureList *dParams);
00100 
00101 //      virtual void getInnerContinuousAction(CStateCollection *state, ColumnVector *action);
00102 //      virtual void getInnerInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00103 //      virtual void getInnerGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00104 
00105 public:
00106         CContinuousActionPolicyFromGradientFunction(CContinuousAction *contAction, CGradientFunction *gradientFunction, CStateProperties *modelState);
00107         ~CContinuousActionPolicyFromGradientFunction();
00108 
00109         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00110         virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00111         virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00112 
00113         virtual int getNumWeights();
00114 
00115         virtual void getWeights(double *parameters);
00116         virtual void setWeights(double *parameters);
00117 
00118 
00119         virtual void resetData();
00120 };
00121 
00122 class CContinuousActionFeaturePolicy : public CContinuousActionGradientPolicy, public CCAGradientPolicyInputDerivationCalculator
00123 {
00124 protected:
00125         std::list<CFeatureCalculator *> *featureCalculators;
00126         std::list<CFeatureVFunction *> *featureFunctions;
00127 
00128         virtual void updateWeights(CFeatureList *dParams);
00129 
00130         int numWeights;
00131         CFeatureList *localGradient;
00132 
00133         ColumnVector *inputDerivation;
00134 
00135         std::map<CFeatureVFunction *, CVFunctionInputDerivationCalculator *> *inputDerivationFunctions;
00136 
00137         //virtual void getInnerContinuousAction(CStateCollection *state, ColumnVector *action);
00138         //virtual void getInnerInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00139         //virtual void getInnerGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00140 
00141 public:
00142         CContinuousActionFeaturePolicy(CContinuousAction *contAction, CStateProperties *modelState, std::list<CFeatureCalculator *> *featureCalcualtors);
00143         ~CContinuousActionFeaturePolicy();
00144 
00145         virtual int getNumWeights();
00146 
00147         virtual void getWeights(double *parameters);
00148         virtual void setWeights(double *parameters);
00149 
00150         virtual void resetData();
00151 
00152         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00153         virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00154         virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00155 };
00156 
00157 class CContinuousActionSigmoidPolicy : public CContinuousActionGradientPolicy, public CCAGradientPolicyInputDerivationCalculator
00158 {
00159 protected:
00160         CContinuousActionGradientPolicy *policy;
00161         CCAGradientPolicyInputDerivationCalculator *inputDerivation;
00162 
00163         CContinuousActionData *contData;
00164 
00165         virtual void updateWeights(CFeatureList *dParams);
00166 
00167 public:
00168         CContinuousActionSigmoidPolicy(CContinuousActionGradientPolicy *policy, CCAGradientPolicyInputDerivationCalculator *inputDerivation);
00169         ~CContinuousActionSigmoidPolicy();
00170 
00171 
00172         virtual int getNumWeights();
00173 
00174         virtual void getWeights(double *parameters);
00175         virtual void setWeights(double *parameters);
00176 
00177         virtual void resetData();
00178 
00179         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00180         virtual void getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures);
00181         virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00182 
00183         virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *l_noise);
00184 
00185 };
00186 
00187 class CCAGradientPolicyNumericInputDerivationCalculator : public CCAGradientPolicyInputDerivationCalculator
00188 {
00189 protected:
00190         CContinuousActionGradientPolicy *policy;
00191 
00192         CContinuousActionData *contDataPlus;
00193         CContinuousActionData *contDataMinus;
00194 
00195         CStateCollectionImpl *stateBuffer;
00196 public:
00197         CCAGradientPolicyNumericInputDerivationCalculator(CContinuousActionGradientPolicy *policy, double stepSize,  std::list<CStateModifier *> *modifiers);
00198         ~CCAGradientPolicyNumericInputDerivationCalculator();
00199 
00200         virtual void getInputDerivation(CStateCollection *inputState, Matrix *targetVector);
00201 };
00202 
00203 #endif
00204