Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cresiduals.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_RESIDUALS__H
00033 #define C_RESIDUALS__H
00034 
00035 
00036 #include "cparameters.h"
00037 
00038 class CFeatureList;
00039 class CStateCollection;
00040 
00041 
00042 class CResidualGradientFunction : virtual public CParameterObject
00043 {
00044 public:
00045         virtual void getResidualGradient(CFeatureList *oldGradient, CFeatureList *newGradient, double duration, CFeatureList *residualGradientFeatures) = 0;
00046 };
00047 
00048 class CResidualFunction : public CResidualGradientFunction
00049 {
00050 public:
00051         virtual double getResidual(double oldV, double reward, double duration, double newV) = 0;
00052 };
00053 
00054 
00055 class CDiscreteResidual : public CResidualFunction
00056 {
00057 protected: 
00058 public:
00059         CDiscreteResidual(double gamma);
00060 
00061         virtual double getResidual(double oldV, double reward, double duration, double newV);
00062         
00063         virtual void getResidualGradient(CFeatureList *oldGradient, CFeatureList *newGradient, double duration, CFeatureList *residualGradientFeatures);
00064 };
00065 
00066 class CContinuousEulerResidual : public CResidualFunction
00067 {
00068 protected:
00069 
00070 public:
00071 
00072         CContinuousEulerResidual(double dt, double sgamma);
00073         
00074         virtual double getResidual(double oldV, double reward, double duration,  double newV);
00075 
00076         virtual void getResidualGradient(CFeatureList *oldGradient, CFeatureList *newGradient, double duration, CFeatureList *residualGradientFeatures);
00077 };
00078 
00079 class CContinuousCoulomResidual : public CResidualFunction
00080 {
00081 protected:
00082         
00083 public:
00084         CContinuousCoulomResidual(double dt, double sgamma);
00085 
00086         virtual double getResidual(double oldV, double reward, double duration, double newV);
00087 
00088         virtual void getResidualGradient(CFeatureList *oldGradient, CFeatureList *newGradient, double duration, CFeatureList *residualGradientFeatures);
00089 };
00090 
00091 class CAbstractBetaCalculator : virtual public CParameterObject
00092 {
00093 public:
00094         virtual double getBeta(CFeatureList *directGradient, CFeatureList *residualGradient) = 0;
00095 };
00096 
00097 class CConstantBetaCalculator : public CAbstractBetaCalculator
00098 {
00099 protected:
00100 public:
00101         CConstantBetaCalculator(double beta);
00102         virtual double getBeta(CFeatureList *directGradient, CFeatureList *residualGradient);
00103 };
00104 
00105 class CVariableBetaCalculator : public CAbstractBetaCalculator
00106 {
00107 protected:
00108 
00109 public:
00110         CVariableBetaCalculator(double mu, double maxBeta);
00111         virtual double getBeta(CFeatureList *directGradient, CFeatureList *residualGradient);
00112 };
00113 
00114 
00115 class CResidualBetaFunction : public CResidualGradientFunction
00116 {
00117 protected:
00118         CAbstractBetaCalculator *betaCalculator;
00119         CResidualGradientFunction *residualGradient;
00120         CFeatureList *tempResidual;
00121 public:
00122         CResidualBetaFunction(CAbstractBetaCalculator *betaCalculator, CResidualGradientFunction *residualGradient);
00123 
00124         virtual void getResidualGradient(CFeatureList *oldGradient, CFeatureList *newGradient, double duration, CFeatureList *residualGradientFeatures);
00125 
00126 };
00127 
00128 class CDirectGradient : public CResidualGradientFunction
00129 {
00130 public:
00131         virtual void getResidualGradient(CFeatureList *oldGradient, CFeatureList *newGradient, double duration, CFeatureList *residualGradientFeatures);
00132 
00133 };
00134 
00135 
00136 
00137 
00138 #endif
00139