Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cgradientfunction.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_GRADIENTFUNCTION__H
00033 #define C_GRADIENTFUNCTION__H
00034 
00035 #include "cparameters.h"
00036 #include "clearndataobject.h"
00037 #include <newmat/newmat.h>
00038 
00039 class CFeatureList;
00040 
00042 
00049 class CAdaptiveEtaCalculator : virtual public CParameterObject
00050 {
00051 public:
00053         virtual void getWeightUpdates(CFeatureList *updates) = 0;
00054 };
00055 
00057 class CIndividualEtaCalculator : public CAdaptiveEtaCalculator
00058 {
00059 protected:
00060         int numWeights;
00061         double *etas;
00062 public:
00063         CIndividualEtaCalculator(int numWeights, double *etas = NULL);
00064         virtual ~CIndividualEtaCalculator();
00065 
00067         virtual void getWeightUpdates(CFeatureList *updates);
00068 
00070         virtual void setEta(int index, double value);
00071 };
00072 
00074 
00083 class CVarioEta : public CAdaptiveEtaCalculator
00084 {
00085 protected:
00086         double *eta_i;
00087         double *v_i;
00088 
00089         /*double beta;
00090        double eta;
00091        double epsilon;*/
00092         unsigned int numParams;
00093 public:
00094         CVarioEta(unsigned int numParams, double eta, double beta = 0.01, double epsilon = 0.0001);
00095         ~CVarioEta();
00096 
00097         virtual void getWeightUpdates(CFeatureList *updates);
00098 };
00099 
00101 
00115 class CGradientUpdateFunction : virtual public CParameterObject, virtual public CLearnDataObject
00116 {
00117 protected:
00118         CFeatureList *localGradientFeatureBuffer;
00119 
00120         
00121         CAdaptiveEtaCalculator *etaCalc;
00122 public:
00123         CGradientUpdateFunction();
00124         virtual ~CGradientUpdateFunction();
00125 
00127 
00131         void updateGradient(CFeatureList *gradientFeatures, double factor = 1.0);
00132 
00134         virtual void updateWeights(CFeatureList *dParams) = 0;
00135 
00137         virtual int getNumWeights() = 0;
00138 
00140         virtual CAdaptiveEtaCalculator* getEtaCalculator();
00142         virtual void setEtaCalculator(CAdaptiveEtaCalculator *etaCalc);
00143 
00145 
00148         virtual void getWeights(double *parameters) = 0;
00150 
00153         virtual void setWeights(double *parameters) = 0;
00154 
00156         virtual void saveData(FILE *stream);
00158         virtual void loadData(FILE *stream);
00159 
00161         virtual void resetData() = 0;
00162 
00163         virtual void copy(CLearnDataObject *gradientFuntion);
00164 };
00165 
00166 /*
00168 * the weight updates are transmitted to the original gradient function at every call of "updateOriginalGradientFunction", so the updates can be delayed to an arbitrary time.
00169 
00170 class CGradientDelayedUpdateFunction : virtual public CGradientUpdateFunction
00171 {
00172 protected:
00173        CGradientUpdateFunction *gradientFunction;
00174 
00175        double *weightsUpdate;
00176        
00177        
00178 public:
00179        virtual void updateWeights(CFeatureList *dParams);
00180 
00181 
00182        CGradientDelayedUpdateFunction(CGradientUpdateFunction *gradientFunction);
00183        virtual ~CGradientDelayedUpdateFunction();
00184 
00186        virtual int getNumWeights();
00187 
00188        virtual void getWeights(double *parameters);
00189        virtual void setWeights(double *parameters);
00190 
00192        virtual void resetData();
00193 
00194        virtual void updateOriginalGradientFunction();
00195 };
00196 
00197 class CDelayedFunctionUpdater : public CSemiMDPListener
00198 {
00199 protected:
00200        int nUpdateEpisodes;
00201        int nUpdateSteps;
00202 
00203        CGradientDelayedUpdateFunction * updateFunction;
00204 
00205        int nEpisodes;
00206        int nSteps;
00207 public:
00208 
00209        CDelayedFunctionUpdater(CGradientDelayedUpdateFunction * updateFunction, int nUpdateEpisodes, int nUpdateSteps);
00210        virtual ~CDelayedFunctionUpdater();
00211 
00212        virtual void newEpisode();
00213        virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
00214 
00215 };
00216 */
00217 
00219 
00228 class CGradientFunction : public CGradientUpdateFunction
00229 {
00230 protected:
00231         int num_inputs;
00232         int num_outputs;
00233 
00234         ColumnVector *input_mean;
00235         ColumnVector *input_std;
00236         
00237         ColumnVector *output_mean;
00238         ColumnVector *output_std;
00239         
00240         
00241         virtual void preprocessInput(ColumnVector *input, ColumnVector *norm_input);
00242         virtual void postprocessOutput(Matrix *norm_output, Matrix *output);
00243 public:
00244         CGradientFunction(int n_input, int n_output);
00245         virtual ~CGradientFunction();
00246 
00247         virtual void getGradient(ColumnVector *input, ColumnVector *outputErrors, CFeatureList *gradientFeatures);
00249         virtual void getFunctionValue(ColumnVector *input, ColumnVector *output);
00250 
00252         virtual void getInputDerivation(ColumnVector *input, Matrix *targetVector);
00253 
00254 
00255 
00257         virtual void getGradientPre(ColumnVector *input, ColumnVector *outputErrors, CFeatureList *gradientFeatures) = 0;
00259         virtual void getFunctionValuePre(ColumnVector *input, ColumnVector *output) = 0;
00260 
00262         virtual void getInputDerivationPre(ColumnVector *, Matrix *) {};
00263 
00264 
00266         virtual int getNumInputs();
00268         virtual int getNumOutputs();
00269 
00270         void setInputMean(ColumnVector *input_mean);
00271         void setOutputMean(ColumnVector *output_mean);
00272         
00273         void setInputStd(ColumnVector *input_std);
00274         void setOutputStd(ColumnVector *output_std);
00275 };
00276 
00277 
00278 #endif
00279