Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

crewardfunction.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_REWARDFUNCTION_H
00033 #define C_REWARDFUNCTION_H
00034 
00035 #include "cbaseobjects.h"
00036 
00037 #include "newmat/newmat.h"
00038 
00039 class CFeatureList;
00040 class CAbstractVFunction;
00041 class CFeatureVFunction;
00042 
00044 
00052 class CRewardFunction
00053 {
00054 public:
00055         virtual ~CRewardFunction() {};
00057         virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState) = 0;
00058 };
00059 
00061 
00066 class CFeatureRewardFunction : public CRewardFunction, public CStateObject
00067 {
00068 protected:
00069         CStateProperties *discretizer;
00070 
00071 public:
00073         CFeatureRewardFunction(CStateProperties *discretizer);
00074         virtual ~CFeatureRewardFunction();
00075 
00077         virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00079         virtual double getReward(int oldState, CAction *action, int newState) = 0;
00081 
00084         virtual double getReward(CState *oldState, CAction *action, CState *newState);
00086         virtual double getReward(CFeatureList *oldState, CAction *action, CFeatureList *newState);
00087 
00088 };
00089 
00091 
00095 class CStateReward : public CRewardFunction, public CStateObject
00096 {
00097 protected:
00098         CStateProperties *properties;
00099 public:
00100         CStateReward(CStateProperties *properties);
00101         virtual ~CStateReward() {};
00102 
00103         virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00104         
00105         virtual double getStateReward(CState *modelState) = 0;
00106         virtual void getInputDerivation(CState *, ColumnVector *) {};
00107 };
00108 
00109 class CZeroReward : public CRewardFunction
00110 {
00111 public:
00112         virtual double getReward(CStateCollection *, CAction *, CStateCollection *) {return 0;};
00113 };
00114 
00115 class CRewardFunctionFromValueFunction : public CRewardFunction
00116 {
00117 protected:
00118         CAbstractVFunction *vFunction;
00119         bool useNewState;
00120 public:
00121         CRewardFunctionFromValueFunction(CAbstractVFunction *vFunction, bool useNewState = true);
00122 
00123         virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00124 };
00125 
00126 class CFeatureRewardFunctionFromValueFunction : public CFeatureRewardFunction
00127 {
00128 protected:
00129         CFeatureVFunction *vFunction;
00130         bool useNewState;
00131 public:
00132         CFeatureRewardFunctionFromValueFunction(CStateModifier *discretizer, CFeatureVFunction *vFunction, bool useNewState = true);
00133         ~CFeatureRewardFunctionFromValueFunction();
00134 
00135         virtual double getReward(int oldState, CAction *action, int newState);
00136 };
00137 
00138 
00139 #endif
00140 
00141