Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

crewardmodel.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_REWARDMODEL_H
00033 #define C_REWARDMODEL_H
00034 
00035 #include <map>
00036 #include <vector>
00037 
00038 #include "crewardfunction.h"
00039 #include "clearndataobject.h"
00040 #include "cbaseobjects.h"
00041 #include "cutility.h"
00042 #include "cagentlistener.h"
00043 
00044 class CAbstractFeatureStochasticEstimatedModel;
00045 
00046 class CFeatureStateRewardFunction : public CFeatureRewardFunction
00047 {
00048 protected:
00049         std::map<int, double> *rewards;
00050 public:
00051         CFeatureStateRewardFunction(CStateProperties *discretizer);
00052         virtual ~CFeatureStateRewardFunction();
00053 
00054         virtual double getReward(int oldState, CAction *action, int newState);
00055         virtual double getReward(int state);
00056 
00057         virtual void setReward(int state, double reward);
00058 };
00059 
00060 
00072 class CFeatureRewardModel : public CFeatureRewardFunction, public CSemiMDPRewardListener, public CActionObject, public CLearnDataObject
00073 {
00074 protected:
00076         CMyArray2D<CFeatureMap *> *rewardTable;
00078 
00080         CMyArray2D<CFeatureMap *> *visitTable;
00081 
00083         CAbstractFeatureStochasticEstimatedModel *model;
00084 
00085         bool bExternVisitSparse;
00086 
00088 
00091         double getTransitionVisits(int oldState, int action, int newState);
00092 
00093 public:
00095 
00097         CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *discretizer);
00099         CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CStateModifier *discretizer);
00100         virtual ~CFeatureRewardModel();
00101 
00103 
00106         virtual double getReward(int oldState, CAction *action, int newState);
00107         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00108 
00110 
00111         virtual void saveData(FILE *stream);
00113 
00114         virtual void loadData(FILE *stream);
00115 
00116         virtual void resetData();
00117 };
00118 
00119 class CFeatureStateRewardModel : public CFeatureRewardFunction, public CSemiMDPRewardListener, public CLearnDataObject
00120 {
00121 protected:
00122         double *rewards;
00123         double *visits;
00124 
00125         double rewardMean;
00126         int numRewards;
00127 
00128 public:
00130 
00132         CFeatureStateRewardModel(CRewardFunction *function, CStateModifier *discretizer);
00134         virtual ~CFeatureStateRewardModel();
00135 
00137 
00140         virtual double getReward(CState *oldState, CAction *action, CState *newState);
00142 
00145         virtual double getReward(int oldState, CAction *action, int newState);
00146         virtual double getReward(int newState);
00147 
00148         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00149 
00151 
00152         virtual void saveData(FILE *stream);
00154 
00155         virtual void loadData(FILE *stream);
00156 
00157         virtual void resetData();
00158 };
00159 
00161 
00166 class CRewardEpisode : public CSemiMDPRewardListener
00167 {
00168 protected:
00170         std::vector<double> *rewards;
00171 public:
00172         CRewardEpisode(CRewardFunction *rewardFunction);
00173         virtual ~CRewardEpisode();
00174 
00176         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00178         virtual void newEpisode();
00179 
00180         int getNumRewards();
00182         double getReward(int index);
00183 
00184         double getMeanReward();
00185         double getLastStepsMeanReward(int Steps);
00186 
00187 
00188         virtual void saveBIN(FILE *stream);
00189         virtual void saveData(FILE *stream);
00190         virtual void loadBIN(FILE *stream);
00191         virtual void loadData(FILE *stream);
00192 };
00193 
00194 class CRewardHistory
00195 {
00196         protected:
00197 
00198         public:
00199                 CRewardHistory() {};
00200                 virtual ~CRewardHistory(){};
00201 
00202                 virtual CRewardEpisode* getEpisode(int index) = 0;
00203                 virtual int getNumEpisodes() = 0;
00204 };
00205 
00206 class CRewardHistorySubset : public CRewardHistory
00207 {
00208         protected:
00209                 CRewardHistory *episodes;
00210                 std::vector<int> *indices;
00211         public:
00212                 CRewardHistorySubset(CRewardHistory *episodes, std::vector<int> *indices);
00213                 virtual ~CRewardHistorySubset();
00214 
00216                 virtual int getNumEpisodes();
00218                 virtual CRewardEpisode* getEpisode(int index);
00219 };
00220 
00221 class CRewardLogger : public CSemiMDPRewardListener, public CLearnDataObject, public CRewardHistory
00222 {
00223 protected:
00225     char filename[512];
00227     FILE* file;
00228 
00229         char loadFileName[512];
00230 
00232         int holdMemory;
00233 
00235         std::list<CRewardEpisode *> *episodes;
00237         CRewardEpisode *currentEpisode;
00238         
00239         void init();
00240 public:
00241         
00242         CRewardLogger(CRewardFunction *rewardFunction, char* autoSavefile, int holdMemory);
00243         CRewardLogger(CRewardFunction *rewardFunction);
00244         CRewardLogger(char *loadFile, CRewardFunction *rewardFunction);
00245 
00246         virtual ~CRewardLogger();
00247 
00248         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00249         virtual void newEpisode();
00250 
00251         void setAutoSaveFile(char *filename);
00252         virtual void saveBIN(FILE *stream);
00253         
00254         virtual void saveData(FILE *stream);
00255         void loadBIN(FILE *stream, int episodes = -1);
00256         virtual void loadData(FILE *stream, int episodes = -1);
00257         virtual void loadData(FILE *stream);
00258 
00259         virtual int getNumEpisodes();
00260         
00261         virtual CRewardEpisode* getCurrentEpisode();
00262         virtual CRewardEpisode* getEpisode(int index);
00263 
00264         void clearAutoSaveFile();
00265         void setLoadDataFile(char *loadData);
00266 
00267         virtual void resetData();
00268 };
00269 
00271 
00281 class CSemiMDPLastNRewardFunction : public CRewardFunction, public CRewardEpisode
00282 {
00283 protected:
00285         double gamma;
00286 public:
00288         CSemiMDPLastNRewardFunction(CRewardFunction *rewardFunction, double gamma);
00289         virtual ~CSemiMDPLastNRewardFunction();
00291 
00295         virtual double getReward(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00296 };
00297 
00298 #endif // REWARDMODEL_H
00299