Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cmontecarlo.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_MONTECARLO__H
00033 #define C_MONTECARLO__H
00034 
00035 #include "cbatchlearning.h"
00036 #include "cevaluator.h"
00037 #include "cparameters.h"
00038 
00039 #define MC_MSE 0
00040 #define MC_MAE 1
00041 
00042 class CAgent;
00043 class CEpisode;
00044 class CRewardFunction;
00045         
00046 class CStateCollectionImpl;
00047 class CStateCollectionImpl;
00048         
00049 class CSemiMDPSender;
00050 class CRewardHistory;
00051 class CRewardEpisode;
00052 class CEpisodeHistory;
00053 
00054 class CMonteCarloError : public CEvaluator, public CParameterObject
00055 {
00056 protected:
00057         CAgent *agent;
00058         CEpisode *episode;
00059         CRewardFunction *rewardFunction;
00060         
00061         CStateCollectionImpl *oldState;
00062         CStateCollectionImpl *newState;
00063         
00064         int nEpisodes;
00065         int nStepsPerEpisode;
00066         
00067         CSemiMDPSender *semiMDPSender;
00068 
00069         virtual double getValue(CStateCollection *state, CAction *action) = 0;
00070         
00071         CRewardHistory *rewardLogger;
00072         CEpisodeHistory *episodeHistory;
00073 public: 
00074         bool useRewardEpisode;
00075         int errorFunction;
00076 
00077         CMonteCarloError(CAgent *agent, CRewardFunction *reward, CStateProperties *modelState, CActionSet *actions, std::list<CStateModifier *> *modifiers, int numEpisodes, int numSteps, double discountFactor);
00078         virtual ~CMonteCarloError();
00079         
00080         void setEpisodeHistory(CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger);
00081 
00082         double getMonteCarloError(CEpisode *episode, CRewardEpisode *rewardEpisode);
00083         double getMeanMonteCarloError(CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger);
00084 
00085         virtual double evaluate();
00086         
00087         void setSemiMDPSender(CSemiMDPSender *sender);
00088         
00089 };
00090 
00091 class CMonteCarloVError : public CMonteCarloError
00092 {
00093 protected:
00094         CAbstractVFunction *vFunction;
00095         
00096         virtual double getValue(CStateCollection *state, CAction *action);
00097 public: 
00098         CMonteCarloVError(CAbstractVFunction *vFunction, CAgent *agent, CRewardFunction *reward, CStateProperties *modelState, CActionSet *actions, std::list<CStateModifier *> *modifiers, int numEpisodes, int numSteps, double discountFactor);
00099         virtual ~CMonteCarloVError();   
00100 };
00101 
00102 class CMonteCarloQError : public CMonteCarloError
00103 {
00104 protected:
00105         CAbstractQFunction *qFunction;
00106         
00107         virtual double getValue(CStateCollection *state, CAction *action);
00108 public: 
00109         CMonteCarloQError(CAbstractQFunction *vFunction, CAgent *agent, CRewardFunction *reward, CStateProperties *modelState, CActionSet *actions, std::list<CStateModifier *> *modifiers, int numEpisodes, int numSteps, double discountFactor);
00110         virtual ~CMonteCarloQError();   
00111 };
00112 
00113 class CMonteCarloSupervisedLearner : public CPolicyEvaluation
00114 {
00115 protected:
00116                 
00117         CEpisodeHistory *episodeHistory;
00118         CRewardHistory *rewardLogger;
00119 
00120         CBatchDataGenerator *dataGenerator;
00121 
00122 public:
00123         CMonteCarloSupervisedLearner(CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CBatchDataGenerator *dataGenerator);
00124 
00125         virtual ~CMonteCarloSupervisedLearner();
00126 
00127         virtual void evaluatePolicy(int trials);
00128 };
00129 
00130 
00131 
00132 class CMonteCarloVLearner : public CMonteCarloSupervisedLearner
00133 {
00134 protected:
00135 public:
00136         CMonteCarloVLearner(CAbstractVFunction *vFunction,  CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CSupervisedLearner *learner);
00137 
00138         virtual ~CMonteCarloVLearner();
00139 };
00140 
00141 class CMonteCarloCAQLearner : public CMonteCarloSupervisedLearner
00142 {
00143 protected:
00144         
00145 public:
00146         CMonteCarloCAQLearner(CStateProperties *properties, CContinuousActionQFunction *qFunction, CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CSupervisedLearner *learner);
00147 
00148         virtual ~CMonteCarloCAQLearner();
00149 };
00150 
00151 
00152 class CMonteCarloQLearner : public CMonteCarloSupervisedLearner
00153 {
00154 protected:
00155 public:
00156         CMonteCarloQLearner(CQFunction *qFunction, CEpisodeHistory *episodeHistory, CRewardHistory *rewardLogger, CSupervisedQFunctionLearner *learner);
00157 
00158         virtual ~CMonteCarloQLearner();
00159 };
00160 
00161 
00162 #endif
00163