Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cepisodehistory.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_EPISODEHISTORY_H
00033 #define C_EPISODEHISTORY_H
00034 
00035 #include "cagentlistener.h"
00036 #include "chistory.h"
00037 #include "cagentcontroller.h"
00038 #include "cbaseobjects.h"
00039 #include "cenvironmentmodel.h"
00040 
00041 #include <vector>
00042 
00043 class CEpisode;
00044 
00046 
00057 class CEpisodeHistory : virtual public CStateModifiersObject, public CStepHistory
00058 {
00059 protected:
00060         std::map<int, CEpisode*> *stepToEpisodeMap;
00061         std::map<CEpisode*, int> *episodeOffsetMap;
00062 public:
00063         CEpisodeHistory(CStateProperties *properties, CActionSet *actions);
00064         virtual ~CEpisodeHistory();
00065 
00067         virtual int getNumEpisodes() = 0;
00069         virtual CEpisode* getEpisode(int index) = 0;
00070 
00071 
00072 
00073         virtual int getNumSteps();
00074         virtual void getStep(int index, CStep *step);
00075         
00076         virtual void createStepToEpisodeMap();
00077 };
00078 
00079 class CEpisodeHistorySubset : public CEpisodeHistory
00080 {
00081         protected:
00082                 CEpisodeHistory *episodes;
00083                 std::vector<int> *indices;
00084         public:
00085                 CEpisodeHistorySubset(CEpisodeHistory *episodes, std::vector<int> *indices);
00086                 virtual ~CEpisodeHistorySubset();
00087 
00089                 virtual int getNumEpisodes();
00091                 virtual CEpisode* getEpisode(int index);
00092 
00093                 virtual void resetData() {};
00094                 virtual void loadData(FILE *) {};
00095                 virtual void saveData(FILE *) {};
00096 };
00097 
00098 /*
00100 class CEpisodeToStepHistory : public CStepHistory
00101 {
00102 protected:
00103        CEpisodeHistory *episodes;
00104 public:
00105 
00106        CEpisodeToStepHistory(CEpisodeHistory *history);
00107        virtual ~CEpisodeToStepHistory() {};
00108 
00109        
00110 };*/
00111 
00113 
00128 class CStoredEpisodeModel : public CEnvironmentModel, public CAgentController
00129 {
00130 protected:
00132         CEpisodeHistory *history;
00134         CEpisode *currentEpisode;
00135         
00136         int numEpisode;
00137         int numStep;
00138 
00140 
00141         virtual void doNextState(CPrimitiveAction *action);
00143         virtual void doResetModel();
00144 
00145 public:
00147         CStoredEpisodeModel(CEpisodeHistory *history);
00148 
00149         ~CStoredEpisodeModel();
00150 
00151         virtual CEpisodeHistory* getEpisodeHistory();
00152         virtual void setEpisodeHistory(CEpisodeHistory *hist);
00153 
00155         virtual void getState(CState *state);
00159         virtual void getState(CStateCollectionImpl *stateCollection);
00161         virtual CAction* getNextAction(CStateCollection *state);
00162 };
00163 
00165 
00171 class CBatchEpisodeUpdate : public CSemiMDPListener
00172 {
00173 protected:
00175         CSemiMDPListener *listener;
00177         CEpisodeHistory *logger;
00178 
00179         int numEpisodes;
00180         std::list<int> *episodeIndex;
00181 
00182         CActionDataSet *dataSet;
00183         CStep *step;
00184 public:
00186         CBatchEpisodeUpdate(CSemiMDPListener *listener, CEpisodeHistory *logger, int numEpisodes, std::list<CStateModifier *> *modifiers);
00187         ~CBatchEpisodeUpdate();
00188 
00190 
00192         virtual void newEpisode();
00193 
00195         virtual void simulateEpisode(int episode, CSemiMDPListener *listener);
00197         virtual void simulateAllEpisodes(CSemiMDPListener *listener);
00198 
00200         void simulateNRandomEpisodes(int numEpisodes, CSemiMDPListener *listener);
00201 
00202 };
00203 
00204 
00205 #endif
00206 
00207 
00208