Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cagent.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef CAGENT_H
00033 #define CAGENT_H
00034 
00035 #include <list>
00036 
00037 
00038 #include "cagentcontroller.h"
00039 #include "caction.h"
00040 #include "cbaseobjects.h"
00041 #include "chierarchiccontroller.h"
00042 
00043 
00044 class CSemiMDPListener;
00045 class CEpisode;
00046 class CEnvironmentModel;
00047 class CStateCollection;
00048 class CStateCollectionImpl;
00049 
00051 
00055 class CSemiMDPSender
00056 {
00057 protected:
00058         std::list<CSemiMDPListener *> *SMDPListeners;
00059 public:
00060         CSemiMDPSender();
00061         virtual ~CSemiMDPSender();
00062 
00064         void addSemiMDPListener(CSemiMDPListener *listener);
00066         void removeSemiMDPListener(CSemiMDPListener *listener);
00067 
00068         bool isListenerAdded(CSemiMDPListener *listener);
00069 
00071         virtual void startNewEpisode();
00073         virtual void sendNextStep(CStateCollection *lastState, CAction *Action,  CStateCollection *currentState);
00075         virtual void sendIntermediateStep(CStateCollection *lastState, CAction *Action, CStateCollection *currentState);
00076 };
00077 
00079 
00090 class CSemiMarkovDecisionProcess : public CDeterministicController, public CSemiMDPSender
00091 {
00092 protected:
00093 
00094         CAction *lastAction;
00095         
00096         int currentEpisodeNumber;
00097         int currentSteps;
00098 
00099         int totalSteps;
00100         
00101         bool isFirstStep;
00102 
00103 
00104 public:
00105 
00106         CSemiMarkovDecisionProcess();
00107         ~CSemiMarkovDecisionProcess();
00108 
00110         virtual void sendNextStep(CStateCollection *lastState, CAction *action, CStateCollection *currentState);
00111 
00113         CAction* getLastAction();
00114 
00116         virtual void startNewEpisode();
00117 
00119         int getCurrentEpisodeNumber() {return this->currentEpisodeNumber;};
00121         int getCurrentStep() {return this->currentSteps;};
00122 
00123         int getTotalSteps() {return this->totalSteps;};
00124 
00126         virtual void addAction(CAction *action);
00127         virtual void addActions(CActionSet *action);
00128 };
00129 
00131 
00141 class CHierarchicalSemiMarkovDecisionProcess : public CSemiMarkovDecisionProcess, public CHierarchicalStackListener, public CExtendedAction, public CStateModifiersObject
00142 {
00143 protected:
00145         virtual CAction *getExecutedAction(CHierarchicalStack *actionStack);
00146         
00150         CEpisode *currentEpisode;
00151 
00152         CStateCollectionImpl *pastState;
00153         CStateCollectionImpl *currentState;
00154 public:
00159         CHierarchicalSemiMarkovDecisionProcess(CEpisode *currentEpisode);
00160         CHierarchicalSemiMarkovDecisionProcess(CStateProperties *modelProperties, std::list<CStateModifier *> *modifiers = NULL);
00161 
00162         ~CHierarchicalSemiMarkovDecisionProcess();
00163 
00164         virtual void setLoggedEpisode(CEpisode *loggedEpisode);
00165 
00166         virtual void nextStep(CStateCollection *oldState, CHierarchicalStack *actionStack, CStateCollection *newState);
00167         virtual void newEpisode();
00168 
00170 
00175         virtual void sendNextStep(CAction *action);
00176 
00177  
00178         virtual bool isFinished(CStateCollection *, CStateCollection *) {return false;};
00179 
00180         virtual CAction *getNextHierarchyLevel(CStateCollection *stateCollection, CActionDataSet *actionDataSet = NULL);
00181 
00183         virtual void addStateModifier(CStateModifier *modifier);
00185         virtual void removeStateModifier(CStateModifier *modifier);
00186 
00187 };
00188 
00190 
00213 class CAgent : public CSemiMarkovDecisionProcess, public CStateModifiersObject
00214 {
00215 protected:
00216         CStateCollectionImpl *currentState;
00217         CStateCollectionImpl *lastState;
00218         
00219     int maxEpisodes;
00220         int maxSteps;
00221 
00222         bool keyboardBreaks;
00223 
00224         CEnvironmentModel *model;
00225 
00226         bool bLogEpisode;
00227 
00228         int doRun(bool bContinue);
00229 
00230         CEpisode *currentEpisode;
00231 public:
00232 
00233         CAgent(CEnvironmentModel *model);
00234         ~CAgent();
00235 
00237         virtual void doAction(CAction *action);
00239         virtual void addStateModifier(CStateModifier *modifier);
00241         virtual void removeStateModifier(CStateModifier *modifier);
00242 
00243 
00244 
00248         int doControllerEpisode(int maxEpisodes = 1, int maxSteps = 5000);
00250         void setParameters(int maxEpisodes, int maxSteps);
00254         int doResume();
00255 
00257         virtual void startNewEpisode();
00258 
00260 
00263         void doControllerStep();
00264 
00266         void setKeyboardBreak(bool keyboardBreak);
00267     bool getKeyboardBreak();
00268 
00270         virtual void addAction(CPrimitiveAction *action);
00271 
00273         virtual void setLogEpisode(bool bLogEpisode);
00274 
00276         virtual CEpisode *getCurrentEpisode();
00277 
00278         virtual CStateCollection *getCurrentState();
00279 
00280         CEnvironmentModel *getEnvironmentModel();
00281 };
00282 
00283 class CHiearchicalAgent : public CAgent
00284 {
00285         protected:
00286                 CAgent *realAgent;
00287                 CHierarchicalSemiMarkovDecisionProcess *hierarchicSMDP;
00288         public:
00289                 CHiearchicalAgent(CAgent *agent, CHierarchicalSemiMarkovDecisionProcess *hierarchicSMDP);
00290 
00291                 virtual ~CHiearchicalAgent();
00292 
00293                 virtual void doAction(CAction *action);
00294                 virtual void startNewEpisode();
00295 
00296 };
00297 
00298 #endif