Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

ctheoreticalmodel.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_DISCRETETHEORETICALMODEL_H
00033 #define C_DISCRETETHEORETICALMODEL_H
00034 
00035 #include "clearndataobject.h"
00036 #include "caction.h"
00037 #include "cbaseobjects.h"
00038 #include "cagentlistener.h"
00039 #include "cutility.h"
00040 
00041 #include <map>
00042 #include <list>
00043 
00044 #define TRANSITION 1
00045 #define SEMIMDPTRANSITION 2
00046 
00047 class CFeatureQFunction;
00048 class CAbstractStateDiscretizer;
00049 class CFeatureCalculator;
00050 class CTransitionFunction;
00051 class CFeatureList;
00052 
00053 class CStateCollection;
00054 class CState;
00055 //class CMyArray2D<CStateActionTransitions *>;
00056 
00058 
00063 class CTransition
00064 {
00065 protected:
00066         int startState;
00067         int endState;
00068         double propability;
00069         
00070         int type;
00071 public:
00072         CTransition(int startState, int endState, double prop);
00073         virtual ~CTransition() {};
00074 
00075         int getStartState();
00076         int getEndState();
00077 
00078         virtual double getPropability();
00079         virtual void setPropability(double prop);
00080 
00081         virtual void loadASCII(FILE *stream, int fixedState, bool forward);
00082         virtual void saveASCII(FILE *stream, bool forward);
00083 
00084         virtual bool isType(int Type);
00085 };
00086 
00088 
00097 class CSemiMDPTransition : public CTransition
00098 {
00099 protected:
00101         std::map<int, double> *durations;
00102 public:
00103         CSemiMDPTransition(int startState, int endState, double prop);
00104         virtual ~CSemiMDPTransition();
00105 
00106         std::map<int, double> *getDurations();
00107 
00109 
00113         void addDuration(int duration, double factor);
00115         void setDuration(int duration, double factor);
00116         double getDurationFaktor(int duration);
00118 
00119         double getDurationPropability(int duration);
00120 
00121         virtual void loadASCII(FILE *stream, int fixedState, bool forward);
00122         virtual void saveASCII(FILE *stream, bool forward);
00124         double getSemiMDPFaktor(double gamma);
00125 };
00126 
00127 
00129 /***The transitions are all stored in a CTransitionList object. The transition list stores whether it is a forward or a backward list. 
00130 The Transitions are stored in a ordered list, the list is ordered by end-states for forward lists and by start-states for backward lists. 
00131 It provides functions for adding a specific transition, getting the transition given a feature index and determining whether a feature index is member of the transition list. 
00132 If the list is a forward list the search criteria for get and isMember is obviously the end-State otherwise the start-state of the transitions.
00133 */
00134 class CTransitionList : public std::list<CTransition *>
00135 {
00136 protected:
00138         bool forwardList;
00139 
00140 public:
00141         CTransitionList(bool forwardList);
00142 
00144 
00147         bool isMember(int featureIndex);
00148         bool isForwardList();
00150 /*Adds a Transition to the sorted list in the right position*/
00151         void addTransition(CTransition *transition);
00153         CTransition *getTransition(int featureIndex);
00154 
00155         CTransitionList::iterator getTransitionIterator(int featureIndex);
00157         void clearAndDelete();
00158 };
00159 
00161 
00163 class CStateActionTransitions
00164 {
00165 protected:
00166         CTransitionList *forwardList;
00167         CTransitionList *backwardList;
00168 
00169 public:
00170         CStateActionTransitions();
00171         ~CStateActionTransitions();
00172 
00173         
00174         CTransitionList* getForwardTransitions();
00175         CTransitionList* getBackwardTransitions();
00176 };
00177 
00178 
00180 
00192 class CAbstractFeatureStochasticModel : public CActionObject
00193 {
00194 protected:
00195         unsigned int numFeatures;
00196         CStateModifier *discretizer;
00197 
00198         bool createdActions;
00199 public:
00201         CAbstractFeatureStochasticModel(CActionSet *actions, int numStates);
00202         CAbstractFeatureStochasticModel(CActionSet *actions, CStateModifier *discretizer);
00203         CAbstractFeatureStochasticModel(int numActions, int numFeatures);
00204         virtual ~CAbstractFeatureStochasticModel();
00205 
00206         
00207 
00209         virtual double getPropability(int oldFeature, CAction *action, int newFeature);
00211 
00214         virtual double getPropability(int oldFeature, int action, int newFeature) = 0;
00216 
00218         virtual double getPropability(int oldFeature, int action, int duration, int newFeature) = 0;
00219 
00221         virtual double getPropability(CFeatureList *oldList, CAction *action, CFeatureList *newList);
00222 
00223         virtual double getPropability(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00224         virtual double getPropability(CState *oldState, CAction *action, CState *newState);
00225 
00227 
00230         virtual CTransitionList* getForwardTransitions(int action, int state) = 0;
00231         virtual CTransitionList* getForwardTransitions(CAction *action, CState *state);
00232         virtual CTransitionList* getForwardTransitions(CAction *action, CStateCollection *state);
00234 
00238         virtual CTransitionList* getBackwardTransitions(int action, int state) = 0;
00239         virtual unsigned int getNumFeatures();
00240 };
00241 
00242 
00243 
00245 
00253 class CFeatureStochasticModel : public CAbstractFeatureStochasticModel
00254 {
00255 protected:
00257         CMyArray2D<CStateActionTransitions *> *stateTransitions;
00259         void loadASCII(FILE *stream);
00260 
00262 
00265         CTransition *getNewTransition(int  startState, int endState, CAction *action, double propability);
00266 
00267         
00268 public:
00270         CFeatureStochasticModel(CActionSet *actions, int numFeatures, FILE *file);
00272         CFeatureStochasticModel(CActionSet *actions, int numFeatures);
00273         CFeatureStochasticModel(int numActions, int numFeatures);
00274         virtual ~CFeatureStochasticModel();
00275 
00277 
00280         virtual double getPropability(int oldFeature, int action, int newFeature);
00281 // returns the Propability of the transition
00285         virtual double getPropability(int oldFeature, int action, int duration, int newFeature);
00286         void setPropability(double propability, int oldFeature, int action, int newFeature);
00287         void setPropability(double propability, int oldFeature, int action, int duration, int newFeature);
00288 
00290         virtual CTransitionList* getForwardTransitions(int action, int state);
00292         virtual CTransitionList* getBackwardTransitions(int action, int state);
00293 
00294         virtual void saveASCII(FILE *stream);
00295 };
00296 
00297 class CStochasticModelAction : public CPrimitiveAction
00298 {
00299 protected:
00300         CAbstractFeatureStochasticModel *model;
00301 public:
00302         CStochasticModelAction(CAbstractFeatureStochasticModel *model);
00303         virtual ~CStochasticModelAction(){};
00304 
00305         virtual bool isAvailable(CStateCollection *state);
00306 
00307 };
00308 
00309 
00310 /*
00311 class CFeatureStateVisitCounter : public CLearnDataObject, public CStateObject, public CSemiMDPListener
00312 {
00313 protected:
00314 
00315 };
00316 
00317 class CFeatureStateActionVisitCounter : public CLearnDataObject, public CStateObject, public CSemiMDPListener
00318 {
00319 protected:
00320 
00321 };*/
00322 
00324 
00344 class CAbstractFeatureStochasticEstimatedModel : public CFeatureStochasticModel, public CSemiMDPListener, public CStateObject, public CLearnDataObject
00345 {
00346 protected:
00347 
00348         CFeatureQFunction *stateActionVisits;
00350 
00355         virtual void updateStep(int oldFeature, CAction *action, int newFeature, double Faktor);
00356 public:
00358         CAbstractFeatureStochasticEstimatedModel(CStateProperties *properties, CFeatureQFunction *stateActionVisits, CActionSet *actions, int numFeatures);
00360         CAbstractFeatureStochasticEstimatedModel(CStateProperties *properties, CFeatureQFunction *stateActionVisits, CActionSet *actions, int numFeatures, FILE *file);
00361         
00362         virtual ~CAbstractFeatureStochasticEstimatedModel();
00363 
00365         virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState) = 0;
00367         virtual void intermediateStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
00368 
00369         virtual void saveData(FILE *stream);
00370         virtual void loadData(FILE *stream);
00371 
00372         virtual void resetData();
00373 
00375 
00377         double getTransitionsVisits(int oldFeature, CAction *action, int newFeature);
00379 
00381         double getStateActionVisits(int Feature, int action);
00383 
00385         double getStateVisits(int Feature);
00386 
00387 };
00388 
00390 
00395 class CDiscreteStochasticEstimatedModel : public CAbstractFeatureStochasticEstimatedModel
00396 {
00397 protected:
00398         CAbstractStateDiscretizer *discretizer;
00399 
00400 public:
00401         CDiscreteStochasticEstimatedModel(CAbstractStateDiscretizer *discState, CFeatureQFunction *stateActionVisits, CActionSet *actions);
00402         virtual ~CDiscreteStochasticEstimatedModel() {};
00403 
00405         virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
00406 
00407         int getStateActionVisits(int Feature, int action);
00408         int getStateVisits(int Feature);
00409 };
00410 
00412 
00418 class CFeatureStochasticEstimatedModel : public CAbstractFeatureStochasticEstimatedModel
00419 {
00420 protected:
00421         CFeatureCalculator *featCalc;
00422 
00423 public:
00424         CFeatureStochasticEstimatedModel(CFeatureCalculator *properties, CFeatureQFunction *stateActionVisits, CActionSet *actions);
00425         virtual ~CFeatureStochasticEstimatedModel() {};
00427 
00432         virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
00433 };
00434 
00435 
00436 
00437 #endif
00438