Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

chierarchicbehaviours.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_HIERARCHICBEHAVIOUR__H
00033 #define C_HIERARCHICBEHAVIOUR__H
00034 
00035 #include "cagent.h"
00036 #include "crewardfunction.h"
00037 #include "caction.h"
00038 #include "cagentcontroller.h"
00039 
00040 
00041 class CRegion;
00042 class CStateProperties;
00043 class CStateCollection;
00044 class CState;
00045 
00046 
00047 class CSubGoalBehaviour : public CHierarchicalSemiMarkovDecisionProcess, public CStateReward
00048 {
00049 protected:
00050         std::map<CRegion *, std::pair<double, double> > *rewardFactors;
00051         
00052         std::list<CRegion *> *targetRegions;
00053         std::list<CRegion *> *failRegions;
00054 
00055         double standardReward;
00056 
00057         CRegion *availableRegion;
00058         CStateProperties *modelProperties;
00059 
00060         string subgoalName;
00061 
00062 public:
00063         CSubGoalBehaviour(CStateProperties *modelProperties, CRegion *avialableRegion, char *subgoalName = "");
00064         virtual ~CSubGoalBehaviour();
00065 
00066         virtual bool isFinished(CStateCollection *oldState, CStateCollection *newState);
00067         virtual bool isAvailable(CStateCollection *currentState);
00068 
00069         virtual bool isInGoalRegion(CState *state);
00070         virtual bool isInFailRegion(CState *state);
00071 
00072         virtual double getStateReward(CState *modelState);
00073         virtual void getInputDerivation(CState *modelState, ColumnVector *targetState);
00074 
00075         virtual void addTargetRegion(CRegion *target, double rewardFactor = 1.0, double rewardTau = 10);
00076         virtual void addFailRegion(CRegion *target, double rewardFactor = - 1.0, double rewardTau = 10);
00077 
00078         void setRewardFactor(CRegion *region, double rewardFactor);
00079         void setRewardTau(CRegion *region, double rewardTau);
00080 
00081         void setStandardReward(double l_standardReward) {this->standardReward = l_standardReward;};
00082 
00083         virtual CRegion *getAvailAbleRegion() {return availableRegion;};
00084 
00085         virtual void sendNextStep(CAction *action);
00086 
00087         string getSubGoalName() {return subgoalName;};
00088 };
00089 
00090 class CSubGoalController : public CAgentController
00091 {
00092 protected:
00093 public:
00094         CSubGoalController(CActionSet *hierarchicActions);
00095 
00096         virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
00097 };
00098 
00099 
00100 class CSubGoalOutput : public CSemiMDPListener
00101 {
00102 protected:
00103         CSubGoalBehaviour *lastAction;
00104         CAgentController *policy;
00105 
00106 public:
00107         CSubGoalOutput(CAgentController *policy);
00108 
00109         virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00110         virtual void newEpisode();
00111 };
00112 /*
00113 class CSubGoalTrainer : public CTransitionFunctionEnvironment
00114 {
00115 protected:
00116        CSubGoalBehaviour *subGoal;
00117        CRegion *sampleRegion;
00118 public:
00119        CSubGoalTrainer(CTransitionFunction *transitionFunction, CSubGoalBehaviour *subGoal);
00120 
00121        virtual void doNextState(CPrimitiveAction *action);
00122        virtual void doResetModel();
00123 
00124        virtual void setSubGoal(CSubGoalBehaviour *subGoal);
00125        virtual void setSampleRegion(CRegion *l_sampleRegion);
00126 
00127 };
00128 */
00129 
00130 class CExtendedPrimitiveAction : public CExtendedAction
00131 {
00132 protected:
00133         CAction *primitiveAction;
00134 public:
00135         int extendedActionDuration;
00136 
00137         CExtendedPrimitiveAction(CAction *primitiveAction, int extendedActionDuration);
00138 
00139         virtual bool isFinished(CStateCollection *oldState, CStateCollection *newState);
00140 
00141         virtual CAction* getNextHierarchyLevel(CStateCollection *state, CActionDataSet *actionDataSet = NULL);
00142 
00143 
00144 };
00145 
00147 
00153 class CPrimitiveActionStateChange : public CExtendedAction
00154 {
00155 protected:
00157         CStateProperties *stateToChange;
00158 
00159         CPrimitiveAction *primitiveAction;
00160 
00161 public:
00162         CPrimitiveActionStateChange(CPrimitiveAction *action, CStateProperties *stateToChange);
00163 
00165         virtual CAction* getNextHierarchyLevel(CStateCollection *state, CActionDataSet *actionDataSet = NULL);
00166 
00167         // Returns true if the 2 states are not equal
00168         virtual bool isFinished(CStateCollection *oldState, CStateCollection *newState);
00169 
00170         // Sets the state which has to change
00171         void setStateToChange(CStateProperties *stateToChange);
00172 
00173 };
00174 
00175 
00176 #endif
00177