Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cpegasus.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C__PEGASUS__H
00033 #define C__PEGASUS__H
00034 
00035 #include "cparameters.h"
00036 #include "cpolicygradient.h"
00037 #include "cagentlistener.h"
00038 
00039 class CContinuousTimeAndActionTransitionFunction;
00040 class CState;
00041 class CContinuousActionData;
00042 
00043 class CContinuousActionGradientPolicy;
00044 class CStateList;
00045 class CTransitionFunctionEnvironment;
00046 class CPolicySameStateEvaluator; 
00047 class CAgent;
00048 class CRewardFunction;
00049 class CCAGradientPolicyInputDerivationCalculator;
00050 class CFeatureList;
00051 class CStateReward;
00052 
00053 class CTransitionFunctionInputDerivationCalculator : virtual public CParameterObject
00054 {
00055 protected:
00056         CContinuousTimeAndActionTransitionFunction *dynModel;
00057         CState *nextState;
00058         CContinuousActionData *buffData;
00059 
00060 public:
00061         CTransitionFunctionInputDerivationCalculator(CContinuousTimeAndActionTransitionFunction *dynModel);
00062         ~CTransitionFunctionInputDerivationCalculator();
00063 
00064         virtual void getInputDerivation(CState *currentState, CContinuousActionData *data, Matrix *dModelInput) = 0;
00065 };
00066 
00067 class CTransitionFunctionNumericalInputDerivationCalculator : public CTransitionFunctionInputDerivationCalculator
00068 {
00069 protected:
00070         CState *buffState;
00071 
00072         CState *nextState1;
00073         CState *nextState2;
00074 public:
00075         CTransitionFunctionNumericalInputDerivationCalculator(CContinuousTimeAndActionTransitionFunction *dynModel, double stepsize);
00076         ~CTransitionFunctionNumericalInputDerivationCalculator();
00077 
00078         virtual void getInputDerivation(CState *currentState, CContinuousActionData *data, Matrix *dModelInput);
00079 };
00080 
00081 class CPEGASUSPolicyGradientCalculator : public CPolicyGradientCalculator
00082 {
00083 protected:
00084         CContinuousActionGradientPolicy *policy;
00085         
00086 
00087         CStateList *startStates;
00088 
00089         CTransitionFunctionEnvironment *dynModel;
00090         
00091         CPolicySameStateEvaluator *sameStateEvaluator; 
00092 
00093 public:
00094         CPEGASUSPolicyGradientCalculator(CAgent *agent, CRewardFunction *reward, CContinuousActionGradientPolicy *policy, CTransitionFunctionEnvironment *dynModel, int numStartStates,  int horizon, double gamma);
00095         ~CPEGASUSPolicyGradientCalculator();
00096 
00097         virtual void getGradient(CFeatureList *gradient);
00098         virtual void getPEGASUSGradient(CFeatureList *gradient, CStateList *startStates) = 0;
00099 
00100         virtual CStateList* getStartStates();
00101         virtual void setStartStates(CStateList *startStates);
00102 
00103         virtual void setRandomStartStates();
00104 };
00105 
00106 class CPEGASUSAnalyticalPolicyGradientCalculator : public CPEGASUSPolicyGradientCalculator, public CSemiMDPListener
00107 {
00108 protected:
00109         ColumnVector *dReward;
00110         Matrix *dPolicy;
00111         Matrix *dModelInput;
00112         std::list<CFeatureList *> *stateGradient1;
00113         std::list<CFeatureList *> *stateGradient2;
00114         std::list<CFeatureList *> *dModelGradient;
00115 
00116         CFeatureList *episodeGradient;
00117 
00118         CStateReward *rewardFunction;
00119         CTransitionFunctionInputDerivationCalculator *dynModeldInput;
00120         CCAGradientPolicyInputDerivationCalculator *policydInput;
00121 
00122         int steps;
00123 
00124         CAgent *agent;
00125 
00126         void multMatrixFeatureList(Matrix *matrix, CFeatureList *features, int index, std::list<CFeatureList *> *newFeatures);
00127 public:
00128         CPEGASUSAnalyticalPolicyGradientCalculator(CAgent *agent, CContinuousActionGradientPolicy *policy, CCAGradientPolicyInputDerivationCalculator *policyInputDerivation, CTransitionFunctionEnvironment *dynModel, CTransitionFunctionInputDerivationCalculator *dynModeldInput, CStateReward *reward, int numStartStates, int horizon, double gamma);
00129         ~CPEGASUSAnalyticalPolicyGradientCalculator();
00130 
00131         virtual void getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates);
00132         
00133         virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *newState);
00134         virtual void newEpisode();
00135 };
00136 
00137 class CPEGASUSNumericPolicyGradientCalculator : public CPEGASUSPolicyGradientCalculator
00138 {
00139 protected:
00140         CFeatureList *gradientFeatures;
00141         double *weights;
00142 
00143         CRewardFunction *rewardFunction;
00144         CAgent *agent;
00145 public:
00146         CPEGASUSNumericPolicyGradientCalculator(CAgent *agent, CContinuousActionGradientPolicy *policy, CTransitionFunctionEnvironment *dynModel, CRewardFunction *reward, double stepSize, int startStates, int horizon, double gamma);
00147         ~CPEGASUSNumericPolicyGradientCalculator();
00148 
00149         virtual void getPEGASUSGradient(CFeatureList *gradientFeatures, CStateList *startStates);
00150 };
00151 
00152 #endif
00153