Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

creinforce.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_REINFORCE__H
00033 #define C_REINFORCE__H
00034 
00035 #include "cparameters.h"
00036 #include "cagentlistener.h"
00037 
00038 class CGradientUpdateFunction;
00039 class CGradientVETraces;
00040 class CStochasticPolicy;
00041 class CFeatureList;
00042 
00043 
00044 class CReinforcementBaseLineCalculator : virtual public CParameterObject
00045 {
00046 public:
00047         virtual double getReinforcementBaseLine(int feature) = 0;
00048 };
00049 
00050 class CConstantReinforcementBaseLineCalculator : public CReinforcementBaseLineCalculator
00051 {
00052 public:
00053         CConstantReinforcementBaseLineCalculator(double b);
00054 
00055         virtual double getReinforcementBaseLine(int feature);
00056 };
00057 
00058 class CAverageReinforcementBaseLineCalculator : public CReinforcementBaseLineCalculator, public CSemiMDPRewardListener 
00059 {
00060 protected:
00061         double averageReward;
00062         int steps;
00063 public:
00064         CAverageReinforcementBaseLineCalculator(CRewardFunction *reward, double minUpdateFactor);
00065 
00066         virtual double getReinforcementBaseLine(int feature);
00067 
00068         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState);
00069         virtual void newEpisode();
00070 };
00071 
00072 class CREINFORCELearner : public CSemiMDPRewardListener
00073 {
00074 protected:
00075         CStochasticPolicy *policy;
00076         CGradientUpdateFunction *updateFunction;
00077 
00078         CReinforcementBaseLineCalculator *baseLine;
00079 
00080         CFeatureList *gradient;
00081         CGradientVETraces *eTraces;
00082 public:
00083         CREINFORCELearner(CRewardFunction *reward, CStochasticPolicy *policy, CGradientUpdateFunction *updateFunction, CReinforcementBaseLineCalculator *baesLine);
00084         ~CREINFORCELearner();
00085 
00086         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState); 
00087         virtual void newEpisode();
00088 
00089         CGradientVETraces *getETraces();
00090 
00091 };
00092 
00093 #endif
00094