Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

clstd.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_LSTD_H
00033 #define C_LSTD_H
00034 
00035 #include "clearndataobject.h"
00036 #include "cagentlistener.h"
00037 #include "csupervisedlearner.h"
00038 
00039 #include <stdlib.h>
00040 #include <stdio.h>
00041 
00042 
00043 
00044 class Matrix;
00045 class ColumnVector;
00046 
00047 class CFeatureVFunction;
00048 class CFeatureQFunction;
00049 class CStateProperties;
00050 class CFeatureVETraces;
00051 class CFeatureQETraces;
00052 class CFeatureList;
00053 
00054 class CGradientQETraces;
00055 class CAgentController;
00056 class CActionDataSet;
00057 
00058 
00059 class CLSTDLambda : public CSemiMDPRewardListener, public CLearnDataObject, public CLeastSquaresLearner
00060 {
00061 protected:
00062         //CFeatureVFunction *vFunction;
00063         
00064         //CFeatureVETraces *vETraces;
00065         CFeatureList *oldStateGradient;
00066         CFeatureList *newStateGradient;
00067         
00068                 
00069         int nEpisode;
00070         
00071         
00072         virtual void getOldGradient(CStateCollection *stateCol, CAction *action, CFeatureList *gradient) = 0;
00073         virtual void getNewGradient(CStateCollection *stateCol, CFeatureList *gradient) = 0;
00074         
00075         virtual void updateETraces(CStateCollection *stateCol, CAction *action) = 0;
00076         virtual CFeatureList *getGradientETraces() = 0;
00077         virtual void resetETraces() = 0;
00078         
00079 public:
00080         int nUpdateEpisode;
00081 
00082         CLSTDLambda(CRewardFunction *rewardFunction, CGradientUpdateFunction *updateFunction, int nUpdatePerEpisode);
00083         virtual ~CLSTDLambda();
00084         
00085         virtual void nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState);
00086         virtual void newEpisode();
00087         
00088         
00089         virtual void resetData();
00090         virtual void loadData(FILE *stream);
00091         virtual void saveData(FILE *stream);
00092 };
00093 
00094 class CVLSTDLambda : public CLSTDLambda
00095 {
00096         protected:
00097                 CFeatureVFunction *vFunction;
00098                 CFeatureVETraces *vETraces;
00099         
00100                 virtual void getOldGradient(CStateCollection *stateCol, CAction *action, CFeatureList *gradient);
00101                 virtual void getNewGradient(CStateCollection *stateCol, CFeatureList *gradient);
00102         
00103                 virtual void updateETraces(CStateCollection *stateCol, CAction *action);
00104                 virtual CFeatureList *getGradientETraces();
00105                 virtual void resetETraces();
00106         public:
00107                 CVLSTDLambda(CRewardFunction *rewardFunction, CFeatureVFunction *updateFunction, int nUpdatePerEpisode);
00108                 virtual ~CVLSTDLambda();        
00109 };
00110 
00111 class CQLSTDLambda : public CLSTDLambda
00112 {
00113         protected:
00114                 CFeatureQFunction *qFunction;
00115                 CGradientQETraces *qETraces;
00116                 
00117                 CAgentController *policy;
00118                 CActionDataSet *actionDataSet;
00119         
00120                 virtual void getOldGradient(CStateCollection *stateCol, CAction *action, CFeatureList *gradient);
00121                 virtual void getNewGradient(CStateCollection *stateCol, CFeatureList *gradient);
00122         
00123                 virtual void updateETraces(CStateCollection *stateCol, CAction *action);
00124                 virtual CFeatureList * getGradientETraces();
00125                 virtual void resetETraces();
00126         public:
00127                 CQLSTDLambda(CRewardFunction *rewardFunction, CFeatureQFunction *updateFunction, CAgentController *policy,  int nUpdatePerEpisode);
00128                 virtual ~CQLSTDLambda();        
00129 };
00130 
00131 
00132 #endif
00133 
00134