Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cdynamicprogramming.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef C_DYNAMICPROGRAMMING__H
00033 #define C_DYNAMICPROGRAMMING__H
00034 
00035 
00036 #include "cparameters.h"
00037 
00038 
00039 #include <map>
00040 
00041 
00042 class CTransition;
00043 class CAbstractFeatureStochasticModel; 
00044 class CFeatureRewardFunction;
00045 class CAbstractVFunction;
00046 class CState;
00047 class CAction;
00048         
00049 
00050 class CFeatureQFunction;
00051 class CFeatureVFunction;
00052 class CQFunctionFromStochasticModel;
00053 class CActionSet;
00054 
00055 class CFeatureList;
00056 class CStochasticPolicy;
00057 
00059 
00061 class CDynamicProgramming 
00062 {
00063 public:
00065 
00076         static double getActionValue(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *discState, CAction *action, double gamma);
00078 
00080         static double getBellmanValue(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *discState, double gamma);
00082 
00084         static double getBellmanError(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *discState, double gamma);
00085 };
00086 
00088 
00112 class CValueIteration : virtual public CParameterObject
00113 {
00114 protected:
00116         CAbstractVFunction *vFunction;
00118         CAbstractVFunction *vFunctionFromQFunction;
00120         CFeatureQFunction *qFunction;
00122         CQFunctionFromStochasticModel *qFunctionFromVFunction;
00124         CAbstractFeatureStochasticModel *model;
00126         CFeatureRewardFunction *rewardModel;
00128         CActionSet *actions;
00129 
00131         bool learnVFunction;
00133         CState *discState;
00134 
00136         CFeatureList *priorityList;
00137 
00139         CStochasticPolicy *stochPolicy;
00140 
00142 
00144         virtual double getPriority(CTransition *trans, double bellE);
00145         void init(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel);
00146 
00147   
00148 public:
00150         CValueIteration(CFeatureQFunction *qFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel);
00152         CValueIteration(CFeatureQFunction *qFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel, CStochasticPolicy *stochPolicy);
00154         CValueIteration(CFeatureVFunction *vFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel);
00156         CValueIteration(CFeatureVFunction *vFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel, CStochasticPolicy *stochPolicy);
00157         virtual ~CValueIteration();
00158 
00160 
00169         virtual void updateFeature(int feature);
00170 
00172         void updateFirstFeature();
00173 
00175         void addPriority(int feature, double priority);
00177         void addPriorities(CFeatureList *featList);
00178 
00179         CAbstractFeatureStochasticModel *getTheoreticalModel();
00180         CAbstractVFunction *getVFunction();
00181         CFeatureQFunction *getQFunction();
00182         CStochasticPolicy *getStochasticPolicy();
00183         
00184         int getMaxListSize();
00185         void setMaxListSize(int maxListSize);
00186 
00188 
00189         void doUpdateSteps(int k);
00191         void doUpdateStepsUntilEmptyList(int k);
00192 
00194 
00197         void doUpdateBackwardStates(int state);
00198 };
00199 
00200 
00201 #endif
00202