Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

ccontinuousactions.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 
00033 #ifndef __CCONTINUOUSRL_H
00034 #define __CCONTINUOUSRL_H
00035 
00036 
00037 
00038 
00039 #include "caction.h"
00040 #include "cutility.h"
00041 #include "cqfunction.h"
00042 #include "cqetraces.h"
00043 #include "cagentcontroller.h"
00044 #include "cagentlistener.h"
00045 
00046 class CAbstractQFunction;
00047 class CQFunction;
00048 
00050 
00053 class CContinuousActionData : public CMultiStepActionData, public ColumnVector
00054 {
00055 protected:
00056         
00057 public:
00058         CContinuousActionData(CContinuousActionProperties *properties);
00059         virtual ~CContinuousActionData();
00060 
00061         CContinuousActionProperties *properties;
00062 
00063         virtual void setActionValue(int dim, double value);
00064         double getActionValue(int dim);
00065 
00066 //      double *getActionValues();
00067 
00068         void normalizeAction();
00069         
00070         double getDistance(ColumnVector *vector);
00071 
00072         virtual void saveASCII(FILE *stream);
00073         virtual void loadASCII(FILE *stream);
00074 
00075         virtual void saveBIN(FILE *stream);
00076         virtual void loadBIN(FILE *stream);
00077 
00078         virtual void setData(CActionData *actionData);
00079         void initData(double initVal);
00080 };
00081 
00082 class CContinuousActionProperties
00083 {
00084 protected:
00085         unsigned int numActionValues;
00086         double *minValues;
00087         double *maxValues;
00088 public:
00089         CContinuousActionProperties(int numActionValues);
00090         virtual ~CContinuousActionProperties();
00091 
00092         unsigned int getNumActionValues();
00093 
00094         double getMinActionValue(int dim);
00095         double getMaxActionValue(int dim);
00096 
00097         void setMinActionValue(int dim, double value);
00098         void setMaxActionValue(int dim, double value);
00099 };
00100 
00101 class CContinuousAction : public CPrimitiveAction
00102 {
00103 protected:
00104         CContinuousActionData *continuousActionData;
00105         CContinuousActionProperties *properties;
00106 
00107         CContinuousAction(CContinuousActionProperties *properties, CContinuousActionData *actionData);
00108 public:
00109         CContinuousAction(CContinuousActionProperties *properties);
00110         virtual ~CContinuousAction();
00111 
00112 
00113         CContinuousActionProperties *getContinuousActionProperties();
00114 
00115         virtual CContinuousActionData *getContinuousActionData() {return continuousActionData;};
00116 
00117         virtual CActionData *getNewActionData();
00118 
00119         double getActionValue(int dim);
00120         unsigned int getNumDimensions();
00121 
00122         virtual void loadActionData(CActionData *data);
00123 
00124         virtual bool equals(CAction *action);
00125         virtual bool isSameAction(CAction *action, CActionData *data);
00126 };
00127 
00128 #define NO_RANDOM_CONTROLLER 0
00129 #define EXTERN_RANDOM_CONTROLLER 1
00130 #define INTERN_RANDOM_CONTROLLER 2
00131 
00132 class CContinuousActionRandomPolicy;
00133 
00134 class CContinuousActionController : public CAgentController
00135 {
00136 protected:
00137         CContinuousAction *contAction;
00138 
00139         CContinuousActionRandomPolicy *randomController;
00140         CContinuousActionData *noise;
00141 
00142         int randomControllerMode;
00143 public:
00144         CContinuousActionController(CContinuousAction *contAction, int randomControllerMode = 1);
00145         virtual ~CContinuousActionController();
00146 
00147         virtual CAction *getNextAction(CStateCollection *state, CActionDataSet *data = NULL);
00148         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action) = 0;
00149 
00150         virtual CContinuousActionProperties *getContinuousActionProperties() {return contAction->getContinuousActionProperties();};
00151         virtual CContinuousAction *getContinuousAction() {return contAction;};
00152 
00153         virtual void setRandomController(CContinuousActionRandomPolicy *randomController);
00154         virtual CContinuousActionRandomPolicy *getRandomController();
00155 
00156         void setRandomControllerMode(int randomControllerMode);
00157         int getRandomControllerMode();
00158 
00159         virtual void getNoise(CStateCollection *state, CContinuousActionData *action, CContinuousActionData *noise);
00160 
00161 
00162 };
00163 
00164 
00165 class CStaticContinuousAction : public CContinuousAction
00166 {
00167 protected:
00168         CContinuousAction *contAction;
00169 
00170         double maximumDistance;
00171 public:
00172         CStaticContinuousAction(CContinuousAction *properties, double *actionValues, double maximumDistance = 0.0);
00173         virtual ~CStaticContinuousAction();
00174 
00175         virtual void setContinuousAction(CContinuousActionData *contAction);
00176         virtual void addToContinuousAction(CContinuousActionData *contAction, double factor);
00177 
00178         CContinuousAction *getContinuousAction();
00179 
00180         virtual void loadActionData(CActionData *) {};
00181         virtual void setData(CActionData *) {assert(false);};
00182 
00183         virtual bool equals(CAction *action);
00184         virtual bool isSameAction(CAction *action, CActionData *data);
00185 
00186         virtual double getMaximumDistance();
00187 };
00188 
00189 
00190 class CLinearFAContinuousAction : public CStaticContinuousAction
00191 {
00192 protected:
00193 public:
00194         CLinearFAContinuousAction(CContinuousAction *properties, double *actionValues);
00195         virtual ~CLinearFAContinuousAction() {};
00196 
00197         virtual double getActionFactor(CContinuousActionData *contAction) = 0;
00198 
00199 };
00200 
00201 class CContinuousRBFAction : public CLinearFAContinuousAction
00202 {
00203 protected:
00204         double *rbfSigma;
00205 public:
00206         CContinuousRBFAction(CContinuousAction *properties, double *rbfCenter, double *rbfSigma);
00207         virtual ~CContinuousRBFAction();
00208 
00209         virtual double getActionFactor(CContinuousActionData *contAction);
00210 };
00211 
00212 class CContinuousActionLinearFA
00213 {
00214 protected:
00215         CActionSet *contActions;
00216         CContinuousActionProperties *actionProperties;
00217 
00218 public:
00219 
00220         CContinuousActionLinearFA(CActionSet *contActions, CContinuousActionProperties *properties);
00221         virtual ~CContinuousActionLinearFA();
00222 
00223         void getActionFactors(CContinuousActionData *action, double *actionFactors);
00224         
00225         void getContinuousAction(unsigned int index, CContinuousActionData *action);
00226         void getContinuousAction(CContinuousActionData *action, double *actionFactors);
00227 
00228         int getNumContinuousActionFA();
00229 };
00230 
00231 
00232 class CCALinearFAQETraces;
00233 
00234 class CContinuousActionQFunction : public CGradientQFunction
00235 {
00236 protected:
00237         CContinuousAction *contAction;
00238 public:
00239         CContinuousActionQFunction(CContinuousAction *contAction);
00240         virtual ~CContinuousActionQFunction();
00241 
00242         virtual CAction *getMax(CStateCollection *, CActionSet *availableActions, CActionDataSet *actionDatas);
00243 
00244         virtual void getBestContinuousAction(CStateCollection *state, CContinuousActionData *actionData) = 0;
00245 
00246         virtual void updateValue(CStateCollection *state, CAction *action, double td, CActionData *data = NULL);
00248 
00250         virtual void setValue(CStateCollection *state, CAction *action, double qValue, CActionData *data = NULL); 
00252 
00254         virtual double getValue(CStateCollection *state, CAction *action, CActionData *data = NULL);
00255 
00256 
00257         virtual void updateCAValue(CStateCollection *state, CContinuousActionData *data, double td);
00258         virtual void setCAValue(CStateCollection *state, CContinuousActionData *data, double qValue); 
00259         virtual double getCAValue(CStateCollection *state, CContinuousActionData *data) = 0;
00260 
00261 
00262         virtual void getGradient(CStateCollection *state, CAction *action, CActionData *data, CFeatureList *gradient);
00263         virtual void getCAGradient(CStateCollection *state, CContinuousActionData *data, CFeatureList *gradient);
00264 
00265         CContinuousAction *getContinuousActionObject() {return contAction;};
00266 
00267         virtual int getNumWeights() {return 0;};
00268 
00269         virtual void getWeights(double *parameters);
00270         virtual void setWeights(double *parameters);
00271 
00272         //virtual CAbstractQETraces* getStandardETraces() = 0;
00273 };
00274 
00275 class CCALinearFAQFunction : public CContinuousActionQFunction, public CContinuousActionLinearFA
00276 {
00277 protected:
00278         double *actionFactors;
00279         double *CAactionValues;
00280         CQFunction *qFunction;
00281 
00282         CFeatureList *tempGradient;
00283 
00284         virtual void updateWeights(CFeatureList *features);
00285 
00286 public:
00287 
00288         CCALinearFAQFunction(CQFunction *qFunction, CContinuousAction *returnAction);
00289 
00290         virtual ~CCALinearFAQFunction();
00291 
00292         virtual void getBestContinuousAction(CStateCollection *state, CContinuousActionData *actionData);
00293 
00294         virtual void updateCAValue(CStateCollection *state, CContinuousActionData *data, double td);
00295         virtual void setCAValue(CStateCollection *state, CContinuousActionData *data, double qValue); 
00296         virtual double getCAValue(CStateCollection *state, CContinuousActionData *data);
00297 
00298 
00299         CQFunction *getQFunctionForCA();
00300 
00301         virtual CAbstractQETraces* getStandardETraces();
00302         virtual void getCAGradient(CStateCollection *state, CContinuousActionData *action, CFeatureList *gradient);
00303 
00304         virtual int getNumWeights();
00305 
00306         virtual void getWeights(double *weights);
00307         virtual void setWeights(double *weights);
00308 
00309         virtual int getWeightsOffset(CAction *) {return 0;};
00310 };
00311 
00312 class CCALinearFAQETraces : public CQETraces
00313 {
00314 protected:
00315         double *actionFactors;
00316         CCALinearFAQFunction *contQFunc;
00317         
00318 public:
00319 
00320         CCALinearFAQETraces(CCALinearFAQFunction *qfunction);
00321         virtual ~CCALinearFAQETraces();
00322 
00323         virtual void addETrace(CStateCollection *State, CAction *action, double factor = 1.0, CActionData *data = NULL);
00324 };
00325 
00326 class CActionDistribution;
00327 
00328 class CContinuousActionPolicy : public CContinuousActionController
00329 {
00330 protected:
00331         CActionDistribution *distribution;
00332         double *actionValues;
00333         CAbstractQFunction *continuousActionQFunc;
00334 
00335         CActionSet *continuousStaticActions;
00336 
00337 public:
00338         CContinuousActionPolicy(CContinuousAction *contAction, CActionDistribution *distribution, CAbstractQFunction *continuousActionQFunc, CActionSet *continuousStaticActions);
00339         virtual ~CContinuousActionPolicy();
00340 
00341         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00342 
00343 };
00344 
00345 class CContinuousActionRandomPolicy : public CContinuousActionController, public CSemiMDPListener
00346 {
00347 protected:
00348         ColumnVector *lastNoise;
00349         ColumnVector *currentNoise;
00350         
00351         double sigma;
00352         double alpha;
00353 public: 
00354         CContinuousActionRandomPolicy(CContinuousAction *action, double sigma, double alpha);
00355         virtual ~CContinuousActionRandomPolicy();
00356 
00357         virtual void newEpisode();
00358         virtual void nextStep(CStateCollection *, CAction *, CStateCollection *);
00359 
00360         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00361         
00362         virtual void onParametersChanged();
00363         
00364         ColumnVector *getCurrentNoise();
00365         ColumnVector *getLastNoise();
00366 };
00367 
00368 
00369 class CContinuousActionAddController : public CContinuousActionController
00370 {
00371 protected:
00372         std::list<CContinuousActionController *> *controllers;
00373 
00374         std::map<CContinuousActionController *,double> *controllerWeights;
00375 
00376         ColumnVector *actionValues;
00377 public:
00378         CContinuousActionAddController(CContinuousAction *action);
00379         virtual ~CContinuousActionAddController();
00380 
00381         virtual void getNextContinuousAction(CStateCollection *state, CContinuousActionData *action);
00382 
00383         void addContinuousActionController(CContinuousActionController *controller, double weight = 1.0);
00384         void setControllerWeight(CContinuousActionController *controller, double weight);
00385         double getControllerWeight(CContinuousActionController *controller);
00386 
00387 };
00388 
00389 
00390 #endif
00391