Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cacrobotmodel.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef __CACROBOT_H
00033 #define __CACROBOT_H
00034 
00035 #include "cqtconfig.h"
00036 
00037 #include "ctransitionfunction.h"
00038 #include "crewardfunction.h"
00039 #include "ril_debug.h"
00040 
00041 #ifdef RL_TOOLBOX_USE_QT
00042 #include "cqtmodelvisualizer.h"
00043 #endif
00044 
00045 class CAcroBotModel : public CLinearActionContinuousTimeTransitionFunction
00046 {
00047 protected:
00048         virtual void doSimulationStep(CState *state, double timestep, CAction *action, CActionData *data);
00049 
00050 public:
00051         double uMax;
00052         double g;
00053         double mass1;
00054         double mass2;
00055         double length1;
00056         double length2;
00057         double mu_1; 
00058         double mu_2; 
00059 
00060         CAcroBotModel(double dt, double uMax = 2, double length1 = 0.5, double length2 = 0.5, double mass1 = 1.0, double mass2 = 1.0, double mu_1 = 0.05, double mu_2 = 0.05, double g = 9.8);
00061         virtual ~CAcroBotModel();
00062 
00063         virtual Matrix *getB(CState *state);
00064         virtual ColumnVector *getA(CState *state);
00065 
00066 
00067         virtual bool isFailedState(CState *state);
00068 
00069         virtual void getResetState(CState *state);
00070 };
00071 
00072 /*
00073 class CAcroBotModelSutton : public CAcroBotModel
00074 {
00075 protected:
00076 
00077 public:
00078 
00079        double I1; 
00080        double I2; 
00081 
00082        CAcroBotModelSutton(double dt, double uMax = 2, double length1 = 1.0, double length2 = 1.0, double mass1 = 1.0, double mass2 = 1.0, double I1 = 1.0, double I2 = 1.0, double g = 9.8);
00083        virtual ~CAcroBotModelSutton();
00084 
00085        virtual Matrix *getB(CState *state);
00086        virtual ColumnVector *getA(CState *state);
00087 
00088 };
00089 */
00090 
00091 class CAcroBotRewardFunction : public CStateReward
00092 {
00093 protected:
00094         CAcroBotModel *model;
00095 public:
00096         CAcroBotRewardFunction(CAcroBotModel *model, double segmentFactor = 0.5);
00097         virtual ~CAcroBotRewardFunction(){};
00098         
00099         double segmentFactor;
00100         bool useHeighPeak;
00101 
00102         double power;
00103 
00104         virtual double getStateReward(CState *state);
00105         virtual void getInputDerivation(CState *modelState, ColumnVector *targetState);
00106 
00107 };
00108 
00109 class CAcroBotHeightRewardFunction : public CStateReward
00110 {
00111 protected:
00112         CAcroBotModel *model;
00113         bool useHeighPeak;
00114 public:
00115         CAcroBotHeightRewardFunction(CAcroBotModel *model);
00116         virtual ~CAcroBotHeightRewardFunction() {};
00117 
00118         virtual double getStateReward(CState *state);
00119         virtual void getInputDerivation(CState *modelState, ColumnVector *targetState);
00120 
00121 };
00122 
00123 class CAcroBotVelocityRewardFunction : public CStateReward
00124 {
00125 protected:
00126         CAcroBotModel *model;
00127 public:
00128         bool invertVelocity;
00129 
00130         CAcroBotVelocityRewardFunction(CAcroBotModel *model);
00131         virtual ~CAcroBotVelocityRewardFunction(){};
00132 
00133         virtual double getStateReward(CState *state);
00134         virtual void getInputDerivation(CState *modelState, ColumnVector *targetState);
00135 };
00136 
00137 class CAcroBotExpRewardFunction : public CStateReward
00138 {
00139 protected:
00140         CAcroBotModel *model;
00141 public:
00142         double expFactor;
00143 
00144         CAcroBotExpRewardFunction(CAcroBotModel *model, double expFactor = 10.0);
00145         virtual ~CAcroBotExpRewardFunction(){};
00146 
00147         virtual double getStateReward(CState *state);
00148         virtual void getInputDerivation(CState *modelState, ColumnVector *targetState);
00149 };
00150 
00151 #ifdef RL_TOOLBOX_USE_QT
00152 
00153 class CQTAcroBotVisualizer : public CQTModelVisualizer
00154 {
00155 protected:
00156         double phi1;
00157         double dphi1;
00158         double phi2;
00159         double dphi2;
00160 
00161         CAcroBotModel *acroModel;
00162 
00163         virtual void doDrawState( QPainter *painter);
00164 
00165 public:
00166         CQTAcroBotVisualizer( CAcroBotModel *acroModel, QWidget *parent=0, const char *name=0);
00167         virtual ~CQTAcroBotVisualizer() {};
00168 
00169         virtual void newDrawState(CStateCollection *state);
00170 };
00171 
00172 #endif
00173 
00174 #endif
00175 
00176