Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

cpendulummodel.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 #ifndef __CPENDULUMMODEL_H
00033 #define __CPENDULUMMODEL_H
00034 
00035 #include "cqtconfig.h"
00036 
00037 #ifndef M_PI
00038 #define M_PI 3.14159265358979323846
00039 #endif
00040 
00041 #include "ctransitionfunction.h"
00042 #include "crewardfunction.h"
00043 #include "cagentlistener.h"
00044 
00045 #ifdef RL_TOOLBOX_USE_QT
00046 #include "cqtmodelvisualizer.h"
00047 #endif
00048 
00049 class CPendulumModel : public CLinearActionContinuousTimeTransitionFunction
00050 {
00051 protected:
00052         virtual void doSimulationStep(CState *state, double timestep, CAction *action, CActionData *data);
00053 
00054 public:
00055         double uMax;
00056         double dPhiMax;
00057         double g;
00058         double mass;
00059         double length;
00060         double mu; // friction
00061 
00062         CPendulumModel(double dt, double uMax = 5, double dPhiMax = 10, double length = 1, double mass = 1, double mu = 1.0, double g = 9.81);
00063         ~CPendulumModel();
00064 
00065         virtual Matrix *getB(CState *state);
00066         virtual ColumnVector *getA(CState *state);
00067 
00068         virtual bool isFailedState(CState *state);
00069 
00070 
00071 
00072         virtual void getResetState(CState *resetState);
00073 
00074         virtual void setParameter(string paramName, double value);
00075 
00076 };
00077 
00078 class CPendulumRewardFunction : public CStateReward
00079 {
00080 public:
00081         double rewardFactor;
00082         CPendulumRewardFunction(CPendulumModel *model);
00083 
00084         virtual double getStateReward(CState *state);
00085         virtual void getInputDerivation(CState *modelState, ColumnVector *targetState);
00086 
00087 };
00088 
00089 class CPendulumUpTimeCalculator : public CSemiMDPListener
00090 {
00091 protected:
00092         double phi_up;
00093         double dt;
00094         int up_steps;
00095 public:
00096         CPendulumUpTimeCalculator(double phi_up, double dt);
00097 
00098         virtual void nextStep(CStateCollection *oldState, CAction *action, CStateCollection *nextState);
00099         virtual void newEpisode();
00100 
00101         double getUpTime();
00102         int getUpSteps();
00103 };
00104 
00105 /*
00106 class CTestSuitePendulumUpTimeCalculatorEvaluator : public CTestSuiteEpisodesToLearnEvaluator
00107 {
00108 protected:
00109        int neededUpSteps;
00110        CPendulumUpTimeCalculator *upTimeCalc;
00111 
00112        virtual bool isEpisodeSuccessFull(FILE *stream);
00113 public:
00114        CTestSuitePendulumUpTimeCalculatorEvaluator(CAgent *agent, int neededSuccEpisodes, int maxEpisodes, int stepsPerEpisode, int neededUpSteps,double phi_up);
00115        ~CTestSuitePendulumUpTimeCalculatorEvaluator();
00116 };*/
00117 
00118 #ifdef RL_TOOLBOX_USE_QT
00119 
00120 class CQTPendulumVisualizer : public CQTModelVisualizer
00121 {
00122 protected:
00123         double phi;
00124         double dphi;
00125 
00126         CPendulumModel *pendModel;
00127 
00128         virtual void doDrawState( QPainter *painter);
00129 
00130 public:
00131         CQTPendulumVisualizer(CPendulumModel *pendModel, QWidget *parent=0, const char *name=0);
00132 
00133         virtual void newDrawState(CStateCollection *state);
00134 };
00135 
00136 #endif
00137 
00138 #endif