Reinforcement Learning Toolbox 2.0
last updated:
General
Documentation
Manual
Tutorial
Class Reference
Master Thesis
Examples
Related Papers
Downloads
Links
News
mailto:webmaster
Main Page     Class Hierarchy   Compound List   File List   Compound Members   File Members

ctestsuit.h

Go to the documentation of this file.
00001 // Copyright (C) 2003
00002 // Gerhard Neumann (gneumann@gmx.net)
00003 // Stephan Neumann (sneumann@gmx.net) 
00004 //                
00005 // This file is part of RL Toolbox.
00006 // http://www.igi.tugraz.at/ril_toolbox
00007 //
00008 // All rights reserved.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions
00012 // are met:
00013 // 1. Redistributions of source code must retain the above copyright
00014 //    notice, this list of conditions and the following disclaimer.
00015 // 2. Redistributions in binary form must reproduce the above copyright
00016 //    notice, this list of conditions and the following disclaimer in the
00017 //    documentation and/or other materials provided with the distribution.
00018 // 3. The name of the author may not be used to endorse or promote products
00019 //    derived from this software without specific prior written permission.
00020 // 
00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00024 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00025 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00026 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00030 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 
00032 
00033 #ifndef C_TESTSuite__H
00034 #define C_TESTSuite__H
00035 
00036 #include <time.h>
00037 #include <stdio.h>
00038 
00039 #include "cparameters.h"
00040 
00041 #include <map>
00042 #include <string>
00043 #include <iostream>
00044 
00045 //using namespace std;
00046 
00047 #define ARCF_IDENTITY 1
00048 #define ARCF_LINEAR 2
00049 #define ARCF_AVERAGE 3
00050 
00051 
00052 class CEvaluator;
00053 class CVisitStateCounter;
00054 class CVisitStateActionCounter;
00055 
00056 class CAgent;
00057 class CStateProperties;
00058 class CActionSet;
00059 class CRewardFunction;
00060 
00061 class CAbstractVFunction;
00062 class CStateModifier;
00063 class CErrorSender;
00064 class CFeatureVFunction;
00065 class CFeatureQFunction;
00066                 
00067 class CStateList;
00068 
00069 class CVAverageTDErrorLearner;
00070 class CVAverageTDVarianceLearner;
00071 
00072 class CAgentController;
00073 class CLearnDataObject;
00074 class CAdaptiveParameterCalculator;
00075 class CSemiMDPListener;
00076 class CSemiMarkovDecisionProcess;       
00077 
00078 class CPolicyEvaluation;
00079 class CPolicyIteration;
00080 class CGradientLearner;
00081 
00082 class CGraphDynamicProgramming;
00083 
00084 class CTestSuiteEvaluatorLogger
00085 {
00086         protected:
00087                 string outputDirectory;
00088         public:
00089                 int nEpisodesBeforeEvaluate;
00090         
00091                 CTestSuiteEvaluatorLogger(string outputDirectory); 
00092                 virtual ~CTestSuiteEvaluatorLogger() {};
00093                 
00094                 void setOutputDirectory(string outputDirectory);
00095                 
00096                 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes) = 0;      
00097                 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00098                 virtual void endEvaluation() {};
00099 };
00100 
00101 class CTestSuiteLoggerFromEvaluator : public CTestSuiteEvaluatorLogger
00102 {
00103         protected:
00104                 CEvaluator *evaluator;
00105                 string outputFileName;
00106         public:
00107                 CTestSuiteLoggerFromEvaluator(string outputDirectory, string outputFileName, CEvaluator *evaluator);
00108                 virtual ~CTestSuiteLoggerFromEvaluator() {};
00109                 
00110                 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);
00111                 virtual double evaluateValue(string evaluationDirectory, int trial, int numEpisodes);
00112                 
00113                 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00114 };
00115 
00116 class CGraphLogger : public CTestSuiteEvaluatorLogger
00117 {
00118         protected:
00119                 CStateList *states;
00120                 CGraphDynamicProgramming *graph;
00121 
00122 
00123         public:
00124                 CGraphLogger(CStateList *states, CGraphDynamicProgramming *graph);
00125                 virtual ~CGraphLogger();
00126 
00127                 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);  
00128                 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00129 };
00130 
00131 /*
00132 class CAdaptiveTargetGraphLogger : public CAdaptiveTargetGraphLogger
00133 {
00134        protected:
00135                CAdaptiveTargetGraphDynamicProgramming *graph;
00136 
00137 
00138        public:
00139                CAdaptiveTargetGraphLogger(CStateList *states, CGraphDynamicProgramming *graph);
00140                virtual ~CAdaptiveTargetGraphLogger();
00141 
00142                virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);  
00143                virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00144 };*/
00145 
00146 
00147 class CMatlabEpisodeOutputLogger : public CTestSuiteEvaluatorLogger
00148 {
00149         protected:
00150                 CAgent *agent;
00151                 int nEpisodes;
00152                 int nSteps;     
00153 
00154                 CStateProperties *modifier;
00155                 CActionSet *actions;
00156                 
00157                 CRewardFunction *rewardFunction;
00158         public:
00159                 CMatlabEpisodeOutputLogger( CAgent *agent, CRewardFunction *rewardFunction, CStateProperties *modifier, CActionSet *actions, int nEpisodes, int nSteps);
00160                 virtual ~CMatlabEpisodeOutputLogger();
00161                 
00162                 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);  
00163                 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00164 };
00165 
00166 class CMatlabVAnalyzerLogger : public CTestSuiteEvaluatorLogger
00167 {
00168         protected:
00169                 CAbstractVFunction *vFunction;
00170                 
00171                 std::list<CStateModifier *> *modifiers;
00172 
00173                 CErrorSender *vLearner;
00174         
00175                 CFeatureVFunction *visitCounter;
00176                 CFeatureVFunction *averageError;
00177                 CFeatureVFunction *averageVariance;
00178 
00179                 int dim1;
00180                 int dim2;               
00181 
00182                 int part1;
00183                 int     part2;
00184         
00185                 CStateList *states;
00186 
00187         public:
00188                 int nTrialEvaluate;             
00189         
00190                 CVisitStateCounter *visitCounterLearner;
00191                         
00192                 CVAverageTDErrorLearner *errorLearner;
00193                 CVAverageTDVarianceLearner *varianceLearner;
00194         
00195                 CMatlabVAnalyzerLogger(CAbstractVFunction *l_vFunction, CFeatureCalculator *featCalc, CErrorSender *l_vLearner, CStateList *l_States, int l_dim1, int l_dim2, int l_part1, int l_part2, std::list<CStateModifier *> *l_modifiers);
00196                 virtual ~CMatlabVAnalyzerLogger();
00197                 
00198                 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);  
00199                 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00200                 
00201                 virtual void addListenersToAgent(CSemiMDPSender *agent);
00202                 virtual void removeListenersToAgent(CSemiMDPSender *agent);
00203 };
00204 
00205 class CMatlabQAnalyzerLogger : public CMatlabVAnalyzerLogger
00206 {
00207         protected:
00208                 CFeatureQFunction *qFunction;
00209                 CFeatureQFunction *saVisits;
00210                 
00211                 bool delVFunction;
00212         public:
00213                 CVisitStateActionCounter *visitStateActionCounterLearner;
00214                 
00215                 CMatlabQAnalyzerLogger(CFeatureQFunction *l_qFunction, CFeatureCalculator *featCalc, CErrorSender *l_vLearner, CStateList *l_States, int l_dim1, int l_dim2, int l_part1, int l_part2, std::list<CStateModifier *> *l_modifiers);
00216                 CMatlabQAnalyzerLogger(CFeatureVFunction *vFunction, CFeatureQFunction *l_qFunction, CFeatureCalculator *featCalc, CErrorSender *l_vLearner, CStateList *l_States, int l_dim1, int l_dim2, int l_part1, int l_part2, std::list<CStateModifier *> *l_modifiers);
00217                 virtual ~CMatlabQAnalyzerLogger();
00218                 
00219                 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);  
00220                 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00221                 
00222                 virtual void addListenersToAgent(CSemiMDPSender *agent);
00223                 virtual void removeListenersToAgent(CSemiMDPSender *agent);
00224 };
00225 
00226 class CTestSuite :  virtual public CParameterObject
00227 {
00228 protected:
00229 
00230         CAgentController *controller;
00231         CAgentController *evaluationController;
00232         std::list<CLearnDataObject *> *learnDataObjects;
00233         
00234         std::map<CLearnDataObject *, bool> *saveLearnData;
00235         
00236         std::list<CAdaptiveParameterCalculator *> *paramCalculators;
00237 
00238         CAgent *agent;
00239 
00240         string testSuiteName;
00241         string learnDataFileName;
00242 
00243 public:
00244         CTestSuite(CAgent *agent, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00245         CTestSuite(CAgent *agent, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName);
00246         virtual ~CTestSuite();
00247         
00248         virtual void addParamCalculator(CAdaptiveParameterCalculator *paramCalculator);
00249         virtual void resetParamCalculators();
00250         
00251         virtual void saveLearnedData(FILE *stream);
00252         virtual void loadLearnedData(FILE *stream);
00253 
00254         virtual void resetLearnedData();
00255         
00256         void addLearnDataObject(CLearnDataObject *learnDataObject, bool saveLearnData = true);
00257 
00258         virtual void learn(int nEpisodes, int nStepsPerEpisode) = 0;
00259 
00260         virtual CAgentController *getController();
00261         virtual void setController(CAgentController *controller);
00262         virtual CAgentController *getEvaluationController();
00263         virtual void setEvaluationController(CAgentController *evaluationController);
00264 
00265         virtual void deleteObjects();
00266 
00267         string getTestSuiteName();
00268         void setTestSuiteName(string name);
00269 };
00270 
00271 
00272 
00273 class CListenerTestSuite : public CTestSuite
00274 {
00275 protected:
00276         std::list<CSemiMDPListener *> *learnerObjects;
00277         std::map<CSemiMDPListener *, CSemiMarkovDecisionProcess *> *addToAgent;
00278 public:
00279         CListenerTestSuite(CAgent *agent, CSemiMDPListener *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00280         CListenerTestSuite(CAgent *agent, CSemiMDPListener *learner, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName);
00281 
00282         virtual ~CListenerTestSuite();
00283 
00284         virtual void addLearnersToAgent();
00285         virtual void removeLearnersFromAgent();
00286 
00287         void addLearnerObject(CSemiMDPListener *listener, bool addParams = true, bool addBack = true, CSemiMarkovDecisionProcess *remove = NULL);
00288 
00289         virtual void learn(int nEpisodes, int nStepsPerEpisode);
00290         virtual void deleteObjects();
00291         
00292         std::list<CSemiMDPListener *> *getLearnerList() {return learnerObjects;};
00293 };
00294 
00295 class CPolicyEvaluation;
00296 
00297 class CPolicyEvaluationTestSuite : public CTestSuite
00298 {
00299 protected:
00300         CPolicyEvaluation *evaluation;
00301 public:
00302         CPolicyEvaluationTestSuite(CAgent *agent, CPolicyEvaluation *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00303 
00304         virtual ~CPolicyEvaluationTestSuite();
00305 
00306         virtual void learn(int nEpisodes, int nStepsPerEpisode);
00307         
00308         virtual void resetLearnedData();
00309 };
00310 
00311 class CPolicyIteration;
00312 
00313 class CPolicyIterationTestSuite : public CTestSuite
00314 {
00315 protected:
00316         CPolicyIteration *policyIteration;
00317 public:
00318         CPolicyIterationTestSuite(CAgent *agent, CPolicyIteration *policyIteration, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00319 
00320         virtual ~CPolicyIterationTestSuite();
00321 
00322         virtual void learn(int nEpisodes, int nStepsPerEpisode);
00323 
00324         virtual void resetLearnedData();
00325 };
00326 
00327 class CPolicyGradientTestSuite : public CTestSuite
00328 {
00329 protected:
00330         CGradientLearner *learner;
00331 
00332 public:
00333         CPolicyGradientTestSuite(CAgent *agent, CGradientLearner *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName, int nMaxGradientUpdates = 1);
00334         CPolicyGradientTestSuite(CAgent *agent, CGradientLearner *learner, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName, int nMaxGradientUpdates = 1);
00335 
00336         virtual ~CPolicyGradientTestSuite();
00337         virtual void deleteObjects();
00338 
00339         virtual void learn(int nEpisodes, int nStepsPerEpisode);
00340         virtual void resetLearnedData();
00341 };
00342 
00343 class CTestSuiteCollection 
00344 {
00345 protected:
00346         std::map<string, CTestSuite *> *testSuiteMap;
00347         std::list<void *> *objectsToDelete;
00348 public:
00349         CTestSuiteCollection();
00350         virtual ~CTestSuiteCollection();
00351 
00352         void addTestSuite(CTestSuite *testSuite);
00353         void removeTestSuite(CTestSuite *testSuite);
00354 
00355         void removeAllTestSuites();
00356 
00357         int getNumTestSuites();
00358         CTestSuite *getTestSuite(string testSuiteName);
00359         CTestSuite *getTestSuite(int index);
00360         
00361         void addObjectToDelete(void *object);
00362         void deleteObjects();
00363 };
00364 
00365 typedef struct 
00366 {
00367         double averageValue;
00368         double bestValue;
00369         unsigned int trialNumber;
00370         double evaluationTime;
00371         string evaluationDate;
00372 } EvaluationValue;
00373 
00374 typedef std::list<EvaluationValue> EvaluationValues;
00375 
00376 class CTestSuiteEvaluator : virtual public CParameterObject
00377 {
00378 protected:
00379         CAgent *agent;
00380         
00381         std::list<CTestSuiteEvaluatorLogger *> *evaluators;
00382         
00383         string baseDirectory;
00384         CTestSuite *testSuite;
00385 
00386         unsigned int nTrials;
00387         unsigned int trialNumber;
00388         
00389         bool exception;
00390 
00391         std::list<CParameters *> *parameterList;
00392         std::map<CParameters *, EvaluationValues *> *evaluations;
00393 
00394         virtual void newEvaluationTrial(CTestSuite *testSuite, EvaluationValue *evaluationData) = 0;
00395         virtual void doEpisode(CTestSuite *testSuite, int nEpisode) = 0;
00396         virtual void getEvaluationValue(EvaluationValue *evaluationData) = 0;
00397         virtual bool isFinished(int unsigned nEpisode) = 0;
00398         
00399         CParameters *getParametersObject(CParameters *);
00400         
00401 public:
00402         CTestSuiteEvaluator(CAgent *agent, string baseDirectory, CTestSuite *testSuite, int nTrials);
00403         virtual ~CTestSuiteEvaluator();
00404         
00405         string getEvaluationDirectory();
00406         int getNewTrialNumber();
00407         
00408         string getLearnDataFileName(int trialNumber);
00409 
00410         void checkDirectories();
00411 
00412         virtual void loadEvaluationData(string filename);
00413         virtual void saveEvaluationData(string filename);
00414         virtual void saveEvaluationDataMatlab(string filename);
00415         
00416         
00417         virtual void doEvaluationTrial(CParameters *testSuite, EvaluationValue *evaluationData);
00418         virtual void evaluateParameters(CParameters *testSuite);
00419 
00420         virtual double getAverageValue(CParameters *testSuite);
00421         virtual double getBestValue(CParameters *testSuite);
00422         
00423         virtual EvaluationValues *getEvaluationValues();
00424         
00425         virtual void addPolicyEvaluator(CTestSuiteEvaluatorLogger *evaluator);
00426 };
00427 
00428 /*
00429 class CTestSuiteNeededStepsEvaluator : public CTestSuiteEvaluator
00430 {
00431 protected:
00432        std::list<double *> *succeded;
00433 
00434        unsigned int totalLearnEpisodes;
00435        unsigned int stepsLearnEpisode;
00436        unsigned int nTrials;
00437        unsigned int episodesBeforeEvaluate;
00438 
00439        unsigned int nValues;
00440 
00441        bool maxStepsSucceded;
00442 public: 
00443        CTestSuiteNeededStepsEvaluator(CAgent *agent, string testSuiteCollectionName, int totalLearnEpisodes, int stepsLearnEpisode, int episodesBeforeEvaluate, int nTrials, bool maxStepsSucceded = false);
00444        virtual ~CTestSuiteNeededStepsEvaluator();
00445 
00446        virtual void loadEvaluationData(CParameters *testSuite, FILE *file);
00447 
00448        virtual void doEvaluationTrial(CTestSuite *testSuiteName, FILE *evaluationFile, const char *learnDataFileName);
00449 
00450        virtual double getEvaluationValue(std::list<double *> *values);
00451 
00452        double getPercentageSucceded();
00453 
00454        virtual void clearValues();
00455 
00456 };
00457 */
00458 
00459 class CAverageRewardTestSuiteEvaluator : public CTestSuiteEvaluator
00460 {
00461 protected:
00462         int numEvals;
00463         
00464         double bestValue;
00465         double averageValue;
00466         
00467         CEvaluator *evaluator;
00468                         
00469         virtual void newEvaluationTrial(CTestSuite *testSuite, EvaluationValue *evaluationData);
00470         virtual void doEpisode(CTestSuite *testSuite, int nEpisode);
00471         virtual void getEvaluationValue(EvaluationValue *evaluationData);
00472         virtual bool isFinished(unsigned int nEpisode);
00473 
00474         FILE *evaluationFile;
00475 public:
00476         unsigned int episodesBeforeEvaluate;
00477         unsigned int totalLearnEpisodes;
00478         unsigned int stepsLearnEpisode;
00479 
00480         CAverageRewardTestSuiteEvaluator(CAgent *agent, string baseDirectory, CTestSuite *testSuite, CEvaluator *evaluator, int totalLearnEpisodes, int episodesBeforeEvaluate, int stepsLearnEpisode, int nTrials);
00481         virtual ~CAverageRewardTestSuiteEvaluator();
00482 };
00483 
00484 
00485 
00486 #endif
00487