00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef C_TESTSuite__H
00034 #define C_TESTSuite__H
00035
00036 #include <time.h>
00037 #include <stdio.h>
00038
00039 #include "cparameters.h"
00040
00041 #include <map>
00042 #include <string>
00043 #include <iostream>
00044
00045
00046
00047 #define ARCF_IDENTITY 1
00048 #define ARCF_LINEAR 2
00049 #define ARCF_AVERAGE 3
00050
00051
00052 class CEvaluator;
00053 class CVisitStateCounter;
00054 class CVisitStateActionCounter;
00055
00056 class CAgent;
00057 class CStateProperties;
00058 class CActionSet;
00059 class CRewardFunction;
00060
00061 class CAbstractVFunction;
00062 class CStateModifier;
00063 class CErrorSender;
00064 class CFeatureVFunction;
00065 class CFeatureQFunction;
00066
00067 class CStateList;
00068
00069 class CVAverageTDErrorLearner;
00070 class CVAverageTDVarianceLearner;
00071
00072 class CAgentController;
00073 class CLearnDataObject;
00074 class CAdaptiveParameterCalculator;
00075 class CSemiMDPListener;
00076 class CSemiMarkovDecisionProcess;
00077
00078 class CPolicyEvaluation;
00079 class CPolicyIteration;
00080 class CGradientLearner;
00081
00082 class CGraphDynamicProgramming;
00083
00084 class CTestSuiteEvaluatorLogger
00085 {
00086 protected:
00087 string outputDirectory;
00088 public:
00089 int nEpisodesBeforeEvaluate;
00090
00091 CTestSuiteEvaluatorLogger(string outputDirectory);
00092 virtual ~CTestSuiteEvaluatorLogger() {};
00093
00094 void setOutputDirectory(string outputDirectory);
00095
00096 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes) = 0;
00097 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00098 virtual void endEvaluation() {};
00099 };
00100
00101 class CTestSuiteLoggerFromEvaluator : public CTestSuiteEvaluatorLogger
00102 {
00103 protected:
00104 CEvaluator *evaluator;
00105 string outputFileName;
00106 public:
00107 CTestSuiteLoggerFromEvaluator(string outputDirectory, string outputFileName, CEvaluator *evaluator);
00108 virtual ~CTestSuiteLoggerFromEvaluator() {};
00109
00110 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);
00111 virtual double evaluateValue(string evaluationDirectory, int trial, int numEpisodes);
00112
00113 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00114 };
00115
00116 class CGraphLogger : public CTestSuiteEvaluatorLogger
00117 {
00118 protected:
00119 CStateList *states;
00120 CGraphDynamicProgramming *graph;
00121
00122
00123 public:
00124 CGraphLogger(CStateList *states, CGraphDynamicProgramming *graph);
00125 virtual ~CGraphLogger();
00126
00127 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);
00128 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00129 };
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147 class CMatlabEpisodeOutputLogger : public CTestSuiteEvaluatorLogger
00148 {
00149 protected:
00150 CAgent *agent;
00151 int nEpisodes;
00152 int nSteps;
00153
00154 CStateProperties *modifier;
00155 CActionSet *actions;
00156
00157 CRewardFunction *rewardFunction;
00158 public:
00159 CMatlabEpisodeOutputLogger( CAgent *agent, CRewardFunction *rewardFunction, CStateProperties *modifier, CActionSet *actions, int nEpisodes, int nSteps);
00160 virtual ~CMatlabEpisodeOutputLogger();
00161
00162 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);
00163 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00164 };
00165
00166 class CMatlabVAnalyzerLogger : public CTestSuiteEvaluatorLogger
00167 {
00168 protected:
00169 CAbstractVFunction *vFunction;
00170
00171 std::list<CStateModifier *> *modifiers;
00172
00173 CErrorSender *vLearner;
00174
00175 CFeatureVFunction *visitCounter;
00176 CFeatureVFunction *averageError;
00177 CFeatureVFunction *averageVariance;
00178
00179 int dim1;
00180 int dim2;
00181
00182 int part1;
00183 int part2;
00184
00185 CStateList *states;
00186
00187 public:
00188 int nTrialEvaluate;
00189
00190 CVisitStateCounter *visitCounterLearner;
00191
00192 CVAverageTDErrorLearner *errorLearner;
00193 CVAverageTDVarianceLearner *varianceLearner;
00194
00195 CMatlabVAnalyzerLogger(CAbstractVFunction *l_vFunction, CFeatureCalculator *featCalc, CErrorSender *l_vLearner, CStateList *l_States, int l_dim1, int l_dim2, int l_part1, int l_part2, std::list<CStateModifier *> *l_modifiers);
00196 virtual ~CMatlabVAnalyzerLogger();
00197
00198 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);
00199 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00200
00201 virtual void addListenersToAgent(CSemiMDPSender *agent);
00202 virtual void removeListenersToAgent(CSemiMDPSender *agent);
00203 };
00204
00205 class CMatlabQAnalyzerLogger : public CMatlabVAnalyzerLogger
00206 {
00207 protected:
00208 CFeatureQFunction *qFunction;
00209 CFeatureQFunction *saVisits;
00210
00211 bool delVFunction;
00212 public:
00213 CVisitStateActionCounter *visitStateActionCounterLearner;
00214
00215 CMatlabQAnalyzerLogger(CFeatureQFunction *l_qFunction, CFeatureCalculator *featCalc, CErrorSender *l_vLearner, CStateList *l_States, int l_dim1, int l_dim2, int l_part1, int l_part2, std::list<CStateModifier *> *l_modifiers);
00216 CMatlabQAnalyzerLogger(CFeatureVFunction *vFunction, CFeatureQFunction *l_qFunction, CFeatureCalculator *featCalc, CErrorSender *l_vLearner, CStateList *l_States, int l_dim1, int l_dim2, int l_part1, int l_part2, std::list<CStateModifier *> *l_modifiers);
00217 virtual ~CMatlabQAnalyzerLogger();
00218
00219 virtual void evaluate(string evaluationDirectory, int trial, int numEpisodes);
00220 virtual void startNewEvaluation(string evaluationDirectory, CParameters *parameters, int trial);
00221
00222 virtual void addListenersToAgent(CSemiMDPSender *agent);
00223 virtual void removeListenersToAgent(CSemiMDPSender *agent);
00224 };
00225
00226 class CTestSuite : virtual public CParameterObject
00227 {
00228 protected:
00229
00230 CAgentController *controller;
00231 CAgentController *evaluationController;
00232 std::list<CLearnDataObject *> *learnDataObjects;
00233
00234 std::map<CLearnDataObject *, bool> *saveLearnData;
00235
00236 std::list<CAdaptiveParameterCalculator *> *paramCalculators;
00237
00238 CAgent *agent;
00239
00240 string testSuiteName;
00241 string learnDataFileName;
00242
00243 public:
00244 CTestSuite(CAgent *agent, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00245 CTestSuite(CAgent *agent, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName);
00246 virtual ~CTestSuite();
00247
00248 virtual void addParamCalculator(CAdaptiveParameterCalculator *paramCalculator);
00249 virtual void resetParamCalculators();
00250
00251 virtual void saveLearnedData(FILE *stream);
00252 virtual void loadLearnedData(FILE *stream);
00253
00254 virtual void resetLearnedData();
00255
00256 void addLearnDataObject(CLearnDataObject *learnDataObject, bool saveLearnData = true);
00257
00258 virtual void learn(int nEpisodes, int nStepsPerEpisode) = 0;
00259
00260 virtual CAgentController *getController();
00261 virtual void setController(CAgentController *controller);
00262 virtual CAgentController *getEvaluationController();
00263 virtual void setEvaluationController(CAgentController *evaluationController);
00264
00265 virtual void deleteObjects();
00266
00267 string getTestSuiteName();
00268 void setTestSuiteName(string name);
00269 };
00270
00271
00272
00273 class CListenerTestSuite : public CTestSuite
00274 {
00275 protected:
00276 std::list<CSemiMDPListener *> *learnerObjects;
00277 std::map<CSemiMDPListener *, CSemiMarkovDecisionProcess *> *addToAgent;
00278 public:
00279 CListenerTestSuite(CAgent *agent, CSemiMDPListener *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00280 CListenerTestSuite(CAgent *agent, CSemiMDPListener *learner, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName);
00281
00282 virtual ~CListenerTestSuite();
00283
00284 virtual void addLearnersToAgent();
00285 virtual void removeLearnersFromAgent();
00286
00287 void addLearnerObject(CSemiMDPListener *listener, bool addParams = true, bool addBack = true, CSemiMarkovDecisionProcess *remove = NULL);
00288
00289 virtual void learn(int nEpisodes, int nStepsPerEpisode);
00290 virtual void deleteObjects();
00291
00292 std::list<CSemiMDPListener *> *getLearnerList() {return learnerObjects;};
00293 };
00294
00295 class CPolicyEvaluation;
00296
00297 class CPolicyEvaluationTestSuite : public CTestSuite
00298 {
00299 protected:
00300 CPolicyEvaluation *evaluation;
00301 public:
00302 CPolicyEvaluationTestSuite(CAgent *agent, CPolicyEvaluation *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00303
00304 virtual ~CPolicyEvaluationTestSuite();
00305
00306 virtual void learn(int nEpisodes, int nStepsPerEpisode);
00307
00308 virtual void resetLearnedData();
00309 };
00310
00311 class CPolicyIteration;
00312
00313 class CPolicyIterationTestSuite : public CTestSuite
00314 {
00315 protected:
00316 CPolicyIteration *policyIteration;
00317 public:
00318 CPolicyIterationTestSuite(CAgent *agent, CPolicyIteration *policyIteration, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName);
00319
00320 virtual ~CPolicyIterationTestSuite();
00321
00322 virtual void learn(int nEpisodes, int nStepsPerEpisode);
00323
00324 virtual void resetLearnedData();
00325 };
00326
00327 class CPolicyGradientTestSuite : public CTestSuite
00328 {
00329 protected:
00330 CGradientLearner *learner;
00331
00332 public:
00333 CPolicyGradientTestSuite(CAgent *agent, CGradientLearner *learner, CAgentController *controller, CLearnDataObject *vFunction, char *testSuiteName, int nMaxGradientUpdates = 1);
00334 CPolicyGradientTestSuite(CAgent *agent, CGradientLearner *learner, CAgentController *controller, CAgentController *evaluationController, CLearnDataObject *vFunction, char *testSuiteName, int nMaxGradientUpdates = 1);
00335
00336 virtual ~CPolicyGradientTestSuite();
00337 virtual void deleteObjects();
00338
00339 virtual void learn(int nEpisodes, int nStepsPerEpisode);
00340 virtual void resetLearnedData();
00341 };
00342
00343 class CTestSuiteCollection
00344 {
00345 protected:
00346 std::map<string, CTestSuite *> *testSuiteMap;
00347 std::list<void *> *objectsToDelete;
00348 public:
00349 CTestSuiteCollection();
00350 virtual ~CTestSuiteCollection();
00351
00352 void addTestSuite(CTestSuite *testSuite);
00353 void removeTestSuite(CTestSuite *testSuite);
00354
00355 void removeAllTestSuites();
00356
00357 int getNumTestSuites();
00358 CTestSuite *getTestSuite(string testSuiteName);
00359 CTestSuite *getTestSuite(int index);
00360
00361 void addObjectToDelete(void *object);
00362 void deleteObjects();
00363 };
00364
00365 typedef struct
00366 {
00367 double averageValue;
00368 double bestValue;
00369 unsigned int trialNumber;
00370 double evaluationTime;
00371 string evaluationDate;
00372 } EvaluationValue;
00373
00374 typedef std::list<EvaluationValue> EvaluationValues;
00375
00376 class CTestSuiteEvaluator : virtual public CParameterObject
00377 {
00378 protected:
00379 CAgent *agent;
00380
00381 std::list<CTestSuiteEvaluatorLogger *> *evaluators;
00382
00383 string baseDirectory;
00384 CTestSuite *testSuite;
00385
00386 unsigned int nTrials;
00387 unsigned int trialNumber;
00388
00389 bool exception;
00390
00391 std::list<CParameters *> *parameterList;
00392 std::map<CParameters *, EvaluationValues *> *evaluations;
00393
00394 virtual void newEvaluationTrial(CTestSuite *testSuite, EvaluationValue *evaluationData) = 0;
00395 virtual void doEpisode(CTestSuite *testSuite, int nEpisode) = 0;
00396 virtual void getEvaluationValue(EvaluationValue *evaluationData) = 0;
00397 virtual bool isFinished(int unsigned nEpisode) = 0;
00398
00399 CParameters *getParametersObject(CParameters *);
00400
00401 public:
00402 CTestSuiteEvaluator(CAgent *agent, string baseDirectory, CTestSuite *testSuite, int nTrials);
00403 virtual ~CTestSuiteEvaluator();
00404
00405 string getEvaluationDirectory();
00406 int getNewTrialNumber();
00407
00408 string getLearnDataFileName(int trialNumber);
00409
00410 void checkDirectories();
00411
00412 virtual void loadEvaluationData(string filename);
00413 virtual void saveEvaluationData(string filename);
00414 virtual void saveEvaluationDataMatlab(string filename);
00415
00416
00417 virtual void doEvaluationTrial(CParameters *testSuite, EvaluationValue *evaluationData);
00418 virtual void evaluateParameters(CParameters *testSuite);
00419
00420 virtual double getAverageValue(CParameters *testSuite);
00421 virtual double getBestValue(CParameters *testSuite);
00422
00423 virtual EvaluationValues *getEvaluationValues();
00424
00425 virtual void addPolicyEvaluator(CTestSuiteEvaluatorLogger *evaluator);
00426 };
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459 class CAverageRewardTestSuiteEvaluator : public CTestSuiteEvaluator
00460 {
00461 protected:
00462 int numEvals;
00463
00464 double bestValue;
00465 double averageValue;
00466
00467 CEvaluator *evaluator;
00468
00469 virtual void newEvaluationTrial(CTestSuite *testSuite, EvaluationValue *evaluationData);
00470 virtual void doEpisode(CTestSuite *testSuite, int nEpisode);
00471 virtual void getEvaluationValue(EvaluationValue *evaluationData);
00472 virtual bool isFinished(unsigned int nEpisode);
00473
00474 FILE *evaluationFile;
00475 public:
00476 unsigned int episodesBeforeEvaluate;
00477 unsigned int totalLearnEpisodes;
00478 unsigned int stepsLearnEpisode;
00479
00480 CAverageRewardTestSuiteEvaluator(CAgent *agent, string baseDirectory, CTestSuite *testSuite, CEvaluator *evaluator, int totalLearnEpisodes, int episodesBeforeEvaluate, int stepsLearnEpisode, int nTrials);
00481 virtual ~CAverageRewardTestSuiteEvaluator();
00482 };
00483
00484
00485
00486 #endif
00487