00001 // 00002 // C++ Interface: cinputdata 00003 // 00004 // Description: 00005 // 00006 // 00007 // Author: Neumann Gerhard <gerhard@tu-graz.ac.at>, (C) 2006 00008 // 00009 // Copyright: See COPYING file that comes with this distribution 00010 // 00011 // 00012 00013 #ifndef C_INPUTDATASET__H 00014 #define C_INPUTDATASET__H 00015 00016 #include <newmat/newmat.h> 00017 #include <list> 00018 #include <vector> 00019 #include <set> 00020 #include <stdio.h> 00021 00022 class CDataSet; 00023 00024 class CDataPreprocessor 00025 { 00026 protected: 00027 00028 public: 00029 virtual ~CDataPreprocessor() {}; 00030 00031 virtual void preprocessInput(ColumnVector *input, ColumnVector *preInput) = 0; 00032 00033 00034 void preprocessDataSet(CDataSet *dataSet); 00035 00036 }; 00037 00038 class CMeanStdPreprocessor : public CDataPreprocessor 00039 { 00040 protected: 00041 ColumnVector *mean; 00042 ColumnVector *std; 00043 00044 public: 00045 CMeanStdPreprocessor(ColumnVector *mean, ColumnVector *std); 00046 CMeanStdPreprocessor(CDataSet *dataSet); 00047 00048 virtual ~CMeanStdPreprocessor(); 00049 00050 virtual void preprocessInput(ColumnVector *input, ColumnVector *preInput); 00051 00052 void setMean(ColumnVector *mean); 00053 void setStd(ColumnVector *std); 00054 }; 00055 00056 00057 template <typename OutputValue> class CMapping 00058 { 00059 protected: 00060 CDataPreprocessor *preprocessor; 00061 ColumnVector *buffVector; 00062 00063 virtual OutputValue doGetOutputValue(ColumnVector *vector) = 0; 00064 00065 int numDim; 00066 00067 public: 00068 00069 CMapping(int numDim); 00070 00071 virtual ~CMapping(); 00072 00073 00074 virtual OutputValue getOutputValue(ColumnVector *vector); 00075 00076 virtual void saveASCII(FILE *) {}; 00077 00078 void setPreprocessor(CDataPreprocessor *preprocessor); 00079 CDataPreprocessor *getPreprocessor() {return preprocessor;}; 00080 00081 int getNumDimensions() {return numDim;}; 00082 00083 ColumnVector *getPreprocessedInput(ColumnVector *input); 00084 }; 00085 00086 template <typename OutputValue> CMapping<OutputValue>::CMapping(int l_numDim) 00087 { 00088 numDim = l_numDim; 00089 buffVector = new ColumnVector(numDim); 00090 preprocessor = NULL; 00091 } 00092 00093 template <typename OutputValue> CMapping<OutputValue>::~CMapping() 00094 { 00095 delete buffVector; 00096 } 00097 00098 00099 template <typename OutputValue> ColumnVector * CMapping<OutputValue>::getPreprocessedInput(ColumnVector *input) 00100 { 00101 if (preprocessor) 00102 { 00103 preprocessor->preprocessInput(input, buffVector); 00104 } 00105 else 00106 { 00107 *buffVector = *input; 00108 } 00109 return buffVector; 00110 } 00111 00112 00113 template <typename OutputValue> OutputValue CMapping<OutputValue>::getOutputValue(ColumnVector *vector) 00114 { 00115 return doGetOutputValue( getPreprocessedInput( vector)); 00116 } 00117 00118 template <typename OutputValue> void CMapping<OutputValue>::setPreprocessor(CDataPreprocessor *l_preprocessor) 00119 { 00120 preprocessor = l_preprocessor; 00121 } 00122 00123 class DataSubset : public std::set<int> 00124 { 00125 public: 00126 DataSubset() {}; 00127 virtual ~DataSubset(){}; 00128 00129 void addElements(std::list<int> *subsetList); 00130 }; 00131 00132 class CDataSet : public std::vector<ColumnVector *> 00133 { 00134 protected: 00135 int numDimensions; 00136 00137 ColumnVector *buffVector1; 00138 ColumnVector *buffVector2; 00139 public: 00140 CDataSet(int numDimensions); 00141 CDataSet(CDataSet &dataset); 00142 virtual ~CDataSet(); 00143 00144 int getNumDimensions(); 00145 virtual void addInput(ColumnVector *input); 00146 00147 void saveCSV(FILE *stream); 00148 void loadCSV(FILE *stream); 00149 00150 virtual void getSubSet(DataSubset *subSet, CDataSet *newSet); 00151 00152 virtual void clear(); 00153 00154 double getVarianceNorm(DataSubset *dataSubset); 00155 void getVariance(DataSubset *dataSubset, ColumnVector *variance); 00156 00157 void getMean(DataSubset *dataSubset, ColumnVector *mean); 00158 }; 00159 00160 00161 class CDataSet1D : public std::vector<double> 00162 { 00163 public: 00164 CDataSet1D(CDataSet1D &dataset); 00165 CDataSet1D(); 00166 00167 void loadCSV(FILE *stream); 00168 void saveCSV(FILE *stream); 00169 00170 double getVariance(DataSubset *dataSubset, CDataSet1D *weight = NULL); 00171 double getMean(DataSubset *dataSubset, CDataSet1D *weighting = NULL); 00172 }; 00173 00174 00175 00176 00177 #endif