static const char* szModule = "ProjectG.cpp";
//------------------------------------------------------------------------------
// module project.cpp //
// //
// Class TProject Encapsulates project settings. Derived from TProject. //
// See below or http://www.newty.de/pnc2/sdocu.html for more information. //
// //
// copyright (c) 2001-2003 by Lars Haendel //
// home: www.newty.de //
// //
// This program is free software and can be used under the terms of the //
// GNU licence. See header file for further information and disclaimer. //
// //
//------------------------------------------------------------------------------
// //
// CREATE: Create an empty instance and call Load() or Initialize(). //
// The former will load a project file from file and the latter gets //
// a TData object and initializes itself with default/standard values //
// depending on the number of data tuples, variable types etc. //
//------------------------------------------------------------------------------
#include <iomanip> // due to: setiosflags()
#include "projectG.h"
//----------------------------------------------------------------------------------------------------------------------
// reset to default parameters
void TProjectG::Reset()
{
// a) call base class version
TProject::Reset();
// b) initialize parameters of section '[GUI]'
szSimulationOutputFile[0] = '\0';
f_PruneAnyway = DEF_PRUNE_ANYWAY;
f_KillCuboids = DEF_KILL_CUBOIDS;
f_Modify1 = f_Modify2 = false;
f_Prune = DEF_PRUNE;
f_Weights = DEF_WEIGHTS;
f_Euclid = DEF_EUCLID;
_W_Kernel = DEF_W_KERNEL;
_Sigma = DEF_SIGMA;
_p_min = DEF_P_MIN;
f_WritePredictions = DEF_WRITE_PREDICTIONS;
f_Model = f_TuningResults = false;
}
//----------------------------------------------------------------------------------------------------------------------
// associate data and initialize those project settings/parameters that depend on the data file (to be concrete: number
// of variables and tuples) using suitable heuristics
void TProjectG::Initialize(TData*const& _data1)
{
// Reset(); // note: do NOT reset anymore! Why ???
// a) misc.
SetData1(_data1); // associate data
f_Checked = true; // consider settings as checked
// b) section 'Basic'
f_Regression = !data1->IsSymbolic(outcol);
// c) section 'Tuning'
DefDataSplitting();
// # repetitions
Def_N_R_Tune();
}
//----------------------------------------------------------------------------------------------------------------------
// use heuristic to set default value for parameter 'DataSplitting'
void TProjectG::DefDataSplitting()
{
// data splitting
int split = DEF_SPLIT; // initialize with default value
if(data1->nTup()*split/100>MAX_N_L_TUNE) // ... but if # tuning learn tuples would exceed threshold ...
split = MAX_N_L_TUNE*100/data1->nTup(); // ... reduce splitting
SetDataSplitting(split);
}
//----------------------------------------------------------------------------------------------------------------------
// use heuristic to set default value for parameter 'N_R_Tune' (for tuning type repetition)
void TProjectG::Def_N_R_Tune()
{
// try to have an overall sum of 2000 tuning test data tuples but do not allow
// values bigger than MAX_N_R_TUNE and smaller than MIN_DEF_N_R_TUNE
N_R_Tune = max(min((double) MAX_N_R_TUNE, 2000.0/(data1->nTup()*(100.0-split)/100.0)), (double) MIN_HEURISTIC_N_R_TUNE);
}
//----------------------------------------------------------------------------------------------------------------------
// load project settings from file
void TProjectG::Load(ifstream& file, const char*const& _szProjectFilePath)
{
// a) call base class version
TProject::Load(file, _szProjectFilePath);
// b) read additional GUI parameters
try
{
streampos curPos = file.tellg(); // preserve actual stream position
// section '[Gui]'
file.seekg(curPos); // restore stream position
SearchKey(file, "[Gui]"); // position to section
f_Model = ReadKeyBool(file, "Model", false); // there is a model to load
f_TuningResults = ReadKeyBool(file, "TuningResults", false); // there are tunign results to load
ReadKeyString(file, "Results", szSimulationOutputFile, STS); // simulation output filename
CorrectPathDelimiter(szSimulationOutputFile);
f_PruneAnyway = ReadKeyBool(file, "PruneAnyway"); // flag: prune even if parameter is not set
f_KillCuboids = ReadKeyBool(file, "KillCuboids"); // flag: kill (permanently remove) cuboids with
// mass less than TParameter::K
f_Modify1 = ReadKeyBool (file, "Modify1", false); // flag: use alternate parameters
_p_min = ReadKeyValue(file, SZ_P_MIN, DEF_P_MIN);
_W_Kernel = ReadKeyValue(file, SZ_W_KERNEL, DEF_W_KERNEL);
_Sigma = ReadKeyValue(file, SZ_SIGMA, DEF_SIGMA);
f_Prune = ReadKeyBool (file, SZ_PRUNE, DEF_PRUNE);
f_Modify2 = ReadKeyBool(file, "Modify2", false); // flag: use alternate parameters
f_Weights = ReadKeyBool(file, SZ_WEIGHTS, DEF_WEIGHTS);
f_Euclid = ReadKeyBool(file, SZ_EUCLID, DEF_EUCLID);
f_WritePredictions = ReadKeyBool(file, "WritePredictions", DEF_WRITE_PREDICTIONS);
file.seekg(curPos); // restore stream position
}
catch(int errNo) // exception handling
{
char szText[STS];
// compose error text
if(strcmp("[Gui]", GetLastKey())==0)
sprintf(szText, "Section '%s' not found!", "[Gui]");
else
sprintf(szText, "Section '%s' reading key '%s': %s", "[Gui]", GetLastKey(), GetLastError(errNo));
ThrowTypeU(szText); // 'propagate' exception
}
}
//----------------------------------------------------------------------------------------------------------------------
// save project settings to file
void TProjectG::Save(ofstream& file, const bool& f_WriteTuningAnyway/*=true*/)
{
// a) call base class version
TProject::Save(file, f_WriteTuningAnyway);
// b) save additional parameters in section '[Gui]'
file << setiosflags(ios::left) << resetiosflags(ios::right); // set left justified output
file << endl << endl << "[Gui]" << endl;
file << setw(WNAME) << "Model" << " = " << FlagToString(f_Model) << endl;
file << setw(WNAME) << "TuningResults" << " = " << FlagToString(f_TuningResults) << endl;
if(szSimulationOutputFile[0]!='\0') // simulation output file
file << setw(WNAME) << "Results" << " = " << szSimulationOutputFile << endl;
file << setw(WNAME) << "PruneAnyway" << " = " << FlagToString(f_PruneAnyway) << endl;
file << setw(WNAME) << "KillCuboids" << " = " << FlagToString(f_KillCuboids) << endl;
file << setw(WNAME) << "WritePredictions" << " = " << FlagToString(f_WritePredictions) << endl;
// write alternate parameters 1
if(f_Modify1)
{
file << setw(WNAME) << "Modify1" << " = " << FlagToString(f_Modify1) << endl;
if(f_Regression)
file << setw(WNAME) << SZ_SIGMA << " = " << _Sigma << endl; // regression only parameter
else
file << setw(WNAME) << SZ_W_KERNEL << " = " << _W_Kernel << endl; // classification only parameters
file << setw(WNAME) << SZ_P_MIN << " = " << _p_min << endl;
file << setw(WNAME) << SZ_PRUNE << " = " << FlagToString(f_Prune) << endl;
}
// write alternate parameters 2
if(f_Modify2)
{
file << setw(WNAME) << "Modify2" << " = " << FlagToString(f_Modify2) << endl;
file << setw(WNAME) << SZ_WEIGHTS << " = " << FlagToString(f_Weights) << endl;
file << setw(WNAME) << SZ_EUCLID << " = " << FlagToString(f_Euclid) << endl;
}
file << resetiosflags(ios::left) << setiosflags(ios::right); // restore right justified output
}
//----------------------------------------------------------------------------------------------------------------------
// check project parameters against constraints and given data file, set output column and variable types and set
// dependant variables - WARNING: caller has to release returned TParaSetList !!
TParaSetList* /*cr*/ TProjectG::Synchronize(TData*const& _data1, const bool& f_CheckTuningAnyway/*=true*/)
{
// a) call base class version
TParaSetList* sets = TProject::Synchronize(_data1, f_CheckTuningAnyway);
// b) check
try
{
//----------------------------------------------------------------------------------------------------------------
// b1) section '[Gui]'
if(_p_min<MIN_P_MIN) throw 300;
if(_W_Kernel<MIN_W_KERNEL || _W_Kernel>MAX_W_KERNEL) throw 301;
if(_Sigma<MIN_SIGMA || _Sigma>MAX_SIGMA) throw 303;
//----------------------------------------------------------------------------------------------------------------
// b2) section '[Tuning]'
if(tuneType==Rep && split==0)
throw 508; // repetition needs data splitting, specification of 'N_L' and 'N_T' is not accepted
}
catch(int errNo) // exception handling
{
delete sets; // release
char szText[STS]; // compose error text
switch(errNo)
{
// section '[Gui]'
case 300 : sprintf(szText, "Section '[Gui]': Parameter '%s>%d' must yield!", SZ_P_MIN, MIN_P_MIN); break;
case 301 : sprintf(szText, "Section '[Gui]': Parameter '%s' must be e[%s..%s]!", SZ_W_KERNEL, ValueToText1(MIN_W_KERNEL), ValueToText2(MAX_W_KERNEL)); break;
case 303 : sprintf(szText, "Section '[Gui]': Parameter '%s' must be e[%s..%s]!", SZ_SIGMA, ValueToText1(MIN_SIGMA, 0, PREC_PARA), ValueToText2(MAX_SIGMA, 0, PREC_PARA)); break;
// section '[Tuning]'
case 508 : sprintf(szText, "Section '[Tuning]': Data splitting not given to calculate number of learn and test tuples! Please specify key 'DataSplitting'!"); break;
}
ThrowTypeU(szText); // 'propagate' exception
}
if(split==0) // now everythings checked: initialize with default value if parameter was not
DefDataSplitting(); // given. Necessary to have appropriate value if tuning type is changed.
// return parameter set list generated from settings in section [Parameter] note: not really good style but this
// list was generated while checking, so why do not use it ?
return sets;
}
//----------------------------------------------------------------------------------------------------------------------
// correct values of 'N_Int' for classification tasks
void TProjectG::CorrectTasksIntervals()
{
if(f_Regression) // check: only execute for classification tasks
return;
for(int i=0;i<tasks.Size();i++) // iterate task list
sprintf(tasks.Get(i).szN_Int, "%d", nClasses());
}
//----------------------------------------------------------------------------------------------------------------------
// set learn and test data - note: actually now one would have to re-check settings but this is not done here so far!
// Thus be careful and call Synchronize() after using these functions.
void TProjectG::SetData1(TData*const& _data1)
{
data1 = _data1;
strcpy(szData1, data1->LoadFileName());
}
void TProjectG::SetData2(TData*const& _data2)
{
data2 = _data2;
strcpy(szData2, _data2->LoadFileName());
// toDo: check if data objects match
}
//----------------------------------------------------------------------------------------------------------------------
// set tuning type, ensure # repetitions/cross-validations is legal and set # learn and test data tuples
void TProjectG::SetTuneType(const TTestType& type)
{
tuneType = type; // set
Set_N_R_Tune(N_R_Tune); // ensure bounds
IniTuneCounts(); // actualize # learn and test data tuples
}
//----------------------------------------------------------------------------------------------------------------------
// set # repetitions/cross-validations; ensure #repetitions/cross-validations is legal
void TProjectG::Set_N_R_Tune(int _N_R_Tune)
{
IfTrueThrowTypeA(!data1, "No learn data object associated!", "TProjectG::Set_N_R_Tune", szModule); // check
// check bounds (with respect to the tuning type)
if(_N_R_Tune<Min_N_R_Tune()) _N_R_Tune = Min_N_R_Tune();
if(_N_R_Tune>Max_N_R_Tune()) _N_R_Tune = Max_N_R_Tune();
N_R_Tune = _N_R_Tune; // set
IniTuneCounts(); // actualize # learn and test data tuples
}
//----------------------------------------------------------------------------------------------------------------------
// return learn data tuple count
int TProjectG::Peek_N_L()
{
IfTrueThrowTypeA(!data1, "No learn data object associated!", "TProject::Peek_N_L", szModule); // check
return data1->nTup();
}