#
# Author: Ing. Jiri Petrlik (ipetrlik@fit.vutbr.cz)
# This software was supported by IT4Innovations Centre of Excellence CZ.1.05/1.1.00/02.0070.
#
# License:
# BUT OPEN SOURCE LICENCE
# Version 1.
# Copyright (c) 2013, Brno University of Technology, Antonnsk 548/1, 601 90, Czech Republic
# --------------------------------------------------------------------------
# BY INSTALLING, COPYING OR OTHER USES OF SOFTWARE YOU ARE DECLARING
# THAT YOU AGREE WITH THE TERMS AND CONDITIONS OF THIS LICENCE
# AGREEMENT. IF YOU DO NOT AGREE WITH THE TERMS AND CONDITIONS, DO
# NOT INSTAL, COPY OR USE THE SOFTWARE.
# 
# IF YOU DO NOT POSESS A VALID LICENCE, YOU ARE NOT AUTHORISED TO
# INSTAL, COPY OR OTHERWISE USE THE SOTWARE.
# 
# Definitions:
# 
# For the purpose of this agreement, Software shall mean a computer
# program (a group of computer programs functional as a unit) capable
# of copyright protection and accompanying documentation.
# 
# Work based on Software shall mean a work containing Software or a
# portion of it, either verbatim or with modifications and/or
# translated into another language, or a work based on Software.
# Portions of work not containing a portion of Software or not based
# on Software are not covered by this definition, if it is capable
# of independent use and distributed separately.
# 
# Source code shall mean all the source code for all modules of
# Software, plus any associated interface definition files, plus the
# scripts used to control compilation and installation of the executable
# program. Source code distributed with Software need not include
# anything that is normally distributed (in either source or binary
# form) with the major components (compiler, kernel, and so on) of
# the operating system on which the executable program runs.
# 
# Anyone who uses Software becomes User. User shall abide by this
# licence agreement.
# 
# BRNO UNIVERSITY OF TECHNOLOGY GRANTS TO USER A LICENCE TO USE
# SOFTWARE ON THE FOLLOWING TERMS AND CONDITIONS:
# 
# 1. User may use Software for any purpose, commercial or noncommercial,
# without a need to pay any licence fee.
# 
# 2. User may copy and distribute verbatim copies of executable
# Software with source code as he/she received it, in any medium,
# provided that User conspicuously and appropriately publishes on
# each copy an appropriate copyright notice and disclaimer of warranty;
# keeps intact all the notices that refer to this licence and to the
# absence of any warranty; and give any other recipients of Software
# a copy of this licence along with Software. User may charge a fee
# for the physical act of transferring a copy, and may offer warranty
# protection in exchange for a fee.
# 
# 3. User may modify his/her copy or copies of Software or any portion
# of it, thus forming a work based on Software, and copy and distribute
# such modifications or work, provided that User clearly states this
# work is modified Software. These modifications or work based on
# software may be distributed only under the terms of section 2 of
# this licence agreement, regardless if it is distributed alone or
# together with other work. Previous sentence does not apply to mere
# aggregation of another work not based on software with Software (or
# with a work based on software) on a volume of a storage or distribution
# medium.
# 
# 4. User shall accompany copies of Software or work based on software
# in object or executable form with:
# 
# a) the complete corresponding machine-readable source code, which
# must be distributed on a medium customarily used for software
# interchange; or,
# b) written offer, valid for at least three years, to give any third
# party, for a charge no more than actual cost of physically performing
# source distribution, a complete machine-readable copy of the
# corresponding source code, to be distributed on a medium customarily
# used for software interchange; or,
# c) the information User received as to the offer to distribute
# corresponding source code. (This alternative is allowed only for
# noncommercial distribution and only if User received the program
# in objects code or executable form with such an offer, in accord
# with subsection b above.)
# 
# 5. User may not copy, modify, grant sublicences or distribute
# Software in any other way than expressly provided for in this
# licence agreement. Any other copying, modifying, granting of
# sublicences or distribution of Software is illegal and will
# automatically result in termination of the rights granted by this
# licence. This does not affect rights of third parties acquired in
# good faith, as long as they abide by the terms and conditions of
# this licence agreement.
# 
# 6. User may not use and/or distribute Software, if he/she cannot
# satisfy simultaneously obligations under this licence and any other
# pertinent obligations.
# 
# 7. User is not responsible for enforcing terms of this agreement
# by third parties.
# 
# 8. BECAUSE SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
# FOR SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT
# WHEN OTHERWISE STATED IN WRITING, BUT PROVIDES SOFTWARE "AS IS"
# WITHOUT WARRANTY OF ANY KIND,EITHER EXPRESSED OR IMPLIED,INCLUDING,BUT
# NOT LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE.THE ENTIRE RISK AS TO THE QUALITY AND
# PERFORMANCE OF SOFTWARE IS WITH USER. SHOULD SOFTWARE PROVE
# DEFECTIVE, USER SHALL ASSUME THE COST OF ALL NECESSARY SERVICING,
# REPAIR OR CORRECTION.
# IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
# WILL BRNO UNIVERSITY OF TECHNOLOGY BE LIABLE FOR DAMAGES, INCLUDING
# ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
# OUT OF THE USE OR INABILITY TO USE SOFTWARE (INCLUDING BUT NOT
# LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES
# SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF SOFTWARE TO OPERATE
# WITH ANY OTHER PROGRAMS).
# 
# Final provisions:
# Any provision of this licence agreement that is prohibited,
# unenforceable, or not authorized in any jurisdiction shall, as to
# such jurisdiction, be ineffective to the extent of such prohibition,
# unenforceability, or non-authorization without invalidating or
# affecting the remaining provisions.
# 
# This licence agreement provides in essentials the same extent of
# rights as the terms of GNU GPL version 2 and Software fulfils the
# requirements of Open Source software.
# 
# This agreement is governed by law of the Czech Republic. In case
# of a dispute, the jurisdiction shall be that of courts in the Czech
# Republic.
# 
# By installing, copying or other use of Software User declares he/she
# has read this terms and conditions, understands them and his/her
# use of Software is a demonstration of his/her free will absent of
# any duress.
###############################################################################

fsReapair<-function(selectedFeatures) {
	if(sum(selectedFeatures)==0) {
		index<-runif(1,min=1,max=length(selectedFeatures));
		index<-floor(index);
		
		selectedFeatures[index]<-TRUE;
	}
	
	return(selectedFeatures);
}

featureSelectionClassificationGA<-function(dataTrain,dataTest,predictedVariable,features,
		popSize=40,generations=100,...) {
	
	multiobjectiveFitness<-function(chromosome,trainData,testData,...) {
		selection<-c(predictedVariable,features[chromosome]);
		trainData<-trainData[,selection];
		testData<-testData[,selection];
		
		rowNumber<-nrow(testData);
		tmpTestData<-testData[,2:ncol(testData)];
		if(is.vector(tmpTestData)==TRUE) {
			tmpTestData<-as.matrix(tmpTestData,ncol=1);
		}
		tmpTestData<-na.omit(tmpTestData);
		fitness3<-1-(nrow(tmpTestData)/rowNumber);
		
		trainData<-na.omit(trainData);
		testData<-na.omit(testData);
		
		if((nrow(trainData)<10) || (nrow(testData)<10)) {
			return(c(Inf,Inf,Inf));
		}
		
		featureNames<-features[chromosome];
		f<-paste(featureNames,collapse="+");
		f<-paste(predictedVariable,f,sep="~");
		
		model<-svm(as.formula(f),data=trainData,...);
		tmpData<-testData[,featureNames];
		if(is.vector(tmpData)==TRUE) {
			tmpData<-as.matrix(tmpData,ncol=1);
			colnames(tmpData)<-featureNames;
		}
		predicted<-predict(model,as.matrix(tmpData));
		
		real<-testData[,predictedVariable];
		missed<-predicted!=real;
		fitness1<-sum(missed)/length(real);
		
		fitness2<-sum(chromosome);
		
		result<-c(fitness1,fitness2,fitness3);
		if(length(result)!=3) {
			stop("problem");
		}
		return(result);
	}
	
	if(!(predictedVariable %in% colnames(dataTrain))) {
		stop("Predikovana velicina neni v trenovaci sade");
	}
	if(!(predictedVariable %in% colnames(dataTest))) {
		stop("Predikovana velicina neni v testovaci sade");
	}
	
	if(!(all(features %in% colnames(dataTrain)))) {
		stop("Nektera vstupni promenna neni v trenovaci mnozine");
	}
	if(!(all(features %in% colnames(dataTest)))) {
		stop("Nektera vstupni promenna neni v testovaci mnozine");
	}
	
	dataTrain<-dataTrain[,c(predictedVariable,features)];
	dataTest<-dataTest[,c(predictedVariable,features)];
	
	gaResult<-multimodalNsga2Algorithm(initBinaryChromosome(length(features),0.1),
			multiobjectiveFitness,
			uniformBinnaryCrossover(),
			binaryMutation(),
			popSize=popSize,
			generations=generations,
			statistic=multiobjectiveStatistic,
			repair=fsReapair,
			list(trainData=dataTrain,testData=dataTest),...);
	
	features2<-character();
	for(chromosome in gaResult$population) {
		featuresTmp<-features[chromosome];
		featuresTmp<-paste(featuresTmp,collapse=",");
		features2<-c(features2,featuresTmp);
	}
	
	solution<-1:length(gaResult$population);
	front<-nonDominatedSorting(gaResult$fitnessMatrix);
	
	results<-data.frame(
			solution=solution,
			front=front,
			features=features2,
			missclasified=gaResult$fitnessMatrix[,1],
			featureCount=gaResult$fitnessMatrix[,2],
			unavailable=gaResult$fitnessMatrix[,3]
	);
	
	modelList<-list();
	resultsList<-list();
	for(chromosome in gaResult$population) {
		selection<-c(predictedVariable,features[chromosome]);
		tmpTrain<-dataTrain[,selection];
		tmpTrain<-na.omit(tmpTrain);
		
		if(nrow(tmpTrain)<10) {
			modelList[[length(modelList)]]<-NA;
			resultsList[[length(resultsList)+1]]<-NA;
		}
		else {
			featureNames<-features[chromosome];
			f<-paste(featureNames,collapse="+");
			f<-paste(predictedVariable,f,sep="~");
			
			model<-svm(as.formula(f),data=tmpTrain,...);
			modelList[[length(modelList)+1]]<-model;
			
			trainResults<-predict(model,tmpTrain);
			trainResults<-list(real=tmpTrain[,predictedVariable],classification=trainResults);
			names(trainResults[[1]])<-rownames(tmpTrain);
			names(trainResults[[2]])<-rownames(tmpTrain);
			
			tmpTest<-dataTest[,selection];
			tmpTest<-na.omit(tmpTest);
			testResults<-predict(model,tmpTest);
			testResults<-list(real=tmpTest[,predictedVariable],classification=testResults);
			names(testResults[[1]])<-rownames(tmpTest);
			names(testResults[[2]])<-rownames(tmpTest);
			
			resultsList[[length(resultsList)+1]]<-list(trainResults=trainResults,testResults=testResults);
		}
	}
	
	return(list(results=results,modelList=modelList,resultsList=resultsList,statistic=gaResult$statistic));
}

filterParetoOptimalClassification<-function(results) {
	fitnessMatrix<-results$results[,c("missclasified","featureCount","unavailable")];
	indexes<-filterParetoOptimal(fitnessMatrix);
	
	results$results<-results$results[indexes,];
	results$modelList<-results$modelList[indexes];
	results$resultsList<-results$resultsList[indexes];
	
	return(results);
}

plotParetoFrontClassification<-function(results) {
	fitnessMatrix<-results$results[,c("missclasified","featureCount","unavailable")];
	fitnessMatrix[,"missclasified"]<-fitnessMatrix[,"missclasified"]*100;
	fitnessMatrix[,"unavailable"]<-fitnessMatrix[,"unavailable"]*100;
	
	par(mfrow=c(2,2));
	plotSingleParetoFront(fitnessMatrix,"missclasified","featureCount",c("missclasified %","featureCount"));
	plotSingleParetoFront(fitnessMatrix,"missclasified","unavailable",c("missclasified %","unavailable %"));
	plotSingleParetoFront(fitnessMatrix,"featureCount","unavailable",c("featureCount","unavailable %"));
	par(mfrow=c(1,1));
}

plotConvergenceClassification<-function(results) {
	par(mfrow=c(2,2));
	
	results$statistic$fitnessStatistic[[1]]$minFitness<-results$statistic$fitnessStatistic[[1]]$minFitness*100;
	results$statistic$fitnessStatistic[[1]]$meanFitness<-results$statistic$fitnessStatistic[[1]]$meanFitness*100;
	results$statistic$fitnessStatistic[[3]]$minFitness<-results$statistic$fitnessStatistic[[3]]$minFitness*100;
	results$statistic$fitnessStatistic[[3]]$meanFitness<-results$statistic$fitnessStatistic[[3]]$meanFitness*100;
	
	plotConvergence((results$statistic$fitnessStatistic[[1]]),"Misclassified %");
	plotConvergence(results$statistic$fitnessStatistic[[2]],"Number of features");
	plotConvergence((results$statistic$fitnessStatistic[[3]]),"Unavailable %");
	
	par(mfrow=c(1,1));
}

predictClassification<-function(results,number,data) {
	model<-results$modelList[[number]];
	if(!(any(class(model) %in% c("svm.formula","svm")))) {
		stop("Model is not trained");
	}
	features<-strsplit(as.character(results$results[number,"features"]),",")[[1]];
	
	data<-data[,features];
	results<-predict(model,data);
	
	return(results);
}
