##
# Defines modelfit function which 
# fits following distributins and stores fitted model into .model_{modelname}
#	power law  
#	power law  with cutoff 
#	exponential  
#	log-normal  the “DGX” distribution
#	DPLN distribution
#	PLN distribution

modelfit <- function(expendedDataCCDF,saveSteps=FALSE,datasetName,doPlot=FALSE,doSkipExisting=TRUE,showTitle=TRUE){
outFile=paste("results/",datasetName,".expendedDataCCDF",sep="");
if(!(doSkipExisting && saveSteps && file.exists(outFile)) ){
	if(saveSteps){
	save(expendedDataCCDF,file=outFile)
	}
}
reducedSizePlot=FALSE;
x=1:length(expendedDataCCDF)
y=expendedDataCCDF

# y<-y/sum(y)
ds<-data.frame(x=x,y=y)
str(ds)
s=1:max(x)

plotds=ds;

if(reducedSizePlot){
DF2 <- data.frame(x=round(ds$x,3),y=round(ds$y,3))
DF2 <- DF[!duplicated(DF2),]
nrow(DF2)
#[1] 373429
#plot(y~x,data=DF2,pch=".",cex=4)
plotds=DF2;
}


if(doPlot){
	 
	plot_colors <- c("yellow","blue", "red", "forestgreen","magenta",rgb(r=0.0,g=0.9,b=0.9),rgb(r=0.9,g=0.9,b=0.0))
	plot_names <- c("Power law distribution", "Power law  with cutoff distribution", "Exponential distribution","Log-normal (the DGX) distribution","DPLN distribution","PLN distribution"   )
	plot_colors_index=1;

	#set plot destination
# 	pdf(paste("results/",datasetName,".pdf",sep=""),compress=TRUE)
	png(paste("results/",datasetName,".png",sep=""),width=400,height=350,res=72)
# 	png(paste("results/",datasetName,".png",sep=""))
# work on:
#	png(file="animals72.png",width=400,height=350,res=72)

	#par(mar=c(4.2, 3.8, 0.2, 0.2))
	if(showTitle){
	plot(y ~ x, main = paste("Fitted model for",datasetName),xlab = "degree", ylab = "complementary CDF", 
	sub = "Dataset and Statistical fits",log="xy")
	}else{
	plot(y ~ x, xlab = "degree", ylab = "complementary CDF",log="xy")
	}
}


#power law  
outFile=paste("results/",datasetName,".model_power_law",sep="");
if((doSkipExisting && file.exists(outFile)) ){
		load(file=outFile);
		m<-model_power_law;
}else{
	m <- nls(y ~ I(C*x^(-power)), data = ds, start = list(C=1e+6,power = 2),trace = T)
}
class(m)
summary(m)
summary(m)$coefficients
C <- round(summary(m)$coefficients[1], 3)
C.se <- round(summary(m)$coefficients[3], 3)
power <- round(summary(m)$coefficients[2], 3)
power.se <- round(summary(m)$coefficients[4], 3)
if(doPlot)
lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index])

plot_colors_index=plot_colors_index+1;
model_power_law<-m;
rm(m)
if(saveSteps){
	if(!(doSkipExisting && file.exists(outFile))) 
		save(model_power_law,file=outFile) 
	rm(model_power_law)
}

#power law  with cutoff 
outFile=paste("results/",datasetName,".model_power_law_cutoff",sep="");
if((doSkipExisting && file.exists(outFile)) ){
		load(file=outFile);
		m<-model_power_law_cutoff;
}else{ 
	m <- nls(y ~ I( C*x^(-power)*exp(-rate*x) ), data = ds, start = list(C=1,power = 0.01,rate=0.003),trace = T);
}
if(doPlot)
lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index])
plot_colors_index=plot_colors_index+1;
model_power_law_cutoff<-m;
rm(m);
if(saveSteps){
	if(!(doSkipExisting && file.exists(outFile))) 
		save(model_power_law_cutoff,file=outFile)
	rm(model_power_law_cutoff)
}

#exponential  
outFile=paste("results/",datasetName,".model_exponential",sep="");
if((doSkipExisting && file.exists(outFile)) ){
		load(file=outFile);
		m<-model_exponential;
}else{  
m <- nls(y ~ I( C*exp(-rate*x) ), data = ds, start = list(C=1,rate=0.003),trace = T);
}
if(doPlot)
lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index])
plot_colors_index=plot_colors_index+1;
model_exponential<-m;
rm(m)
if(saveSteps ){
	if(!(doSkipExisting && file.exists(outFile))) 
		save(model_exponential,file=outFile)
	rm(model_exponential)
}


#log-normal  the “DGX” distribution
dln <- function(x,mean,sigma){
(1/x)*exp(-( (log(x)-mean)^2) / (2*sigma^2))
}

outFile=paste("results/",datasetName,".model_lognormal",sep="");
if((doSkipExisting && file.exists(outFile)) ){
		load(file=outFile);
		m<-model_lognormal;
}else{  
	m <- nls(y ~ I( C*dln(x,mean,sigma  ) ), data = ds, start = list(C=1,mean=1,sigma=-1),trace = T);
}

if(doPlot)
	lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index])
plot_colors_index=plot_colors_index+1;
model_lognormal<-m;
rm(m)
if(saveSteps ){
	if(!(doSkipExisting && file.exists(outFile))) 
		save(model_lognormal,file=outFile)
	rm(model_lognormal)
}
 
#DPLN distribution
ddpln <- function(x,alpha,beta,vi,teta){
(alpha*beta)/(alpha+beta)*(
exp(alpha*vi+alpha^2*teta^2/2)*x^(-alpha-1)*  pnorm((log(x)-vi-alpha*teta^2)/teta)+
x^(beta-1)*exp(-beta*teta+beta^2*teta^2/2)*(1-pnorm((log(x)-vi+beta *teta^2)/teta))
)
} 

outFile=paste("results/",datasetName,".model_dpln",sep="");
if((doSkipExisting && file.exists(outFile)) ){
		load(file=outFile);
		m<-model_dpln;
}else{   
	m<-nls(y ~ I( C*ddpln(x,alpha,beta,vi,teta)), data = ds, start = list(C=55,alpha=.5,beta=.16,vi=.4,teta=1), upper = list(C=1500,alpha=10,beta=30,vi=10,teta=20),lower = list(C=-150,alpha=-30,beta=-30,vi=-10,teta=0), trace = T,algorithm = "port", control=list(maxiter = 5000,  minFactor = 1/10240,tol=1e-06, eval=10000) )
}

	 
if(doPlot){
	lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index])
	plot_colors_index=plot_colors_index+1;

# 	par(mar=c(5, 4, 4, 2) + 0.1)
}
model_dpln<-m;
rm(m)
if(saveSteps ){
	if(!(doSkipExisting && file.exists(outFile))) 
		save(model_dpln,file=outFile)
	rm(model_dpln)
}
 

#PLN distribution
dpln <- function(x,beta,vi,teta){
(beta)*( 
x^(beta-1)*exp(-beta*vi+beta^2*teta^2/2)*(1-pnorm((log(x)-vi+beta *teta^2)/teta))
)
} 

outFile=paste("results/",datasetName,".model_pln",sep="");
if((doSkipExisting && file.exists(outFile)) ){
		load(file=outFile);
		m<-model_pln;
}else{    
	m<-nls(y ~ I( C*dpln(x,beta,vi,teta)), data = ds, start = list(C=55,beta=.8,vi=2.9,teta=.2), upper = list(C=1500,beta=30,vi=10,teta=20),lower = list(C=-150,beta=-30,vi=-10,teta=0), trace = T,algorithm = "port", control=list(maxiter = 5000,  minFactor = 1/10240,tol=1e-06, eval=10000) )
}

if(doPlot){
	lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index])
	plot_colors_index=plot_colors_index+1;

# 	par(mar=c(5, 4, 4, 2) + 0.1)
}
model_pln<-m;
rm(m)
if(saveSteps ){
	if(!(doSkipExisting && file.exists(outFile))) 
		save(model_pln,file=outFile)
	rm(model_pln)
}
 

#end part
if(doPlot){
	legend("bottomleft", plot_names, cex=0.8, col=plot_colors, lty=1:plot_colors_index, lwd=2, bty="n");

 	dev.off()
	
#	png(paste("results/",datasetName,".png",sep=""))
#	replot();
 #	dev.off()
	
# 	par(mar=c(5, 4, 4, 2) + 0.1)
}

#eval
# TSS=sum(y^2)
#1-sum(residuals(model_power_law)^2)/TSS
#1-sum(residuals(model_power_law_cutoff)^2)/TSS
#1-sum(residuals(model_lognormal)^2)/TSS
#1-sum(residuals(model_exponential)^2)/TSS
#1-sum(residuals(model_dpln)^2)/TSS


#fits = list();
#fits$gsm_0c_all$dataset=datasetName;
#fits$gsm_0c_all$powerlaw=model_power_law;
#fits$gsm_0c_all$powerlawcuttof=model_power_law_cutoff;
#fits$gsm_0c_all$lognormal=model_lognormal;
#fits$gsm_0c_all$exponential=model_exponential;
#fits$gsm_0c_all$dpln=model_dpln;

#fits;
}