## # Defines modelfit function which # fits following distributins and stores fitted model into .model_{modelname} # power law # power law with cutoff # exponential # log-normal the “DGX” distribution # DPLN distribution # PLN distribution modelfit <- function(expendedDataCCDF,saveSteps=FALSE,datasetName,doPlot=FALSE,doSkipExisting=TRUE,showTitle=TRUE){ outFile=paste("results/",datasetName,".expendedDataCCDF",sep=""); if(!(doSkipExisting && saveSteps && file.exists(outFile)) ){ if(saveSteps){ save(expendedDataCCDF,file=outFile) } } reducedSizePlot=FALSE; x=1:length(expendedDataCCDF) y=expendedDataCCDF # y<-y/sum(y) ds<-data.frame(x=x,y=y) str(ds) s=1:max(x) plotds=ds; if(reducedSizePlot){ DF2 <- data.frame(x=round(ds$x,3),y=round(ds$y,3)) DF2 <- DF[!duplicated(DF2),] nrow(DF2) #[1] 373429 #plot(y~x,data=DF2,pch=".",cex=4) plotds=DF2; } if(doPlot){ plot_colors <- c("yellow","blue", "red", "forestgreen","magenta",rgb(r=0.0,g=0.9,b=0.9),rgb(r=0.9,g=0.9,b=0.0)) plot_names <- c("Power law distribution", "Power law with cutoff distribution", "Exponential distribution","Log-normal (the DGX) distribution","DPLN distribution","PLN distribution" ) plot_colors_index=1; #set plot destination # pdf(paste("results/",datasetName,".pdf",sep=""),compress=TRUE) png(paste("results/",datasetName,".png",sep=""),width=400,height=350,res=72) # png(paste("results/",datasetName,".png",sep="")) # work on: # png(file="animals72.png",width=400,height=350,res=72) #par(mar=c(4.2, 3.8, 0.2, 0.2)) if(showTitle){ plot(y ~ x, main = paste("Fitted model for",datasetName),xlab = "degree", ylab = "complementary CDF", sub = "Dataset and Statistical fits",log="xy") }else{ plot(y ~ x, xlab = "degree", ylab = "complementary CDF",log="xy") } } #power law outFile=paste("results/",datasetName,".model_power_law",sep=""); if((doSkipExisting && file.exists(outFile)) ){ load(file=outFile); m<-model_power_law; }else{ m <- nls(y ~ I(C*x^(-power)), data = ds, start = list(C=1e+6,power = 2),trace = T) } class(m) summary(m) summary(m)$coefficients C <- round(summary(m)$coefficients[1], 3) C.se <- round(summary(m)$coefficients[3], 3) power <- round(summary(m)$coefficients[2], 3) power.se <- round(summary(m)$coefficients[4], 3) if(doPlot) lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index]) plot_colors_index=plot_colors_index+1; model_power_law<-m; rm(m) if(saveSteps){ if(!(doSkipExisting && file.exists(outFile))) save(model_power_law,file=outFile) rm(model_power_law) } #power law with cutoff outFile=paste("results/",datasetName,".model_power_law_cutoff",sep=""); if((doSkipExisting && file.exists(outFile)) ){ load(file=outFile); m<-model_power_law_cutoff; }else{ m <- nls(y ~ I( C*x^(-power)*exp(-rate*x) ), data = ds, start = list(C=1,power = 0.01,rate=0.003),trace = T); } if(doPlot) lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index]) plot_colors_index=plot_colors_index+1; model_power_law_cutoff<-m; rm(m); if(saveSteps){ if(!(doSkipExisting && file.exists(outFile))) save(model_power_law_cutoff,file=outFile) rm(model_power_law_cutoff) } #exponential outFile=paste("results/",datasetName,".model_exponential",sep=""); if((doSkipExisting && file.exists(outFile)) ){ load(file=outFile); m<-model_exponential; }else{ m <- nls(y ~ I( C*exp(-rate*x) ), data = ds, start = list(C=1,rate=0.003),trace = T); } if(doPlot) lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index]) plot_colors_index=plot_colors_index+1; model_exponential<-m; rm(m) if(saveSteps ){ if(!(doSkipExisting && file.exists(outFile))) save(model_exponential,file=outFile) rm(model_exponential) } #log-normal the “DGX” distribution dln <- function(x,mean,sigma){ (1/x)*exp(-( (log(x)-mean)^2) / (2*sigma^2)) } outFile=paste("results/",datasetName,".model_lognormal",sep=""); if((doSkipExisting && file.exists(outFile)) ){ load(file=outFile); m<-model_lognormal; }else{ m <- nls(y ~ I( C*dln(x,mean,sigma ) ), data = ds, start = list(C=1,mean=1,sigma=-1),trace = T); } if(doPlot) lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index]) plot_colors_index=plot_colors_index+1; model_lognormal<-m; rm(m) if(saveSteps ){ if(!(doSkipExisting && file.exists(outFile))) save(model_lognormal,file=outFile) rm(model_lognormal) } #DPLN distribution ddpln <- function(x,alpha,beta,vi,teta){ (alpha*beta)/(alpha+beta)*( exp(alpha*vi+alpha^2*teta^2/2)*x^(-alpha-1)* pnorm((log(x)-vi-alpha*teta^2)/teta)+ x^(beta-1)*exp(-beta*teta+beta^2*teta^2/2)*(1-pnorm((log(x)-vi+beta *teta^2)/teta)) ) } outFile=paste("results/",datasetName,".model_dpln",sep=""); if((doSkipExisting && file.exists(outFile)) ){ load(file=outFile); m<-model_dpln; }else{ m<-nls(y ~ I( C*ddpln(x,alpha,beta,vi,teta)), data = ds, start = list(C=55,alpha=.5,beta=.16,vi=.4,teta=1), upper = list(C=1500,alpha=10,beta=30,vi=10,teta=20),lower = list(C=-150,alpha=-30,beta=-30,vi=-10,teta=0), trace = T,algorithm = "port", control=list(maxiter = 5000, minFactor = 1/10240,tol=1e-06, eval=10000) ) } if(doPlot){ lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index]) plot_colors_index=plot_colors_index+1; # par(mar=c(5, 4, 4, 2) + 0.1) } model_dpln<-m; rm(m) if(saveSteps ){ if(!(doSkipExisting && file.exists(outFile))) save(model_dpln,file=outFile) rm(model_dpln) } #PLN distribution dpln <- function(x,beta,vi,teta){ (beta)*( x^(beta-1)*exp(-beta*vi+beta^2*teta^2/2)*(1-pnorm((log(x)-vi+beta *teta^2)/teta)) ) } outFile=paste("results/",datasetName,".model_pln",sep=""); if((doSkipExisting && file.exists(outFile)) ){ load(file=outFile); m<-model_pln; }else{ m<-nls(y ~ I( C*dpln(x,beta,vi,teta)), data = ds, start = list(C=55,beta=.8,vi=2.9,teta=.2), upper = list(C=1500,beta=30,vi=10,teta=20),lower = list(C=-150,beta=-30,vi=-10,teta=0), trace = T,algorithm = "port", control=list(maxiter = 5000, minFactor = 1/10240,tol=1e-06, eval=10000) ) } if(doPlot){ lines(s, predict(m, list(x = s)), lty = plot_colors_index, lwd=2, col = plot_colors[plot_colors_index]) plot_colors_index=plot_colors_index+1; # par(mar=c(5, 4, 4, 2) + 0.1) } model_pln<-m; rm(m) if(saveSteps ){ if(!(doSkipExisting && file.exists(outFile))) save(model_pln,file=outFile) rm(model_pln) } #end part if(doPlot){ legend("bottomleft", plot_names, cex=0.8, col=plot_colors, lty=1:plot_colors_index, lwd=2, bty="n"); dev.off() # png(paste("results/",datasetName,".png",sep="")) # replot(); # dev.off() # par(mar=c(5, 4, 4, 2) + 0.1) } #eval # TSS=sum(y^2) #1-sum(residuals(model_power_law)^2)/TSS #1-sum(residuals(model_power_law_cutoff)^2)/TSS #1-sum(residuals(model_lognormal)^2)/TSS #1-sum(residuals(model_exponential)^2)/TSS #1-sum(residuals(model_dpln)^2)/TSS #fits = list(); #fits$gsm_0c_all$dataset=datasetName; #fits$gsm_0c_all$powerlaw=model_power_law; #fits$gsm_0c_all$powerlawcuttof=model_power_law_cutoff; #fits$gsm_0c_all$lognormal=model_lognormal; #fits$gsm_0c_all$exponential=model_exponential; #fits$gsm_0c_all$dpln=model_dpln; #fits; }