#!/usr/bin/Rscript




#--------------------------------------
#  FUNCTIONS DECLARATION
#--------------------------------------

# function zscore calculates z-scores of a given vector x
# rm.outlier logical flag if to remove outliers; default-value FALSE
# c.outlier - cutOff for outliers, default = -2
# rp.negative - logical flag if to replace negative z-scores with 0
# NEW: now we replace the z-scores for outliers not with O but with value of c.outlier
zscore <- function (x, rm.outlier=FALSE, rp.negative=FALSE, c.outlier=-2, filter.zero=FALSE){

    # calculate mean and standard deviation
    m <- mean(x, na.rm=TRUE);
    sdx <- sdx<-sd(x, na.rm=TRUE);
    # calculate z-scores
    z<-(x-m)/sdx;
    # remove 0
    if (filter.zero){
        y <- x[x>0 & x<Inf];
        m <-mean(y, na.rm=TRUE);
        sdx<-sd(y, na.rm=TRUE);
        z<-(x-m)/sdx;
        # remove outliers
        if (rm.outlier){
            y <- x[z>=c.outlier & x<Inf & x>0];
            m <-mean(y, na.rm=TRUE);
            sdx<-sd(y, na.rm=TRUE);
            z<-(x-m)/sdx;
        }
        # replace  z-scores for outliers
        if(rp.negative){
            zscout <- max(c.outlier,(0-m)/sdx);
            z[z<c.outlier | is.na(z)]<-zscout;
        }
    } else {
	# remove outliers
	if (rm.outlier){
	   y <- x[z>=c.outlier & x<Inf];
           m <-mean(y, na.rm=TRUE);
           sdx<-sd(y, na.rm=TRUE);
           z<-(x-m)/sdx;
        } 
        # replace negative z-scores with 0
        if(rp.negative){
           zscout <- max(c.outlier,(0-m)/sdx);
           z[z<c.outlier | is.na(z)]<-zscout;
        }
    }
    # return final vector
    z
}


zscoreByRef <- function(x, y, rm.outlier=FALSE, rp.negative=FALSE, c.outlier=-2){
    m <- mean(x[!is.na(y)], na.rm=TRUE);
    sdx<-sd(x[!is.na(y)], na.rm=TRUE);
    z <- (x-m)/sdx;
    # remove outliers
    if (rm.outlier){
       xx <- x[z>=c.outlier & x<Inf & !(is.na(y))];
       m <-mean(xx, na.rm=TRUE);
       sdx<-sd(xx, na.rm=TRUE);
       z<-(x-m)/sdx;
    }
    # replace negative z-scores with 0
    if(rp.negative){
       z[z<c.outlier | is.na(z)]<-c.outlier;
    }
    # return final vector
    z
}


# function calculates Grishin's tenScore
tenZscore <- function(data){
  z1<-zscore(data$tm,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z2<-zscore(data$tm_n_align,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z3<-zscore(data$dali,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z4<-zscore(data$dali_n_align,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z5<-zscore(data$lga_gdt_ts,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z6<-zscore(data$lga_n_align,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z7<-zscore(data$mammoth_ln_e,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z8<-zscore(data$mammoth_n_align,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z9<-zscore(data$contSintra,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  z10<-zscore(data$sov,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  #z<-z1+z2+z3+z4+z5+z6+z7+z8+z9+z10
  z <- rep(0, length(z1)) 
  if(sum(!is.na(z1))>0){
	z <- z + z1
  }
  if(sum(!is.na(z2))>0){
        z <- z + z2
  }
  if(sum(!is.na(z3))>0){
        z <- z + z3
  }
  if(sum(!is.na(z4))>0){
        z <- z + z4
  }
  if(sum(!is.na(z5))>0){
        z <- z + z5
  }
  if(sum(!is.na(z6))>0){
        z <- z + z6
  }
  if(sum(!is.na(z7))>0){
        z <- z + z7
  }
  if(sum(!is.na(z8))>0){
        z <- z + z8
  }
  if(sum(!is.na(z9))>0){
        z <- z + z9
  }
  if(sum(!is.na(z10))>0){
        z <- z + z10
  }
  data$Zten<-zscoreByRef(z, data$missing, rm.outlier=TRUE, rp.negative=TRUE)
  data
}

# function calculates Grishin's qcsZscore
qcsZscore <- function(data){
  z<-zscore(data$qcs,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  data$Zqcs<-z
  data
}

# function calculates Grishin's contSZscore
contSZscore <- function(data){
  z<-zscore(data$contS,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  data$ZcontS<-z
  data
}

# function calculates Grishin's lga_gdt_tsZscore
gdt_tsZscore <- function(data){
  z<-zscore(data$lga_gdt_ts,rm.outlier=TRUE, rp.negative=TRUE, filter.zero=TRUE)
  data$Zlga_gdt_ts<-z
  data
}

# calc all Zscores
allZscores <- function(data){
  data <- tenZscore(data)
  data <- qcsZscore(data)
  data <- contSZscore(data)
  data <- gdt_tsZscore(data)
  data

}

# the function select the best score out of all 5 models for every group
getBest <- function(data){
  target <- data$Target[1]
  data <- aggregate(data[4:length(data)], by=list(GrCode = data$GrCode), max, na.rm=TRUE)
  data$Target <- rep(target, nrow(data))
  data <- data[,c(length(data),1:length(data)-1)]
  data[data == -Inf] <- NA
  data
}

# the function select the score of the first model submitted by groups
getFirst <- function(data){
  data <- data[data$Model == 1 | data$Model == '1_1' | data$Model == '1_2' | data$Model == '1_3' | data$Model == '1_4', ]
  data <- getBest(data)
  data
}

getBestModelID <- function(data){
  df <- data
  # create argMax indexes
  bestIndex <- aggregate(df[4:length(df)], by=list(GrCode = df$GrCode), which.max)
  bestModel <- bestIndex
  for (gr in bestIndex$GrCode){   
    for (k in 2:length(bestIndex)){
        j <- bestIndex[bestIndex$GrCode == gr,k][[1]]
        if (length(j)==0){
            bestModel[bestModel$GrCode==gr,k] <- NA
        } else{
            i <- as.character(df[df$GrCode == gr, "Model"])[[j]]
            bestModel[bestModel$GrCode==gr,k] <- i  
        }
    }
  }
  target <- df$Target[1]
  bestModel$Target <- rep(target, nrow(bestModel))
  bestModel <- bestModel[,c(length(bestModel),1:length(bestModel)-1)]
  as.matrix(bestModel)
}

#--------------------------------------
# END OF FUNCTIONS DECLARATION
#--------------------------------------






# enable reading args
args <- commandArgs(trailingOnly = TRUE)
# read arg[1] it should be target name
infile <- args[1]
outfile <- args[2]
flag <- args[3]
if (flag != 'best' & flag != 'all'){
    flag <- 'first';
} else {
    if (flag != 'best'){
    	flag <- 'all';
    } else {
	flag <- 'best';
    }
}


# read data
data <- read.csv(infile, header=TRUE)
if (flag == 'best'){
   bestModelID <- getBestModelID(data); 
   outfile_modelID <- paste(outfile, ".bestModelID.csv", sep='');
   write.table(bestModelID, outfile_modelID, quote=FALSE, na="NA", sep="\t", row.names=FALSE, col.names=TRUE);
   data <- getBest(data);
   outfile <- paste(outfile, ".best.csv", sep='');
} 
if (flag == 'first') {
   data <- getFirst(data);
   outfile <- paste(outfile, ".first.csv", sep='');
}
if (flag == 'all') {
   # do nothing
   # data <- data
}

# calc zscore of ten scores
data <- allZscores(data)


# gen output file
write.table(data, outfile, quote=FALSE, na="NA", sep="\t", row.names=FALSE, col.names=TRUE)

