#!/usr/bin/perl

use strict;
#use warnings;

use lib qw(/local/Projects/Perl/casp13/src/scripts/evaluation_scripts/Parsers);
use LocalConfig; 
use EvaluationResult;
use ResultsParser;
use Getopt::Long;

use lib qw(Classes);
use TargetsManager;

###########################################################
#
# The script parses the corresponding directories with 
# results and creates file containing table of scores. 
# (One file per target). These files are used to perform 
# analysis in R.
#
###########################################################



# VARS
my $DEBUG = 0;
my $headerDone = 0; # flag to print header just once
my $TARGET;
my $OUTFILE; # output file in csv format with 
my $TMP_FILE = "/tmp/tmp_".$$.".csv"; # tmp_file for parsing results from outputs; it is used as input to Rscript
my @models; # array of models
my @preds;  # array of members of class Evaluator() - one evaluator for eveyr model
 
my $Rscript = "/local/Projects/Perl/casp13/src/scripts/evaluation_scripts/calcZscores.R";

# read arguments
GetOptions (
	"target|t=s" => \$TARGET,
	"output|o=s" => \$OUTFILE
);
# check args
if (!defined($TARGET)){
	&usage();
}

if (!defined($OUTFILE)){
	$OUTFILE = sprintf("/local/CASP13/tmp_Grishin/%s.GrishinScores", $TARGET);
}


my %ALGROUPS = (
'006'=>1, '008'=>1, '011'=>1, '014'=>1, '022'=>1, '023'=>1, '024'=>1, '026'=>1, '032'=>1, '034'=>1, '038'=>1, '040'=>1, '041'=>1, 
'042'=>1, '044'=>1, '049'=>1, '050'=>1, '054'=>1, '056'=>1, '063'=>1, '064'=>1, '065'=>1, '067'=>1, '073'=>1, '076'=>1, '080'=>1, 
'092'=>1, '097'=>1, '106'=>1, '110'=>1, '111'=>1, '116'=>1, '117'=>1, '118'=>1, '120'=>1, '128'=>1, '132'=>1, '133'=>1, '144'=>1, 
'145'=>1, '153'=>1, '155'=>1, '156'=>1, '157'=>1, '160'=>1, '162'=>1, '169'=>1, '171'=>1, '173'=>1, '184'=>1, '186'=>1, '192'=>1, 
'193'=>1, '197'=>1, '203'=>1, '204'=>1, '206'=>1, '210'=>1, '212'=>1, '216'=>1, '228'=>1, '230'=>1, '235'=>1, '237'=>1, '241'=>1, 
'251'=>1, '258'=>1, '260'=>1, '263'=>1, '268'=>1, '269'=>1, '276'=>1, '277'=>1, '279'=>1, '281'=>1, '282'=>1, '290'=>1, '296'=>1, 
'300'=>1, '301'=>1, '310'=>1, '311'=>1, '317'=>1, '322'=>1, '326'=>1, '328'=>1, '333'=>1, '335'=>1, '336'=>1, '338'=>1, '340'=>1, 
'342'=>1, '345'=>1, '346'=>1, '347'=>1, '349'=>1, '357'=>1, '358'=>1, '360'=>1, '361'=>1, '362'=>1, '364'=>1, '368'=>1, '381'=>1, 
'385'=>1, '386'=>1, '391'=>1, '401'=>1, '403'=>1, '404'=>1, '410'=>1, '414'=>1, '417'=>1, '419'=>1, '420'=>1, '425'=>1, '428'=>1, 
'430'=>1, '433'=>1, '434'=>1, '436'=>1, '437'=>1, '438'=>1, '439'=>1, '442'=>1, '445'=>1, '448'=>1, '452'=>1, '454'=>1, '457'=>1, 
'460'=>1, '465'=>1, '466'=>1, '476'=>1, '479'=>1, '482'=>1, '483'=>1, '490'=>1, '492'=>1, '493'=>1, '495'=>1, '499'=>1, '077'=>1, '486'=>1
);

my %SERVERS = (
'008'=>1, '011'=>1, '022'=>1, '038'=>1, '041'=>1, '050'=>1, '073'=>1, '110'=>1,
'117'=>1, '133'=>1, '145'=>1, '156'=>1, '160'=>1, '171'=>1, '184'=>1, '193'=>1,
'206'=>1, '210'=>1, '212'=>1, '216'=>1, '228'=>1, '237'=>1, '251'=>1, '263'=>1,
'268'=>1, '277'=>1, '279'=>1, '300'=>1, '335'=>1, '345'=>1, '346'=>1, '349'=>1,
'381'=>1, '410'=>1, '414'=>1, '420'=>1, '436'=>1, '448'=>1, '452'=>1, '454'=>1,
'466'=>1, '479'=>1, '492'=>1, '499'=>1
) ;

my %TCGROUPS = (
'032'=>1, '038'=>1, '040'=>1, '041'=>1, '042'=>1, 
'044'=>1, '064'=>1, '065'=>1, '080'=>1, '155'=>1, 
'157'=>1, '162'=>1, '169'=>1, '186'=>1, '219'=>1, 
'276'=>1, '287'=>1, '300'=>1, '310'=>1, '329'=>1, 
'342'=>1, '345'=>1, '357'=>1, '361'=>1, '420'=>1, 
'428'=>1, '476'=>1, '479'=>1, '490'=>1
);

my %GROUPS;
my %DONE_GROUPS;

my $targetManager = new TargetsManager();
my $target_name;
my $domain;
if ($TARGET =~ m/^(T[acspx0-9][0-9]{3})-D([1-9]$)/){
   $target_name = $1;
   $domain = $2;
} else {
   $target_name = $TARGET;
}

my $targ_id = $targetManager->get_id_by_name($target_name);
my %info = $targetManager->info($targ_id);
if ($target_name =~ m/^T[acspx]/){
        %GROUPS = %TCGROUPS;
} elsif ($info{IS_SERVER_ONLY} ){
        %GROUPS = %SERVERS;
} else {
        %GROUPS = %ALGROUPS;
}


# read list of models
my @models = &getModels();

if ($DEBUG){
   foreach my $m (@models){
	print $m."\n";
   }
}
# end read predictions

my $no_scores;



open TMP, ">$TMP_FILE" || die("Can not open output file for writing\n"); 
# parse scores of all predictions and write to output file
my @preds; # array of all predictions
foreach my $m (@models){
	my $parser = new ResultsParser();
	my $evaluator;
	#  parse QCS results file
        $evaluator = $parser->parseQCS($m);
        # parse SOV result file
        $evaluator = $parser->parseSOV($evaluator);
        # parse CE result file : obsolete score
#       $evaluator = $parser->parseCE($evaluator);
        # parse TMscore
        $evaluator = $parser->parseTMscore($evaluator);
        # parse Mammoth result file
        $evaluator = $parser->parseMammoth($evaluator);
        # parse LGA_SDA result file
        $evaluator = $parser->parseLGA_SDA($evaluator);
        # parse LGA_SIA result file
        $evaluator = $parser->parseLGA_SIA($evaluator);
        # parse DaliLite result file
        $evaluator = $parser->parseDali($evaluator);
        # parse grishin conts result file
        $evaluator = $parser->parseGrContS($evaluator);
	if ($DEBUG){
#                printf("%s,%s%s,%s\n", $evaluator->createTargetName(), $evaluator->{_pfrmat}, $evaluator->{_gr_code}, $evaluator->{_model});
#                print "contS\t".$evaluator->{_scores}{contSintra}."\n";
#               print "handedness\t".$evaluator->{_scores}{handedness}."\n";
#                print "qcs\t".$evaluator->{_scores}{qcs}."\n";
#                print "sov\t".$evaluator->{_scores}{sov}."\n";
#                print "tm\t".$evaluator->{_scores}{tm}."\n";
#                print "tm_n_align\t".$evaluator->{_scores}{tm_n_align}."\n";
#                print "mammoth_ln_e\t".$evaluator->{_scores}{mammoth_ln_e}."\n";
#                print "mammoth_n_align\t".$evaluator->{_scores}{mammoth_n_align}."\n";
#                print "lga_gdt_ts\t".$evaluator->{_scores}{lga_gdt_ts}."\n";
#                print "lga_n_align\t".$evaluator->{_scores}{lga_n_align}."\n";
#                print "dali\t".$evaluator->{_scores}{dali}."\n";
#                print "dali_n_align\t".$evaluator->{_scores}{dali_n_align}."\n"; 
               # last;
	}
	my $header = "Target,GrCode,Model";
	my $record = sprintf("%s,%s%s,%s%s", $evaluator->createTargetName(), $evaluator->{_pfrmat}, $evaluator->{_gr_code}, $evaluator->{_model}, (defined($evaluator->{_parent}) ? '_'.$evaluator->{_parent}: ''));
	$no_scores = scalar(keys %{$evaluator->{_scores}});
	foreach my $score (sort keys %{$evaluator->{_scores}}){
		$header .= sprintf(",$score");
		$record .= sprintf(",%s", defined($evaluator->{_scores}{"$score"}) ? $evaluator->{_scores}{"$score"} : 'NA');
	}
	$header .= ",missing\n";
	$record .= ",0\n";
	print TMP $header if ($headerDone == 0);
	$headerDone = 1;
	print TMP $record;
	$DONE_GROUPS{sprintf("%03d", $evaluator->{_gr_code})} = 1;
}
# end read predictions and write output 
# write missing groups
foreach my $key (sort keys %GROUPS){
        if (defined($DONE_GROUPS{$key})){next;}
        printf TMP "%s,TS%03d,1", $TARGET, $key;
        for(my $c=0; $c < $no_scores; $c++){
                print TMP ",NA";
        }
        print TMP ",NA\n";
}


close TMP;

if ($DEBUG){exit;}

# run Rscript and calculate z scripts
# for first models
my $program = sprintf("%s %s %s first", $Rscript, $TMP_FILE, $OUTFILE);
system("$program");
# for best models
$program = sprintf("%s %s %s best", $Rscript, $TMP_FILE, $OUTFILE);
system("$program");
# for all models
#$program = sprintf("%s %s %s all", $Rscript, $TMP_FILE, $OUTFILE);
#system("$program");


# clean
if (-e $TMP_FILE){
  system("rm $TMP_FILE");
}

# change groupand permissions
system("chgrp casp $OUTFILE* ; chmod g+w $OUTFILE*");
exit;



#----------------------
# SUBROUTINES
#----------------------

# the subroutine returns list of models for which the results
# should be retrieved
# it parses  the directory: <RESULTS_DIR>/QCS/<TARGET>/
sub getModels{
    my @result; 
    my $dir = sprintf("%s/QCS/%s", $LOCAL_CONFIG->{RESULTS_DIR}, $TARGET);	
    opendir D, $dir;
	while(defined(my $f = readdir(D))){
		next unless $f =~ m/\.qcs$/;
		$f =~ s/\.qcs//;
		push @result, $f;
	}
    closedir(D);
    @result = sort @result;
    return @result;
}


sub usage{
print "
======
USAGE: $0 --target <TARGET>\n
\t<TARGET> - required argument - target name; 
The script  parses result files in directory /local/CASP13/RESULTS/;
and creates file <TARGET>.GrishinScores.csv.
The output file is use to perform atatistical analysis in R.
The list of programs for which the script expects the results to exist
in the corresponding directories:
lga_sda -  <RESULTS_DIR>/LGA/SDA/DATA/<TARGET>/, 
mammoth -  <RESULTS_DIR>/MAMMOTH/<TARGET>/, 
daliLite - <RESULTS_DIR>/DaliLite/<TARGET>/
ce -       <RESULTS_DIR>/CE/<TARGET>/, 
sov -      <RESULTS_DIR>/SOV/<TARGET>/, 
qcs -      <RESULTS_DIR>/QCS/<TARGET>/. 
The list of all models is parsed from <RESULTS_DIR>/QCS/<TARGET>/.\n
=========\n";
exit;
}

1;
