package ZscoresRRCManager;

use strict;
use warnings;

use DBI;

use lib qw(Core);

use Database;


use Configuration;
use LocalConfiguration;

my $zscores_manager = undef;
my $OUTLIER_CUTOFF = -2.0;
#my @SCORES = qw/auc_pr mcc f1 prec recall rrces mcc_ss f1_ss prec_ss recall_ss mcc_ss_tp prec_ss_tp recall_ss_tp rrces_ext f1_prob prec_prob recall_prob auc_pr_java prec_fdr_pw prec_pwa prec_fdr_pwa/;
my @SCORES = qw/auc_pr mcc f1 prec recall rrces rrces_ext/; 

sub new {
    my ($class) = @_;
    return $zscores_manager if(defined($zscores_manager));

    my $self = {
        _database => Database->new($CONFIG->{HOSTNAME}, $CONFIG->{PORT}, $CONFIG->{DATABASE}, $CONFIG->{USERNAME}, $CONFIG->{PASSWORD}),
    };

    $zscores_manager = bless $self, $class;
    return $zscores_manager;
}

#######################################
#
# Methods to upload data to database
#
########################################

sub get_new_record{
    my ($self) = @_;
    my %record = (
	id => '',
	predictions_id => '',
	domains_id => '',
	rr_range => '',
	list_size => '',
	prob_filter => '',
	auc_pr => '',
	mcc => '', 
	f1 => '',
	prec => '',
	recall => '',
	rrces => '',
	rrces_ext => '',
	mcc_ss => '',
	f1_ss => '',
	prec_ss => '',
	recall_ss => '',
	mcc_ss_tp => '', 
	prec_ss_tp => '',
	recall_ss_tp => '',
	prec_prob => '',
	recall_prob => '',
	f1_prob => '',
	auc_pr_java => '',
	prec_fdr_pw => '',
	prec_pwa => '',
	prec_fdr_pwa => ''
    );
    return %record;
}

sub is_valid_columname {
    my ($self, $colname) = @_;
    if ('id' eq $colname ||
        'predictions_id' eq $colname ||
	'domains_id' eq $colname ||
        'rr_range' eq $colname ||
        'list_size' eq $colname ||
        'prob_filter' eq $colname ||
        'auc_pr' eq $colname ||
        'mcc' eq $colname ||
	'f1' eq $colname ||
        'prec' eq $colname ||
        'recall' eq $colname  ||
        'rrces' eq $colname ||
	'rrces_ext' eq $colname #||
#        'mcc_ss' eq $colname ||
#	'f1_ss' eq $colname ||
#        'prec_ss' eq $colname ||
#	'recall_ss' eq $colname ||  
#        'mcc_ss_tp' eq $colname ||
#        'prec_ss_tp' eq $colname ||
#        'recall_ss_tp' eq $colname ||
#	'prec_prob' eq $colname ||
#        'recall_prob' eq $colname ||
#        'f1_prob' eq $colname ||
#        'auc_pr_java' eq $colname ||
#	'prec_fdr_pw' eq $colname ||
#	'prec_pwa' eq $colname ||
#	'prec_fdr_pwa' eq $colname 
    ){
	return 1;
    } else {
	return 0;
    }
}

sub getRawScores{
    my ($self, $param ) = @_;
    my $query = "
        SELECT  
        re.predictions_id, re.domains_id, 
	re.rr_range, re.list_size, re.prob_filter,
	re.auc_pr, re.mcc, re.f1, re.prec, re.recall, re.rrces, re.rrces_ext,
	re.mcc_ss,
	re.f1_ss, re.prec_ss, re.recall_ss, re.mcc_ss_tp, re.prec_ss_tp, re.recall_ss_tp,
	re.prec_prob, re.recall_prob, re.f1_prob, re.auc_pr_java, re.prec_fdr_pw, 
	re.prec_pwa, re.prec_fdr_pwa
        FROM casp13.rrc_results re 
	JOIN casp13.predictions pr ON re.predictions_id=pr.id
	JOIN casp13.domains d ON re.domains_id=d.id
	JOIN casp13.groups g ON pr.groups_id=g.id		
" ;

    my $subquery = " WHERE 1=1 ";
    if (defined($param->{target})){
	my $target; my $dom ;
	if ($param->{target} =~ m/(T[0-9]{4}(s\d){0,1})-D(\d+)/){
		$target = $1; 
		$dom = $3;
	} else {
		$target = $param->{target};
		$dom = 0;
	}
	$subquery .= sprintf (" AND pr.target=\'%s\' ", $target);
	$subquery .= sprintf (" AND d.index=%d ", $dom);
    } else {
	return undef;
    }
    $subquery .= sprintf (" AND re.list_size=\'%s\' ", $param->{list_size});
    $subquery .= sprintf (" AND re.rr_range=\'%s\' ", $param->{rr_range});
    $subquery .= sprintf (" AND re.prob_filter=\'%s\' ", $param->{prob_filter});
    
    # TODO: skip groups (223, 112, 342, 48)
    #$subquery .= " AND g.code not in (223, 112, 342, 48) "; 

    $query .= $subquery;

    my $sth = $self->{_database}->query($query);

    if (defined($sth) && ($sth->rows() > 0)){
        my @data; 
        while(my (
		$predictions_id, $domains_id,
		$rr_range, $list_size, $prob_filter,
		$auc_pr, $mcc, $f1, $prec, $recall, $rrces, $rrces_ext, $mcc_ss,
		$f1_ss, $prec_ss, $recall_ss, $mcc_ss_tp, $prec_ss_tp, $recall_ss_tp,
		$prec_prob, $recall_prob, $f1_prob, $auc_pr_java, $prec_fdr_pw,
		$prec_pwa, $prec_fdr_pwa
        ) = $sth->fetchrow_array()){
                push @data, {
			id => '',
			predictions_id => $predictions_id, 
			domains_id => $domains_id,
			rr_range => $rr_range, 
			list_size => $list_size, 
			prob_filter => $prob_filter,
			auc_pr => $auc_pr, 
			mcc => $mcc, 
			f1 => $f1,
			prec => $prec, 
			recall => $recall, 
			rrces => $rrces, 
			rrces_ext => $rrces_ext,
			mcc_ss => $mcc_ss,
			f1_ss => $f1_ss,
			prec_ss => $prec_ss, 
			recall_ss => $recall_ss, 
			mcc_ss_tp => $mcc_ss_tp, 
			prec_ss_tp => $prec_ss_tp, 
			recall_ss_tp => $recall_ss_tp,
			prec_prob => $prec_prob, 
			recall_prob => $recall_prob, 
			f1_prob => $f1_prob, 
			auc_pr_java => $auc_pr_java,
			prec_fdr_pw => $prec_fdr_pw,
			prec_pwa => $prec_pwa,
			prec_fdr_pwa => $prec_fdr_pwa
                };
	}
        return \@data;
   }

   return undef;
}

sub calcZScore{
  my ($self, $refData) = @_;
  foreach my $score (@SCORES){
    my @arr ; # array to store raw scores
    foreach my $el (@{$refData}){
	if (defined($el->{$score})){
	   push @arr, $el->{$score};
	}
    }
    # calculate mean and standard deviation
    my ($m, $std) = $self->calcMeanStdWithoutOutliers(\@arr);
    # calculate z-score
    foreach my $el (@{$refData}){
	my $zscore;
                if (defined($el->{$score}) && ($el->{$score} >= 0.0 || $score eq 'prec_fdr_pw' || $score eq 'prec_fdr_pwa')){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        } 
	         }

	if (defined($zscore) && $zscore < $OUTLIER_CUTOFF){
		$zscore = $OUTLIER_CUTOFF;
	}
	if (defined($zscore)) {
		$el->{$score} = $zscore;
	} else {
		$el->{$score} = $OUTLIER_CUTOFF;
	}
    }
  }
  return $refData;
}


sub upload_all{
    my ($self, $param, $refData) = @_;
    my @data = @$refData;
    my $table = "zscores_rrc_1m";
    foreach my $record (@data) {
	$self->upload($record, $table);
    }
}

sub upload{
    my ($self, $record, $table) = @_;
    my $id = $self->get_id_record($record, $table);
    if ($id) {
	$record->{id} = $id;
	$self->update($record, $table);
    } else {
	$self->add($record, $table);
    }
}

# check if the record exists in the table 
# if it exists return id else 0
sub get_id_record{
    my ($self, $record, $table) = @_;
    my $query = sprintf("SELECT id FROM casp13.%s WHERE predictions_id=%d AND domains_id=%d AND list_size=\'%s\' AND prob_filter=\'%s\' AND rr_range=\'%s\' ", $table, $record->{predictions_id}, $record->{domains_id}, $record->{list_size}, $record->{prob_filter}, $record->{rr_range});
    my $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows() > 0)){
        while(my ($id) = $sth->fetchrow_array()) {
		if ($id eq '' || $id == 0){
			return 0;
		} else {
			return $id;
		}
	}
    }
}

# add one record to table
sub add{
    my ($self, $record, $table) = @_;
    my %hash = %$record;
    my $result = 0;
    my $column_names = '';
    my $column_values = '';
    #my $values_count = 0;
    while(my ($key, $value) = each(%hash)){
	if ($key eq 'id'){
		next;
	}
        if ($self->is_valid_columname($key)){
                $column_names .= sprintf(" %s,", $key);
                $column_values .= sprintf(" \'%s\',", $value);
        }
    }
    $column_names =~ s/,$//;
    $column_values =~ s/,$//;
    my $query = sprintf("INSERT INTO casp13.$table ( %s ) VALUES ( %s )", $column_names, $column_values);
   # print $query."\n";
   # return $result;

    my $sth = $self->{_database}->query($query);
    if(defined($sth)) {
        my $query = sprintf("SELECT MAX(id) FROM casp13.$table");
        my $sth = $self->{_database}->query($query);
        # add logger
        ($result) = $sth->fetchrow_array();
    }
    return $result;
}

# update one record to table
sub update{
    my ($self, $record, $table) = @_;
    my %hash = %$record;
    my $result = 0;
    my $set_query = '';
    while ( my ($key, $value) = each(%hash) ) {
	if ($key eq 'id'){
		next;
	}
	if ($self->is_valid_columname($key)){
		$set_query .= sprintf(" %s = \'%s\',", $key, $value);
	}
    }
    $set_query =~ s/,$//; 

    my $query = sprintf("UPDATE casp13.$table SET %s WHERE id=%d ", $set_query, $hash{id});

   # print $query."\n";
   # return $result;

    my $sth = $self->{_database}->query($query);
    if(defined($sth)) {
        $result = 1;
    }

    return $result;
}

sub mean{
    my ($self, $refArr) = @_;
    my $res = 0.0;
    my @arr = @{$refArr};
    if (scalar(@arr) == 0){
	return undef;
    }
    foreach my $el (@arr){
	$res += $el;
    }
    return $res/scalar(@arr);
}

sub std{
    my ($self, $refArr) = @_;
    my $res = 0.0;
    my @arr = @{$refArr};
    if (scalar(@arr) == 0 ){
	return undef;
    }
    if (scalar(@arr) == 1){
        return $res;
    }
    my $m = $self->mean($refArr);
    foreach my $el (@arr) {
	$res += ($el-$m)*($el-$m);
    }
    $res = sqrt($res/(scalar(@arr) - 1));
    return $res;
}  

sub calcMeanStdWithoutOutliers{
    my ($self, $refArr) = @_;
    my @arr = @{$refArr};
    my @nArr ;
    my $m = $self->mean($refArr);
    my $std = $self->std($refArr);
    foreach my $el (@arr){
	if ($el - $m >= $OUTLIER_CUTOFF * $std){
		push @nArr, $el;
	}
    }
    $m = $self->mean(\@nArr);
    $std = $self->std(\@nArr);
    return ($m, $std);

}

##################################################
#
# methods for retrieving data from database
#
###################################################

sub get_rows{
    my ($self, $param) = @_;
    my $table = "zscores_rrc_1m";
    my $subque = '';
    if ($param->{tbm_hard} eq 'on'){
        $subque .= "1,";
    }
    if ($param->{tbm} eq 'on'){
        $subque .= "2,";
    }
    if ($param->{tbmfm} eq 'on'){
        $subque .= "4,";
    }
    if ($param->{fm} eq 'on'){
        $subque .= "3,";
    }
    $subque =~ s/,$//;
    if ($subque ne ''){
	$subque = " WHERE  d.domain_classifications_id IN ($subque)";
    }
    $subque .= sprintf " AND list_size=\'%s\' AND prob_filter=\'%s\' AND rr_range=\'%s\' ", $param->{list_size}, $param->{prob_filter}, $param->{rr_range};
    # query to get no_domains
    my $query = "SELECT COUNT(DISTINCT d.id) FROM casp13.$table z JOIN casp13.domains d ON z.domains_id=d.id  $subque ";
    my $sth = $self->{_database}->query($query);
    my $no_targets;
    if (defined($sth) && ($sth->rows() > 0)){
	($no_targets) = $sth->fetchrow_array();
    }

    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';

    foreach my $score (@SCORES){
	if ($score eq 'prec_pwa') {next;}
	$sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
	$sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;

    $query = "SELECT g.code, g.name, count(z.*) as count_domains, 
	SUM($sum_minus2_query) as sum_minus2,
	SUM($sum_0_query) as sum_0,
	AVG($avrg_minus2_query) as avg_mimus2,
	AVG($avrg_0_query) as avg_0
	FROM casp13.$table z 
	JOIN casp13.domains d ON z.domains_id=d.id
	JOIN casp13.predictions pr ON z.predictions_id=pr.id
	JOIN casp13.groups g ON pr.groups_id=g.id
	$subque GROUP BY g.code, g.name ORDER BY sum_minus2 ";
    
    $sth = $self->{_database}->query($query);

    if (defined($sth) && ($sth->rows() > 0)){
	my @data;
        while(my ($gr_code, $gr_name, $count_domains, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
			GR_NAME => $gr_name,
			COUNT_DOMAINS => $count_domains,
			SUM_MINUS2 => $sum_minus2 + ($no_targets - $count_domains) * $OUTLIER_CUTOFF,
			SUM_0 => $sum_0,
			AVG_MINUS2 => $avg_minus2,
			AVG_0 => $avg_0
                };
        }
        return $self->calcFinalRanks(\@data);
    }

}


sub calcFinalRanks{
    my ($self, $refData) = @_;
    my @data = @{$refData};
    my @KEYS = qw(SUM_MINUS2 AVG_MINUS2 AVG_0 SUM_0);
    foreach my $key (@KEYS){
    	@data = sort {$b->{$key}<=>$a->{$key}} @data;
	my $cur_rank = 1;
	my $prev_value;
        for (my $i = 0; $i < scalar(@data); $i++){
		if ($i == 0){
			$prev_value = $data[$i]->{$key};
			$data[$i]->{"RANK_$key"} = $cur_rank;
			next;
		}
		if ($prev_value != $data[$i]->{$key}){
			$prev_value = $data[$i]->{$key};
			$cur_rank = $i + 1;
		}
		$data[$i]->{"RANK_$key"} = $cur_rank;
	}
    }
    for(my $index = 0; $index < scalar(@data); $index++){
	$data[$index]->{"INDEX"} = $index + 1;
    }
    return @data;
}

=head
sub get_refinement_rows{
    my ($self, $param) = @_;
    my $table;
    if ($param->{model_type} eq 'first'){
	$table = 'zscores_1m';
    } else {
	$table = 'zscores_bm';
    }
    # count TR targets
    my $subque = "WHERE t.name similar to \'TR%\'";
    my $query = "SELECT COUNT(DISTINCT t.id) FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
         $subque ";
    my $sth = $self->{_database}->query($query);
    my $no_domains;
    if (defined($sth) && ($sth->rows() > 0)){
        ($no_domains) = $sth->fetchrow_array();
    }
    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';
    foreach my $score (@SCORES){
	if ($score eq 'tens'){next;}
	if ($score eq 'rf'){next;}
	if ($score eq 'qse'){next;}
	#if ($score eq 'ace_rms_all'){next;}
        $sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
        $sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;
    $query = "SELECT gr.code, gr.name, count(z.*) as count_domains, 
        SUM($sum_minus2_query) as sum_minus2,
        SUM($sum_0_query) as sum_0,
        AVG($avrg_minus2_query) as avg_mimus2,
        AVG($avrg_0_query) as avg_0
        FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
        JOIN casp13.groups gr ON z.groups_id=gr.id $subque GROUP BY gr.code, gr.name ORDER BY sum_minus2 ";

    $sth = $self->{_database}->query($query);

    if (defined($sth) && ($sth->rows()>0)){
        my @data;
        while(my ($gr_code, $gr_name, $count_domains, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        COUNT_DOMAINS => $count_domains,
                        SUM_MINUS2 => $sum_minus2 + ($no_domains - $count_domains) * $OUTLIER_CUTOFF,
                        SUM_0 => $sum_0,
                        AVG_MINUS2 => $avg_minus2,
                        AVG_0 => $avg_0
                };
        }
        return $self->calcFinalRanks(\@data);
    }

}

# the method handles with both whole targets and domains
# if a domain(s) is defined for a target te results for a domain is picked up
# if not the results for the whole target are taken
sub get_refinement_domain_rows{
    my ($self, $param) = @_;
    my $table;
    if ($param->{model_type} eq 'first'){
        $table = 'zscores_1m';
    } else {
        $table = 'zscores_bm';
    }
    # count TR targets
#    my $subque = "WHERE t.name similar to \'TR%\'";
    my $subque = "WHERE  z.id in (SELECT z.id FROM casp13.$table z JOIN casp13.targets t ON t.id=z.targets_id WHERE t.name SIMILAR TO 'TR%' AND  z.domain<>0 
		UNION 
		SELECT  z.id FROM casp13.$table z JOIN casp13.targets t ON t.id=z.targets_id WHERE t.name SIMILAR TO 'TR%' AND z.domain=0 AND t.name NOT IN (
		   SELECT DISTINCT t.name FROM casp13.$table z JOIN  casp13.targets t ON t.id=z.targets_id WHERE t.name SIMILAR TO 'TR%' AND z.domain<>0)
		   )";
    my $query = "SELECT COUNT(DISTINCT (t.id, z.domain)) FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
         $subque ";
    my $sth = $self->{_database}->query($query);
    my $no_domains;
    if (defined($sth) && ($sth->rows() > 0)){
        ($no_domains) = $sth->fetchrow_array();
    }
    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';
    foreach my $score (@SCORES){
        if ($score eq 'tens'){next;}
        if ($score eq 'rf'){next;}
        if ($score eq 'qse'){next;}
        #if ($score eq 'ace_rms_all'){next;}
        $sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
        $sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;
    $query = "SELECT gr.code, gr.name, count(z.*) as count_domains, 
        SUM($sum_minus2_query) as sum_minus2,
        SUM($sum_0_query) as sum_0,
        AVG($avrg_minus2_query) as avg_minus2,
        AVG($avrg_0_query) as avg_0
        FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
        JOIN casp13.groups gr ON z.groups_id=gr.id $subque GROUP BY gr.code, gr.name ORDER BY sum_minus2 ";

    $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows()>0)) {
        my @data;
        while(my ($gr_code, $gr_name, $count_domains, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        COUNT_DOMAINS => $count_domains,
                        SUM_MINUS2 => $sum_minus2 + ($no_domains - $count_domains) * $OUTLIER_CUTOFF,
                        SUM_0 => $sum_0,
                        AVG_MINUS2 => $avg_minus2,
                        AVG_0 => $avg_0
                };
        }
        return $self->calcFinalRanks(\@data);
    }

}


sub get_Tacspx_rows{
    my ($self, $param) = @_;
    my $table;
    if ($param->{model_type} eq 'first'){
        $table = 'zscores_1m';
    } else {
        $table = 'zscores_bm';
    }
    # count targets
    my $subque ; 
    if ($param->{target_flag} eq 'Ta') {
      $subque = "WHERE t.name similar to \'Ta%\'";
    } elsif ($param->{target_flag} eq 'Tc') {
      $subque = "WHERE t.name similar to \'Tc%\'";
    } elsif ($param->{target_flag} eq 'Tp') {
      $subque = "WHERE t.name similar to \'Tp%\'";
    } elsif ($param->{target_flag} eq 'Ts') {
      $subque = "WHERE t.name similar to \'Ts%\'";
    } elsif ($param->{target_flag} eq 'Tx') {
      $subque = "WHERE t.name similar to \'Tx%\'";
    }else {
      $subque = "WHERE t.name similar to \'T[acspx]%\'";
    }
    $subque .= " AND t.cancellation_status=0 ";
    if (defined($param->{perdomain_flag}) && $param->{perdomain_flag} eq 'true' ) {
	$subque .= " AND z.domain<>0 "; 
    } else {
	$subque .= " AND z.domain=0 ";
    }
    my $query = "SELECT COUNT(DISTINCT t.id) FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
         $subque ";
    my $sth = $self->{_database}->query($query);
    my $no_domains;
    if (defined($sth) && ($sth->rows() > 0)){
        ($no_domains) = $sth->fetchrow_array();
    }
    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';
    foreach my $score (@SCORES){
        if ($score eq 'tens'){next;}
	if ($score eq 'rf'){next;}
	if ($score eq 'qse'){next;}
	if ($score eq 'ace_rms_all'){next;}
        $sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
        $sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;
    $query = "SELECT gr.code, gr.name, count(z.*) as count_domains, 
        SUM($sum_minus2_query) as sum_minus2,
        SUM($sum_0_query) as sum_0,
        AVG($avrg_minus2_query) as avg_mimus2,
        AVG($avrg_0_query) as avg_0
        FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
        JOIN casp13.groups gr ON z.groups_id=gr.id $subque GROUP BY gr.code, gr.name ORDER BY sum_minus2 ";

    $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows()>0)){
        my @data;
        while(my ($gr_code, $gr_name, $count_domains, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        COUNT_DOMAINS => $count_domains,
                        SUM_MINUS2 => $sum_minus2 + ($no_domains - $count_domains) * $OUTLIER_CUTOFF,
                        SUM_0 => $sum_0,
                        AVG_MINUS2 => $avg_minus2,
                        AVG_0 => $avg_0
                };
        }
        return $self->calcFinalRanks(\@data);
    }

}

# the method generates a list of targets to be uploaded into db
sub getTargetsWaitingToBeProcessed{
   my ($self) = @_;
   my @result;
   my $query = "SELECT t.name || '-D' || d.index AS full_target FROM casp13.domains d JOIN casp13.targets t ON t.id=d.targets_id 
		   WHERE index>=1 AND index<=6 AND t.is_final_eval = 1 AND 
			(t.id,d.index) NOT IN (SELECT DISTINCT tt.id, z.domain FROM casp13.targets tt JOIN casp13.zscores_bms z ON z.targets_id=tt.id
						UNION
						SELECT DISTINCT tt.id, z.domain FROM casp13.targets tt JOIN casp13.zscores_bm z ON z.targets_id=tt.id
						) 
		UNION  
		SELECT t.name AS full_target FROM casp13.targets t JOIN casp13.results_targets rt ON rt.targets_id=t.id 
		   WHERE t.name SIMILAR TO 'T[Racspx]%' AND t.id NOT IN (SELECT DISTINCT targets_id FROM casp13.zscores_bm) ;";
   my $sth = $self->{_database}->query($query);
   if (defined($sth) && ($sth->rows()>0)){
        while(my ($target) = $sth->fetchrow_array()){
                push @result, $target;
        }
        @result = sort @result;
   }
   return @result;
}


###########################################################
#  methods for bootstraping and ttest
###########################################################

sub get_data_for_two_groups{
   my ($self, $param, $gr_code1, $gr_code2) = @_;
    my $table;
    if ($param->{gr_type} eq 'server_only'){
        if ($param->{model_type} eq 'first'){
                $table = 'zscores_1ms';
        } else {
                $table = 'zscores_bms';
        }
    } else {
        if ($param->{model_type} eq 'first'){
                $table = 'zscores_1m';
        } else {
                $table = 'zscores_bm';
        }
    }
    my $subque = '';
    if (defined($param->{tbm_hard}) && $param->{tbm_hard} eq 'on'){
        $subque .= "1,";
    }
    if (defined($param->{tbm}) && $param->{tbm} eq 'on'){
        $subque .= "2,";
    }
    if (defined($param->{fm}) && $param->{fm} eq 'on'){
        $subque .= "3,";
    }
    if (defined($param->{tbmfm}) && $param->{tbmfm} eq 'on'){
        $subque .= "4,";
    }
    $subque =~ s/,$//;
    if ($subque ne ''){
        $subque = " AND  d.domain_classifications_id IN ($subque) ";
    }
    my $subque2 = '';
    if (defined($param->{target_flag})){
     if ($param->{target_flag} eq 'Ta') {
       $subque2 = " AND t.name similar to \'Ta%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Tc') {
       $subque2 = " AND t.name similar to \'Tc%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Tp') {
       $subque2 = " AND t.name similar to \'Tp%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Ts') {
       $subque2 = " AND t.name similar to \'Ts%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Tx') {
       $subque2 = " AND t.name similar to \'Tx%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'TR') {
       $subque2 = " AND t.name similar to \'TR%\'";
        $subque = '';
     } 
#	elsif ($param->{target_flag} eq 'TaD') {
#       $subque2 = " AND t.name similar to \'Ta%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TcD') {
#       $subque2 = " AND t.name similar to \'Tc%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TpD') {
#       $subque2 = " AND t.name similar to \'Tp%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TsD') {
#       $subque2 = " AND t.name similar to \'Ts%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TxD') {
#       $subque2 = " AND t.name similar to \'Tx%\' AND z.domain<>0";
#        $subque = '';
#     }
     if ($param->{target_flag} =~ m/^T[acspx]/){
	if (defined($param->{perdomain_flag}) && $param->{perdomain_flag} eq 'true' ) {
        	$subque2 .= " AND z.domain<>0 ";
     	} else {
        	$subque2 .= " AND z.domain=0 ";
     	}
     }
     $subque2 .= " AND t.cancellation_status=0 ";
    }
    # main query
    my %weights = %{$param->{weights}};
    my $score_query = '';
    foreach my $score (@SCORES){
	if ($score eq 'rf'){next;}
	if ($score eq 'ace_rms_all'){next;}
	if ($score eq 'tens'){next;}
	if (defined($param->{target_flag}) && $score eq 'qse'){next;}
        $score_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
    }
    $score_query =~ s/\+$//;
    my $intersect_query = "SELECT DISTINCT targets_id||'_'||domain as dom_id FROM casp13.$table z JOIN casp13.groups gr ON gr.id=z.groups_id WHERE gr.code=$gr_code1
        INTERSECT
                SELECT DISTINCT targets_id||'_'||domain as dom_id FROM casp13.$table z JOIN casp13.groups gr ON gr.id=z.groups_id WHERE gr.code=$gr_code2";
    my $query = "SELECT gr.code, gr.name, z.targets_id || \'_\' || domain AS dom_id, $score_query AS score
        FROM casp13.$table z JOIN casp13.groups gr ON z.groups_id=gr.id ".
	($subque eq '' ? '' : " JOIN casp13.domains d ON (d.targets_id=z.targets_id AND z.domain=d.index)").
	" JOIN casp13.targets t ON z.targets_id=t.id
        WHERE z.targets_id||\'_\'||domain IN ($intersect_query) AND gr.code IN ($gr_code1, $gr_code2) 
	$subque 
	$subque2
        ORDER BY dom_id DESC, gr.code ASC";
#    print $query."\n";
    my $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows() > 0)){
        my @data1; my @data2; my %tmp_hash;
        while(my ($gr_code, $gr_name, $dom_id, $score) = $sth->fetchrow_array()){
		if ($param->{target_flag} =~ m/^TR/){
			my ($target_id, $dom) = split(/_/, $dom_id);
			if ($dom == 0 && exists $tmp_hash{$target_id}){
				next;
			}
			if ($dom > 0){
				$tmp_hash{$target_id} = 1;
			}
		}
                if($gr_code1 == $gr_code){
                   push @data1, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        DOM_ID => $dom_id,
                        SCORE => $score
                   };
                }
                if ($gr_code2 == $gr_code) {
                   push @data2, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        DOM_ID => $dom_id,
                        SCORE => $score
                   };
                }
        }
        return (\@data1,\@data2);
     } else {
        return (undef,undef);
     }
}

sub stat_significance_test{
    my ($self, $param, $gr_code1, $gr_code2) = @_;
    my ($refArr1, $refArr2) = $self->get_data_for_two_groups($param, $gr_code1, $gr_code2);
    if (!defined($refArr1) || !defined($refArr2)){ return (0,'-','-');}
    my $no_common = scalar(@$refArr1);
    if ( $no_common == 1){ return (0,'-','-');}
    my @arr1; my @arr2;
    for (my $i = 0; $i<scalar(@$refArr1); $i++){
        if ($$refArr1[$i]->{DOM_ID} eq $$refArr2[$i]->{DOM_ID}){
                push @arr1, $$refArr1[$i]->{SCORE};
                push @arr2, $$refArr2[$i]->{SCORE};
        }
    }
    # ttest 
    my ($t, $prob) = Statistics::ttest_matched(\@arr1,\@arr2);
    # bootstraping
    my $N = 1000; # number of repetitions
    my $count = 0 ;
    my $count_1_over_2 = 0;
    while(($count++) < $N){
        my @indexes = Statistics::sample(scalar(@arr1), scalar(@arr2));
        my $sum_1 = 0;
        my $sum_2 = 0;
        foreach my $index (@indexes){
                $sum_1 += $arr1[$index];
                $sum_2 += $arr2[$index];
        }
        if ($sum_1 > $sum_2){
                $count_1_over_2++;
        }
    }
    return ($no_common, sprintf("%5.4f",$prob), sprintf("%5.4f",$count_1_over_2/$N));
}

sub head2head_test{
    my ($self, $param, $gr_code1, $gr_code2) = @_;
    my ($refArr1, $refArr2) = $self->get_data_for_two_groups($param, $gr_code1, $gr_code2);
    if (!defined($refArr1) || !defined($refArr2)){ return (0,'-','-','-','-');}
    my $no_common = scalar(@$refArr1);
    if ( $no_common == 1){ return (0,'-','-','-','-');}
    my @arr1; my @arr2;
    for (my $i = 0; $i<scalar(@$refArr1); $i++){
        if ($$refArr1[$i]->{DOM_ID} eq $$refArr2[$i]->{DOM_ID} && defined($$refArr1[$i]->{SCORE}) && defined($$refArr2[$i]->{SCORE})){
                push @arr1, $$refArr1[$i]->{SCORE};
                push @arr2, $$refArr2[$i]->{SCORE};
        }
    }
    my $count_1_over_2 = 0;
    my $count_2_over_1 = 0;
    for (my $i = 0; $i < scalar(@arr1); $i++){
	if ($arr1[$i] > $arr2[$i]) {
		$count_1_over_2++;
	}
	if ($arr2[$i] > $arr1[$i]) {
                $count_2_over_1++;
        }
    }
    if (scalar(@arr1) == 0){
	return (0, '-', '-', '-', '-');
    } else {
	return (scalar(@arr1), sprintf("%4.3f", ($count_1_over_2)/scalar(@arr1)), sprintf("%4.3f",($count_2_over_1)/scalar(@arr1)), $count_1_over_2, $count_2_over_1 );
    }
}

=cut

1;
