package ZscoresMultimerManager;

use strict;
use warnings;

use DBI;

use lib qw(Core);

use Database;

#use PDBUtils;

use Configuration;
use LocalConfiguration;
#use TargetsManager;
#use GroupsManager;
#use Statistics;

my $zscores_manager = undef;
my $OUTLIER_CUTOFF = -2.0;
my @SCORES = qw/f1 qs_glob qs_best jaccard_d global_rmsd local_rmsd iface_rmsd orient oligo_lddt w_lddt prec_iface recall_iface dens_corr tm_score gdt_ts/;

sub new {
    my ($class) = @_;
    return $zscores_manager if(defined($zscores_manager));

    my $self = {
        _database => Database->new($CONFIG->{HOSTNAME}, $CONFIG->{PORT}, $CONFIG->{DATABASE}, $CONFIG->{USERNAME}, $CONFIG->{PASSWORD}),
    };

    $zscores_manager = bless $self, $class;
    return $zscores_manager;
}

#######################################
#
# Methods to upload data to database
#
########################################

sub get_new_record{
    my ($self) = @_;
    my %record = (
	id => '',
	target => '',
	gr_code => '',
	f1 => '',
	qs_glob => '',
	qs_best => '',
	jaccard_d => '', # jaccard_coefficient
	global_rmsd =>'',
	local_rmsd =>'', 
	iface_rmsd => '',
	orient => '',
	oligo_lddt => '',
	w_lddt => '',
	prec_iface => '',
	recall_iface => '',
	dens_corr => '',
	tm_score => '',
	gdt_ts => '',
    );
    return %record;
}

sub is_valid_columname {
    my ($self, $colname) = @_;
    if ('id' eq $colname ||
        'gr_code' eq $colname ||
	'target' eq $colname ||
        'f1' eq $colname ||
        'qs_glob' eq $colname ||
	'qs_best' eq $colname ||
        'jaccard_d' eq $colname || # jaccard_coefficient
        'global_rmsd' eq $colname || # gdc_sc
        'local_rmsd' eq $colname ||
        'iface_rmsd' eq $colname ||
        'orient' eq $colname ||
        'oligo_lddt' eq $colname ||
	'w_lddt' eq $colname ||
        'prec_iface' eq $colname ||
        'recall_iface' eq $colname  ||	
	'dens_corr' eq $colname  ||
	'tm_score' eq $colname  ||
	'gdt_ts' eq $colname 
    ){
	return 1;
    } else {
	return 0;
    }
}

sub getRawScores{
    my ($self, $param ) = @_;
    my $query = "
        SELECT  
        re.gr_code, re.target, 
        max(re.f1) as f1, max(re.qs_glob) as qs_glob, max(re.qs_best) as qs_best,
        max(re.jaccard_d) as jaccard_d, max(-re.global_rmsd) as global_rmsd, 
        max(-re.local_rmsd) as local_rmsd, max(-re.iface_rmsd) as iface_rmsd,
        max(-re.orient) as orient, max(re.oligo_lddt) as oligo_lddt, max(re.w_lddt) as w_lddt,
        max(re.prec_iface) as prec_iface, max(re.recall_iface) as recall_iface,
	max(dens_corr) as dens_corr, max(tm_score) as tm_score, max(gdt_ts) as gdt_ts
        FROM casp13.multimer_results re 
" ;

    my $subquery = " WHERE 1=1 "; #no_conts>no_clashes AND no_clashes<20 ";
    if (defined($param->{target})){
	$subquery .= sprintf (" AND target=\'%s\' ", $param->{target});
    } else {
	return undef;
    }
    if ($param->{model_type} eq "first"){
        $subquery .= " AND re.model=1 ";
    }
    $subquery .= "  GROUP BY re.gr_code, re.target";

    $query .= $subquery;

    my $sth = $self->{_database}->query($query);

    if (defined($sth) && ($sth->rows() > 0)){
        my @data; 
        while(my ($gr_code, $target,
		$f1, $qs_glob, $qs_best, $jaccard_d, $global_rmsd, $local_rmsd, $iface_rmsd, $orient, $oligo_lddt, $w_lddt, $prec_iface, $recall_iface,
		$dens_corr, $tm_score, $gdt_ts
        ) = $sth->fetchrow_array()){
                push @data, {
			id => '',
                        gr_code => $gr_code,
                        target => $target,
                        f1 => $f1,
                        qs_glob => $qs_glob,
			qs_best => $qs_best,
                        jaccard_d => $jaccard_d,
			global_rmsd => $global_rmsd,
			local_rmsd => $local_rmsd,
			iface_rmsd => $iface_rmsd,
			orient => $orient,
			oligo_lddt => $oligo_lddt, 
			w_lddt => $w_lddt,
			prec_iface => $prec_iface,
			recall_iface => $recall_iface,
			dens_corr => $dens_corr,
			tm_score => $tm_score,
			gdt_ts => $gdt_ts
                };
	}
        return \@data;
   }

   return undef;
}

sub calcZScore{
  my ($self, $refData) = @_;
  foreach my $score (@SCORES){
    my @arr ; # array to store raw scores
    foreach my $el (@{$refData}){
	if ($score eq "global_rmsd") {
		if (defined($el->{$score}) && $el->{$score} <= 0){
			push @arr, $el->{$score};
		}
	} elsif ($score eq "local_rmsd") {
                if (defined($el->{$score}) && $el->{$score} <= 0){
                        push @arr, $el->{$score};
                }
        } elsif ($score eq "iface_rmsd"){
		if (defined($el->{$score}) && $el->{$score} <= 0 ){
                        push @arr, $el->{$score};
                }
	} elsif ($score eq "jaccard_d") { # jaccard_coefficient
		if (defined($el->{$score}) && $el->{$score} >= 0){
                        push @arr, $el->{$score};
                }
        } elsif ($score eq "orient") {
                if (defined($el->{$score}) && $el->{$score} <= 0){
                        push @arr, $el->{$score};
                }
        } elsif ($score eq "f1") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        push @arr, $el->{$score};
                }
        } elsif ($score eq "qs_glob") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        push @arr, $el->{$score};
                }
        } elsif ($score eq "qs_best") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        push @arr, $el->{$score};
                }
        } elsif ($score eq "prec_iface") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        push @arr, $el->{$score};
                }
        } elsif ($score eq "recall_iface") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        push @arr, $el->{$score};
                }
        } else {
		if (defined($el->{$score}) && $el->{$score} >= 0.0 ){
                        push @arr, $el->{$score};
                }
	}
    }
    # calculate mean and standard deviation
    my ($m, $std) = $self->calcMeanStdWithoutOutliers(\@arr);
    # calculate z-score
    foreach my $el (@{$refData}){
	my $zscore;
	if ($score eq "global_rmsd") {
                if (defined($el->{$score}) && $el->{$score} <= 0){
			if (defined($std)){
			    if ($std != 0){
				$zscore = ($el->{$score} - $m)/$std;
			    } else {
				$zscore = 0.0;
			    }
			}
                }
        } elsif ($score eq "local_rmsd") {
                if (defined($el->{$score}) && $el->{$score} <= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        } 
                }
        } elsif ($score eq "iface_rmsd"){
                if (defined($el->{$score}) && $el->{$score} <= 0 ){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        } 
                }
        } elsif ($score eq "jaccard_d") { # jaccard_coefficient
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        }
                }
        } elsif ($score eq "orient") {
                if (defined($el->{$score}) && $el->{$score} <= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        }
                }
        } elsif ($score eq "f1") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        }
                }
        } elsif ($score eq "qs_best") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        }
                }
        } elsif ($score eq "qs_glob") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        }
                }
        } elsif ($score eq "prec_iface") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        }
                }
        } elsif ($score eq "recall_iface") {
                if (defined($el->{$score}) && $el->{$score} >= 0){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        }
                }
        } else {
                if ($el->{$score} >= 0.0 ){
                        if (defined($std)){
                            if ($std != 0){
                                $zscore = ($el->{$score} - $m)/$std;
                            } else {
                                $zscore = 0.0;
                            }
                        } 
                }
        }

	if (defined($zscore) && $zscore < $OUTLIER_CUTOFF){
		$zscore = $OUTLIER_CUTOFF;
	}
	if (defined($zscore)) {
		$el->{$score} = $zscore;
	} else {
		$el->{$score} = $OUTLIER_CUTOFF;
	}
    }
  }
  return $refData;
}


sub upload_all{
    my ($self, $param, $refData) = @_;
    my @data = @$refData;
    my $table;
    if ($param->{model_type} eq 'first'){
            $table = 'zscores_multimer_1m';
    } else {
            $table = 'zscores_multimer_bm';
    }
    foreach my $record (@data) {
	$self->upload($record, $table);
    }
}

sub upload{
    my ($self, $record, $table) = @_;
    my $id = $self->get_id_record($record, $table);
    if ($id) {
	$record->{id} = $id;
	$self->update($record, $table);
    } else {
	$self->add($record, $table);
    }
}

# check if the record exists in the table 
# if it exists return id else 0
sub get_id_record{
    my ($self, $record, $table) = @_;
    my $query = sprintf("SELECT id FROM casp13.%s WHERE gr_code=%d AND target=\'%s\'", $table, $record->{gr_code}, $record->{target});
    my $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows() > 0)){
        while(my ($id) = $sth->fetchrow_array()) {
		if ($id eq '' || $id == 0){
			return 0;
		} else {
			return $id;
		}
	}
    }
}

# add one record to table
sub add{
    my ($self, $record, $table) = @_;
    my %hash = %$record;
    my $result = 0;
    my $column_names = '';
    my $column_values = '';
    #my $values_count = 0;
    while(my ($key, $value) = each(%hash)){
	if ($key eq 'id'){
		next;
	}
        if ($self->is_valid_columname($key)){
                $column_names .= sprintf(" %s,", $key);
                $column_values .= sprintf(" \'%s\',", $value);
        }
    }
    $column_names =~ s/,$//;
    $column_values =~ s/,$//;
    my $query = sprintf("INSERT INTO casp13.$table ( %s ) VALUES ( %s ) RETURNING id", $column_names, $column_values);
   # print $query."\n";
   # return $result;

    my $sth = $self->{_database}->query($query);
    if(defined($sth)) {
        #my $query = sprintf("SELECT MAX(id) FROM casp13.$table");
        #my $sth = $self->{_database}->query($query);
        # add logger
        ($result) = $sth->fetchrow_array();
    }
    return $result;
}

# update one record to table
sub update{
    my ($self, $record, $table) = @_;
    my %hash = %$record;
    my $result = 0;
    my $set_query = '';
    while ( my ($key, $value) = each(%hash) ) {
	if ($key eq 'id'){
		next;
	}
	if ($self->is_valid_columname($key)){
		$set_query .= sprintf(" %s = \'%s\',", $key, $value);
	}
    }
    $set_query =~ s/,$//; 

    my $query = sprintf("UPDATE casp13.$table SET %s WHERE id=%d ", $set_query, $hash{id});

   # print $query."\n";
   # return $result;

    my $sth = $self->{_database}->query($query);
    if(defined($sth)) {
        $result = 1;
    }

    return $result;
}

sub mean{
    my ($self, $refArr) = @_;
    my $res = 0.0;
    my @arr = @{$refArr};
    if (scalar(@arr) == 0){
	return undef;
    }
    foreach my $el (@arr){
	$res += $el;
    }
    return $res/scalar(@arr);
}

sub std{
    my ($self, $refArr) = @_;
    my $res = 0.0;
    my @arr = @{$refArr};
    if (scalar(@arr) == 0 ){
	return undef;
    }
    if (scalar(@arr) == 1){
        return $res;
    }
    my $m = $self->mean($refArr);
    foreach my $el (@arr) {
	$res += ($el-$m)*($el-$m);
    }
    $res = sqrt($res/(scalar(@arr) - 1));
    return $res;
}  

sub calcMeanStdWithoutOutliers{
    my ($self, $refArr) = @_;
    my @arr = @{$refArr};
    my @nArr ;
    my $m = $self->mean($refArr);
    my $std = $self->std($refArr);
    foreach my $el (@arr){
	if ($el - $m >= $OUTLIER_CUTOFF * $std){
		push @nArr, $el;
	}
    }
    $m = $self->mean(\@nArr);
    $std = $self->std(\@nArr);
    return ($m, $std);

}

##################################################
#
# methods for retrieving data from database
#
###################################################

sub get_rows{
    my ($self, $param) = @_;
    my $table;
    if ($param->{model_type} eq 'first'){
        $table = 'zscores_multimer_1m';
    } else {
        $table = 'zscores_multimer_bm';
    }
    my $subque = ' WHERE 1=1 ';
    if (defined($param->{t_capri_only}) && $param->{t_capri_only} eq 'on'){
        $subque .= " AND z.is_capri=1 ";
    } else {
      my $subque2 = '';
      if (defined($param->{t_easy}) && $param->{t_easy} eq 'on'){
	if ($subque2 ne '') {
	  $subque2 .=  ",";
	}
	$subque2 .=  "\'easy\'";
      }
      if (defined($param->{t_medium}) && $param->{t_medium} eq 'on'){
	if ($subque2 ne '') {
          $subque2 .=  ",";
        }
        $subque2 .= "\'medium\'";
      }
      if (defined($param->{t_hard}) && $param->{t_hard} eq 'on'){
	if ($subque2 ne '') {
          $subque2 .=  ",";
        }
        $subque2 .=  " \'hard\'";
      }
      if ($subque2 ne '') {
	$subque .= " AND z.target_class IN ($subque2) " ;
      }
    }
    # groups_only
    my $subque3 = ""; 
    if (defined($param->{capri_groups_only}) && $param->{capri_groups_only} eq 'on') {
	$subque3 .= " AND z.is_capri=1 " ;
	$subque .= " AND z.is_capri=1 "
    } elsif (defined($param->{capri_groups_only}) && $param->{capri_groups_only} eq 'off') {
        $subque3 .= " AND gr.eval_capri=0 " ;
    } 
    # targets classes (xray, nmr, cryoem)
    my $subque4 = '';
    if ($param->{xray} eq 'on'){
        $subque4 .= "0,";
    }
    if ($param->{nmr} eq 'on'){
        $subque4 .= "1,";
    }
    if ($param->{cryoem} eq 'on'){
        $subque4 .= "2,";
    }
    $subque4 =~ s/,$//;
    if ($subque4 ne ''){
       $subque4 = " AND  ss.is_nmr IN ($subque4)";
    }

    
    # query to get no_domains
    my $query = "SELECT COUNT(DISTINCT z.target) FROM casp13.$table z 
	JOIN casp13.targets t ON substring(z.target from 1 for 5)::text=t.name::text
	JOIN casp13.submitted_sequences ss ON t.submitted_sequences_id=ss.id
	$subque $subque4";
    my $sth = $self->{_database}->query($query);
    my $no_targets;
    if (defined($sth) && ($sth->rows() > 0)){
	($no_targets) = $sth->fetchrow_array();
    }

    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';

    foreach my $score (@SCORES){
	$sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
	$sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;

    $query = "SELECT gr.name, gr.eval_capri, gr_code, count(z.*) as count_targets, 
	SUM($sum_minus2_query) as sum_minus2,
	SUM($sum_0_query) as sum_0,
	AVG($avrg_minus2_query) as avg_mimus2,
	AVG($avrg_0_query) as avg_0
	FROM casp13.$table z 
	JOIN CASP13.groups gr ON gr.code=z.gr_code
	JOIN casp13.targets t ON substring(z.target from 1 for 5)::text=t.name::text
	JOIN casp13.submitted_sequences ss ON t.submitted_sequences_id=ss.id
	$subque $subque3 $subque4
	GROUP BY gr_code, gr.name, gr.eval_capri  ORDER BY sum_minus2 ";
    
    $sth = $self->{_database}->query($query);
    my @data;
    if (defined($sth) && ($sth->rows() > 0)){
	@data;
        while(my ($gr_name, $eval_capri, $gr_code, $count_targets, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
#	     if ($count_targets > 2){ # count of targets should be at least 3
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
			EVAL_CAPRI => $eval_capri,
			GR_NAME => $gr_name,
			COUNT_TARGETS => $count_targets,
			SUM_MINUS2 => $sum_minus2 + ($no_targets - $count_targets) * $OUTLIER_CUTOFF,
			SUM_0 => $sum_0,
			AVG_MINUS2 => $avg_minus2,
			AVG_0 => $avg_0
                };
#	     }
        }
        return $self->calcFinalRanks(\@data);
    }
    return @data;
}


sub calcFinalRanks{
    my ($self, $refData) = @_;
    my @data = @{$refData};
    my @KEYS = qw(SUM_MINUS2 AVG_MINUS2 AVG_0 SUM_0);
    foreach my $key (@KEYS){
    	@data = sort {$b->{$key}<=>$a->{$key}} @data;
	my $cur_rank = 1;
	my $prev_value;
        for (my $i = 0; $i < scalar(@data); $i++){
		if ($i == 0){
			$prev_value = $data[$i]->{$key};
			$data[$i]->{"RANK_$key"} = $cur_rank;
			next;
		}
		if ($prev_value != $data[$i]->{$key}){
			$prev_value = $data[$i]->{$key};
			$cur_rank = $i + 1;
		}
		$data[$i]->{"RANK_$key"} = $cur_rank;
	}
    }
    for(my $index = 0; $index < scalar(@data); $index++){
	$data[$index]->{"INDEX"} = $index + 1;
    }
    return @data;
}

=head
sub get_refinement_rows{
    my ($self, $param) = @_;
    my $table;
    if ($param->{model_type} eq 'first'){
	$table = 'zscores_1m';
    } else {
	$table = 'zscores_bm';
    }
    # count TR targets
    my $subque = "WHERE t.name similar to \'TR%\'";
    my $query = "SELECT COUNT(DISTINCT t.id) FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
         $subque ";
    my $sth = $self->{_database}->query($query);
    my $no_domains;
    if (defined($sth) && ($sth->rows() > 0)){
        ($no_domains) = $sth->fetchrow_array();
    }
    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';
    foreach my $score (@SCORES){
	if ($score eq 'tens'){next;}
	if ($score eq 'rf'){next;}
	if ($score eq 'qse'){next;}
	#if ($score eq 'ace_rms_all'){next;}
        $sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
        $sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;
    $query = "SELECT gr.code, gr.name, count(z.*) as count_domains, 
        SUM($sum_minus2_query) as sum_minus2,
        SUM($sum_0_query) as sum_0,
        AVG($avrg_minus2_query) as avg_mimus2,
        AVG($avrg_0_query) as avg_0
        FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
        JOIN casp13.groups gr ON z.groups_id=gr.id $subque GROUP BY gr.code, gr.name ORDER BY sum_minus2 ";

    $sth = $self->{_database}->query($query);

    if (defined($sth) && ($sth->rows()>0)){
        my @data;
        while(my ($gr_code, $gr_name, $count_domains, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        COUNT_DOMAINS => $count_domains,
                        SUM_MINUS2 => $sum_minus2 + ($no_domains - $count_domains) * $OUTLIER_CUTOFF,
                        SUM_0 => $sum_0,
                        AVG_MINUS2 => $avg_minus2,
                        AVG_0 => $avg_0
                };
        }
        return $self->calcFinalRanks(\@data);
    }

}

# the method handles with both whole targets and domains
# if a domain(s) is defined for a target te results for a domain is picked up
# if not the results for the whole target are taken
sub get_refinement_domain_rows{
    my ($self, $param) = @_;
    my $table;
    if ($param->{model_type} eq 'first'){
        $table = 'zscores_1m';
    } else {
        $table = 'zscores_bm';
    }
    # count TR targets
#    my $subque = "WHERE t.name similar to \'TR%\'";
    my $subque = "WHERE  z.id in (SELECT z.id FROM casp13.$table z JOIN casp13.targets t ON t.id=z.targets_id WHERE t.name SIMILAR TO 'TR%' AND  z.domain<>0 
		UNION 
		SELECT  z.id FROM casp13.$table z JOIN casp13.targets t ON t.id=z.targets_id WHERE t.name SIMILAR TO 'TR%' AND z.domain=0 AND t.name NOT IN (
		   SELECT DISTINCT t.name FROM casp13.$table z JOIN  casp13.targets t ON t.id=z.targets_id WHERE t.name SIMILAR TO 'TR%' AND z.domain<>0)
		   )";
    my $query = "SELECT COUNT(DISTINCT (t.id, z.domain)) FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
         $subque ";
    my $sth = $self->{_database}->query($query);
    my $no_domains;
    if (defined($sth) && ($sth->rows() > 0)){
        ($no_domains) = $sth->fetchrow_array();
    }
    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';
    foreach my $score (@SCORES){
        if ($score eq 'tens'){next;}
        if ($score eq 'rf'){next;}
        if ($score eq 'qse'){next;}
        #if ($score eq 'ace_rms_all'){next;}
        $sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
        $sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;
    $query = "SELECT gr.code, gr.name, count(z.*) as count_domains, 
        SUM($sum_minus2_query) as sum_minus2,
        SUM($sum_0_query) as sum_0,
        AVG($avrg_minus2_query) as avg_minus2,
        AVG($avrg_0_query) as avg_0
        FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
        JOIN casp13.groups gr ON z.groups_id=gr.id $subque GROUP BY gr.code, gr.name ORDER BY sum_minus2 ";

    $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows()>0)) {
        my @data;
        while(my ($gr_code, $gr_name, $count_domains, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        COUNT_DOMAINS => $count_domains,
                        SUM_MINUS2 => $sum_minus2 + ($no_domains - $count_domains) * $OUTLIER_CUTOFF,
                        SUM_0 => $sum_0,
                        AVG_MINUS2 => $avg_minus2,
                        AVG_0 => $avg_0
                };
        }
        return $self->calcFinalRanks(\@data);
    }

}


sub get_Tacspx_rows{
    my ($self, $param) = @_;
    my $table;
    if ($param->{model_type} eq 'first'){
        $table = 'zscores_1m';
    } else {
        $table = 'zscores_bm';
    }
    # count targets
    my $subque ; 
    if ($param->{target_flag} eq 'Ta') {
      $subque = "WHERE t.name similar to \'Ta%\'";
    } elsif ($param->{target_flag} eq 'Tc') {
      $subque = "WHERE t.name similar to \'Tc%\'";
    } elsif ($param->{target_flag} eq 'Tp') {
      $subque = "WHERE t.name similar to \'Tp%\'";
    } elsif ($param->{target_flag} eq 'Ts') {
      $subque = "WHERE t.name similar to \'Ts%\'";
    } elsif ($param->{target_flag} eq 'Tx') {
      $subque = "WHERE t.name similar to \'Tx%\'";
    }else {
      $subque = "WHERE t.name similar to \'T[acspx]%\'";
    }
    $subque .= " AND t.cancellation_status=0 ";
    if (defined($param->{perdomain_flag}) && $param->{perdomain_flag} eq 'true' ) {
	$subque .= " AND z.domain<>0 "; 
    } else {
	$subque .= " AND z.domain=0 ";
    }
    my $query = "SELECT COUNT(DISTINCT t.id) FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
         $subque ";
    my $sth = $self->{_database}->query($query);
    my $no_domains;
    if (defined($sth) && ($sth->rows() > 0)){
        ($no_domains) = $sth->fetchrow_array();
    }
    # main query
    my %weights = %{$param->{weights}};

    my $sum_minus2_query = '';
    my $sum_0_query = '';
    my $avrg_minus2_query = '';
    my $avrg_0_query = '';
    foreach my $score (@SCORES){
        if ($score eq 'tens'){next;}
	if ($score eq 'rf'){next;}
	if ($score eq 'qse'){next;}
	if ($score eq 'ace_rms_all'){next;}
        $sum_minus2_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
        $sum_0_query .= sprintf("%.3f*(CASE WHEN z.%s > 0 THEN z.%s ELSE 0.0 END)\+", $weights{"w_$score"}, $score, $score);
    }
    $sum_minus2_query =~ s/\+$//;
    $sum_0_query =~ s/\+$//;
    $avrg_minus2_query = $sum_minus2_query;
    $avrg_0_query = $sum_0_query;
    $query = "SELECT gr.code, gr.name, count(z.*) as count_domains, 
        SUM($sum_minus2_query) as sum_minus2,
        SUM($sum_0_query) as sum_0,
        AVG($avrg_minus2_query) as avg_mimus2,
        AVG($avrg_0_query) as avg_0
        FROM casp13.$table z JOIN casp13.targets t ON z.targets_id=t.id 
        JOIN casp13.groups gr ON z.groups_id=gr.id $subque GROUP BY gr.code, gr.name ORDER BY sum_minus2 ";

    $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows()>0)){
        my @data;
        while(my ($gr_code, $gr_name, $count_domains, $sum_minus2, $sum_0, $avg_minus2, $avg_0) = $sth->fetchrow_array()){
                push @data, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        COUNT_DOMAINS => $count_domains,
                        SUM_MINUS2 => $sum_minus2 + ($no_domains - $count_domains) * $OUTLIER_CUTOFF,
                        SUM_0 => $sum_0,
                        AVG_MINUS2 => $avg_minus2,
                        AVG_0 => $avg_0
                };
        }
        return $self->calcFinalRanks(\@data);
    }

}

# the method generates a list of targets to be uploaded into db
sub getTargetsWaitingToBeProcessed{
   my ($self) = @_;
   my @result;
   my $query = "SELECT t.name || '-D' || d.index AS full_target FROM casp13.domains d JOIN casp13.targets t ON t.id=d.targets_id 
		   WHERE index>=1 AND index<=6 AND t.is_final_eval = 1 AND 
			(t.id,d.index) NOT IN (SELECT DISTINCT tt.id, z.domain FROM casp13.targets tt JOIN casp13.zscores_bms z ON z.targets_id=tt.id
						UNION
						SELECT DISTINCT tt.id, z.domain FROM casp13.targets tt JOIN casp13.zscores_bm z ON z.targets_id=tt.id
						) 
		UNION  
		SELECT t.name AS full_target FROM casp13.targets t JOIN casp13.results_targets rt ON rt.targets_id=t.id 
		   WHERE t.name SIMILAR TO 'T[Racspx]%' AND t.id NOT IN (SELECT DISTINCT targets_id FROM casp13.zscores_bm) ;";
   my $sth = $self->{_database}->query($query);
   if (defined($sth) && ($sth->rows()>0)){
        while(my ($target) = $sth->fetchrow_array()){
                push @result, $target;
        }
        @result = sort @result;
   }
   return @result;
}


###########################################################
#  methods for bootstraping and ttest
###########################################################

sub get_data_for_two_groups{
   my ($self, $param, $gr_code1, $gr_code2) = @_;
    my $table;
    if ($param->{gr_type} eq 'server_only'){
        if ($param->{model_type} eq 'first'){
                $table = 'zscores_1ms';
        } else {
                $table = 'zscores_bms';
        }
    } else {
        if ($param->{model_type} eq 'first'){
                $table = 'zscores_1m';
        } else {
                $table = 'zscores_bm';
        }
    }
    my $subque = '';
    if (defined($param->{tbm_hard}) && $param->{tbm_hard} eq 'on'){
        $subque .= "1,";
    }
    if (defined($param->{tbm}) && $param->{tbm} eq 'on'){
        $subque .= "2,";
    }
    if (defined($param->{fm}) && $param->{fm} eq 'on'){
        $subque .= "3,";
    }
    if (defined($param->{tbmfm}) && $param->{tbmfm} eq 'on'){
        $subque .= "4,";
    }
    $subque =~ s/,$//;
    if ($subque ne ''){
        $subque = " AND  d.domain_classifications_id IN ($subque) ";
    }
    my $subque2 = '';
    if (defined($param->{target_flag})){
     if ($param->{target_flag} eq 'Ta') {
       $subque2 = " AND t.name similar to \'Ta%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Tc') {
       $subque2 = " AND t.name similar to \'Tc%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Tp') {
       $subque2 = " AND t.name similar to \'Tp%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Ts') {
       $subque2 = " AND t.name similar to \'Ts%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'Tx') {
       $subque2 = " AND t.name similar to \'Tx%\'";
	$subque = '';
     } elsif ($param->{target_flag} eq 'TR') {
       $subque2 = " AND t.name similar to \'TR%\'";
        $subque = '';
     } 
#	elsif ($param->{target_flag} eq 'TaD') {
#       $subque2 = " AND t.name similar to \'Ta%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TcD') {
#       $subque2 = " AND t.name similar to \'Tc%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TpD') {
#       $subque2 = " AND t.name similar to \'Tp%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TsD') {
#       $subque2 = " AND t.name similar to \'Ts%\' AND z.domain<>0";
#        $subque = '';
#     } elsif ($param->{target_flag} eq 'TxD') {
#       $subque2 = " AND t.name similar to \'Tx%\' AND z.domain<>0";
#        $subque = '';
#     }
     if ($param->{target_flag} =~ m/^T[acspx]/){
	if (defined($param->{perdomain_flag}) && $param->{perdomain_flag} eq 'true' ) {
        	$subque2 .= " AND z.domain<>0 ";
     	} else {
        	$subque2 .= " AND z.domain=0 ";
     	}
     }
     $subque2 .= " AND t.cancellation_status=0 ";
    }
    # main query
    my %weights = %{$param->{weights}};
    my $score_query = '';
    foreach my $score (@SCORES){
	if ($score eq 'rf'){next;}
	if ($score eq 'ace_rms_all'){next;}
	if ($score eq 'tens'){next;}
	if (defined($param->{target_flag}) && $score eq 'qse'){next;}
        $score_query .= sprintf("%.3f*z.%s\+", $weights{"w_$score"}, $score);
    }
    $score_query =~ s/\+$//;
    my $intersect_query = "SELECT DISTINCT targets_id||'_'||domain as dom_id FROM casp13.$table z JOIN casp13.groups gr ON gr.id=z.groups_id WHERE gr.code=$gr_code1
        INTERSECT
                SELECT DISTINCT targets_id||'_'||domain as dom_id FROM casp13.$table z JOIN casp13.groups gr ON gr.id=z.groups_id WHERE gr.code=$gr_code2";
    my $query = "SELECT gr.code, gr.name, z.targets_id || \'_\' || domain AS dom_id, $score_query AS score
        FROM casp13.$table z JOIN casp13.groups gr ON z.groups_id=gr.id ".
	($subque eq '' ? '' : " JOIN casp13.domains d ON (d.targets_id=z.targets_id AND z.domain=d.index)").
	" JOIN casp13.targets t ON z.targets_id=t.id
        WHERE z.targets_id||\'_\'||domain IN ($intersect_query) AND gr.code IN ($gr_code1, $gr_code2) 
	$subque 
	$subque2
        ORDER BY dom_id DESC, gr.code ASC";
#    print $query."\n";
    my $sth = $self->{_database}->query($query);
    if (defined($sth) && ($sth->rows() > 0)){
        my @data1; my @data2; my %tmp_hash;
        while(my ($gr_code, $gr_name, $dom_id, $score) = $sth->fetchrow_array()){
		if ($param->{target_flag} =~ m/^TR/){
			my ($target_id, $dom) = split(/_/, $dom_id);
			if ($dom == 0 && exists $tmp_hash{$target_id}){
				next;
			}
			if ($dom > 0){
				$tmp_hash{$target_id} = 1;
			}
		}
                if($gr_code1 == $gr_code){
                   push @data1, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        DOM_ID => $dom_id,
                        SCORE => $score
                   };
                }
                if ($gr_code2 == $gr_code) {
                   push @data2, {
                        GR_CODE => sprintf("%03d", $gr_code),
                        GR_NAME => $gr_name,
                        DOM_ID => $dom_id,
                        SCORE => $score
                   };
                }
        }
        return (\@data1,\@data2);
     } else {
        return (undef,undef);
     }
}

sub stat_significance_test{
    my ($self, $param, $gr_code1, $gr_code2) = @_;
    my ($refArr1, $refArr2) = $self->get_data_for_two_groups($param, $gr_code1, $gr_code2);
    if (!defined($refArr1) || !defined($refArr2)){ return (0,'-','-');}
    my $no_common = scalar(@$refArr1);
    if ( $no_common == 1){ return (0,'-','-');}
    my @arr1; my @arr2;
    for (my $i = 0; $i<scalar(@$refArr1); $i++){
        if ($$refArr1[$i]->{DOM_ID} eq $$refArr2[$i]->{DOM_ID}){
                push @arr1, $$refArr1[$i]->{SCORE};
                push @arr2, $$refArr2[$i]->{SCORE};
        }
    }
    # ttest 
    my ($t, $prob) = Statistics::ttest_matched(\@arr1,\@arr2);
    # bootstraping
    my $N = 1000; # number of repetitions
    my $count = 0 ;
    my $count_1_over_2 = 0;
    while(($count++) < $N){
        my @indexes = Statistics::sample(scalar(@arr1), scalar(@arr2));
        my $sum_1 = 0;
        my $sum_2 = 0;
        foreach my $index (@indexes){
                $sum_1 += $arr1[$index];
                $sum_2 += $arr2[$index];
        }
        if ($sum_1 > $sum_2){
                $count_1_over_2++;
        }
    }
    return ($no_common, sprintf("%5.4f",$prob), sprintf("%5.4f",$count_1_over_2/$N));
}

sub head2head_test{
    my ($self, $param, $gr_code1, $gr_code2) = @_;
    my ($refArr1, $refArr2) = $self->get_data_for_two_groups($param, $gr_code1, $gr_code2);
    if (!defined($refArr1) || !defined($refArr2)){ return (0,'-','-','-','-');}
    my $no_common = scalar(@$refArr1);
    if ( $no_common == 1){ return (0,'-','-','-','-');}
    my @arr1; my @arr2;
    for (my $i = 0; $i<scalar(@$refArr1); $i++){
        if ($$refArr1[$i]->{DOM_ID} eq $$refArr2[$i]->{DOM_ID} && defined($$refArr1[$i]->{SCORE}) && defined($$refArr2[$i]->{SCORE})){
                push @arr1, $$refArr1[$i]->{SCORE};
                push @arr2, $$refArr2[$i]->{SCORE};
        }
    }
    my $count_1_over_2 = 0;
    my $count_2_over_1 = 0;
    for (my $i = 0; $i < scalar(@arr1); $i++){
	if ($arr1[$i] > $arr2[$i]) {
		$count_1_over_2++;
	}
	if ($arr2[$i] > $arr1[$i]) {
                $count_2_over_1++;
        }
    }
    if (scalar(@arr1) == 0){
	return (0, '-', '-', '-', '-');
    } else {
	return (scalar(@arr1), sprintf("%4.3f", ($count_1_over_2)/scalar(@arr1)), sprintf("%4.3f",($count_2_over_1)/scalar(@arr1)), $count_1_over_2, $count_2_over_1 );
    }
}

=cut

1;
