package GroupsAnalysisManager;
use strict;
use warnings;

use DBI;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use DateTime;
use lib qw(Core);
use Database;
use PDBUtils;
use Configuration;
use LocalConfiguration;
use ResultsManager;
use CGI qw(:all);
use BO;
our @ISA = qw(BO);
sub new {
    my ($class) = @_;
    my $self = $class->SUPER::new("casp13.groups_analysis");

    bless $self, $class;
    return $self;
}

sub get_new_model {
    my ($self) = @_;
    my %model = (
########################### system
	id => '',
	groups_id => '',
    domain_classifications_id => '',
	target_type => '',
########################## data
    sum_z_gdt_ts_all_pos => '',
	sum_z_gdt_ts_server_pos => '',
	sum_z_gdt_ha_all_pos => '',
	sum_z_gdt_ha_server_pos => '',
	sum_z_gdt_al0p_all_pos => '',
	sum_z_gdt_al0p_server_pos => '',

	avg_gdt_ts_all => '',
	avg_gdt_ts_server => '',
	avg_gdt_ha_all => '',
	avg_gdt_ha_server => '',
	avg_gdt_al0p_all => '',
	avg_gdt_al0p_server => '',

	avg_mom_z => '',
	avg_dali_z => '',
	avg_dal_4 => '',

	domains_count => '',
######################### OTHER
	INDEX => 0
    );
    return %model;
}


sub is_table_column {
my ($self, $colunm_name) = @_;
my $result = 0;
if(	$colunm_name eq 'id' ||
	$colunm_name eq 'groups_id' ||
	$colunm_name eq 'domain_classifications_id' ||
	$colunm_name eq 'target_type' ||
	########################### data
	$colunm_name eq 'sum_z_gdt_ts_all_pos' ||
	$colunm_name eq 'sum_z_gdt_ts_server_pos' ||
	$colunm_name eq 'sum_z_gdt_ha_all_pos' ||
	$colunm_name eq 'sum_z_gdt_ha_server_pos' ||
	$colunm_name eq 'sum_z_gdt_al0p_all_pos' ||
	$colunm_name eq 'sum_z_gdt_al0p_server_pos' ||

	$colunm_name eq 'avg_gdt_ts_all' ||
	$colunm_name eq 'avg_gdt_ts_server' ||
	$colunm_name eq 'avg_gdt_ha_all' ||
	$colunm_name eq 'avg_gdt_ha_server' ||
	$colunm_name eq 'avg_gdt_al0p_all' ||
	$colunm_name eq 'avg_gdt_al0p_server' ||

	$colunm_name eq 'avg_mom_z' ||
	$colunm_name eq 'avg_dali_z' ||
	$colunm_name eq 'avg_dal_4' ||

	$colunm_name eq 'domains_count'

  ) {
	$result = 1;
	}
    return $result;
}

sub exist_by_parameters {
    my ($self, %model) = @_;
    my $result = 0;
    my $query = sprintf("SELECT id FROM %s WHERE groups_id=%d and domain_classifications_id=%d and target_type=%d",
	$self->{_table}, $model{groups_id}, $model{domain_classifications_id}, $model{target_type});
    my $sth = $self->{_database}->query($query);
    if(defined($sth) && ($sth->rows() > 0)) {
        my ($id) = $sth->fetchrow_array();
	$result = $id;
    }
    return $result;
}

sub get_rows {
    my ($self,  $type, $tbm, $tbmfm, $fm, $class_other, $exclude, $tbm_hard) = @_;

    my @results = ();
	my @types = ();
	my $subque = '';
	if ($tbm eq 'on' ) {  push (@types, 2); } else {$subque .= " and dci<>2 "; }
	if ($tbmfm eq 'on') {push (@types, 4); } else {$subque .= " and dci<>4 ";}
 	if ($fm eq 'on') { push (@types, 3); } else {$subque .= " and dci<>3 ";}
	if ($class_other eq 'on') { push (@types, 5); } else {$subque .= " and dci<>5 ";}
	if ($tbm_hard eq 'on' ) {  push (@types, 1); } else {$subque .= " and dci<>1 "; }
#	if (!@types) { push (@types, 2) ; }
	if (scalar(@types) == 0){$subque = '';}
	my $model = join(',',@types);

 	my $q = "
 	  select
 		gr_id,
 		count(domains_count) as domains_count,
 		gr_name as gr_name,
 		type,

                count(case when rank>0 and rank=1 then rank end) as no_rank_1,
                count(case when rank>0 and rank<=3 then rank end) as no_rank_3,
                count(case when rank>0 and rank<=10 then rank end) as no_rank_10,

                count(case when gdt_ts_4>80 then gdt_ts_4 end) as no_gdt_ts_80,
                count(case when gdt_ts_4>50 then gdt_ts_4 end) as no_gdt_ts_50,
                count(case when gdt_ts_4>40 then gdt_ts_4 end) as no_gdt_ts_40,
		count(case when gdt_ts_4>30 then gdt_ts_4 end) as no_gdt_ts_30,
		
		sum(case when sum_gdt_ts_z>0 then sum_gdt_ts_z else 0 end) as sum_gdt_ts_z,
		avg(case when sum_gdt_ts_z>0 then sum_gdt_ts_z else 0 end) as avg_gdt_ts_z,
		avg(gdt_ts_4) as avg_gdt_ts_4,
		avg(case when sum_al0p_z>0 then sum_al0p_z else 0 end) as sum_al0p_z,
		avg(avg_al0p) as avg_al0p,
		sum(case when sum_gdt_ha_z>0 then sum_gdt_ha_z else 0 end) as sum_gdt_ha_z,
		avg(case when sum_gdt_ha_z>0 then sum_gdt_ha_z else 0 end) as avg_gdt_ha_z,
		avg(case when avg_gdt_ha_4>0 then avg_gdt_ha_4 else 0 end) as avg_gdt_ha_4,
		avg(case when avg_mammoth>0 then avg_mammoth else 0 end ) as avg_mammoth, avg(mean) as ca, avg(avg_dali) as dali
".
",
		median(response_time) as response_time".
"
		from "  .(($exclude eq 'old') ? "casp13.groups_analysis_distinct_old": "casp13.groups_analysis_distinct" )."
 	  where dci in (1,2,3,4,5) $subque and ".(($type eq 'all') ? " is_server_only=0 and group_type='h/s'" : " type>0  and group_type='s'")."
 	  group by gr_id, gr_name, type
 	  order by  sum_gdt_ts_z desc";

    my $sth = $self->{_database}->query($q);

    if(defined($sth) && ($sth->rows() > 0)) {
        my $index = 0;
        while(my($gr_id, $domains_count, $gr_name, $gr_type, $no_rank1, $no_rank3, $no_rank10, $no_80, $no_50, $no_40, $no_30, $sum_gdt_ts_z, $avg_gdt_ts_z, $avg_gdt_ts, $sum_al0p_z, $avg_al0p, $sum_gdt_ha_z, $avg_gdt_ha_z, $avg_gdt_ha_4, $avg_mammoth_z, $ca, $dali, $response_time) = $sth->fetchrow_array()) {
#	while(my($gr_id, $domains_count, $gr_name, $gr_type, $sum_gdt_ts_z, $avg_gdt_ts_z, $avg_gdt_ts, $sum_al0p_z, $avg_al0p,$avg_gdt_ha_4, $avg_mammoth_z, $ca, $dali) = $sth->fetchrow_array()) {
                if (!defined($response_time)){
                        $response_time = -1.0;
                }

		push(@results, {
		INDEX => ++$index,
		NAME => $gr_name,
		TYPE => $gr_type,
		CODE => sprintf('%03d', $gr_id),
		no_top1 => sprintf('%d', $no_rank1), # number of domains for which the group was the best
		no_top3 => sprintf('%d', $no_rank3), # number of domains for which the group was in top3
		no_top10 => sprintf('%d', $no_rank10), # number of domains for which the group was in top10
		no_80 => sprintf('%d', $no_80), # number of models with gdt_ts > 80
		no_50 => sprintf('%d', $no_50), # number of models with gdt_ts > 50
		no_40 => sprintf('%d', $no_40), # number of models with gdt_ts > 40
		no_30 => sprintf('%d', $no_30), # number of models with gdt_ts > 30
		sum_gdt_ts_z 	=> sprintf("%.3f", $sum_gdt_ts_z),
		avg_gdt_ts_z 	=> sprintf("%.3f", $avg_gdt_ts_z),
 		avg_gdt_ts 	=> sprintf("%.3f", $avg_gdt_ts),
		sum_z_gdt_al0p 	=> sprintf("%.3f", $sum_al0p_z),
 		avg_gdt_al0p 	=> sprintf("%.3f", $avg_al0p),
		sum_gdt_ha_z    => sprintf("%.3f", $sum_gdt_ha_z),
		avg_gdt_ha_z	=> sprintf("%.3f", $avg_gdt_ha_z),
 		avg_gdt_ha 	=> sprintf("%.3f", $avg_gdt_ha_4),
 		avg_mom_z 	=> sprintf("%.3f", $avg_mammoth_z),
		avg_dal_4	=> sprintf("%.3f", $ca),
		dali 		=> sprintf("%.3f", $dali),
		response_time   => sprintf("%.3f", $response_time),
#		response_time   => sprintf("%.3f", 1.00),
		domains_count => $domains_count
		}
	    );
        }
    }
    return @results;
}

# return rows with results of analysis of performance of th egroups
# based on the best model (of up to 5 submitted)
sub get_rows_best {
    my ($self,  $type, $tbm, $tbmfm, $fm, $class_other, $exclude, $tbm_hard) = @_;

    my @results = ();
        my @types = ();
        my $subque = '';
        if ($tbm eq 'on' ) {  push (@types, 2); } else {$subque .= " and dci<>2 "; }
        if ($tbmfm eq 'on') {push (@types, 4); } else {$subque .= " and dci<>4 ";}
        if ($fm eq 'on') { push (@types, 3); } else {$subque .= " and dci<>3 ";}
        if ($class_other eq 'on') { push (@types, 5); } else {$subque .= " and dci<>5 ";}
        if ($tbm_hard eq 'on' ) {  push (@types, 1); } else {$subque .= " and dci<>1 "; }
        if (scalar(@types) == 0){$subque = '';}
        my $model = join(',',@types);

        my $q = "
          select
                g.code,
		g.name,
		g.type,
		count(m.best_gdt_ts),
		avg(m.best_gdt_ts) as avg_best_gdt_ts,
		sum(m.z_gdt_ts_4_a) as sum_z_gdt_ts_4,
                avg(m.z_gdt_ts_4_a) as avg_z_gdt_ts_4,
		count(CASE WHEN m.best_gdt_ts >25 THEN m.best_gdt_ts END) AS no_25,
		count(CASE WHEN m.best_gdt_ts >30 THEN m.best_gdt_ts END) AS no_30,
		count(CASE WHEN m.best_gdt_ts >35 THEN m.best_gdt_ts END) AS no_35,
		count(CASE WHEN m.best_gdt_ts >40 THEN m.best_gdt_ts END) AS no_40,
		count(CASE WHEN m.best_gdt_ts >45 THEN m.best_gdt_ts END) AS no_45,
                count(CASE WHEN m.best_gdt_ts >50 THEN m.best_gdt_ts END) AS no_50
                from  casp13.max_gdt_ts_4_per_domain_per_group m join casp13.groups g on g.code=m.gr_code 
		where dci in (1,2,3,4,5) $subque and  "
		.(($type eq 'all') ? " is_server_only=0 and group_type='h/s'" : " g.type>0  and group_type='s'")." 
          group by g.code, g.name, g.type
          order by sum_z_gdt_ts_4 desc, avg_best_gdt_ts desc";

       my $sth = $self->{_database}->query($q);

    if(defined($sth) && ($sth->rows() > 0)) {
        my $index = 0;
        while(my($gr_code, $gr_name, $gr_type, $domains_count, $avg_best_gdt_ts, $sum_z_gdt_ts, $avg_z_gdt_ts, $no_25, $no_30, $no_35, $no_40, $no_45, $no_50) = $sth->fetchrow_array()) {

                push(@results, {
                INDEX => ++$index,
                NAME => $gr_name,
                TYPE => $gr_type,
                CODE => sprintf('%03d', $gr_code),
		avg_best_gdt_ts => sprintf("%.3f",$avg_best_gdt_ts),
		sum_z_gdt_ts => sprintf("%.3f",$sum_z_gdt_ts),
		avg_z_gdt_ts => sprintf("%.3f",$avg_z_gdt_ts),
                no_25 => sprintf('%d', $no_25),
		no_30 => sprintf('%d', $no_30),
		no_35 => sprintf('%d', $no_35),
		no_40 => sprintf('%d', $no_40),
		no_45 => sprintf('%d', $no_45),
                no_50 => sprintf('%d', $no_50), 
		domains_count => $domains_count,
                }
            );
        }
    }
    return @results;

}



1;
