package ContactGuidedTargetsManager;

use strict;
use warnings;

use lib qw(Core);
use TargetsManager;
use LocalConfiguration;
use GroupsManager;

# list of RR group codes, e.g.: '050,123,237'
my $rr_gr_codes = $LOCAL_CONFIG->{FOR_TC_RR_GROUPS};

# multiplier to find how many contacts should be taken 
# default value L/5
my $factor_len = 0.2;

#----------------------------------------------------------
# S U B R O U T I N E S
#----------------------------------------------------------

sub new {
    my ($class) = @_;
    my $self = {

    };
    bless $self, $class;
    return $self;
}



# the method parses the RR prediction file
# return reference to array of long range contacts
# sorted in descending order by probability
sub parseRRpredictionFile {
	my ($self, $target, $grcode) = @_;
	my $file =sprintf("%s/%s/%sRR%03d_1", $LOCAL_CONFIG->{DATA_RR_MODELS_DIR}, $target->{NAME}, $target->{NAME}, $grcode);
	my @contacts;
	my %hash_done; # hash to identify the pairs of residues that have been processed
	if (! -e $file){
		return \@contacts;
	}
        open FILE, "< $file";
	while(defined(my $l = <FILE>)){
		# parse line 
		# 1 174 0 8 0.945904081632653
		
		if ($l =~ /^\s*(\d+)\s+(\d+)\s+\S+\s+\S+\s+(\S+)/){
			my $res1 = $1; my $res2 = $2; my $prb = $3;
			# check if probability is greater then 0.5
			if ($prb < 0.5){
#				next;
			}
			# check the order of residues 
			# res1 should be less than res2
			if ($res2 < $res1) { # switch them without allocating memory for new variables
				# res1 = a, res2 = b
				$res2 = $res1 + $res2; # res1 = a res2 = a + b
				$res1 = $res2 - $res1; # res1 = b res2 = a + b
				$res2 = $res2 - $res1; # res1 = b res2 = a
			}
			# check for min separation along the sequence
			# at least 24 residues of separation (for long contacts)
			if ($res2 - $res1 < 24){
				next;
			}
			if (exists ($hash_done{$res1}{$res2})){
				next;
			} else {
				$hash_done{$res1}{$res2} = $prb;
				push @contacts, {
					_res1 => $res1,
					_res2 => $res2,
					_prb => $prb
				};
			}
		}

	}
	close FILE;
	# sort contacts with respect to the assigned probability
	@contacts = sort {$b->{_prb} <=> $a->{_prb}} @contacts ;
	# left only L/5 contacts
	my $last_index = sprintf("%d", $factor_len * ($target->{NUMBER_OF_AA})) - 1;
#	if ($last_index < $#contacts){
#		$#contacts = $last_index ;
#	}
	return \@contacts;
}

# the method truncs the list of contacts keeping only N ones from the top of the list
# all contacts with probability of the N-th contact are included
sub keepTopNContacts {
    my ($self, $refConts, $topN) = @_;
    my @arr = @{$refConts};
    # check if length of array less than $topN
    if ($topN > $#arr){
	# do nothing 
    } else {
	my $last_prb = $arr[$topN]->{_prb};# the probability of the N-th contact
	my $last_index = $topN;
	while(($last_index + 1 <= $#arr) && ($arr[$last_index + 1]->{_prb} == $last_prb)){
		$last_index++;
	}
	$#arr = $last_index;
    } 
    return \@arr;
}

# The method parses target's structure contacts
sub parseTargetContsFile{
    my ($self, $target) = @_;
    my $file = sprintf("%s/%s.pdb.conts", $LOCAL_CONFIG->{FOR_TC_TARGET_CONTS_DIR}, $target->{NAME});
    # if file doesn't exist or it cannot be read, return undef;
    if (! -f $file || ! -r $file){
	return undef;
    }
    my %CONTS;
    open F, "< $file";
	while(defined(my $l = <F>)){
		chomp $l;
		if ($l =~ m/^\s*$/){last;}
		if ($l =~ m/^(\d+)\s+(\d+)[A-Z]:(\d+)[A-Z]/){
			my $diff = $1;
			my $res1 = $2; 
			my $res2 = $3;
			if ($diff <= 23){last;}
			$CONTS{$res1}{$res2} = 1;
			$CONTS{$res2}{$res1} = 1;
		}
	}
    close F;
    return \%CONTS;
}


sub filterContacts{
    my ($self, $refConts, $refTargConts) = @_;
    my @arr = @{$refConts};
    if (! defined($refTargConts)){return $refConts;}
    my %CONTS = %{$refTargConts};
    my @result;
    foreach my $el (@arr){
	if (exists $CONTS{$el->{_res1}}{$el->{_res2}}){
		push @result, $el;
	}
    }
    return \@result;
}



# create tarball of filterd predictions for target TpXXX
# these tarball contains RR predictions from pre-selected groups without checking their correctness
# the conatcts are filtered to L/5 long-range contacts 
sub createTarBallTp{
    my ($self, $target) = @_;
    my $Tp_name = $target->{NAME};
    $Tp_name =~ s/^T0/Tp/;
    my $tmp_dir = sprintf("%s/%s", $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $Tp_name);
    if(-d $tmp_dir){
	system("rm -rf $tmp_dir");
    }
    system("mkdir -p $tmp_dir");
    system("chgrp casp $tmp_dir");
    system("chmod g+rwx $tmp_dir");
    my $topN = sprintf("%d", $factor_len * ($target->{NUMBER_OF_AA})) - 1;
    my @gr_codes = split(/,/, $rr_gr_codes);
    my $count = 0;
    foreach my $code (@gr_codes){
	# get long-range contacts from predictions
	my $refConts = $self->keepTopNContacts( $self->parseRRpredictionFile($target, $code), $topN);
	my @conts = @{$refConts};
	# if empty list of contacts - skip group
	if (scalar(@conts) == 0) {next;}
	# write file with predictions
	my $outfile = sprintf("%s/%sRR%03d_1", $tmp_dir, $Tp_name, $code);
	open OUT, "> $outfile";
	print OUT "TARGET $Tp_name\n";
	printf OUT "REMARK RR predictions of group %03d\n", $code;
	printf OUT "REMARK L/5 long-range contacts\n";
	foreach my $c (@conts){
		printf OUT "%d %d 0 8 %s\n", $c->{_res1}, $c->{_res2}, $c->{_prb};
	}
	print OUT "TER";
	close OUT;
	$count++;
	if ($count >= 10){last;}
    }
    # create tarball
    my $tarballFile = sprintf("%s/%s.tar.gz", $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $Tp_name);
    my $command = sprintf("tar -czf %s -C %s %s", $tarballFile , $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $Tp_name);
    system("$command");
    # chgrp
    system("chgrp casp $tarballFile");
    # clean remove tmp_dir
    system("rm -rf $tmp_dir");
}


# create tarball of filtered correct predictions for target TcXXX 
# these tarball contains correct RR predictions from pre-selected groups
# the contacts are filtered to L/5 long-range contacts 
sub createTarBallTc{
    my ($self, $target) = @_;
    my $Tc_name = $target->{NAME};
    $Tc_name =~ s/^T0/Tc/;
    my $TARG_CONTS = $self->parseTargetContsFile($target);
    if (! defined ($TARG_CONTS )){
	return; # file with target's contacts doesn't exist
    }
    my $tmp_dir = sprintf("%s/%s", $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $Tc_name);
    if(-d $tmp_dir){
        system("rm -rf $tmp_dir");
    }
    system("mkdir -p $tmp_dir");
    system("chgrp casp $tmp_dir");
    system("chmod g+rwx $tmp_dir");
    my $topN = sprintf("%d", $factor_len * ($target->{NUMBER_OF_AA})) - 1;
    my @gr_codes = split(/,/, $rr_gr_codes);
    my $count = 0;
    foreach my $code (@gr_codes){
        # get long-range contacts from predictions
        my $refConts = $self->keepTopNContacts($self->filterContacts($self->parseRRpredictionFile($target, $code), $TARG_CONTS), $topN);
        my @conts = @{$refConts};
        # if empty list of contacts - skip group
        if (scalar(@conts) == 0) {next;}
        # write file with predictions
        my $outfile = sprintf("%s/%sRR%03d_1", $tmp_dir, $Tc_name, $code);
        open OUT, "> $outfile";
        print OUT "TARGET $Tc_name\n";
        printf OUT "REMARK RR correct predictions of group %03d\n", $code;
        printf OUT "REMARK L/5 long-range contacts\n";
        foreach my $c (@conts){
                printf OUT "%d %d 0 8 %s\n", $c->{_res1}, $c->{_res2}, $c->{_prb};
        }
        print OUT "TER";
        close OUT;
	$count++;
        if ($count >= 10){last;}
    }
    # create tarball
    my $tarballFile = sprintf("%s/%s.tar.gz", $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $Tc_name);
    my $command = sprintf("tar -czf %s -C %s %s", $tarballFile , $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $Tc_name);
    system("$command");
    # chgrp
    system("chgrp casp $tarballFile");
    # clean remove tmp_dir
    system("rm -rf $tmp_dir");
}


# method returns array of human targets expired today for server prediction,
# for Tp targets we use RR predictions from servers only
sub get_targets_for_today {
    my ($self) = @_;
    my $targetsmanager = new TargetsManager();
    my $params = {
        view_targets => 'human',
        view => 'regular'
    };
    my @targets = ();
    my $current = DateTime->now(time_zone => 'America/Los_Angeles');
    foreach my $target ($targetsmanager->public_targets($params)){
        if($target->{IS_CANCELED} == 1){
                next;
        }
	# human expiration date 
        my $date_line = $target->{HUMAN_EXPIRATION_DATE};
        my $day = 0; my $month = 0; my $year = 0;
        if($date_line =~ m/(\d+)\-(\d+)\-(\d+)/){
                $year = $1; $month = $2; $day = $3;
        }
        my $date = new DateTime (
            year => $year,
            month => $month,
            day => $day,
            hour => $LOCAL_CONFIG->{EXPIRED_HOURS},
            minute => $LOCAL_CONFIG->{EXPIRED_MINUTES},
            time_zone => 'America/Los_Angeles'
        );
        my $date_new = new DateTime (
            year => $year,
            month => $month,
            day => $day,
            hour => $LOCAL_CONFIG->{EXPIRED_HOURS},
            minute => $LOCAL_CONFIG->{EXPIRED_MINUTES},
            time_zone => 'America/Los_Angeles'
        );
        $date_new->add(days => 1);
        if(DateTime->compare_ignore_floating($date, $current) <= 0 &&
                DateTime->compare_ignore_floating($current, $date_new) < 0) {
                push(@targets, $target);
        }
    }
    return @targets;
}

# the method calculates the consensus of predicted contacts from all  predictors;
# sort them with respect to the consensus score
# 
sub calcConsensus {
    my ($self, $target) = @_;
    my @gr_codes = split(/,/, $rr_gr_codes);
    my $HASH_CONTACTS; # consensus contacts 
    foreach my $code (@gr_codes){
    	my $refConts = $self->parseRRpredictionFile($target, $code);
	my @conts = @{$refConts};
	foreach my $c (@conts){
		if (defined $HASH_CONTACTS->{$c->{_res1}}->{$c->{_res2}}){
			$HASH_CONTACTS->{$c->{_res1}}->{$c->{_res2}} += 1;
		} else {
			$HASH_CONTACTS->{$c->{_res1}}->{$c->{_res2}} = 1;
		}
	}
    }
    my @ARR_CONTACTS;
    foreach my $res1 (keys %$HASH_CONTACTS){
	foreach my $res2 (keys %{$HASH_CONTACTS->{$res1}}){
		push @ARR_CONTACTS, {
			_res1 => $res1,
			_res2 => $res2,
			_consScore => $HASH_CONTACTS->{$res1}->{$res2}
		};
	}
    }
    @ARR_CONTACTS = sort  {$b->{_consScore}<=>$a->{_consScore}} @ARR_CONTACTS;
    # left only L/5 contacts
    my $last_index = sprintf("%d", $factor_len * ($target->{NUMBER_OF_AA})) - 1;
    if ($last_index < $#ARR_CONTACTS){
	$#ARR_CONTACTS = $last_index ;
    }
    return \@ARR_CONTACTS;
}

sub processPerTarget {
    my ($self, $target) = @_;
    my $refConts = $self->calcConsensus($target);
    my $targetName_new = $target->{NAME};
    $targetName_new =~ s/T0/Tp/;
    my $outFile = sprintf("%s/%s.stage1.RR", $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $targetName_new);
    open OUT, "> $outFile";
    printf OUT "TARGET %s\n", $targetName_new;
    printf OUT "REMARK consensus of contacts\n";
    printf OUT "REMARK submitted by predictors in category RR\n";
    #printf OUT $self->split50($target->{SEQUENCE_DATABASE});
    foreach my $c (@{$refConts}){
	printf OUT "%d %d 0 8 1.0\n", $c->{_res1}, $c->{_res2};
    }
    print OUT "TER";
    close OUT;
}

sub process {
    my ($self) = @_;
    my @targets = $self->get_targets_for_today();
    foreach my $target (@targets){
	$self->processPerTarget($target);
    }
}


sub createTarballAllPredsForTp{
    my ($self, $target) = @_;
    my $Tp_name = $target->{NAME};
    $Tp_name =~ s/T0/Tp/;
    #my $command = sprintf("tar -czf -C %s/%s   %s/%s/%sRR*_1", $LOCAL_CONFIG->{DATA_RR_MODELS_DIR}, $target->{NAME}, $LOCAL_CONFIG->{DATA_RR_MODELS_DIR}, $target->{NAME}, $target->{NAME});
    # create tarball
    my $tarballFile = sprintf("%s/%s.tar.gz", $LOCAL_CONFIG->{FOR_TC_FILE_LOCATION}, $Tp_name);
    my $command = sprintf("tar -czf %s -C %s/%s %sRR*_1", $tarballFile , $LOCAL_CONFIG->{DATA_RR_MODELS_DIR}, $target->{NAME}, $target->{NAME});
    system("$command");
    # chgrp
    system("chgrp casp $tarballFile");
}



sub split50 {
   my ($self, $str) = @_;
   my $n = length(uc($str))/50;
   my $res = "";
   for (my $i = 0; $i < $n; $i++){
        $res .= substr($str, $i*50 , 50)."\n";
   }
   return $res;
}

1;
