#!/usr/bin/perl
use strict;
use warnings;

#use Digest::MD5 qw(md5 md5_hex md5_base64);
#use MIME::Parser;
#use MIME::Entity;
#use MIME::Body;
#use Getopt::Long;


use lib qw(Core);
use lib qw(Classes);

#use DomainsManager;
use Logger;
use LocalConfiguration;
#use Email;

#use DateTime;

#use ResultsManager;
#use RRResultsManager;
#use ResultsTargetManager;
#use ResultsTemplatesManager;

my $SCRIPTNAME = 'create_domains.pl';

my $subject = "REPORT from Create Domains Script";
my $body = '';
my $damains_list = '';
#my $logger = new Logger();


my @eval_servers = ("pinot2"); #"malbec21", "malbec22", "malbec23");
# remove target pdb file 
my $reg_exp = sprintf("%s/%s-D1.pdb",$LOCAL_CONFIG->{DATA_TARGETS_DIR}, 'T9999');
print "$reg_exp\n";
foreach(@eval_servers){
#   my $json =  `ssh -n www-data\@bystra 'ssh -f -n $_ /bin/rm -rf $reg_exp'`;
    my $json = `ssh -f -n $_ touch $reg_exp`;
}


exit;
# DON"T RUN
=head
my $current = DateTime->now( time_zone => 'America/Los_Angeles' );

my $results_manager = new ResultsManager();
my $rr_results_manager = new RRResultsManager();
my $results_target_manager = new ResultsTargetManager();
my $results_template_manager = new ResultsTemplatesManager();

my $d_m = new DomainsManager();


my $parameters = {'field' => 'd.date'};
my @domains = $d_m->get_all_domains($parameters);

my $hasDone = 0; # flag to indicate that the deletion of obsolete files and records from database and split into domains have been done

my $testdom_only = 0; # flag if to create test domainsin this case only domains designated as 7,8 or 9 will  be created the domains 1-6 wouldnt deleted 
if (defined($ARGV[1]) && $ARGV[1] eq 'testdom_only'){
	$testdom_only = 1;
}

# EVALUATION SERVERS
#my @eval_servers = ("prediction2", "prediction3", "prediction4", "prediction5", "prediction8", "prediction9", "prediction10",  "malbec21", "malbec22", "malbec23");
my @eval_servers = ("malbec21", "malbec22", "malbec23");

# VARIABLES OF TARBALLS
my $tarballPredictionsDir = "/local/CASP13/TARBALLS/predictions_trimmed_to_domains"; # directory with tarballs of trimmed predictions
my $tarballTargetsName = "/local/CASP13/TARBALLS/targets/Targets_trimmed_to_domains.tar.gz";  # tarball of trimmed targets


for (my $i = 0; $i < scalar(@domains); $i++) {
	if ($domains[$i]->{TARGET_NAME} eq $ARGV[0]) {
          if ($hasDone == 0) {
		$hasDone = 1;
		if ( $testdom_only == 1){
			my $tmp = $d_m->split_targets_to_domains($domains[$i]->{TARGET_NAME}, $testdom_only);
			$tmp .= $d_m->split_models_to_domains($domains[$i]->{TARGET_NAME}, $testdom_only);
			exit;
		}
		my $dom_ranges = &toStringRangeDomainsFromPDBFiles($LOCAL_CONFIG->{DATA_TARGETS_DIR},$domains[$i]->{TARGET_NAME});
		sleep 1;
		# remove target pdb file 
		my $reg_exp = sprintf("%s/%s-D*.pdb",$LOCAL_CONFIG->{DATA_TARGETS_DIR}, $domains[$i]->{TARGET_NAME});
		my $tar_file = sprintf("/data/CASP13/RESULTS/OBSOLETE_DOMAINS/%s_%s_%s_%s.tar", $domains[$i]->{TARGET_NAME},$current->ymd('-'), $$, $dom_ranges);

                print $tar_file."\n";
		system(sprintf("find %s -type f | xargs --no-run-if-empty tar -rf %s --absolute-names", $reg_exp, $tar_file));
		system(sprintf("find %s -type l | xargs --no-run-if-empty tar -rhf %s --absolute-names", $reg_exp, $tar_file));

		system("/bin/rm -rf $reg_exp");
		#print $reg_exp."\n";
	        sleep 1;
	        # remove from evaluation servers
                foreach(@eval_servers){
                        my $json =  `ssh -n www-data\@bystra 'ssh -f -n $_ /bin/rm -rf $reg_exp'`;
#			my $json =  `ssh -n www-data\@bystra 'ssh -f -n $_ /bin/ls -1 $reg_exp '`;
#			print "ssh -n www-data\@bystra 'ssh -f -n $_ /bin/ls -1 $reg_exp '\n";
#			print "$_:\n";
#			print $json;
#			print "===============\n";	
                        sleep 1;
                }
                sleep 1;

		#remove models
                $reg_exp = sprintf("%s/%s-D*",$LOCAL_CONFIG->{DATA_MODELS_DIR}, $domains[$i]->{TARGET_NAME});
                system("/bin/rm -rf $reg_exp");
                sleep 1;

                foreach(@eval_servers){
                        my $json =  `ssh -n www-data\@bystra 'ssh -f -n $_ /bin/rm  -rf $reg_exp'`;
#                        my $json =  `ssh -n www-data\@bystra 'ssh -f -n $_ /bin/ls -1 $reg_exp'`;
#			print "$_:\n";
#			print $json;
#			print "===============\n";
                        sleep 1;
                }
                sleep 1;
		
		# delete plots
		print "\ndelete plots:\n";
		system(sprintf("find /data/CASP13/PLOTS/ -name \'%s*-D*\' -type f | xargs --no-run-if-empty /bin/rm -f ", $domains[$i]->{TARGET_NAME}));
		system(sprintf("find /data/CASP13/PLOTS/EQV/ -name \'%s-D*\' -type d | xargs --no-run-if-empty /bin/rm -rf ", $domains[$i]->{TARGET_NAME}));
		system(sprintf("find /data/CASP13/PLOTS/LACC/ -name \'%s-D*\' -type d | xargs --no-run-if-empty /bin/rm -rf ", $domains[$i]->{TARGET_NAME}));
		system(sprintf("find /data/CASP13/PLOTS/MD/ -name \'%s-D*\' -type d | xargs --no-run-if-empty /bin/rm -rf ", $domains[$i]->{TARGET_NAME}));
		system(sprintf("find /data/CASP13/PLOTS/TC/ -name \'%s-D*\' -type d | xargs --no-run-if-empty /bin/rm -rf ", $domains[$i]->{TARGET_NAME}));
		system(sprintf("find /data/CASP13/PLOTS/TM/ -name \'%s-D*\' -type d | xargs --no-run-if-empty /bin/rm -rf ", $domains[$i]->{TARGET_NAME}));
		system(sprintf("find /data/CASP13/PLOTS/GDT_domains -name \'%s*\' -type f | xargs --no-run-if-empty /bin/rm -f ", $domains[$i]->{TARGET_NAME}));


                # delete Grishin scores files
                system(sprintf("find /data/CASP13/tmp_Grishin/ -name \'%s-D*\' -type f | xargs --no-run-if-empty /bin/rm -f ", $domains[$i]->{TARGET_NAME}));

                # delete results tables files
                system(sprintf("find /data/CASP13/TARBALLS/results/ -name \'%s-D*\' -type f | xargs --no-run-if-empty /bin/rm -f ", $domains[$i]->{TARGET_NAME}));




		# tar obsolete results 
		# TODO: set permission 775 for group casp 
		print "\ncreate tar of obsolete data results\n";
#		my $tar_file = sprintf("/data/CASP13/RESULTS/OBSOLETE_DOMAINS/%s_%s_%s_%s.tar", $domains[$i]->{TARGET_NAME},$current->ymd('-'), $$, $dom_ranges);
#		print $tar_file."\n";
		system(sprintf("find %s -name \'%s-D*\' -type f | xargs --no-run-if-empty tar -rf %s --absolute-names",$LOCAL_CONFIG->{RESULTS_DIR},$domains[$i]->{TARGET_NAME},$tar_file));
		system(sprintf("find %s -name \'%s-D*\' -type d | xargs --no-run-if-empty tar -rf %s --absolute-names ",$LOCAL_CONFIG->{RESULTS_DIR},$domains[$i]->{TARGET_NAME},$tar_file));
		sleep 5;
		system(sprintf("find %s -name \'%s-D*\' -type f | xargs --no-run-if-empty /bin/rm -f ",$LOCAL_CONFIG->{RESULTS_DIR},$domains[$i]->{TARGET_NAME}));
		system(sprintf("find %s -name \'%s-D*\' -type d | xargs --no-run-if-empty /bin/rm -rf ",$LOCAL_CONFIG->{RESULTS_DIR},$domains[$i]->{TARGET_NAME}));
		system(sprintf("chgrp casp %s", $tar_file));
		system(sprintf("chmod 664 %s", $tar_file));


		# delete results records from database
		print "\ndelete records from database\n";
		$results_manager->delete_results_for_all_domains_for_target($domains[$i]->{TARGET_NAME});
		$rr_results_manager->delete_results_for_all_domains_for_target($domains[$i]->{TARGET_NAME});
		$results_target_manager->delete_results_for_all_domains_for_target($domains[$i]->{TARGET_NAME});
		$results_template_manager->delete_results_for_all_domains_for_target($domains[$i]->{TARGET_NAME});


		# split target to domains
		my $tmp_msg = '';
		$tmp_msg =  $d_m->split_targets_to_domains($domains[$i]->{TARGET_NAME});
		if($tmp_msg ne '') {
			$body .= "\n TARGET: ". $domains[$i]->{TARGET_NAME} ."\n";
			$body .= $tmp_msg;
		}

		# split models to domains
		$tmp_msg = $d_m->split_models_to_domains($domains[$i]->{TARGET_NAME});
		if($tmp_msg ne '') {
			$body .= "\n TARGET: ". $domains[$i]->{TARGET_NAME} ."\n";
			$body .= $tmp_msg;
		}

	}

	$damains_list .= sprintf("TARGET: %s DOMAIN: %s RANGE: %s DATE: %s      %s\n" , $domains[$i]->{TARGET_NAME}, $domains[$i]->{DOMAIN_INDEX}, $domains[$i]->{RANGE}, $domains[$i]->{DATE}, (($domains[$i]->{LENGTH} >= $d_m->get_aa_count_for_target($domains[$i]->{TARGET_ID}))?sprintf("SIMILARITY DOMAIN TO TARGET -- domain length: %s sequence length: %s ", $domains[$i]->{LENGTH}, $d_m->get_aa_count_for_target($domains[$i]->{TARGET_ID})):""));

        }
}

if ($body eq '') {
        $body .= "No new domains were found. \n";
}
$body.= "\n\nAvailable domains list:\n";
$body.= $damains_list;

print "BODY:" . $body . "\n";

#Email::send_email('akryshtafovych@ucdavis.edu', 'casp@predictioncenter.org', '', '', $subject, $body);
Email::send_email('bmonast@gmail.com', 'casp@predictioncenter.org', '', '', $subject, $body);


# BM this line is not needed amy more since check for short models (for domains) is implemented in DomainsManager.pm (see sub split_models_to_domains)
#system("/data/CASPROL/predictions/remove_short_models.sh $ARGV[0]");


# CREATE TARBALLS OF TRIMMED PREDICTIONS
my $one_time = 0;
sleep 1;
for (my $i = 0; $i < scalar(@domains); $i++) {
        if ($domains[$i]->{TARGET_NAME} eq $ARGV[0]) {
	    if ($one_time == 0){
		system(sprintf("find %s -name \'%s-D*\' -type f | xargs --no-run-if-empty /bin/rm -f  \n",$tarballPredictionsDir,$domains[$i]->{TARGET_NAME}));
		$one_time = 1;
	    }
	    if ($domains[$i]->{DOMAIN_INDEX} =~ m/^[1-9]$/){
		&createPredictionsTarball($domains[$i]->{TARGET_NAME},$domains[$i]->{DOMAIN_INDEX});
	    }
	}
}

# CREATE TARBALL OF TRIMMED TARGETS
sleep 1;
&createTargetsTarball();

# make links of all models in /local/CASP13/MODELS_PDB
system("/local/Projects/Perl/casp13/src/scripts/linkModels.pl");

exit(0);


#----------------------------------------------------------
# SUBROUTINES
#----------------------------------------------------------

# sub returns string of ranges of all domains
# the string is used in name of tarball
sub toStringRangeDomains {
    my ($inDir,$target) = @_;
    my @domainsFiles ;
    open (INDIR, " /bin/ls -1 $inDir/$target-D?.pdb | ");
    while(defined(my $file = <INDIR>)){
        chomp $file;
        $file = substr($file, rindex($file,'/') + 1);
        push @domainsFiles, $file;
    }
    close(INDIR);
    my $result = "";
    foreach my $file (@domainsFiles){
        my $index;
        if($file =~ m/^\S+-D([1-9]{1})/){
                $index = $1;
                $result .= "D$index"."_";
        } else {
                return;
        }
        open (FIRST, "grep \"^ATOM\" $inDir/$file | head -n 1 | ");
        if (defined(my $line = <FIRST>)) {
           my $start = substr($line,22,4);
           $start =~ s/^\s+//;
           $start =~ s/\s+$//;
           $result .= "$start-";
        }
        close(FIRST);
        open (LAST, "grep \"^ATOM\" $inDir/$file | tail -n 1 | ");
        if (defined(my $line = <LAST>)) {
          my $end = substr($line,22,4);
          $end =~ s/^\s+//;
          $end =~ s/\s+$//;
          $result .= "$end.";
        }
        close(LAST);
    }
    return $result;
}


sub toStringRangeDomainsFromPDBFiles {
    my ($inDir,$target) = @_;
    my @domainsFiles ;
    open (INDIR, " /bin/ls -1 $inDir/$target-D?.pdb | ");
    while(defined(my $file = <INDIR>)){
        chomp $file;
        $file = substr($file, rindex($file,'/') + 1);
        push @domainsFiles, $file;
    }
    close(INDIR);
    my $result = "";
    foreach my $file (@domainsFiles){
        my $index;
        if($file =~ m/^\S+-D([1-9]{1})/){
                $index = $1;
                $result .= "D$index"."_";
        } else {
                return;
        }
        $result .= &getRangeFromPDBFile("$inDir/$file");
    }
    return $result;
}


sub getRangeFromPDBFile {
    my ($pdbFile) = @_;
    my $result = "";
    my $start = '';
    my $end = '';
    my $current = '';
    open (CAs, "grep \"^ATOM\" $pdbFile | grep \"CA\" | ");
    while (defined(my $line = <CAs>)){
        $current = substr($line,22,4);
        $current =~ s/^\s+//;
        $current =~ s/\s+$//;
        if ($start eq '') {
            $start = $current;
            $result .= $start.'-';
        }
        if ($end eq ''){
           $end = $current;
        }
        if ( ($current - $end) > 20 ) { # if gap in numeration of amino acids is larger than 20 
           $result .= $end."_";
           $start = '';
           $end = '';
        } else {
           $end = $current;
        }
    }
    if ( ($end - $start) > 20){
        $result .= $end."_";
    }
    if ($result =~ m/-$/) {
        $result =~ s/_(\d+)-$/_/g;
    }
    close(CAs);
    return $result;
}



sub createPredictionsTarball {
    my ($target, $index) = @_;
    my $dir_dom1 = sprintf("%s/%s-D%s", $LOCAL_CONFIG->{DATA_MODELS_DIR}, $target, $index);
    if (! -d $dir_dom1) {
        return;
    }
    my $tar_name = sprintf("%s/%s-D%s.tar.gz", $tarballPredictionsDir, $target, $index);
    if (-e $tar_name ) {
        system(sprintf("/bin/rm -f %s",$tar_name));
    }
    system(sprintf("cd %s ; tar -czhf $tar_name %s-D%s ", $LOCAL_CONFIG->{DATA_MODELS_DIR} ,$target, $index));
    system("chgrp casp $tar_name");
    system("chmod 664 $tar_name");
#    printf("cd %s ; tar -czhf $tar_name %s-D%s ", $LOCAL_CONFIG->{DATA_MODELS_DIR} ,$target, $index);

}


sub createTargetsTarball {
    if (-e $tarballTargetsName){
        system("/bin/rm -f $tarballTargetsName");
    }
    system(sprintf("cd %s; tar -czhf %s *-D?.pdb", $LOCAL_CONFIG->{DATA_TARGETS_DIR}, $tarballTargetsName));
    system("chgrp casp $tarballTargetsName");
    system("chmod 664 $tarballTargetsName");
}
=cut
