package MultimerCapriResultsUploadManager;

use strict;
use warnings;

use DBI;
#use Digest::MD5 qw(md5 md5_hex md5_base64);
#use DateTime;

use lib qw(Core);
use lib qw(Classes);

use Database;

#use PDBUtils;

use Configuration;
use LocalConfiguration;

use MultimerCapriResultsManager;

my $resultsupload_manager = undef;
my $DEBUG = 0;
#Regexp definition for parsing:

my $MODEL_NAME_FORMAT1 = "^T\\d{3}_([P]\\d{2})\.brk\.\\d{1,}_(\\d{1,})"; # regexp does contain info regarding the target
# my $MODEL_NAME_FORMAT2 = "^([A-Z]{2})(\\d{3})_(\\d{1})(o{0,1})"; # regexp doesn't contain info regarding the target - just model
my $TARGET_NAME_FORMAT1 = "^(T[0-9acspxR]\\d{3}o{0,1}-.*T[0-9acspxR]\\d{3}o{0,1})"; # multimeric complexes of targets
my $TARGET_NAME_FORMAT2 = "^(T[0-9acspxR]\\d{3}o{0,1})"; # oligomeric targets

sub new {
    my ($class) = @_;
    return $resultsupload_manager if(defined($resultsupload_manager));
    my $self = {
        _id => undef,
        _database => Database->new($CONFIG->{HOSTNAME}, $CONFIG->{PORT}, $CONFIG->{DATABASE}, $CONFIG->{USERNAME}, $CONFIG->{PASSWORD})
    };
    $resultsupload_manager = bless $self, $class;
    return $resultsupload_manager;
}

# model name parser from line record
sub parse_model_name{
    my ($self, $line, %model) = @_; 
    # the line contains info regarding the target
    if ($line =~ m/$MODEL_NAME_FORMAT1/ ){
	$model{target} = undef;
        $model{gr_code} = $1;
        $model{model} = $2;
    }
    return %model;
}

# target name parser from file name 
sub parse_target_name{
    my ($self, $filename, %model) = @_;
    if ($filename =~ /$TARGET_NAME_FORMAT1/) {
	$model{target} = $1;
    } elsif ($filename =~ /$TARGET_NAME_FORMAT2/) {
	$model{target} = $1;
    }
    return %model;
}

# load AssemblyAlignment.jar results
sub load_AssemblyAlignment {
    my ($self, $file, $dir) = @_;
    my $manager = MultimerCapriResultsManager->new();
    my %model = $manager->get_new_model();
    my $infile = sprintf("%s/%s", $dir, $file);
    if (open(IN, $infile)){
	my $l = <IN>; # skip header
	while($l = <IN>){
		#Query   Target  Relation        ChainLength             RMSD    ResidueLength   [Aligned-Query] [Aligned-Target]        [Query-Target:OrientationAngle]
		# fix format - remove extra spaces
		$l =~ s/\s+\]/\]/g; 
		$l =~ s/\[\s+/\[/g; 
		$l =~ s/,\s+/,/g; $l =~ s/\s+,/,/g;
		my @tokens = split(/\s+/, $l);
		my $model_name = $tokens[0];
		%model = $self->parse_model_name($model_name, %model);
		if (! defined $model{target}){
		   my $target_name = $tokens[1];
		   %model = $self->parse_target_name($target_name, %model);
		}
		my $align_size = $tokens[3];
		my $local_rmsd =  $tokens[4];
		my $align_length = $tokens[5];
		my $txt_orient = $tokens[8]; 
		$txt_orient =~ s/\]//g; $txt_orient =~ s/\[//; # remove extra square brakets
		my $orient = undef;
		# A-A:0.03 C-C:0.02
		my $firstFlag = 1;
		foreach my $el ( split(/\s+/, $txt_orient) ) {
			if ( $el =~ m/\S+:([0-9]+\.[0-9]+)/ ) {
				my $tmp_orient = $1;
				if ($firstFlag == 1 || $orient < $tmp_orient){
					$orient = $tmp_orient;
					$firstFlag = 0;
				}
			}
		}
		$model{align_size} = $align_size;
		$model{local_rmsd} = $local_rmsd;
		$model{align_length} = $align_length;
		$model{orient} = $orient;
		# upload to database
		my $id = $manager->exist_by_parameters(%model);
	        if (!defined($id) || $id eq '' || $id == 0){
        	        $manager->add_results(%model);
	        } else {
			$model{id} = $id;
	                $manager->update_results(%model);
	        }
	last if $DEBUG; 
	}	
        close IN;
    } else {
	print "Failed to open file $infile\n";
    }
}

# load AssemblyProperties.jar results
sub load_AssemblyProperties {
    my ($self, $file, $dir) = @_;
    my $manager = MultimerCapriResultsManager->new();
    my %model = $manager->get_new_model();
    my $infile = sprintf("%s/%s", $dir, $file);
    if (open(IN, $infile)){
        my $l = <IN>; # skip header
        while($l = <IN>){
		#Name    Size    Subunits        Stoichiometry   Pseudostoichiometry     Symmetry        Local   Method  SymmRMSD        SymmTMscore
                # fix format - remove extra spaces
                $l =~ s/\s+\]/\]/g; $l =~ s/\[\s+/\[/g;
		$l =~ s/,\s+/,/g; $l =~ s/\s+,/,/g;
                my @tokens = split(/\s+/, $l);
                my $model_name = $tokens[0];
                %model = $self->parse_model_name($model_name, %model);
                if ( ! defined $model{target} ) {
                   %model = $self->parse_target_name($file, %model);
                }
#		my $mm_size = $tokens[2] =~ tr/,//; # count separators (comma)
#		$mm_size = $mm_size + 1; 
#                my $stoich =  $tokens[3];
                my $symm = $tokens[5];
		my $symm_size = $tokens[1];
                my $symm_rmsd = $tokens[8];
#                $model{mm_size} = $mm_size;
#                $model{stoich} = $stoich;
                $model{symm} = $symm;
		$model{symm_size} = $symm_size;
                $model{symm_rmsd} = $symm_rmsd;
                # upload to database
                my $id = $manager->exist_by_parameters(%model);
                if (!defined($id) || $id eq '' || $id == 0){
                        $manager->add_results(%model);
                } else {
			$model{id} = $id;
                        $manager->update_results(%model);
                }
        }
        close IN;
    } else {
        print "Failed to open file $infile\n";
    }
}

# load data from model
sub load_OligoModels {
    my ($self, $file, $dir) = @_; 
    my $manager = MultimerCapriResultsManager->new();
    my %model = $manager->get_new_model();
    $dir =~ s/\/+$//; 
    my $sub_dir = undef; my $parent_dir = undef;
    if ($dir =~ /^(.*\/)(\S+)$/){
	$parent_dir = $1; $sub_dir = $2;
    }
    if ($sub_dir !~ m/.*-.*/){ # homooligomeric
        my $mm_size = $self->parse_OligoSingleModel($file, $dir);
	my $stoich = 'A'.$mm_size;
	$model{mm_size} = $mm_size;
	$model{stoich} = $stoich;
	%model = $self->parse_model_name($file, %model);
	if ( ! defined $model{target} ) {
		%model = $self->parse_target_name($sub_dir, %model);
        }
	my $id = $manager->exist_by_parameters(%model);
	if ( !defined($id) || $id eq '' || $id == 0 ) {
         	$manager->add_results(%model);
        } else {
		$model{id} = $id;
	        $manager->update_results(%model);     
	}
    } else { # heterooligomeric
	#: TODO
#	my @sub_dirs = split(/-/, $sub_dir);
#	my $mm_size = 0 ;
#	my $stoich = '';
#	my @letters = qw/A B C D E F G H I J K L M N O P Q R S T U V W X Y Z/;
#	my $index = 0;
	
#	foreach my $sd (@sub_dirs){
#	  my $f = $sd; $f =~ s/o$//;
#	  $f = $f.$file;
#	  print $f."\n" if $DEBUG;
#	  my $tmp_size = $self->parse_OligoSingleModel($f, "$parent_dir/$sd");
#	  if (defined($tmp_size) && $tmp_size > 0) {
#	    $mm_size += $tmp_size ;
#	    $stoich .= $letters[$index].$tmp_size ;
#	  }
#	  $index++;
#	}
#	$model{mm_size} = $mm_size;
#       $model{stoich} = $stoich;
#	print "$sub_dir\n" if $DEBUG;
#       %model = $self->parse_model_name($file, %model);
#	%model = $self->parse_target_name($sub_dir, %model);
#       my $id = $manager->exist_by_parameters(%model);
#       if ( !defined($id) || $id eq '' || $id == 0 ) {
#               $manager->add_results(%model);
#       } else {
#		$model{id} = $id;
#               $manager->update_results(%model);     
#       }
    }
}

sub parse_OligoSingleModel {
    my ($self, $file, $dir) = @_;
    my $infile = sprintf("%s/%s", $dir, $file);
    if (! -f $infile){
	$infile =~ s/o$//;
	$infile =~ s/o\//\//g;
    }
    if (! -f $infile){ 
	return undef;
    }
    if (open IN, "grep \"ATOM\" $infile | cut -c22 | sort -u | ") {
        my $mm_size = 0;
	while(my $l = <IN>){
           $mm_size += 1;
        }
	close IN;
	return $mm_size;
    } else {
	print "Failed to open file $infile\n";
	return undef;
    }
}

sub load_group_name{
   my ($self, $file, $dir) = @_;
   my $infile = "$dir/$file";
   my $manager = MultimerCapriResultsManager->new();
   my %model = $manager->get_new_model();
   %model = $self->parse_target_name($file, %model);
   my $target = $model{target};
   open IN, "<$infile";
   while(defined(my $l =<IN>)){
	if ($l =~ m/^(\S+)\s+(\S+.+)$/){
		my $gr_code = $1;
		my $gr_name = $2;
		$manager->update_group_name($gr_code,$gr_name, $target);
	}
   }
   close IN;
}




# load QSscore results
sub load_QSscore {
    my ($self, $file, $dir) = @_;
    my $manager = MultimerCapriResultsManager->new();
    my $infile = sprintf("%s/%s", $dir, $file);
    if (open(IN, $infile)){
        my $l = <IN>; # skip header
        while($l = <IN>){
		if ($l =~ m/ERR/){
			next;
		}
		my %model = $manager->get_new_model();
                #model; best_score; global_score; rmsd; lddt_score; chain_correspondence mld:targ,
		my @tokens = split(/;/, $l);
		my $model_name = $tokens[0];
		%model = $self->parse_model_name($model_name, %model);
		if (!defined $model{target} || $model{target} eq ''){
			%model = $self->parse_target_name($file, %model);
		}
		my $qs_score = $tokens[2];
		my $global_rmsd = $tokens[3];
		my $lddt = $tokens[4];
		$model{qs_score} = $qs_score;
		$model{global_rmsd} = $global_rmsd;
		$model{lddt} = $lddt;

		my $id = $manager->exist_by_parameters(%model);
    		if (!defined($id) || $id eq '' || $id == 0){
        	        $manager->add_results(%model);
	        } else {
			$model{id} = $id;
        	        $manager->update_results(%model);
	        }
        }
        close IN;
    } else {
        print "Failed to open file $infile\n";
    }
}

# load cont_clash results
sub load_cont_clash {
    my ($self, $file, $dir) = @_;
    my $manager = MultimerCapriResultsManager->new();
    my $infile = sprintf("%s/%s", $dir, $file);
    if (open(IN, $infile)){
        my $l = <IN>; # skip header
	while($l = <IN>){
		unless ($l =~ m/\s+ALL\s+/){
                        next;
                }
                my %model = $manager->get_new_model();
		my @tokens = split(/\s+/, $l);
		%model = $self->parse_model_name($tokens[0], %model);
		if (! defined $model{target}) {
			%model = $self->parse_target_name($file, %model);
		}
		my $no_conts = $tokens[2];
		my $no_clashes = $tokens[3];
		$model{no_conts} = $no_conts;
		$model{no_clashes} = $no_clashes;

		my $id = $manager->exist_by_parameters(%model);
                if (!defined($id) || $id eq '' || $id == 0){
                        $manager->add_results(%model);
                } else {
			$model{id} = $id;
                        $manager->update_results(%model);
                }
	}
    } else {
	print "Failed to open file $infile\n";
    }
}


# load Interface results
sub load_Interface {
    my ($self, $file, $dir) = @_;
    my $manager = MultimerCapriResultsManager->new();
    my $infile = sprintf("%s/%s", $dir, $file);
    my $sum_tconts = $self->parseNoContsAllInterfaces($infile);
    if (open(IN, $infile)){
        my $l = <IN>; # skip header
	my $flagFirst = 0;
	my @linesPerGroup = ();
        while($l = <IN>){
	  if ($l =~ m/^=/){
		if ($flagFirst == 1) {
		   # process a bunch of lines corresponding to the same group
		   $self->processInterfaceLinesForGroup(\@linesPerGroup, $file, $manager, $sum_tconts);
		   @linesPerGroup = ();
		}
		$flagFirst = 1;
		next;
	  }
	  if ($l =~ m/^$/){next;}
	  push @linesPerGroup, $l ; 
        }
        close IN;
	# process a bunch of lines corresponding to the last group
	$self->processInterfaceLinesForGroup(\@linesPerGroup, $file, $manager, $sum_tconts);
    } else {
        print "Failed to open file $infile\n";
    }
}

sub processInterfaceLinesForGroup{
    my ($self, $refLines, $file, $manager, $sum_tconts) = @_;
    my @lines = $self->processInterfaceLinesPerInterface($refLines);
    # my @lines = @{$refLines};
    my %model = $manager->get_new_model();
    my $firstFlag = 1;
    my @tconts = ();
    my @f1s = ();
    my @jaccards = ();
    my @precs = ();
    my @recalls = ();
    my $rmsd = undef;
    foreach my $l (@lines){
#	if ($l =~ m/QSscore\s+did\s+not\s+report/i){
#		next;
#	}
	$l =~ s/\|//g; # remove extra symbols
	my @tokens = split(/\s+/, $l);
	if ($firstFlag == 1){
		$firstFlag = 0;
		%model = $self->parse_model_name($tokens[0], %model);
		if (! defined $model{target}){
		   %model = $self->parse_target_name($file, %model);
		}
		$rmsd = $tokens[10];
	}
	push @tconts, $tokens[2];
	push @f1s, $tokens[7];
	push @jaccards, $tokens[11];
	push @precs, $tokens[5];
	push @recalls, $tokens[6];
	if ($rmsd > $tokens[10]){
	    $rmsd = $tokens[10];
	}
    }
    my $f1 = 0.0; my $jaccard = 0;  my $prec = 0; my $recall = 0; #my $sum_tconts = 0;
    for (my $i = 0; $i < scalar(@tconts); $i++){
	$f1 += $tconts[$i]*$f1s[$i];
	$jaccard += $tconts[$i]*(1 - $jaccards[$i]);
	$prec += $tconts[$i]*$precs[$i];
	$recall += $tconts[$i]*$recalls[$i];
	#$sum_tconts += $tconts[$i];
    }
    #$f1 = $f1*scalar(@tconts)/$sum_tconts;
    $f1 = $f1/$sum_tconts;
    #$jaccard = $jaccard*scalar(@tconts)/$sum_tconts;
    $jaccard = 1 - $jaccard/$sum_tconts;
    $prec = $prec/$sum_tconts;
    $recall = $recall/$sum_tconts;
    $model{f1} = $f1;
    $model{jaccard_d} = $jaccard;
    $model{iface_rmsd} = $rmsd;
    $model{prec_iface} = $prec;
    $model{recall_iface} = $recall;
    my $id = $manager->exist_by_parameters(%model);
    if (!defined($id) || $id eq '' || $id == 0){
        $manager->add_results(%model);
    } else {
	$model{id} = $id;
        $manager->update_results(%model);
    }
}

sub processInterfaceLinesPerInterface{
    my ($self, $refLines) = @_;
    my @result;
    my @lines = @{$refLines};
    my @arr = ();
    my $index = 0;
    foreach my $l (@lines){
        $l =~ s/\|//g;
        my @tokens = split(/\s+/, $l);
        my $t_interface = $tokens[1];
        $t_interface =~ s/:\S+//;
        my $f1 = $tokens[7];
        my $jaccard_d = $tokens[11];
        push @arr, {
          index => $index++,
          t_interface => $t_interface,
          f1 => $f1,
          jaccard => $jaccard_d
        };
    }
    @arr = sort {$a->{jaccard}<=>$b->{jaccard}} @arr; # sort by jaccard distance in ascending order
    @arr = sort {$b->{f1}<=>$a->{f1}} @arr; # sort by jaccard distance in descending order
    my %done;
    foreach my $el (@arr){
        if (exists $done{$el->{t_interface}}){
                next;
        } else {
                $done{$el->{t_interface}} = 1;
        }
        push @result, $lines[$el->{index}];
    }
    return @result;
}

sub parseNoContsAllInterfaces{
    my ($self, $infile) = @_;
    if (open I, "<$infile"){
	    my %hash;
	    while(defined(my $l = <I>)){
		# T0889TS005_1o   T_AB:M_AB    124    83    20  80.6  66.9  73.1 |    86 100.00   1.84 0.19
	        if ($l =~ m/^\S+\s+(T_[A-Za-z]{2}):\S+\s+(\d+)/){
			my $t_iface = $1; my $no_conts = $2;
			$hash{$t_iface} = $no_conts;
		}
	    }
	    close I;
	    my $result = 0;
	    foreach my $k (keys %hash){
		$result += $hash{$k};
	    }
	return $result;
    } else {
	print "Failed to open file $infile\n";
	return undef;
    }
}

1;
