package TargetSequencesManager;

use strict;
use warnings;

use DBI;
use Digest::MD5 qw(md5 md5_hex md5_base64);

use lib qw(Core);
use CGI::Carp qw(fatalsToBrowser);
use Database;

use PDBUtils;
use Configuration;

### METHODS (last update 05/25/2010 M. Wojciechowski):
#   add (adds sequence to submitted_sequences table)
#   target_sequences (returns all submitted sequences info containing 
#                    submitter info)
#   sequences_targets_prepared (returns all suquences for regular targets
#		     released or prepared for release)
#   update (changes sequence data)
#   update2 (changes only comments and target name in sequence)
#   delete (removes submitted sequence)
#   sequence (returns full info about the sequence)
#   exist (returns 1 if the sequence with given id exists)
#   get_all_sgi_centers (returns all sgi centers registered in database)
#   get_sgi_center (returns sgi center by given id)
######################################################

my $target_sequences_manager = undef;

sub new {
    my ($class) = @_;
    
    return $target_sequences_manager if(defined($target_sequences_manager));
    
    my $self = {
        _id => undef,
        _database => Database->new($CONFIG->{HOSTNAME}, $CONFIG->{PORT}, $CONFIG->{DATABASE}, $CONFIG->{USERNAME}, $CONFIG->{PASSWORD})
    };
    
    $target_sequences_manager = bless $self, $class;
    return $target_sequences_manager;
}

sub add {
    my ($self, $accounts_id, $protein_name, $organism_name, $number_of_aa, $sequence, $binding_site, $sgi_center_id, $sequence_family, $is_nmr, $has_map_or_spectra, $current_state_description, $chain_tracing_date, $release_date, $oligomeric_state, $stoichiometry, $ligands, $disorder, $solvent, $protein_material, $expression) = @_;
    
    my $result = 0;
    
    $current_state_description =~ s/'/ /g;    
    if ($is_nmr eq 'YES' || $is_nmr eq 'NMR'){ # NMR
	$is_nmr = 1;
    } elsif ($is_nmr eq 'EM') { # EM
	$is_nmr = 2;
    } else { # deafault XRAY
	$is_nmr = 0;
    }
    my $query = sprintf("INSERT INTO casp13.submitted_sequences (
                        accounts_id, protein_name, organism_name, number_of_aa, sequence, binding_site, sgi_center_id, sequence_family, is_nmr, has_map_or_spectra, current_state_description, chain_tracing_date, release_date, oligomeric_state, stoichiometry, ligands, disorder, solvent, protein_material, expression)
                        VALUES (%d, '%s', '%s', %d, '%s', '%s', %d, '%s', %d, %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d, '%s') RETURNING id",
                        $accounts_id, quotemeta($protein_name), quotemeta($organism_name), $number_of_aa, quotemeta($sequence), quotemeta($binding_site), $sgi_center_id, quotemeta($sequence_family), $is_nmr, (($has_map_or_spectra eq 'YES') ? 1 : 0), quotemeta($current_state_description), quotemeta($chain_tracing_date), quotemeta($release_date), 
				quotemeta($oligomeric_state), quotemeta($stoichiometry), quotemeta($ligands), quotemeta($disorder), quotemeta($solvent), 
				quotemeta($protein_material), quotemeta($expression)
			);
    
    my $sth = $self->{_database}->query($query);
    
    if(defined($sth)) {
        #my $query = sprintf("SELECT MAX(id) FROM casp13.submitted_sequences");
        #my $sth = $self->{_database}->query($query);
        
        # add logger
        ($result) = $sth->fetchrow_array();
    }
    
    return $result;
}

sub target_sequences {
    my ($self, $field, $order) = @_;

    my @sequences = ();
    
    $field = (!defined($field) || ($field eq '')) ? 'ss.date' : $field;
    $order = (!defined($order) || ($order eq '')) ? 'ASC' : $order;

    if ($field eq 'ss.target') {$field = 'targ2';}
    if ($order =~ m/asc/i) {
	$order .= " NULLS FIRST ";
    } elsif ($order =~ m/desc/i) {
	$order .= " NULLS LAST ";
    }
    
    my $query = sprintf("SELECT ss.id, ss.protein_name, ss.organism_name, ss.number_of_aa, ss.sequence, ss.binding_site, sc.name, ss.sequence_family, ss.is_nmr, ss.has_map_or_spectra, ss.current_state_description, ss.chain_tracing_date, ss.release_date, ss.comments, ss.target, ss.date, ss.stoichiometry, a.lastname, a.firstname, a.institution, sc.id, a.email, ss.contact_email, substring(ss.target for 100 from 2 ) AS targ2 FROM casp13.submitted_sequences ss LEFT JOIN  public.accounts a ON (a.id = ss.accounts_id) LEFT JOIN casp13.sgi_center sc ON (ss.sgi_center_id = sc.id) ORDER BY %s %s", $field, $order);
    my $sth = $self->{_database}->query($query);

    if(defined($sth) && ($sth->rows() > 0)) {
        my $index = 1;
        while(my($id, $protein_name, $organism_name, $number_of_aa, $sequence, $binding_site, $sgi_center, $sequence_family, $is_nmr, $has_map_or_spectra, $current_state_description, $chain_tracing_date, $release_date, $comments, $target, $date, $stoichiometry, $lastname, $firstname, $institution, $sgi_center_id, $email, $contact_email, $targ2) = $sth->fetchrow_array()) {
            if($date =~ /(\d+)-(\d+)-(\d+)/) {
                $date = sprintf("%d-%d-%d", $1, $2, $3);
            }
            
            if($release_date =~ /(\d+)-(\d+)-(\d+)/) {
                $release_date = sprintf("%d-%d-%d", $1, $2, $3);
            }
            
            my $real_number_of_aa = sequence_length($sequence);

	    my $account_name = '';
            if (!defined($contact_email) || $contact_email eq '' || $contact_email eq $email){
		if (defined($email) && $email ne ''){
			$contact_email = $email;
		} else {
			$contact_email = '';
		}
		if (defined($firstname)) {
	                $account_name .= $firstname." ";
        	}
	        if (defined($lastname)) {
        	        $account_name .= $lastname;
            	}
	        $account_name =~ s/\s+$//;
	    }
 	    #$contact_email =~ s/\@/ AT /;
            push(@sequences, {
                    ID => $id,
                    INDEX => $index,
                    PROTEIN_NAME => $protein_name,
                    ORGANISM_NAME => $organism_name,
                    NUMBER_OF_AA => $number_of_aa,
                    REAL_NUMBER_OF_AA => $real_number_of_aa,
                    SEQUENCE => $sequence,
                    SGI_CENTER => $sgi_center,
                    BINDING_SITE => $binding_site,
                    SEQUENCE_FAMILY => $sequence_family,
                    IS_NMR => $is_nmr,
                    HAS_MAP_OR_SPECTRA => $has_map_or_spectra,
                    CURRENT_STATE_DESCRIPTION => $current_state_description,
                    CHAIN_TRACING_DATE => $chain_tracing_date,
                    RELEASE_DATE => $release_date,
                    COMMENTS => $comments,
                    TARGET => $target,
		    HAS_PDB_FILE => (-e sprintf("exper_struct/structure_%d.pdb", $id) ? 1 : 0),
                    PDB_FILE => (sprintf("exper_struct/structure_%d.pdb", $id)),
                    DATE => $date,
		    STOICHIOMETRY => $stoichiometry,
                    ACCOUNT_NAME => $account_name,
                    INSTITUTION => sprintf("%s", (defined($institution)? $institution: '')),
		    SGI_CENTER_ID => $sgi_center_id,
		    CONTACT_EMAIL => sprintf("%s", $contact_email),
            	}
            );
            $index++;
        }
    }
    
    return @sequences;
}

# The method returns list of sequences for which hte targets were prepared (and released).
sub sequences_targets_prepared {
    my ($self, $field, $order) = @_;

    my @sequences = ();

    $field = (!defined($field) || ($field eq '')) ? 't.release_date' : $field;
    $order = (!defined($order) || ($order eq '')) ? 'DESC' : $order;

    my $query = sprintf("SELECT ss.id, ss.protein_name, ss.organism_name, ss.number_of_aa, ss.sequence, ss.binding_site, sc.name, ss.sequence_family, ss.is_nmr, ss.has_map_or_spectra, ss.current_state_description, ss.chain_tracing_date, ss.release_date, ss.comments, ss.target, ss.date, a.lastname, a.firstname, a.institution, sc.id, t.release_date, ss.blast_info, ss.hhsearch_info, ss.oligomeric_state FROM casp13.submitted_sequences ss LEFT JOIN  public.accounts a ON (a.id = ss.accounts_id) LEFT JOIN casp13.sgi_center sc ON (ss.sgi_center_id = sc.id) JOIN casp13.targets t ON ss.id=t.submitted_sequences_id WHERE t.name SIMILAR TO '(H|O|T)%%'  ORDER BY %s %s", $field, $order);
    my $sth = $self->{_database}->query($query);

    if(defined($sth) && ($sth->rows() > 0)) {
        my $index = 1;
        while(my($id, $protein_name, $organism_name, $number_of_aa, $sequence, $binding_site, $sgi_center, $sequence_family, $is_nmr, $has_map_or_spectra, $current_state_description, $chain_tracing_date, $release_date, $comments, $target, $date, $lastname, $firstname, $institution, $sgi_center_id, $target_release_date, $blast_info, $hhsearch_info, $oligomeric_state) = $sth->fetchrow_array()) {
            if($date =~ /(\d+)-(\d+)-(\d+)/) {
                $date = sprintf("%d-%d-%d", $1, $2, $3);
            }

            if($release_date =~ /(\d+)-(\d+)-(\d+)/) {
                $release_date = sprintf("%d-%d-%d", $1, $2, $3);
            }
	    if($target_release_date =~ /(\d+)-(\d+)-(\d+)/) {
                $target_release_date = sprintf("%d-%d-%d", $1, $2, $3);
            }	    

            my $real_number_of_aa = sequence_length($sequence);

            push(@sequences, {
                    ID => $id,
                    INDEX => $index,
                    PROTEIN_NAME => $protein_name,
                    ORGANISM_NAME => $organism_name,
                    NUMBER_OF_AA => $number_of_aa,
                    REAL_NUMBER_OF_AA => $real_number_of_aa,
                    SEQUENCE => $sequence,
                    SGI_CENTER => $sgi_center,
                    BINDING_SITE => $binding_site,
                    SEQUENCE_FAMILY => $sequence_family,
                    IS_NMR => $is_nmr,
		    HAS_PDB_FILE => (-e sprintf("exper_struct/structure_%d.pdb", $id) ? 1 : 0),
		    PDB_FILE => (sprintf("exper_struct/structure_%d.pdb", $id)),
                    HAS_MAP_OR_SPECTRA => $has_map_or_spectra,
                    CURRENT_STATE_DESCRIPTION => $current_state_description,
                    CHAIN_TRACING_DATE => $chain_tracing_date,
                    RELEASE_DATE => $release_date,
                    COMMENTS => $comments,
                    TARGET => $target,
                    DATE => $date,
                    ACCOUNT_NAME => sprintf("%s %s", $lastname, $firstname),
                    INSTITUTION => $institution,
                    SGI_CENTER_ID => $sgi_center_id,
		    TARGET_RELEASE_DATE => $target_release_date,
		    BLAST_INFO => $blast_info,
		    HHSEARCH_INFO => $hhsearch_info,
		    OLIGOMERIC_STATE => $oligomeric_state,
                }
            );
            $index++;
        }
    }

    return @sequences;
}


sub update {
    my ($self, $id, $protein_name, $organism_name, $number_of_aa, $sequence, $binding_site, $sgi_center_id, $sequence_family, $is_nmr, $has_map_or_spectra, $current_state_description, $chain_tracing_date, $release_date, $comments, $target, $oligomeric_state, $blast_info, $hhsearch_info, $difficulty, $ligands, $stoichiometry, $disorder, $solvent, $protein_material, $expression, $contact_email) = @_;
    
    my $result = 0;

    if (!defined($oligomeric_state) ){
	$oligomeric_state = '1';
    }   
    if (!defined($blast_info)){
	$blast_info = '';
    }
    if (!defined($hhsearch_info)){
	$hhsearch_info = '';
    }
    if (!defined($difficulty)){
	$difficulty = '';
    }
    $current_state_description =~ s/'/ /g;
    if (!defined($ligands)){
	$ligands = '';
    }
    if (!defined($stoichiometry)){
        $stoichiometry = '';
    }
    if (!defined($disorder)){
        $disorder = '';
    }
    if (!defined($solvent)){
        $solvent = '';
    }
    if (!defined($protein_material)){
	$protein_material = '';
    }
    if (!defined($expression)){
	$expression = '';
    }
    if (!defined($contact_email)){
	$contact_email = '';
    }
    my $query = sprintf("UPDATE casp13.submitted_sequences SET protein_name = '%s', organism_name = '%s', number_of_aa = %d, sequence = '%s', binding_site = '%s', sgi_center_id = %d, sequence_family = '%s', is_nmr = %d, has_map_or_spectra = %d, current_state_description = '%s', chain_tracing_date = '%s', release_date = '%s', comments = '%s', target = '%s', 
	oligomeric_state = '%s', ligands = '%s', stoichiometry = '%s', disorder = '%s', solvent = '%s', protein_material = %d, expression = '%s',
	blast_info = '%s', hhsearch_info = '%s', difficulty = '%s', contact_email='%s'  WHERE (id = %d)", 
	$protein_name, $organism_name, $number_of_aa, $sequence, $binding_site, $sgi_center_id, $sequence_family, $is_nmr, $has_map_or_spectra, $current_state_description, $chain_tracing_date, $release_date, $comments, $target, $oligomeric_state, 
	$ligands, $stoichiometry, $disorder, $solvent, $protein_material, $expression,
	$blast_info, $hhsearch_info, $difficulty, $contact_email, $id);
    my $sth = $self->{_database}->query($query);
    
    if(defined($sth)) {
        $result = 1;
    }
    
    return $result;
}

sub update2 {
    my ($self, $id, $comments, $target) = @_;
    
    my $result = 0;
    
    my $query = sprintf("UPDATE casp13.submitted_sequences SET comments = '%s', target = '%s' WHERE (id = %d)", $comments, $target, $id);
    my $sth = $self->{_database}->query($query);
    
    if(defined($sth)) {
        $result = 1;
    }
    
    return $result;
}

sub delete {
    my ($self, $id) = @_;
    
    my $result = 0;
    
    return $result if(! $self->exist($id));
    
    my $query = sprintf("DELETE FROM casp13.submitted_sequences WHERE (id = %d)", $id);
    my $sth = $self->{_database}->query($query);
    
    if(defined($sth)) {
        # add logger
        $result = defined($sth) ? 1 : 0;
    }
    
    return $result;
}

sub exist {
    my ($self, $id) = @_;
    
    my $result = 0;
    
    my $query = sprintf("SELECT COUNT(id) FROM casp13.submitted_sequences WHERE (id = %d)", $id);
    my $sth = $self->{_database}->query($query);
    
    if(defined($sth) && ($sth->rows() > 0)) {
        my ($count) = $sth->fetchrow_array();
        
        $result = ($count == 1) ? 1 : 0;
    }
    
    return $result;
}

sub get_all_sgi_centers {
    my ($self) = @_;
    my @result = ();
   
    my $query = sprintf("SELECT sc.id, sc.name, sc.full_name FROM casp13.sgi_center sc");
    my $sth = $self->{_database}->query($query);
    
    if(defined($sth) && ($sth->rows() > 0)) {
        while(my($id, $name, $full_name) = $sth->fetchrow_array()) {
                       
            push(@result, {
                     ID => $id,
        	     NAME => $name,
	             FULL_NAME => $full_name
            	}
            );
        }       
    }
    
    return @result;
}

sub get_sgi_center {
	my ($self, $id) = @_;

	my %result = (
		ID => '',
		NAME => '',
		FULL_NAME => ''
	);
    
	return %result if(! $self->exist($id));
	my $query = sprintf("SELECT sc.id, sc.name, sc.full_name FROM casp13.sgi_center sc WHERE sc.id = %d", $id);
	
	my $sth = $self->{_database}->query($query);
    
	if(defined($sth) && ($sth->rows() > 0)) {
		(
			$result{ID},
			$result{NAME},
			$result{FULL_NAME}
		) = $sth->fetchrow_array();
        
		$result{REAL_NUMBER_OF_AA} = sequence_length($result{SEQUENCE});
	}
    
	return %result;
}

sub sequence {
    my ($self, $id) = @_;
    
    my %result = (
        ID => '',
        TARGET => '',
        PROTEIN_NAME => '',
        ORGANISM_NAME => '',
        NUMBER_OF_AA => '',
        REAL_NUMBER_OF_AA => '',
        SEQUENCE => '',
        BINDING_SITE => '',
        SGI_CENTER => '',
        SEQUENCE_FAMILY => '',
        IS_NMR => '',
        CURRENT_STATE_DESCRIPTION => '',
        HAS_MAP_OR_SPECTRA => '',
        CHAIN_TRACING_DATE => '',
        RELEASE_DATE => '',
        DATE => '',
        COMMENTS => '',
        ACCOUNTS_ID => '',
        TARGET => '',
	SGI_CENTER_ID => '',
	OLIGOMERIC_STATE => '',
	LIGANDS => '',
	STOICHIOMETRY => '',
	DISORDER => '',
	SOLVENT => '',
	PROTEIN_MATERIAL => '',
	EXPRESSION => '',
	BLAST_INFO => '',
	HHSEARCH_INFO => '',
	DIFFICULTY => '',
	CONTACT_EMAIl => ''
    );
    
    return %result if(! $self->exist($id));
    
    my $query = sprintf("SELECT ss.id, ss.protein_name, ss.organism_name, ss.number_of_aa, ss.sequence, ss.binding_site, sc.name, ss.sequence_family, ss.is_nmr, ss.has_map_or_spectra, ss.current_state_description, ss.chain_tracing_date, ss.release_date, ss.date, ss.comments, ss.accounts_id, ss.target, sc.id, 
		ss.oligomeric_state, ss.ligands, ss.stoichiometry, ss.disorder, ss.solvent, ss.protein_material, ss.expression,
		ss.blast_info, ss.hhsearch_info, ss.difficulty, ss.contact_email, a.email
		FROM casp13.submitted_sequences ss LEFT JOIN casp13.sgi_center sc ON (ss.sgi_center_id = sc.id) 
		LEFT JOIN public.accounts a  ON ss.accounts_id=a.id
		WHERE ss.id = %d", $id);
    my $sth = $self->{_database}->query($query);
    
    if(defined($sth) && ($sth->rows() > 0)) {
        (
        $result{ID},
        $result{PROTEIN_NAME},
        $result{ORGANISM_NAME},
        $result{NUMBER_OF_AA},
        $result{SEQUENCE},
        $result{BINDING_SITE},
        $result{SGI_CENTER},
        $result{SEQUENCE_FAMILY},
        $result{IS_NMR},
        $result{HAS_MAP_OR_SPECTRA},
        $result{CURRENT_STATE_DESCRIPTION},
        $result{CHAIN_TRACING_DATE},
        $result{RELEASE_DATE},
        $result{DATE},
        $result{COMMENTS},
        $result{ACCOUNTS_ID},
        $result{TARGET},
	$result{SGI_CENTER_ID},
	$result{OLIGOMERIC_STATE},
	$result{LIGANDS},
        $result{STOICHIOMETRY},
        $result{DISORDER},
        $result{SOLVENT},
        $result{PROTEIN_MATERIAL},
        $result{EXPRESSION},
	$result{BLAST_INFO},
	$result{HHSEARCH_INFO},
	$result{DIFFICULTY}, 
	$result{CONTACT_EMAIL},
	$result{ACCOUNT_EMAIL}
        ) = $sth->fetchrow_array();
        
        $result{REAL_NUMBER_OF_AA} = sequence_length($result{SEQUENCE});
	if (!defined($result{CONTACT_EMAIL}) || $result{CONTACT_EMAIL} eq '') {
		if (defined($result{ACCOUNT_EMAIL}) && $result{ACCOUNT_EMAIL} ne '') {
			$result{CONTACT_EMAIL} = $result{ACCOUNT_EMAIL};
		} else {
			$result{CONTACT_EMAIL} = '';
		}
	}
    }
    
    return %result;
}

sub summary {
    my ($self, $id) = @_;
    my %seq = $self->sequence($id);
    my $result = sprintf("Sequence TSID: %d\n", $seq{ID});
    $result .= sprintf("Target: %s\n", $seq{TARGET});
    $result .= sprintf("SEQUENCE :\n%s\n", $seq{SEQUENCE});
    $result .= sprintf("NUMBER_OF_AA : %s\n", $seq{NUMBER_OF_AA});
    $result .= sprintf("REAL_NUMBER_OF_AA : %s\n", $seq{REAL_NUMBER_OF_AA});
    $result .= sprintf("SGI_CENTER : %s\n", $seq{SGI_CENTER});
    $result .= sprintf("OLIGOMERIC_STATE : %s\n", $seq{OLIGOMERIC_STATE});
    $result .= sprintf("BINDING_SITES : %s\n", $seq{BINDING_SITE});
    $result .= sprintf("LIGANDS : %s\n", $seq{LIGANDS});
    $result .= sprintf("STOICHIOMETRY : %s\n", $seq{STOICHIOMETRY});
    $result .= sprintf("DISORDER : %s\n", $seq{DISORDER});
    $result .= sprintf("SOLVENT : %s\n", $seq{SOLVENT});
    $result .= sprintf("PROTEIN_MATERIAL : %s\n", ($seq{PROTEIN_MATERIAL} == 1 ? 'YES' : 'NO') );
    $result .= sprintf("EXPRESSION : %s\n", $seq{EXPRESSION});
    $result .= sprintf("BLAST_INFO : %s\n", $seq{BLAST_INFO});
    $result .= sprintf("HHSEARCH_INFO : %s\n", $seq{HHSEARCH_INFO});
    $result .= sprintf("DIFFICULTY : %s\n\n", $seq{DIFFICULTY});
    $result .= sprintf("Other info :\n%s\n", $seq{SEQUENCE_FAMILY});
    return $result;
}
=head
sub splitSequences{
    my ($self, $SEQUENCE) = @_;
    my @result;
#    my %seq = $self->sequence($id);
#    my $SEQUENCE = $seq{SEQUENCE};
    my @lines = split(/\n+/, $SEQUENCE);
    my $i = -1;
    for my $l (@lines) {
        chomp $l;
        if ($l =~ m/^$/){next;}
        if ($l =~ m/^>/) {
          $i++;
          $result[$i] = "$l\n";
        } elsif ($i == -1) {
          $i = 0;
          $result[$i] = "$l\n";
        } else {
          $result[$i] .= "$l\n";
        }
    }
    return @result;
}
=cut

1;
