# class PDBParser
package PDBParser;

use strict;
use warnings;

use String;

use PDBResidue;

use Parser;
use Common;
our @ISA = qw(Parser);

my %AMINOACIDS_LONG_TO_SHORT = (
    'ALA' => 'A',
    'ARG' => 'R',
    'ASN' => 'N',
    'ASP' => 'D',
    'CYS' => 'C',
    'GLU' => 'E',
    'GLN' => 'Q',
    'GLY' => 'G',
    'HIS' => 'H',
    'ILE' => 'I',
    'LEU' => 'L',
    'LYS' => 'K',
    'MET' => 'M',
    'PHE' => 'F',
    'PRO' => 'P',
    'SER' => 'S',
    'THR' => 'T',
    'TRP' => 'W',
    'TYR' => 'Y',
    'VAL' => 'V'
);

my %AMINOACIDS_SHORT_TO_LONG = (
    'A' => 'ALA',
    'R' => 'ARG',
    'N' => 'ASN',
    'D' => 'ASP',
    'C' => 'CYS',
    'E' => 'GLU',
    'Q' => 'GLN',
    'G' => 'GLY',
    'H' => 'HIS',
    'I' => 'ILE',
    'L' => 'LEU',
    'K' => 'LYS',
    'M' => 'MET',
    'F' => 'PHE',
    'P' => 'PRO',
    'S' => 'SER',
    'T' => 'THR',
    'W' => 'TRP',
    'Y' => 'TYR',
    'V' => 'VAL'
);

my %AMINOACIDS_ATOMS = (
    'ALA' => "N CA C O CB",
    'ARG' => "N CA C O CB CG CD NE CZ NH1 NH2",
    'ASN' => "N CA C O CB CG OD1 ND2",
    'ASP' => "N CA C O CB CG OD1 OD2",
    'CYS' => "N CA C O CB SG",
    'GLU' => "N CA C O CB CG CD OE1 OE2",
    'GLN' => "N CA C O CB CG CD OE1 NE2",
    'GLY' => "N CA C O",
    'HIS' => "N CA C O CB CG ND1 CD2 CE1 NE2",
    'ILE' => "N CA C O CB CG1 CG2 CD1",
    'LEU' => "N CA C O CB CG CD1 CD2",
    'LYS' => "N CA C O CB CG CD CE NZ",
    'MET' => "N CA C O CB CG SD CE",
    'PHE' => "N CA C O CB CG CD1 CD2 CE1 CE2 CZ",
    'PRO' => "N CA C O CB CG CD",
    'SER' => "N CA C O CB OG",
    'THR' => "N CA C O CB OG1 CG2",
    'TRP' => "N CA C O CB CG CD1 CD2 NE1 CE2 CE3 CZ2 CZ3 CH2",
    'TYR' => "N CA C O CB CG CD1 CD2 CE1 CE2 CZ OH",
    'VAL' => "N CA C O CB CG1 CG2"
);

#constructor
sub new {
    my ($class) = @_;
    
    my $self = $class->SUPER::new();
    
    bless $self, $class;
    return $self;
}

sub cut_chain {
    my ($self, $OUPUT_HANDLE, $chain) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    my $input_file = $self->{_file};
    
    while(<$HANDLE>) {
        my $line = $_;
            
        if(($chain eq '') || ($chain eq '#')) {
            print($OUPUT_HANDLE $line);
        } else {
            if(($line =~ /^ATOM/) || ($line =~ /^HETATM/) || ($line =~ /^ANISOU/)) {
                my @atom_line = split(//, $line);
                if($atom_line[21] eq $chain) {
                    my $current_line = sprintf("%s  %s", substr($line, 0, 20), substr($line, 22, 100));
                    print($OUPUT_HANDLE $current_line);
                } else {
                    next;
                }
            } else {
                print($OUPUT_HANDLE $line);
            }
        }
    }
}

sub change_chain_into_space_leave_others {
    my ($self, $OUPUT_HANDLE) = @_;
    my $chain = " ";
    my $HANDLE = $self->{_FILEHANDLE};
    my $input_file = $self->{_file};
    
    while(<$HANDLE>) {
        my $line = $_;
            
        if(($chain eq '') || ($chain eq '#')) {
            print($OUPUT_HANDLE $line);
        } else {
            if(($line =~ /^ATOM/) || ($line =~ /^HETATM/) || ($line =~ /^ANISOU/)) {
                my @atom_line = split(//, $line);
                if($atom_line[21] ne $chain) {
                    my $current_line = sprintf("%s  %s", substr($line, 0, 20), substr($line, 22, 100));
                    print($OUPUT_HANDLE $current_line);
                } else {
                    print($OUPUT_HANDLE $line);
                }
            } else {
                print($OUPUT_HANDLE $line);
            }
        }
    }
}

sub transform_chains_into_1000 {
    my ($self, $OUPUT_HANDLE) = @_;
    my $chain = " ";
    my $HANDLE = $self->{_FILEHANDLE};
    my $input_file = $self->{_file};
    my %map_chains = (
	'A' => 0,
	'B' => 1000,
	'C' => 2000,
	'D' => 3000,
	'E' => 4000,
	'F' => 5000,
	'G' => 6000,
	'H' => 7000,
	'I' => 8000,
	'J' => 9000
    );
    while(<$HANDLE>) {
        my $line = $_;

        if(($chain eq '') || ($chain eq '#')) {
            print($OUPUT_HANDLE $line);
        } else {
            if(($line =~ /^ATOM/) || ($line =~ /^HETATM/) || ($line =~ /^ANISOU/)) {
                my @atom_line = split(//, $line);
                if($atom_line[21] ne $chain) {
		    my $resNo = substr($line, 22, 4); $resNo =~ s/\s+//g;
		    if (exists($map_chains{$atom_line[21]})){
			$resNo += $map_chains{$atom_line[21]} ;
		    }
                    my $current_line = sprintf("%s  %4d%s", substr($line, 0, 20), $resNo, substr($line, 26, 100));
                    print($OUPUT_HANDLE $current_line);
                } else {
                    print($OUPUT_HANDLE $line);
                }
            } else {
                print($OUPUT_HANDLE $line);
            }
        }
    }
}

sub chains {
    my ($self) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    my @results = ();
    
    while(<$HANDLE>) {
        my $line = $_;
            
        if($line =~ /^ATOM/) {
            my @atom_line = split(//, $line);
            
            my $chain = trim($atom_line[21]);
            
            if(defined($chain) && ($chain ne '') && (!in_array(@results))) {
                push(@results, $chain);
            }
        }
    }
    return @results;
}

# HOTFIX for modeller ... Remove in future ..
sub cut_chain_and_remove_HETATM {
    my ($self, $OUPUT_HANDLE, $chain) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    my $input_file = $self->{_file};
    
    while(<$HANDLE>) {
        my $line = $_;
            
        if($line =~ /^HETATM/) {
            next;
        }
            
        if(($chain eq '') || ($chain eq '#')) {
            print($OUPUT_HANDLE $line);
        } else {
            if(($line =~ /^ATOM/) || ($line =~ /^HETATM/) || ($line =~ /^ANISOU/)) {
                my @atom_line = split(//, $line);
                if($atom_line[21] eq $chain) {
                    print($OUPUT_HANDLE $line);
                } else {
                    next;
                }
            } else {
                print($OUPUT_HANDLE $line);
            }
        }
    }
}

sub remove_HETATM_residues {
    my ($self, $OUPUT_HANDLE) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    while(<$HANDLE>) {
        my $line = $_;
            
        if($line !~ /^HETATM/) {
            print($OUPUT_HANDLE $line);
        }
    }
}

sub residues_range {
    my ($self) = @_;
    
    my $result = '';
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my @residues = ();
    while(<$HANDLE>) {
        my $line = $_;
        
        if($line =~ /^ATOM\s+\d+\s+CA\s+\S+\s+(\d+)/) {
            push(@residues, $1);
        }
    }
    
    my %ranges = ();
    
    my @current_range = ($residues[0]);
    my $index = 0;
    my $counter = 1;
    for(my $i = 1; $i < scalar(@residues); $i++) {
        if($residues[$i] == ($residues[$i - 1] + 1)) {
            push (@current_range, $residues[$i]);
        } else {
            $ranges{$index} = [@current_range];
            @current_range = ($residues[$i]);
            $index++;
        }
        $counter++;
    }
    $ranges{$index} = [@current_range];
    
    my $ranges_string = '';
    
    my $flag = 1;
    for(my $i = 0; $i < scalar(keys %ranges); $i++) {
        my $delimiter = ',';
        if($flag) {
            $delimiter = '';
        }
        my @range = $ranges{$i};
        my $length = scalar(@{$range[0]});
        
        if($length == 1) {
            $ranges_string .= $delimiter . $range[0];
        } elsif($length > 1) {
            $ranges_string .= $delimiter . $range[0][0] . '-' . $range[0][$length - 1];
        }
        
        $flag = 0;
    }
    
    $result = $ranges_string;
    
    return $result;
}

# get clashes from the pdb structure file
# input - clashes distance
# output - array of hash {index1, index2, distance}
sub clashes {
    my ($self, $CLASHES_LIMIT) = @_;
    
    my @results = ();
    my @residues = ();
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my $counter = 0;
    while(<$HANDLE>) {
        my $line = $_;
        
        if($line !~ /^ATOM/) { next; }
        
        my $atom_name = substr($line, 12, 4);
        my $residue_index = substr($line, 22, 4);
        
        $atom_name = trim($atom_name);
        
        if($atom_name ne 'CA' ) { next; }
        
    	my $x = trim(substr($line, 30, 8));
    	my $y = trim(substr($line, 38, 8));
    	my $z = trim(substr($line, 46, 8));

        my %residue = ();
        $residue{index} = $residue_index;
        $residue{x} = $x;
        $residue{y} = $y;
        $residue{z} = $z;
        
        push(@residues, {%residue});
        $counter++;
    }
    
    for(my $i = 0; $i < $counter - 1; $i++) {
        for(my $j = $i + 1; $j < $counter; $j++) {
            my $distance = sqrt(
                                ($residues[$i]{x} - $residues[$j]{x})*($residues[$i]{x} - $residues[$j]{x}) + 
                                ($residues[$i]{y} - $residues[$j]{y})*($residues[$i]{y} - $residues[$j]{y}) +
                                ($residues[$i]{z} - $residues[$j]{z})*($residues[$i]{z} - $residues[$j]{z})
            );
            
            if($distance < $CLASHES_LIMIT) {
                my %clashe = ();
                $clashe{index1} = $residues[$i]{index};
                $clashe{index2} = $residues[$j]{index};
                $clashe{distance} = $distance;
                
                push(@results, {%clashe});
            }
        }
    }
    
    return @results;
}

sub chain_breaks {
    my ($self, $CHAIN_BREAK_LIMIT) = @_;
    
    my @results = ();
    my @residues = ();
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my $counter = 0;
    while(<$HANDLE>) {
        my $line = $_;
        
        if($line !~ /^ATOM/) { next; }
        
        my $atom_name = substr($line, 12, 4);
        my $residue_index = substr($line, 22, 4);
        
        $atom_name = trim($atom_name);
        
        if($atom_name ne 'CA' ) { next; }
        
    	my $x = trim(substr($line, 30, 8));
    	my $y = trim(substr($line, 38, 8));
    	my $z = trim(substr($line, 46, 8));

        my %residue = ();
        $residue{index} = $residue_index;
        $residue{x} = $x;
        $residue{y} = $y;
        $residue{z} = $z;
        
        push(@residues, {%residue});
        $counter++;
    }
    
    for(my $i = 0; $i < $counter - 1; $i++) {
        my $distance = sqrt(
                ($residues[$i]{x} - $residues[$i + 1]{x})*($residues[$i]{x} - $residues[$i + 1]{x}) + 
                ($residues[$i]{y} - $residues[$i + 1]{y})*($residues[$i]{y} - $residues[$i + 1]{y}) +
                ($residues[$i]{z} - $residues[$i + 1]{z})*($residues[$i]{z} - $residues[$i + 1]{z})
        );
            
        if($distance > $CHAIN_BREAK_LIMIT) {
            my %chain_break = ();
            $chain_break{index1} = $residues[$i]{index};
            $chain_break{index2} = $residues[$i + 1]{index};
            $chain_break{distance} = $distance;
                
            push(@results, {%chain_break});
        }
    }
    
    return @results;
}

sub residues_count {
    my ($self) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my $result = 0;
    while(<$HANDLE>) {
        my $line = $_;
        
        if($line !~ /^ATOM/) { next; }
        
        my $atom_name = substr($line, 12, 4);
        $atom_name = trim($atom_name);
        
        if($atom_name ne 'CA' ) { next; }
        
        $result++;
    }
    
    return $result;
}

sub is_ca_atoms_only {
    my ($self) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my $result = 1;
    while(<$HANDLE>) {
        my $line = $_;
        
        if($line !~ /^ATOM/) { next; }
        
        my $atom_name = substr($line, 12, 4);
        $atom_name = trim($atom_name);
        
        if($atom_name ne 'CA' ) {
            $result = 0;
            last;
        }
    }
    
    return $result;
}

# N, C, O, CA - only
# $allowance_percentage - percent of (0.9 by default)
sub full_backbone_residues {
    my ($self) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my $backbone_CA_exist = 0;
    my $backbone_C_exist = 0;
    my $backbone_N_exist = 0;
    my $backbone_O_exist = 0;
    my $previous_residue = -1000;
    
    my $backbone_residues_count = 0;
    
    while(<$HANDLE>) {
        my $line = $_;
        
        if($line !~ /^ATOM/) { next; }
        my $atom_name = substr($line, 12, 4);
        my $residue_index = substr($line, 22, 4);
        
        $atom_name = trim($atom_name);
        
        if(($previous_residue != -1000) && ($previous_residue != $residue_index)) {
            if(($backbone_CA_exist == 1) && ($backbone_C_exist == 1) && ($backbone_N_exist == 1) && ($backbone_O_exist == 1)) {
                $backbone_residues_count++;
            }
            $backbone_CA_exist = 0;
            $backbone_C_exist = 0;
            $backbone_N_exist = 0;
            $backbone_O_exist = 0;
        }
        
        $previous_residue = $residue_index;
        
        if($atom_name eq 'CA') { $backbone_CA_exist = 1; }
        if($atom_name eq 'C') { $backbone_C_exist = 1; }
        if($atom_name eq 'N') { $backbone_N_exist = 1; }
        if($atom_name eq 'O') { $backbone_O_exist = 1; }
    }
    
    if($previous_residue != -1000) {
        if(($backbone_CA_exist == 1) && ($backbone_C_exist == 1) && ($backbone_N_exist == 1) && ($backbone_O_exist == 1)) {
            $backbone_residues_count++;
        }
    }
    
    return $backbone_residues_count;
}

# N, C, O, CA, CB at least this atoms are presents
sub extended_CB_backbone_residues {
    my ($self) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my $backbone_CA_exist = 0;
    my $backbone_C_exist = 0;
    my $backbone_N_exist = 0;
    my $backbone_O_exist = 0;
    my $previous_residue = -1000;
    
    my $CB_sidechain_residues_count = 0;
    
    my @extra_atoms = ();
    
    my $residue_name = '';

    while(<$HANDLE>) {
        my $line = $_;
        
        if($line !~ /^ATOM/) { next; }
        
        my $atom_name = substr($line, 12, 4);
        my $residue_index = substr($line, 22, 4);
        
        $atom_name = trim($atom_name);
        
        if(($previous_residue != -1000) && ($previous_residue != $residue_index)) {
            if(($backbone_CA_exist == 1) && ($backbone_C_exist == 1) && ($backbone_N_exist == 1) && ($backbone_O_exist == 1)) {
                if($residue_name eq 'GLY') {
                    $CB_sidechain_residues_count++;
                } else {
                    if(in_array('CB', @extra_atoms)) {
                        $CB_sidechain_residues_count++;
                    }
                }
            }
            
            @extra_atoms = ();
            
            $backbone_CA_exist = 0;
            $backbone_C_exist = 0;
            $backbone_N_exist = 0;
            $backbone_O_exist = 0;
        }

        $residue_name = substr($line, 17, 3);
        
        if(($atom_name ne 'CA') && ($atom_name ne 'C') && ($atom_name ne 'O') && ($atom_name ne 'N')) {
            push(@extra_atoms, $atom_name);
        }
        
        $previous_residue = $residue_index;
        
        if($atom_name eq 'CA') { $backbone_CA_exist = 1; }
        if($atom_name eq 'C') { $backbone_C_exist = 1; }
        if($atom_name eq 'N') { $backbone_N_exist = 1; }
        if($atom_name eq 'O') { $backbone_O_exist = 1; }
    }

    if($previous_residue != -1000) {
        if(($backbone_CA_exist == 1) && ($backbone_C_exist == 1) && ($backbone_N_exist == 1) && ($backbone_O_exist == 1)) {
            if($residue_name eq 'GLY') {
                $CB_sidechain_residues_count++;
            } else {
                if(in_array('CB', @extra_atoms)) {
                    $CB_sidechain_residues_count++;
                }
            }
        }
    }
    
    return $CB_sidechain_residues_count;
}

# N, C, O, CA - only
# more than 50% of sidechains residues
sub extended_backbone_residues {
    my ($self) = @_;
    
    my $HANDLE = $self->{_FILEHANDLE};
    
    seek($HANDLE, 0, 0);
    
    my $backbone_CA_exist = 0;
    my $backbone_C_exist = 0;
    my $backbone_N_exist = 0;
    my $backbone_O_exist = 0;
    my $previous_residue = -1000;
    
    my $sidechain_residues_count = 0;
    
    my @extra_atoms = ();
    
    my $residue_name = '';

    while(<$HANDLE>) {
        my $line = $_;
        
        if($line !~ /^ATOM/) { next; }
        
        my $atom_name = substr($line, 12, 4);
        my $residue_index = substr($line, 22, 4);
        
        $atom_name = trim($atom_name);
        
        if(($previous_residue != -1000) && ($previous_residue != $residue_index)) {
            if(($backbone_CA_exist == 1) && ($backbone_C_exist == 1) && ($backbone_N_exist == 1) && ($backbone_O_exist == 1)) {
                my @default_atoms = $self->get_sidechain_atoms($residue_name);
                
                my $valid_atoms_count = 0;
                for(my $i = 0; $i < scalar(@default_atoms); $i++) {
                    if(in_array($default_atoms[$i], @extra_atoms)) {
                        $valid_atoms_count++;
                    }
                }
                
                if($valid_atoms_count >= 0.5 * scalar(@default_atoms)) {
                    $sidechain_residues_count++;
                }
            }
            
            @extra_atoms = ();
            
            $backbone_CA_exist = 0;
            $backbone_C_exist = 0;
            $backbone_N_exist = 0;
            $backbone_O_exist = 0;
        }

        $residue_name = substr($line, 17, 3);
        
        if(($atom_name ne 'CA') && ($atom_name ne 'C') && ($atom_name ne 'O') && ($atom_name ne 'N')) {
            push(@extra_atoms, $atom_name);
        }
        
        $previous_residue = $residue_index;
        
        if($atom_name eq 'CA') { $backbone_CA_exist = 1; }
        if($atom_name eq 'C') { $backbone_C_exist = 1; }
        if($atom_name eq 'N') { $backbone_N_exist = 1; }
        if($atom_name eq 'O') { $backbone_O_exist = 1; }
    }

    if($previous_residue != -1000) {
        if(($backbone_CA_exist == 1) && ($backbone_C_exist == 1) && ($backbone_N_exist == 1) && ($backbone_O_exist == 1)) {
            my @default_atoms = $self->get_sidechain_atoms($residue_name);
            
            my $valid_atoms_count = 0;
            for(my $i = 0; $i < scalar(@default_atoms); $i++) {
                if(in_array($default_atoms[$i], @extra_atoms)) {
                    $valid_atoms_count++;
                }
            }
            
            if($valid_atoms_count >= scalar(@default_atoms)) {
                $sidechain_residues_count++;
            }
        }
    }
    
    return $sidechain_residues_count;
}

sub get_sidechain_atoms{
    my ($self, $residue) = @_;
    
    return $AMINOACIDS_ATOMS{$residue};
}

sub check_complete_set_atoms_per_residue {
    my ($self, $residue) = @_;
    my @atoms = @{$residue->{_atoms}};
    my $result = 1; 
    my $info = "";
    my $tmp_str = "";
    foreach my $atom (@atoms) {
	$tmp_str .= " ".$atom->{_name}." ";
    }
#    print $tmp_str."\n";
    foreach my $atom_name (split(/\s+/,$AMINOACIDS_ATOMS{$residue->{_name}})){
	if ($tmp_str !~ m/ $atom_name /) {
		$result = 0;
		$info .= sprintf("Residue %s %s : Atom %s is missed\n",  $residue->{_name}, $residue->{_res_no}, $atom_name);
	}
    }
    if ($result == 0){
	return (0, $info);
    }
    return (1, sprintf("Residue %s %s : is O.K.\n",  $residue->{_name}, $residue->{_res_no}));
}

sub parseStructure {
    my ($self) = @_;
    my $HANDLE = $self->{_FILEHANDLE};
    seek($HANDLE, 0, 0);
    my %result = (); # hash with keys - res_number : value - PDBresidue;
    my $prev_res_no = -1000;
    my $res_no;
    my $lines_per_res = "";
    while(<$HANDLE>) {
        my $line = $_;
	chomp $line;
        if($line !~ /^ATOM/) { next; }
	$res_no = trim(substr($line, 22, 4));
	if ($prev_res_no == -1000 || $prev_res_no == $res_no){
	    $lines_per_res .= $line."\n";
	} else {
	    my $residue = new PDBResidue();
	    $residue->parseLines($lines_per_res);
	    $result{$prev_res_no} = $residue;
	    $lines_per_res = $line."\n";
	}
	$prev_res_no = $res_no;
    }
    # add last residue
    my $residue = new PDBResidue();
    $residue->parseLines($lines_per_res);
    if (defined($residue->{_atoms})){
         $result{$res_no} = $residue;
    }
    return %result;
}

sub check_complete_set_atoms {
    my ($self) = @_;
    my %residues = $self->parseStructure();
    foreach my $res_no (sort {$a<=>$b} keys %residues){
	my ($result, $info) = $self->check_complete_set_atoms_per_residue($residues{$res_no});
#	if ($result == 0){
	    print $info;
#	}
    }
}


1;
