#!/usr/bin/perl
use warnings;
use strict;

#STANDARD FORMAT TOKENS:
my $PFRMAT      ="^PFRMAT";
my $TARGET      ="^TARGET";
my $AUTHOR      ="^AUTHOR";
my $MODEL       ="^MODEL";
my $METHOD      ="^METHOD";
my $SCORE       ="^SCORE";
my $REMARK      ="^REMARK";
my $RMODE       ="^RMODE";
my $END         ="^END";
my $HEADER_END  ="^\$";
my $Mrecord     ="^[A-Z]*[0-9]+\\s+[A-Z]*[0-9]+\\s+";

#STATES OF THE LEXICAL ANALYZER
my $m_section=0; #1 - if in MODEL section

#OBLIGATORY FIELDS COUNTERS
my %fields_count=(
 PFRMAT => 0,
 AUTHOR => 0,
 TARGET => 0,
 METHOD => 0,
 MODEL => 0,
 RMODE => 0,
 MRECORD => 0,
 END => 0
);
#ADDITIONAL VARIABLES

my $status=0;
my $method=0;
my $num_line=0;
my $targ_val=0;
my $qmode_val=0;
my $error_num=0;
my $skip_lines=0;

while (<>){ 

my $error=0;

# skip lines if model section consist with more than one line
if($skip_lines > 0) {$skip_lines--; next;}

$num_line++;
chomp;
$status=0;
 SWITCH: {
       /$PFRMAT/i && (!$m_section)  && do {
                                               $status=check_pfrmat($_);
                                               $fields_count{PFRMAT}++;
                                               last SWITCH;
                                       };
       /$AUTHOR/i && (!$m_section)     && do {
                                               $status=check_author($_);
                                               $fields_count{AUTHOR}++;
                                               last SWITCH;
                                       };
       /$TARGET/i && (!$m_section)     && do { $targ_val=check_target($_);
                                               if ($targ_val =~ /T\d{4}\s*$/i)
                                                       {$status=0;}
                                               $fields_count{TARGET}++;
                                               last SWITCH;
                                       };
       /$METHOD/i && (!$m_section)     && do { $method++;
                                               $fields_count{METHOD}++;
                                               last SWITCH;
                                       };
       /$RMODE/i  && (!$m_section)      && do { $fields_count{RMODE}++;
                                               if ($fields_count{RMODE}>1)
                       {$status="ERROR!: Too many $RMODE records in the prediction!";}
                                               else {  $status=check_qmode($_);
                                                       $qmode_val=$status; }
                                               last SWITCH;
                                       };
       /$MODEL/i  && (!$m_section)     && do { $m_section=1;
                                               $fields_count{MODEL}++;
                                               $status=check_oheadf(%fields_count);
                                               if ($status eq 0) { $status=check_model($_);}
                                               last SWITCH;
                                       };
       /$Mrecord/i  && ($m_section) && do {
            my @m_section_data = read_m_section(tell(ARGV) - length($_) - 1); 
            $fields_count{MRECORD}++;
            strip_linebreaks($m_section_data[0]);

	    $status=check_mrecord(strip_linebreaks($m_section_data[0]));
            $skip_lines = $m_section_data[1];
            last SWITCH;
      };
       /$REMARK/i                      && do { $status=check_remark($_);
                                               last SWITCH;
                                       };
       /$END/i    && ($m_section)      && do { $m_section=0;
                                               $fields_count{END}++;
                                               last SWITCH;
                                       };
       /^\s*$/                         && do {
                                               last SWITCH;
                                       };
     $error=1;
  }
 if ($status !~ /^[012]$/) {
  #print STDERR "# ERROR! (Line no. $num_line) $status\n";
  print "# ERROR! (Line no. $num_line) $status\n";
  if ($error_num++ > 10){ print "# ERROR! Too many errors. Exiting...\n"; exit;}
 }
 if ($error) {
  print "# ERROR! (Line no. $num_line): Unknown or misplaced record in the following line: $_\n";
  if ($error_num++ > 10){ print "# ERROR! Too many errors. Exiting...\n"; exit;}
 }
}
check_eof(%fields_count);


#------------------------------------
#   Subroutines for parsing tokens:
#____________________________________


##
# read multiline m_section
# return array = (string m_section, integer skiplines_count)
##
sub read_m_section {
      my ($position) = @_;

      open(FH, $ARGV);
      seek(FH, $position, 0);
      my $m_section_string = '';
      $skip_lines = 0;
      my $find1st = 0;
      while(<FH>) {
            if($_ =~ /$Mrecord/) {
                 if ($find1st++) {return($m_section_string, $skip_lines);}
                  $m_section_string  = $_;
            } elsif ($_ =~ /$END/) {
#                  $skip_lines++;
                  return($m_section_string, $skip_lines);
            } else {
                  $m_section_string .= $_;
                  $skip_lines++;
            }
      }
      return ($m_section_string, $skip_lines);
}

sub check_pfrmat {
 my @params=@_;
 my @fields=split /\s+/, $params[0];
 my $length=scalar @fields;
 if ($length > 2){
 return "ERROR! Too many fields after PFRMAT";
 } elsif ($length < 2) { return "ERROR! Please specify type of prediction after PFRMAT";}
 if ($fields[1] =~ /^RR$/) {return 0;}
 else { return "ERROR! Check the type of prediction after PFRMAT - it must be RR";}
 return 0;
}

sub check_author {
 my @params=@_;
 my $param=$params[0];
 #This functionality movet to Submission.pm
 #if ($param =~ /$AUTHOR\s+\d{4}-\d{4}-\d{4}\s*$/i) { return 0;}
 #return "ERROR! Please check the line:  $param";
 return 0;
}

sub check_target {
 my @params=@_;
 my $param=$params[0];
 (my $keyword, my $value)= split (/\s+/, $param);
 if (($keyword eq 'TARGET') && ($value =~ /T[0-9]{4}\s*$/i)) {return $value;}
 return " Please check the line:  $param";
}

sub check_model {
 my @params=@_;
 my $param=$params[0];
 if ($param =~ /$MODEL\s*[0-9]+\s*$/i) { return 0;}
 return "ERROR! Please check the line:  $param";
}

sub check_remark {
 return 0;
}

sub check_qmode {
 my @params=@_;
 my $param=$params[0];
 (my $keyword, my $value)= split (/\s+/, $param);
 if (($keyword eq "RMODE") && ($value =~ /^[12]$/)) {return $value;}
 return " Please check the line:  $param";
}

sub check_mrecord {
 my @params=@_;
 my $param=$params[0];
 my @fields=split (/\s+/, $param) ;
 my $model = $fields[0];

 if  ($#fields < 2){
    return " Each record in an RR prediction must contain at least three columns: two indeces of residues in a contact pair and a probability of the residues being in contact. Check line: $param";
 } elsif ($#fields > 1){
   unless ($fields[0] =~ /^\d+$/  || $fields[0] =~ /^[A-Z]\d+$/i ){
    return " The first column should be a residue number (integer) or a chain name (letter) immediately followed by a number. Please check first column in this line: $param";
   }
   unless ($fields[1] =~ /^\d+$/ ||  $fields[1] =~ /^[A-Z]\d+$/i ){
    return " The second column should be a residue number (integer) or a chain name (letter) immediately followed by a number. Please check second column in this line: $param";
   }
   my $prob = $fields[2];
   unless ($prob =~ /^[01]$/i || $prob =~ /^0\.\d+$/ || $prob =~ /^\.\d+$/ || $prob =~/^1\.0*$/){
    return " Probability score should be a real number between 0 and 1. Please check third column in this line: $param";
   }
 }

 if ($qmode_val eq 1){
  if ($#fields > 2){
    return " In RMODE 1 each record should contain exactly three columns: two numbers of residues in a contact pair and a probability of the residues being in contac. Check line: $param";
  }
 } elsif ($qmode_val eq 2) {
   if ($#fields != 12){
    return " Check data for $model *** Number of columns in the line: $param. In RMODE 2 each line should contain exactly 13 columns: two first columns with residue numbers, third column with a contact probability, and columns 4-13 with probabilties of inter-residue distance falling within ten predefined distance bins (see RR format).";
   }

   my @res = @fields[3 .. $#fields];
   my $i=3;
   my $message=' Distance probabilities should be real numbers in format 0.123 or .123 . Please check: '; 
   my $nfe=0; 
   foreach my $record (@res) {
     $i++; 
     unless ($record =~ /^\d+\.\d+\s*$/ || $record =~ /^\.\d+\s*$/ || $record =~ /^[01]$/i){
         $nfe++;
         $message.=" value $record in the column $i * ";         
     }
   }
   if ( abs($fields[3]+$fields[4]+$fields[5]-$fields[2])>0.005 ){
	$nfe++;
   	$message=" Sum of the first three distance probabilities ($fields[3] + $fields[4] + $fields[5], columns 4-6) should sum up to the contact probability ($fields[2], 3rd column)"; 
   }
   if ($nfe>0){return "$message";}
 } else {
  return " Can not check the data if RMODE parameter is not defined or set to something different from 1 or 2.\n Please check the line:  $param";
 }
 return 0;
}

sub check_rr {
 return 0;
}

sub check_oheadf {
 my %fields_max_count=(
 PFRMAT => 1,
 AUTHOR => 1,
 TARGET => 1,
 METHOD => 20000000, #some large number
 MODEL => 1,
 MRECORD => 0,
 END => 0,
 RMODE => 1
 );
 my %fields_min_count=(
 PFRMAT => 1,
 AUTHOR => 1,
 TARGET => 1,
 METHOD => 1,
 MODEL => 1,
 MRECORD => 0,
 END => 0,
 RMODE => 1
 );
 my %params=@_;
 my $item;
 for  (keys %params) {
  if ($fields_max_count{$_} < $params{$_})
   {return "ERROR!: Too many $_ records in a prediction!";}
  elsif ($fields_min_count{$_} > $params{$_}) { return "ERROR! Record  $_ is missing - at least $fields_min_count{$_} must be present!";}
 }
 return 0;
}

sub check_eof {
 my %fields_max_count=(
 PFRMAT => 1,
 AUTHOR => 1,
 TARGET => 1,
 METHOD => 50, #some large number
 MODEL => 1,
 MRECORD => 2000000,  
 END => 1,
 RMODE => 1
 );
 my %fields_min_count=(
 PFRMAT => 1,
 AUTHOR => 1,
 TARGET => 1,
 METHOD => 1,
 MODEL => 1,
 MRECORD => 1,
 END => 1,
 RMODE => 1
 );
 my %params=@_;
 my $item;
 foreach my $e (keys %params) {
  if ($fields_max_count{$e} < $params{$e})
   {print "# ERROR! Too many $e records in the prediction!\n";}
  elsif ($fields_min_count{$e} > $params{$e}) {
    if ($e eq 'MRECORD')
     {print "# ERROR! Your prediction is empty\n";}
    else
    {print "# ERROR! Record  $e is missing - at least $fields_min_count{$e} must be present!\n";}
  }
 }
 return 0;
}

##
# Remove line breaks characters from string
##
sub strip_linebreaks {
      my $string = shift;
      while($string =~ /\n+/m) {
            $string =~ s/\n+/ /m; #Changed to replace newline for whitespace
      }
      return $string;
}
