#!/usr/bin/perl 

use strict; 
use warnings;

# this script was use to filter parent models of target T0713
# the sequence released has 719 residues
# the pdb structure has residues in range 33-406 
# while the original models were splitted by parent frames
# it turned out that for some models the overlap with the target range is empty or a few residues
# this caused the failure of programs 
# to avoid this problem the splitted models were deleted
# only kept models wich has overlap no less than 20 residues

my @models_2del = ();
my @models = ();

opendir(DIR, "/local/CASP13/MODELS/T0713/");
while(my $file = readdir(DIR)){
	next if ($file !~ m/^T0713TS[0-9]{3}_[1-5]_[1-9]$/ );
#	print $file."\n";
	push @models, $file;
}
close(DIR);

@models = sort @models;

foreach my $model (@models){
   my $start = `grep ATOM /local/CASP13/MODELS/T0713/$model | head -n 1 | cut -c23-26`;
   $start =~ s/^\s+//; $start =~ s/\s+$//;
   my $end = `grep ATOM /local/CASP13/MODELS/T0713/$model | tail -n 1 | cut -c23-26`;
   $end =~ s/^\s+//; $end =~ s/\s+$//;
   my $overlap = min(406, $end) - max(33,$start) + 1;
#   print "$model: $start - $end ; $overlap\n";
   if ( $overlap < 20 ){
     push @models_2del, $model; 
   }
#   last;
}


foreach my $model (@models_2del){
 print $model."\n";
 # delete results files
#  print("find /local/CASP13/RESULTS/ -name $model* | xargs --no-run-if-empty rm -rf \n");
#  system ("find /local/CASP13/RESULTS/ -name $model* | xargs --no-run-if-empty rm -rf ");
  sleep 1;
 #delete models
  print("find /local/CASP13/MODELS/ -name $model* | xargs --no-run-if-empty rm -rf \n");
  system ("find /local/CASP13/MODELS/ -name $model* | xargs --no-run-if-empty rm -rf ");
#  last;
}


###########################################################
# SUBROUTINES
###########################################################

sub min {
   my ($a,$b) = @_;
   return ($a < $b ? $a : $b);
}

sub max {
   my ($a,$b) = @_;
   return ($a > $b ? $a : $b);
}

