/*-----------------------------------------------------------
/
/   ts_ver.c 
/
/   Program checks the TS format for CASP7 submissions
/   (3D atomic coordinates - Tertiary Structure prediction)
/
/   Changes from casp6:
/     - added CA-CA geometry checking (errors and warnings)
/     - CA-CA check is performed within the same PARENT-TER block only
/     - additional check for oligomers
/
/------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>

#define MAXRES           10000
#define MAXATM           32000
#define MAXCHN               4

#define N_AA                21
#define N_AA1               22
#define N_AT                38

char *letter="ARNDCQEGHILKMFPSTWYVX-";                  /* 22 chars             */
char *lowmat="abcdefghijklmnopqrstuvwxyz1234567890";    /* 36 chars             */
char *uppmat="ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";    /* 36 chars             */

typedef struct {
  float   x;
  float   y;
  float   z;
} vector;

typedef struct {
  int     serial;
  char    name[10];
  char    alt_loc[10];
  char    res_name[10];
  char    chain_id[10];
  int     res_seq;
  char    i_code[10];
  vector  R;
  float   occupancy;
  float   temp_factor;
  char    seg_id[10];
  char    element[10];
  char    charge[10];
} atom_struct;

typedef struct {
  atom_struct atom[MAXATM];
  int         n_atoms;
  int         n_mol;
  int         n_aa;
} pdb_struct;

typedef struct {
  char        res;
  char        res_3[10];
  int         all;
  int         main;
  int         numb[N_AT];
  vector      CA;
  int         res_seq;
  char        res_name[10];
  int         n_fragm;   
} res_struct;

typedef struct {
  res_struct  aa[MAXRES];
  atom_struct atom[MAXATM];
  int         n_atoms;
  int         n_aa;
  int         n_fragments;
  int         n_occupancy;
  int         parent_none;
} data_tsp;

typedef struct {
  char      name[6];
} res_atoms;

typedef struct {
  char      name_chain;
  char      name[10];
  int       n_aa;
  int       aa_num[MAXRES];
  char      aa[MAXRES];
} t_chain;

typedef struct {
  int       AA[N_AA][14];
  res_atoms atom[N_AT];
  char      target_name[10];
  char      target_type[200]; 
  t_chain   target[MAXCHN];
  int       target_n_chains;
  data_tsp  tsp[MAXCHN];
  int       model;
  int       u_model;
  char      parent_name[10];
  int       n_fragments;
  int       end;
  int       n_method;
  int       errors;
} ts_f;

int check_aa(char, char*, int);
void read_ts_f(ts_f *, char*);
void read_data_tsp(ts_f *, FILE*);
void check_ts_f(ts_f *);
void clean_ts_f(ts_f *);
void read_seq(char*, ts_f *);
void escape(int);
void C_321(char*, char*);
int pdb_align(char*, char, pdb_struct *);
void read_pdb_orig(char, pdb_struct *, FILE*);
void clean_pdb(pdb_struct *);
int CA_dist(ts_f *);

main(int argc, char *argv[]) 
{
  int  i, CLN=0;
  char ts_file[120];
  ts_f ts_data;
  
  if(argc<2){
    printf(" Usage: ts_ver <ts_file>\n");
    exit(0);
  }
  
  strcpy(ts_file,argv[1]);

  clean_ts_f(&ts_data);

  printf("# Reading prediction format TS\n\n");
  read_ts_f(&ts_data,ts_file);
  printf("\n# Reading prediction format TS             (DONE)\n");



  check_ts_f(&ts_data);

  if(ts_data.errors==0) {
    if(ts_data.u_model==1) {
      printf("\n# MODEL index: %d UNREFINED ",ts_data.model);
    }
    else if(ts_data.u_model==2) {
      printf("\n# MODEL index: %d OLIGOMER ",ts_data.model);
    }
    else {
      printf("\n# MODEL index: %d ",ts_data.model);
    }
    for(i=0;i<ts_data.target_n_chains;i++) {
      printf("\n\n# Target name: %-7s ",ts_data.target[i].name);
      if(ts_data.tsp[i].parent_none>0) {
        printf("\n# THREADING/FOLD RECOGNITION MODEL:       PARENT NONE ");
      }
      printf("\n# Total number of residues in target:            %4d ",ts_data.target[i].n_aa);
      printf("\n# Total number of residues in model:             %4d ",ts_data.tsp[i].n_aa);
      printf("\n# Total number of atoms in model:                %4d ",ts_data.tsp[i].n_atoms);
      printf("\n# Number of atoms with 1.0 occupancy:            %4d ",ts_data.tsp[i].n_occupancy);
    }
    if(ts_data.target_n_chains>1) {
      printf("\n ");
    }
    printf("\n# Number of fragments in model:                  %4d ",ts_data.n_fragments);
    printf("\n# Number of METHOD records:                      %4d ",ts_data.n_method);
    printf("\n\n# No errors in format. Checking prediction geometry ...\n\n");
  }
  else {
    printf("\n# Number of errors in format = %d. Checking prediction geometry ...\n\n",ts_data.errors);
  }
  
  CLN=CA_dist(&ts_data);
  if (CLN==3) {
     printf("\n# ERROR! Too many steric clashes in the prediction. It can not be accepted. You might consider correction and resubmission of the prediction before the target deadline.\n");
  } else if (CLN==2) {
     printf("\n# WARNING! There is a severe CA-CA clash or/and substantial number of geometrical inconsistencies in the prediction. Assessors will be notified about this fact and may penalize the prediction in the assessment. You might consider correction and resubmission of the prediction before the target deadline. \n");
  } else if (CLN==1) {
     printf("\n# WARNING! There are 5 or more disjoint segments (CAs adjacent in sequence separated by more than 4.5 Angstroms) in the prediction. Assessors will be notified about this fact and may penalize the prediction in the assessment. You might consider correction and resubmission of the prediction before the target deadline. \n");
  } 
  printf("\n# Checking prediction geometry             (DONE)\n");

}

void escape(int error)
{
  if(error>25) {
    printf("\n# Too many ERRORS ...");
    printf("\n#    Please check format for TS predictions.\n");
    exit(0);
  }
  return;
}

/*-----------------------------------------------------------
/
/   clean ts_f structure
/
/------------------------------------------------------------*/
void clean_ts_f(ts_f *ts_data)
{
  int i,j,k;
  static int AA[N_AA][14] = {
               {0, 1, 2, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1},    /* A  ALA */
               {0, 1, 2, 3, 4,  5,  8, 21, 15, 35, 36, -1, -1, -1},    /* R  ARG */
               {0, 1, 2, 3, 4,  5, 26, 25, -1, -1, -1, -1, -1, -1},    /* N  ASN */
               {0, 1, 2, 3, 4,  5, 26, 27, -1, -1, -1, -1, -1, -1},    /* D  ASP */
               {0, 1, 2, 3, 4, 31, -1, -1, -1, -1, -1, -1, -1, -1},    /* C  CYS */
               {0, 1, 2, 3, 4,  5,  8, 28, 23, -1, -1, -1, -1, -1},    /* Q  GLN */
               {0, 1, 2, 3, 4,  5,  8, 28, 29, -1, -1, -1, -1, -1},    /* E  GLU */
               {0, 1, 2, 3,-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},    /* G  GLY */
               {0, 1, 2, 3, 4,  5, 24, 10, 12, 23, -1, -1, -1, -1},    /* H  HIS */
               {0, 1, 2, 3, 4,  6,  7,  9, -1, -1, -1, -1, -1, -1},    /* I  ILE */
               {0, 1, 2, 3, 4,  5,  9, 10, -1, -1, -1, -1, -1, -1},    /* L  LEU */
               {0, 1, 2, 3, 4,  5,  8, 11, 34, -1, -1, -1, -1, -1},    /* K  LYS */
               {0, 1, 2, 3, 4,  5, 30, 11, -1, -1, -1, -1, -1, -1},    /* M  MET */
               {0, 1, 2, 3, 4,  5,  9, 10, 12, 13, 15, -1, -1, -1},    /* F  PHE */
               {0, 1, 2, 3, 4,  5,  8, -1, -1, -1, -1, -1, -1, -1},    /* P  PRO */
               {0, 1, 2, 3, 4, 18, -1, -1, -1, -1, -1, -1, -1, -1},    /* S  SER */
               {0, 1, 2, 3, 4, 19,  7, -1, -1, -1, -1, -1, -1, -1},    /* T  THR */
               {0, 1, 2, 3, 4,  5,  9, 10, 22, 13, 14, 16, 17, 33},    /* W  TRP */
               {0, 1, 2, 3, 4,  5,  9, 10, 12, 13, 15, 32, -1, -1},    /* Y  TYR */
               {0, 1, 2, 3, 4,  6,  7, -1, -1, -1, -1, -1, -1, -1},    /* V  VAL */
             };

  strcpy(ts_data->atom[0].name,"CA");
  strcpy(ts_data->atom[1].name,"N");
  strcpy(ts_data->atom[2].name,"C");
  strcpy(ts_data->atom[3].name,"O");
  strcpy(ts_data->atom[4].name,"CB");
  strcpy(ts_data->atom[5].name,"CG");
  strcpy(ts_data->atom[6].name,"CG1");
  strcpy(ts_data->atom[7].name,"CG2");
  strcpy(ts_data->atom[8].name,"CD");
  strcpy(ts_data->atom[9].name,"CD1");
  strcpy(ts_data->atom[10].name,"CD2");
  strcpy(ts_data->atom[11].name,"CE");
  strcpy(ts_data->atom[12].name,"CE1");
  strcpy(ts_data->atom[13].name,"CE2");
  strcpy(ts_data->atom[14].name,"CE3");
  strcpy(ts_data->atom[15].name,"CZ");
  strcpy(ts_data->atom[16].name,"CZ2");
  strcpy(ts_data->atom[17].name,"CZ3");
  strcpy(ts_data->atom[18].name,"OG");
  strcpy(ts_data->atom[19].name,"OG1");
  strcpy(ts_data->atom[20].name,"OG2");
  strcpy(ts_data->atom[21].name,"NE");
  strcpy(ts_data->atom[22].name,"NE1");
  strcpy(ts_data->atom[23].name,"NE2");
  strcpy(ts_data->atom[24].name,"ND1");
  strcpy(ts_data->atom[25].name,"ND2");
  strcpy(ts_data->atom[26].name,"OD1");
  strcpy(ts_data->atom[27].name,"OD2");
  strcpy(ts_data->atom[28].name,"OE1");
  strcpy(ts_data->atom[29].name,"OE2");
  strcpy(ts_data->atom[30].name,"SD");
  strcpy(ts_data->atom[31].name,"SG");
  strcpy(ts_data->atom[32].name,"OH");
  strcpy(ts_data->atom[33].name,"CH2");
  strcpy(ts_data->atom[34].name,"NZ");
  strcpy(ts_data->atom[35].name,"NH1");
  strcpy(ts_data->atom[36].name,"NH2");
  strcpy(ts_data->atom[37].name,"OE");

  for(i=0;i<N_AA;i++) {
    for(j=0;j<14;j++) {
      ts_data->AA[i][j]=AA[i][j];
    }
  }

  strcpy(ts_data->target_name,"   ");
  ts_data->target_n_chains=0;
  ts_data->model=0;
  ts_data->u_model=0;
  strcpy(ts_data->parent_name,"   ");
  ts_data->n_fragments=0;
  ts_data->end=0;
  ts_data->n_method=0;
  ts_data->errors=0;
  for(j=0;j<MAXCHN;j++) {
    ts_data->target[j].name_chain=' ';
    strcpy(ts_data->target[j].name,"   ");
    ts_data->target[j].n_aa=0;
    for(i=0;i<MAXRES;i++) {
      ts_data->target[j].aa[i]=' ';
      ts_data->target[j].aa_num[i]=-9999;
      ts_data->tsp[j].aa[i].res=' ';
      strcpy(ts_data->tsp[j].aa[i].res_3,"     ");
      ts_data->tsp[j].aa[i].all=0;
      ts_data->tsp[j].aa[i].main=0;
      ts_data->tsp[j].aa[i].CA.x=0.0;
      ts_data->tsp[j].aa[i].CA.y=0.0;
      ts_data->tsp[j].aa[i].CA.z=0.0;
      for(k=0;k<N_AT;k++) {
        ts_data->tsp[j].aa[i].numb[k]=0;
      }
    }
    for(i=0;i<MAXATM;i++) {
      ts_data->tsp[j].atom[i].serial=0;
      strcpy(ts_data->tsp[j].atom[i].name,"   ");
      strcpy(ts_data->tsp[j].atom[i].alt_loc," ");
      strcpy(ts_data->tsp[j].atom[i].res_name,"   ");
      strcpy(ts_data->tsp[j].atom[i].chain_id," ");
      ts_data->tsp[j].atom[i].res_seq=0;
      strcpy(ts_data->tsp[j].atom[i].i_code," ");
      ts_data->tsp[j].atom[i].R.x=0.0; 
      ts_data->tsp[j].atom[i].R.y=0.0; 
      ts_data->tsp[j].atom[i].R.z=0.0;
      ts_data->tsp[j].atom[i].occupancy=0.0;
      ts_data->tsp[j].atom[i].temp_factor=0.0;
      strcpy(ts_data->tsp[j].atom[i].seg_id,"    ");
      strcpy(ts_data->tsp[j].atom[i].element,"  ");
      strcpy(ts_data->tsp[j].atom[i].charge,"  ");
    }
    ts_data->tsp[j].n_atoms=0;
    ts_data->tsp[j].n_aa=0;
    ts_data->tsp[j].n_fragments=0;
    ts_data->tsp[j].n_occupancy=0;
    ts_data->tsp[j].parent_none=0;
  }

  return;
}

/*-----------------------------------------------------------
/
/   read_ts_f - read the TS predictions format
/
/------------------------------------------------------------*/
void read_ts_f(ts_f *ts_data, char* fname)
{
  int i, begflag, authflag;
  char keyword[500], line[500], name[200], model_nb[100], unrefined[100];
  char target_type[20], tname[35];
  FILE *fp;

  if((fp = fopen(fname,"r"))==NULL) {
    printf("\n# error opening file %s for read\n\n",fname);
    exit(0);
  }

  /* Read in the ts_data
  -------------------------------------------*/
  begflag=0;
  authflag=0;
  while(fgets(line,500,fp)!=NULL) {
    strcpy(keyword,"   ");
    strcpy(name,"   ");
    strcpy(model_nb,"#   ");
    strcpy(unrefined,"#   ");
    strcpy(target_type,"#   ");
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword,"PFRMAT\0",7)) {

      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);

      begflag=1;
      sscanf(line,"%s %s",keyword,name);
      if(strncmp(name,"TS\0",3)) {
        printf("\n# ERROR! Wrong specification of the TS format category");
        printf("\nPFRMAT TS      # was expected\n\n");
        exit(0);
      }
    }
    else if(!strncmp(keyword,"TARGET\0",7)) {

      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);

      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the TS prediction data records");
        printf("\nPFRMAT TS                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      begflag=2;
      sscanf(line,"%s %s %s",keyword,name,target_type);
      strcpy(ts_data->target_name,name);
      strcpy(ts_data->target_type,target_type);
      if(target_type[0]!='#') {
        if ((!strncmp(target_type,"OLIGOMER\0",9))||(!strncmp(target_type,"oligomer\0",8))) {
          strcat(name,"_oligomer");
          ts_data->u_model=2;
        } else {
          printf("\n# ERROR! Please check TARGET record.");
          printf("\n#        Only OLIGOMER label is acceptable.\n\n");
          ts_data->errors++;
          escape(ts_data->errors);
        }
      }

      read_seq(name,ts_data);
    }
    else if(!strncmp(keyword,"AUTHOR\0",7)) {
      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the TS prediction data records");
        printf("\nPFRMAT TS                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);
      authflag=1;
    }
    else if(!strncmp(keyword,"REMARK\0",7)) {
      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the TS prediction data records");
        printf("\nPFRMAT TS                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);
    }
    else if(!strncmp(keyword,"METHOD\0",7)) {
      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the TS prediction data records");
        printf("\nPFRMAT TS                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      ts_data->n_method++;
    }
    else if(!strncmp(keyword,"MODEL\0",6)) {
      if(begflag<2 || authflag!=1) {
        printf("\n# ERROR! Unacceptable order of the TS prediction data records");
        printf("\nPFRMAT TS                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      i=0;
      sscanf(line,"%s %s %s",keyword,model_nb,unrefined);
      sscanf(model_nb,"%d",&i);
      if(model_nb[1]!=' ' && model_nb[1]!='\0' && model_nb[1]!='\n') {
        printf("\n# ERROR! Unacceptable index of the prediction MODEL\n\n");
        ts_data->errors++;
        escape(ts_data->errors);
      }
      if(unrefined[0]!='#') {
        if(strncmp(unrefined,"UNREFINED\0",10) &&
           strncmp(unrefined,"REFINED\0",8)) {
          printf("\n# ERROR! Please check MODEL record.");
          printf("\n#        Only REFINED or UNREFINED labels are acceptable.\n\n");
          ts_data->errors++;
          escape(ts_data->errors);
        }
      }
      if(i>0 && i<=5) {
        begflag=3;
        if(!strncmp(unrefined,"UNREFINED\0",10)) ts_data->u_model=1;
        ts_data->model=i;
        printf("\n# Reading MODEL %2d\n",i);
        read_data_tsp(ts_data,fp);


        break;
      }
      else {
        printf("\n# ERROR! Unacceptable index of the TS prediction MODEL\n\n");
        ts_data->errors++;
        escape(ts_data->errors);
      }
    }
    else if(!strncmp(keyword,"END\0",4)) {
      ts_data->end=1;
      break;
    }
    else if(strncmp(keyword," ",1)) {

      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);

      printf("\n# ERROR! Unknown record keyword in this section of the prediction.");
      printf("\n#        Check TS format description.\n\n");
      ts_data->errors++;
      escape(ts_data->errors);
    }
  }
  if(begflag!=3) {
    printf("\n# ERROR! There is no TS prediction data in this file\n\n");
    exit(0);
  }
  if(ts_data->end==0) {
    printf("\n# ERROR! There is no END record in this file\n\n");
    exit(0);
  }
  if(authflag!=1) {
    printf("\n# ERROR! Check AUTHOR record in this file\n\n");
    exit(0);
  }
  if(ts_data->n_method==0) {
    printf("\n# ERROR! There is no METHOD records in this file\n\n");
    exit(0);
  }
  fclose(fp);

  return;
}

/*-----------------------------------------------------------
/
/   read_seq - read a sequence file
/
/------------------------------------------------------------*/
void read_seq(char* tname, ts_f *ts_data)
{
  int i, k, n_aa, old_res, new_res;
  char chain, sub[10];
  FILE *fp;
  char line[MAXRES], fname[20], lname[20], keyword[10];

  chain=' ';
/* adjusting for refinement targets  - should be in format TR333*/
  strcpy(lname,tname);
  if (lname[1]=='R') lname[1]='0'; 
  /*strcpy(fname,"TARGETS/");*/
  strcpy(fname,"");
  strcat(fname,lname);
  strcat(fname,".seq.txt");
  if((fp = fopen(fname,"r"))==NULL) {
    for(i=0;i<20;i++) if(lname[i]=='t') lname[i]='T';
    /*strcpy(fname,"TARGETS/");*/
    strcpy(fname,"");
    strcat(fname,lname);
    strcat(fname,".seq.txt");
    if((fp = fopen(fname,"r"))==NULL) {
      for(i=0;i<20;i++) if(lname[i]=='T') lname[i]='t';
       /*strcpy(fname,"TARGETS/");*/
      strcpy(fname,"");
      strcat(fname,lname);
      strcat(fname,".seq.txt");
      if((fp = fopen(fname,"r"))==NULL) {
        printf("\n# ERROR! There is no target name:  %s",tname);
        printf("\n# TARGET Txxxx      # was expected\n\n");
        exit(0);
      }
    }
  }

  k=0;
  n_aa=0;
  while ((fgets(line, MAXRES, fp) != NULL)) {    
    if (strncmp(line, ">", 1) == 0) {
      chain=' ';
      if(line[6]=='_' && isalnum(line[7])!=0) {
        chain=line[7];
        if(isalpha(chain)==0) {
          printf("\n# ERROR! Wrong chain %c specification in the target:  %s",chain,tname);
          printf("\n#        Please send a message to the CASP organizers. \n\n");
          exit(0);
        }
      }
      ts_data->target[k].n_aa=n_aa;
      k=ts_data->target_n_chains;
      ts_data->target[k].name_chain=chain;
      for(i=0;i<10;i++) sub[i]=0;
      strncpy(sub,&line[1],8);
      sscanf(sub,"%s",ts_data->target[k].name);
      ts_data->target_n_chains++;
      if(ts_data->target_n_chains>MAXCHN) {
        printf("\n# ERROR! Too many chains in the target:  %s",tname);
        printf("\n#        Please send a message to the CASP organizers. \n\n");
        exit(0);
      }
      n_aa=0;
    }
    else {
      i=0;
      while (line[i]!='\n') {
        if(line[i]!='\0' && line[i]!=' ') {
          if(check_aa(line[i],letter,N_AA1)<N_AA) {
            ts_data->target[k].aa[n_aa]=line[i];
            n_aa++;
          }
          else {
            printf("\n# ERROR! Wrong amino acid code %c in the target:  %s ",line[i],tname);
            printf("\n#        Please send a message to the CASP organizers. \n\n");
            fclose(fp);
            exit(0);
          }            
        }
        i++;
      }
      ts_data->target[k].n_aa=n_aa;
    }
  }
  fclose(fp);

  strcpy(lname,tname);
  if (lname[1]=='R') lname[1]='0'; 
  /*strcpy(fname,"TARGETS/");*/
  strcpy(fname,"");
  strcat(fname,lname);
  strcat(fname,".pdb.txt");
  if((fp = fopen(fname,"r"))==NULL) {
    for(i=0;i<20;i++) if(lname[i]=='t') lname[i]='T';
    /*strcpy(fname,"TARGETS/");*/
    strcpy(fname,"");
    strcat(fname,lname);
    strcat(fname,".pdb.txt");
    if((fp = fopen(fname,"r"))==NULL) {
      for(i=0;i<20;i++) if(lname[i]=='T') lname[i]='t';
        /*strcpy(fname,"TARGETS/");*/
        strcpy(fname,"");

      strcat(fname,lname);
      strcat(fname,".pdb.txt");
      if((fp = fopen(fname,"r"))==NULL) {
        printf("\n# ERROR! There is no target name:  %s",tname);
        printf("\n# TARGET Txxxx      # was expected\n\n");
        exit(0);
      }
    }
  }

  chain='-';
  k=-1;
  n_aa=0;
  old_res=-9999;
  while ((fgets(line, MAXRES, fp) != NULL)) {    
    for(i=0;i<10;i++) keyword[i]=0;
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword,"ATOM\0",5))   {
      if(chain != line[21]) {
        chain=line[21];
        k++;
        n_aa=0;
      }
      for(i=0;i<10;i++) keyword[i]=0;
      strncpy(keyword,&line[22],4);
      sscanf(keyword,"%d",&new_res);
      if(new_res != old_res) {
        ts_data->target[k].aa_num[n_aa]=new_res;
        old_res=new_res;
        n_aa++;
      }
    }
  }
  fclose(fp);

  return;
}

/*-----------------------------------------------------------
/
/   check_ts_f - checks the TS prediction file format
/
/------------------------------------------------------------*/
void check_ts_f(ts_f *ts_data)
{
  int ch, i, j, k, kr, l, n_aa, ok, imp_note, imp_note_main_chain;
  char res;

  imp_note=0;
  printf("\n# Checking the TS prediction MODEL %2d\n",ts_data->model);

  for(ch=0;ch<ts_data->target_n_chains;ch++) {
    n_aa=0;
    for(i=1;i<=ts_data->tsp[ch].n_atoms;i++) {
      C_321(ts_data->tsp[ch].atom[i].res_name,&res);
      j=ts_data->tsp[ch].atom[i].serial;
      kr=ts_data->tsp[ch].atom[i].res_seq;
      k=-9999;
      for(l=0;l<ts_data->target[ch].n_aa;l++) {
        if(kr==ts_data->target[ch].aa_num[l]) {
          k=l+1;
        }
      }
      if(k==-9999) {
        printf("# ERROR! ATOM number: %d   Check residue:  %c %c%d\n",
                         j,res,ts_data->target[ch].name_chain,kr);
        printf("#        There is no such a residue in TARGET\n");
        printf("#        Check residue numbering in template PDB file for TARGET: %s\n\n",ts_data->target_name);

        if(ts_data->target[ch].aa_num[0]!=1) {
          l=ts_data->target[ch].n_aa-1;
          if(ts_data->target_n_chains>1) {
            printf("# The residue numbering of the  TARGET:  %s (chain: %c)  is not standard: ",
                    ts_data->target_name,ts_data->target[ch].name_chain);
          }
          else {
            printf("# The residue numbering of the  TARGET:  %s  is not standard: ",
                    ts_data->target_name);
          }
          printf("\n#   the first residue in the TARGET is:  %c %c%4d ",
                    ts_data->target[ch].aa[0],ts_data->target[ch].name_chain,ts_data->target[ch].aa_num[0]);
          printf("\n#   the last  residue in the TARGET is:  %c %c%4d ",
                    ts_data->target[ch].aa[l],ts_data->target[ch].name_chain,ts_data->target[ch].aa_num[l]);
          printf("\n# Please check if the numbering of residues in your ");
          printf("\n# prediction corresponds to this numbering scheme. \n\n");
        }
        exit(0);
      }
      for(l=0;l<N_AT;l++) {
        if(!strcmp(ts_data->tsp[ch].atom[i].name,ts_data->atom[l].name) && ts_data->tsp[ch].atom[i].occupancy==1.0) {
          ts_data->tsp[ch].aa[k-1].numb[l]++;
          ts_data->tsp[ch].aa[k-1].all++;
          if(l<4) {
            ts_data->tsp[ch].aa[k-1].main++;
          }
        }
      }
      ts_data->tsp[ch].aa[k-1].res=res;
      strcpy(ts_data->tsp[ch].aa[k-1].res_3,ts_data->tsp[ch].atom[i].res_name);
      if(ts_data->target[ch].aa[k-1]!=ts_data->tsp[ch].aa[k-1].res) {
        printf("# ERROR! Check atom number %d residue:  %c %c%d (In TARGET:  %c %c%d)\n",
                  j,
                  ts_data->tsp[ch].aa[k-1].res,ts_data->target[ch].name_chain,kr,
                  ts_data->target[ch].aa[k-1],ts_data->target[ch].name_chain,kr);
        ts_data->errors++;
        escape(ts_data->errors);
      }
      if(ts_data->tsp[ch].atom[i].occupancy==1.0) {
        ts_data->tsp[ch].n_occupancy++;
      }
    }
    ts_data->tsp[ch].n_aa=0;
    for(i=0;i<ts_data->target[ch].n_aa;i++) {
      if(ts_data->tsp[ch].aa[i].all>0) {
        if(ts_data->tsp[ch].aa[i].main>0) {
          ts_data->tsp[ch].n_aa++;
        }
        imp_note_main_chain=0;
        if(ts_data->tsp[ch].aa[i].numb[0]==0) {
          printf("# ERROR! Check residue:  %c %c%d  (! There is no CA atom !)\n",
                    ts_data->tsp[ch].aa[i].res,
                    ts_data->target[ch].name_chain,
                    ts_data->target[ch].aa_num[i]);
          ts_data->errors++;
          escape(ts_data->errors);
          imp_note_main_chain=1;
        }
        for(l=0;l<4;l++) {
          if(ts_data->tsp[ch].aa[i].numb[l]<1) {
            if(imp_note_main_chain==0) {
              printf("# IMPORTANT NOTE! Not complete main chain atoms for residue  %c %c%d \n",
                      ts_data->tsp[ch].aa[i].res,
                      ts_data->target[ch].name_chain,
                      ts_data->target[ch].aa_num[i]);
              imp_note_main_chain=1;
            }
            if(imp_note==0) {
              printf("\n# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #");
              printf("\n# It is requested  that coordinate data be supplied for  at least #"); 
              printf("\n# all non-hydrogen main chain atoms, i.e. the  N,  CA,  C and  O  #");
              printf("\n# atoms of every residue.  Specifically,   if only  CA  atoms are #"); 
              printf("\n# predicted by the method,  predictors  are  encouraged  to build #"); 
              printf("\n# the main chain  atoms  for  every  residue before submission to #"); 
              printf("\n# CASP. If only CA atoms were submitted it would not be possible  #");
              printf("\n# to run  most of the analysis  software,  which  would  severely #"); 
              printf("\n# limit the evaluation of that prediction.                        #"); 
              printf("\n# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #\n\n");
              imp_note=1;
            }
          }
        }
        for(l=0;l<N_AT;l++) {
          if(ts_data->tsp[ch].aa[i].numb[l]>1) {
            printf("# ERROR! Check residue:  %c %c%d  (! atom %s REPEATED !)\n",
                      ts_data->tsp[ch].aa[i].res,
                      ts_data->target[ch].name_chain,
                      ts_data->target[ch].aa_num[i],
                      ts_data->atom[l].name);
            ts_data->errors++;
            escape(ts_data->errors);
          }
          if(ts_data->tsp[ch].aa[i].numb[l]>0) {
            k=check_aa(ts_data->tsp[ch].aa[i].res,letter,N_AA);
            ok=0;
            for(j=0;j<14;j++) {
              if(ts_data->AA[k][j]==l) {
                ok=1;
                break;
              }
            }
            if(ok==0 && l!=4) {
              printf("# ERROR!   Check residue:  %c %c%d  (! atom %s INCONSISTENT with residue %s !)\n",
                      ts_data->tsp[ch].aa[i].res,
                      ts_data->target[ch].name_chain,
                      ts_data->target[ch].aa_num[i],
                      ts_data->atom[l].name,
                      ts_data->tsp[ch].aa[i].res_3);
              ts_data->errors++;
              escape(ts_data->errors);
            }
            if(ok==0 && l==4) {
              printf("# WARNING! Check residue:  %c %c%d  (! atom %s INCONSISTENT with residue %s !)\n",
                      ts_data->tsp[ch].aa[i].res,
                      ts_data->target[ch].name_chain,
                      ts_data->target[ch].aa_num[i],
                      ts_data->atom[l].name,
                      ts_data->tsp[ch].aa[i].res_3);
            }
          }
        }
      }
    }
  }

  k=0;
  l=0;
  n_aa=0;
  for(ch=0;ch<ts_data->target_n_chains;ch++) {
    if(ts_data->tsp[ch].parent_none>0) {
      k++;
    }
    l=l+ts_data->tsp[ch].n_occupancy;
    n_aa=n_aa+ts_data->tsp[ch].n_aa;
    if(ts_data->tsp[ch].n_aa>ts_data->target[ch].n_aa) {
      printf("# ERROR! Check the number %d of residues in the model. (In TARGET %s: %d)\n",
                ts_data->tsp[ch].n_aa,ts_data->target[ch].name,ts_data->target[ch].n_aa);
      ts_data->errors++;
      escape(ts_data->errors);
    }
  }
  if(k==0 && n_aa==0) {
    printf("# ERROR! The number of predicted residues in the model:  0 \n");
    ts_data->errors++;
    escape(ts_data->errors);
  }
  if(k==0 && l==0) {
    printf("# ERROR! Check the number of predicted atoms (with occupancy 1.0) in the model.\n");
    ts_data->errors++;
    escape(ts_data->errors);
  }
  printf("# Checking the TS prediction MODEL %2d      (DONE)\n",ts_data->model);
  return;
}

/*-----------------------------------------------------------
/
/   check_aa - checks an amino acid
/
/------------------------------------------------------------*/
int check_aa(char token, char* letter, int n)
{
  int i;

  for(i=0;i<n;i++) {
    if(letter[i]==token)
      return i;
  }
  return n;
}

/*-----------------------------------------------------------
/
/   C_321 - converts an amino acid code3 to code1
/
/------------------------------------------------------------*/
void C_321(char* code3, char* code1)
{
  *code1='#';
  if(!strncmp(code3, "ALA", 3) || !strncmp(code3, "ala", 3)) *code1='A';
  else if(!strncmp(code3, "VAL", 3) || !strncmp(code3, "val", 3)) *code1='V';
  else if(!strncmp(code3, "LEU", 3) || !strncmp(code3, "leu", 3)) *code1='L';
  else if(!strncmp(code3, "ILE", 3) || !strncmp(code3, "ile", 3)) *code1='I';
  else if(!strncmp(code3, "PRO", 3) || !strncmp(code3, "pro", 3)) *code1='P';
  else if(!strncmp(code3, "MET", 3) || !strncmp(code3, "met", 3)) *code1='M';
  else if(!strncmp(code3, "PHE", 3) || !strncmp(code3, "phe", 3)) *code1='F';
  else if(!strncmp(code3, "TRP", 3) || !strncmp(code3, "trp", 3)) *code1='W';
  else if(!strncmp(code3, "GLY", 3) || !strncmp(code3, "gly", 3)) *code1='G';
  else if(!strncmp(code3, "SER", 3) || !strncmp(code3, "ser", 3)) *code1='S';
  else if(!strncmp(code3, "THR", 3) || !strncmp(code3, "thr", 3)) *code1='T';
  else if(!strncmp(code3, "CYS", 3) || !strncmp(code3, "cys", 3)) *code1='C';
  else if(!strncmp(code3, "TYR", 3) || !strncmp(code3, "tyr", 3)) *code1='Y';
  else if(!strncmp(code3, "ASN", 3) || !strncmp(code3, "asn", 3)) *code1='N';
  else if(!strncmp(code3, "GLN", 3) || !strncmp(code3, "gln", 3)) *code1='Q';
  else if(!strncmp(code3, "ASP", 3) || !strncmp(code3, "asp", 3)) *code1='D';
  else if(!strncmp(code3, "GLU", 3) || !strncmp(code3, "glu", 3)) *code1='E';
  else if(!strncmp(code3, "LYS", 3) || !strncmp(code3, "lys", 3)) *code1='K';
  else if(!strncmp(code3, "ARG", 3) || !strncmp(code3, "arg", 3)) *code1='R';
  else if(!strncmp(code3, "HIS", 3) || !strncmp(code3, "his", 3)) *code1='H';

  return;
}

/*-----------------------------------------------------------
/
/   read_data_tsp - read the TS predictions file format.
/                   ATOM records from PDB format.
/
/------------------------------------------------------------*/
void read_data_tsp(ts_f *ts_data, FILE* fp)
{
  typedef struct {
    char      name[10];
  } parent_names;

  int i, j, k, l, n_aa, n_atoms, bug, n_fragments, n_occupancy, n_ch;
  int parter, n1, n2, ok, was_none, two_chains;
  float occupancy;
  char keyword[200], line[200], sub[100], parent[200];
  char ch, chain, pdb_id[10], sn[10], sn1[10], sn2[10];
  parent_names p_n[5];
  pdb_struct molecule;

  /* Read in the molecule:  ts_data->tsp.*****
  -------------------------------------------*/
  ch='#';
  parter=0;
  k=-9999;
  n_atoms=0;
  n_aa=0;
  n_ch=0;
  two_chains=0;
  while(fgets(line,200,fp)!=NULL) {
    strcpy(keyword,"   ");
    strcpy(parent,"   ");
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword,"#",1))      {}
    else if(!strncmp(keyword,"REMARK\0",7)) {}
    else if(!strncmp(keyword,"METHOD\0",7)) {
      ts_data->n_method++;
    }
    else if(!strncmp(keyword,"TER\0",4))    {
      was_none=0;
      if(parter!=1) {
        printf("\n# ERROR! Check the number of PARENT and TER records.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.\n");
        ts_data->errors++;
        escape(ts_data->errors);
        parter=1;
      }
      parter--;
      if (strncmp(ts_data->target_type,"OLIGOMER\0",7)&&strncmp(ts_data->target_type,"oligomer\0",7)){ 
      if(two_chains!=2 && ts_data->target_n_chains>1) {
        printf("\n# ERROR! For any given multichain prediction, target residues from more");
        printf("\n#        then one chain should be reported in every independent segment"); 
        printf("\n#        of structure included in submission.");
        printf("\n#        Please check TS format description for multichain targets.\n");
        ts_data->errors++;
        escape(ts_data->errors);
      }}
      two_chains=0;
      ch='#';
    }
    else if(!strncmp(keyword,"PARENT\0",7)) {
      for(i=0;i<200;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='\\') line[i]=' ';
      printf(line);
      if(parter!=0) {
        printf("\n# ERROR! Check the number of PARENT and TER records.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.\n");
        ts_data->errors++;
        escape(ts_data->errors);
        parter=0;
      }
      for(i=0;i<10;i++) parent[i]=' ';
      sscanf(line,"%s %s",keyword,parent);
      if(!strncmp(parent,"NONE\0",5)) {
        was_none=1;
        if(ts_data->target_n_chains>1) {
          printf("\n# ERROR! PARENT NONE specification can be used only");
          printf("\n#        for targets containing single chain.\n\n");
          exit(0);
        }
        ts_data->tsp[n_ch].parent_none++;
        i=0;
        bug=0;
        while (line[i]!='\n' && bug<1) {
          if(isdigit(line[i])!=0) bug=1;
          i++;
        }
        if(bug==1) {
          n1=-9999;
          n2=-9999;
          strcpy(sn1,"   ");
          strcpy(sn2,"   ");
          sscanf(line,"%s %s %s %s",keyword,parent,sn1,sn2);
          if(ts_data->target[n_ch].name_chain==' ') {
            sscanf(sn1,"%d",&n1);
            sscanf(sn2,"%d",&n2);
          }
          else {
            strncpy(sn,&sn1[1],6);
            sscanf(sn,"%d",&n1);
            strncpy(sn,&sn2[1],6);
            sscanf(sn,"%d",&n2);
            if(sn1[0]!=ts_data->target[n_ch].name_chain ||
               sn2[0]!=ts_data->target[n_ch].name_chain) {
              n1=-9999;
            }
          }
          i=ts_data->target[n_ch].n_aa-1;
          if(n1<ts_data->target[n_ch].aa_num[0] || n2>ts_data->target[n_ch].aa_num[i] || n1>n2) {
            printf("\n# ERROR! Check the syntax of the line:  PARENT NONE n1 n2");
            printf("\n#        Delimiters n1 n2 indicate the range of the target"); 
            printf("\n#        sequence predicted as having no homologue in the");
            printf("\n#        current PDB. Omission of n1 n2 indicates the entire");
            printf("\n#        target.");
            n1=0;
            n2=ts_data->target[n_ch].n_aa-1;
            if(ts_data->target[n_ch].name_chain==' ') {
              printf("\n#        BY DEFAULT:  n1 = %d  n2 = %d\n",
                                 ts_data->target[n_ch].aa_num[n1],
                                 ts_data->target[n_ch].aa_num[n2]);
            }
            else {
              printf("\n#        BY DEFAULT:  n1 = %c%d  n2 = %c%d\n",
                                 ts_data->target[n_ch].name_chain,
                                 ts_data->target[n_ch].aa_num[n1],
                                 ts_data->target[n_ch].name_chain,
                                 ts_data->target[n_ch].aa_num[n2]);
            }
            ts_data->errors++;
            escape(ts_data->errors);
          }
          else {
            j=-9999;
            k=-9999;
            for(l=0;l<ts_data->target[n_ch].n_aa;l++) {
              if(n1==ts_data->target[n_ch].aa_num[l]) {
                j=l;
              }
              if(n2==ts_data->target[n_ch].aa_num[l]) {
                k=l;
              }
            }
            n1=j;
            n2=k;
          }
        }
        else {
          n1=0;
          n2=ts_data->target[n_ch].n_aa-1;
        }
        for(i=n1;i<=n2;i++) {
          ts_data->tsp[n_ch].aa[i].numb[0]++;
          ts_data->tsp[n_ch].aa[i].numb[1]++;
          ts_data->tsp[n_ch].aa[i].numb[2]++;
          ts_data->tsp[n_ch].aa[i].numb[3]++;
/*
          ts_data->tsp[n_ch].aa[i-1].main++;
*/
        }
      }
      else if((strncmp(parent,"N/A\0",4))&&(strncmp(parent,"n/a\0",4))) {
        for(i=0;i<5;i++) {
          strcpy(p_n[i].name,"#        ");
        }
        sscanf(line,"%s %s %s %s %s %s",keyword,p_n[0].name,p_n[1].name,p_n[2].name,p_n[3].name,p_n[4].name);
        for(i=0;i<5;i++) {
          if(p_n[i].name[0]!='#') {
            chain=' ';
            strcpy(parent,"#        ");
            strcpy(parent,p_n[i].name);
            strcpy(ts_data->parent_name,parent);
            pdb_id[0]=parent[0];
            pdb_id[1]=parent[1];
            pdb_id[2]=parent[2];
            pdb_id[3]=parent[3];
            pdb_id[4]='\0';
            pdb_id[5]=' ';
            if(isalnum(parent[4])!=0) {
              chain=parent[4];
            }
            else { 
              chain=parent[5];
            }
            ok=pdb_align(pdb_id, chain, &molecule);
            if(ok==0) {
              printf("\n# ERROR! Check PARENT record. PDB ID: %s\n\n",
                          ts_data->parent_name);
              ts_data->errors++;
              escape(ts_data->errors);
            }
          }
          else if(i==0) {
            printf("\n# ERROR! Check PARENT record. PDB ID, N/A or NONE");
            printf("\n#        should be used. Check TS format description.\n\n");
            ts_data->errors++;
            escape(ts_data->errors);
          }
        }
      }
      ts_data->n_fragments++;
      ts_data->tsp[n_ch].n_fragments++;
/*
      printf("-%d- \n",ts_data->n_fragments);
*/
      parter++;
    }
    else if(!strncmp(keyword,"END\0",4))    {
      if(parter!=0) {
        printf("\n# ERROR! Check the TER record before END.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.\n");
        ts_data->errors++;
        escape(ts_data->errors);
        parter=0;
      }
      ts_data->tsp[n_ch].n_atoms=n_atoms;
      ts_data->tsp[n_ch].n_aa=n_aa;
      ts_data->end=1;
      return;
    }
    else if(!strncmp(keyword,"ATOM\0",5))   {
      if(was_none==1) {
        printf("\n# ERROR! Check the TER record.");
        printf("\n#        After PARENT NONE the TER record is expected.\n");
        ts_data->errors++;
        escape(ts_data->errors);
        was_none=0;
      }
      if(parter!=1) {
        printf("\n# ERROR! Check the number of PARENT and TER records.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.\n");
        ts_data->errors++;
        escape(ts_data->errors);
        parter=1;
      }
      i=0;
      while (line[i]!='\n') i++;
      ok=0;
      n_ch=0;
      if(i>21) {
        for(j=0;j<ts_data->target_n_chains;j++) {
          if(ts_data->target[j].name_chain==line[21]) {
            ok=1;
            n_ch=j;
          }
        }
      }
      n_atoms=ts_data->tsp[n_ch].n_atoms;
      n_aa=ts_data->tsp[n_ch].n_aa;
      n_atoms++;
      if(n_atoms>=MAXATM) {
        printf("\n# ERROR! Check the number of ATOM records. (Limit MAX = %d)\n\n",MAXATM);
        fclose(fp);
        exit(0);
      }
      if(i>6) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[6],5);
        sscanf(sub,"%d",&ts_data->tsp[n_ch].atom[n_atoms].serial);
      }
      if(i>12) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[12],4);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].name);
      }
      if(i>16) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[16],1);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].alt_loc);
      }
      if(i>17) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[17],3);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].res_name);
      }
      if(i>21) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[21],1);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].chain_id);
        if(ch!=ts_data->tsp[n_ch].atom[n_atoms].chain_id[0]) {
          ch=ts_data->tsp[n_ch].atom[n_atoms].chain_id[0];
          k=-9999;
          two_chains++;
        }
        if(ok==0) {
          printf("\n# ERROR! Check chain ID: %c for ATOM number %d (in TARGET: %c)\n",
                             ch,ts_data->tsp[n_ch].atom[n_atoms].serial,ts_data->target[n_ch].name_chain);
          ts_data->errors++;
          escape(ts_data->errors);
        }
      }
      if(i>22) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[22],4);
        sscanf(sub,"%d",&ts_data->tsp[n_ch].atom[n_atoms].res_seq);
        if(k!=ts_data->tsp[n_ch].atom[n_atoms].res_seq) {
          k=ts_data->tsp[n_ch].atom[n_atoms].res_seq;
          n_aa++;
        }
      }
      if(i>26) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[26],1);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].i_code);
      }
      if(i>30) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[30],8);
        sscanf(sub,"%f",&ts_data->tsp[n_ch].atom[n_atoms].R.x);
      }
      if(i>38) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[38],8);
        sscanf(sub,"%f",&ts_data->tsp[n_ch].atom[n_atoms].R.y);
      }
      if(i>46) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[46],8);
        sscanf(sub,"%f",&ts_data->tsp[n_ch].atom[n_atoms].R.z);
      }
      if(i>54) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[54],6);
        occupancy=-1.0;
        sscanf(sub,"%f",&occupancy);
        if(occupancy!=0.0 && occupancy!=1.0) {
          printf("# ERROR! Check occupancy: ATOM number %d\n",ts_data->tsp[n_ch].atom[n_atoms].serial);
          ts_data->errors++;
          escape(ts_data->errors);
          occupancy=0.0;
        }
        ts_data->tsp[n_ch].atom[n_atoms].occupancy=occupancy;
      }
      if(i>60) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[60],6);
        sscanf(sub,"%f",&ts_data->tsp[n_ch].atom[n_atoms].temp_factor);
        if(ts_data->tsp[n_ch].atom[n_atoms].temp_factor<0.0) {
          printf("# ERROR! Check temperature factor: ATOM number %d\n",ts_data->tsp[n_ch].atom[n_atoms].serial);
          ts_data->errors++;
          escape(ts_data->errors);
        }
      }
      if(i>72) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[72],4);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].seg_id);
      }
      if(i>76) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[76],2);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].element);
      }
      if(i>78) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[78],2);
        sscanf(sub,"%s",ts_data->tsp[n_ch].atom[n_atoms].charge);
      }
      if(i<30) {
        printf("\n# ERROR! Check ATOM number %d\n",n_atoms);
        fclose(fp);
        exit(0);
      }
      if(i>=30 && i<54) {
        printf("# ERROR! Check coordinates and occupancy factor: ATOM number %d\n",ts_data->tsp[n_ch].atom[n_atoms].serial);
        ts_data->errors++;
        escape(ts_data->errors);
      }
      if(i>=54 && i<60) {
        printf("# ERROR! Check occupancy and temperature factors: ATOM number %d\n",ts_data->tsp[n_ch].atom[n_atoms].serial);
        ts_data->errors++;
        escape(ts_data->errors);
      }
      if(i>=60 && i<65) {
        printf("# ERROR! Check PDB format, temperature factor: ATOM number %d\n",ts_data->tsp[n_ch].atom[n_atoms].serial);
        ts_data->errors++;
        escape(ts_data->errors);
      }
      ts_data->tsp[n_ch].n_atoms=n_atoms;
      ts_data->tsp[n_ch].n_aa=n_aa;

      if(!strcmp(ts_data->tsp[n_ch].atom[n_atoms].name,"CA")){
          ts_data->tsp[n_ch].aa[n_aa].res_seq=ts_data->tsp[n_ch].atom[n_atoms].res_seq;
          ts_data->tsp[n_ch].aa[n_aa].n_fragm=ts_data->tsp[n_ch].n_fragments;
          strcpy(ts_data->tsp[n_ch].aa[n_aa].res_name,ts_data->tsp[n_ch].atom[n_atoms].res_name);
          ts_data->tsp[n_ch].aa[n_aa].CA.x=ts_data->tsp[n_ch].atom[n_atoms].R.x;
          ts_data->tsp[n_ch].aa[n_aa].CA.y=ts_data->tsp[n_ch].atom[n_atoms].R.y;
          ts_data->tsp[n_ch].aa[n_aa].CA.z=ts_data->tsp[n_ch].atom[n_atoms].R.z;
      }
    }
    else {
      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);

      printf("\n# ERROR! Unacceptable record in the MODEL - END section.");
      printf("\n#        Each structure segment should begin with a single");
      printf("\n#        PARENT record and terminate with a TER record.");
      printf("\n#        Each record must begins with a standard keyword,");
      printf("\n#        and blank records are not allowed.");
      printf("\n#        Submission should end with END record.");
      printf("\n#        Check TS format description.\n\n");
      exit(0);
    }
  }

  return ;
}

/*-----------------------------------------------------------
/
/   pdb_align - read a pdb file (code and chain from AL format)
/
/------------------------------------------------------------*/
int pdb_align(char* pdb_code, char chain, pdb_struct* structure)
{
  int i, ok;
  FILE *tmp;
  char name[20], sub1[500], sub2[500], sub3[500], pdb_dir[100];

  strcpy(pdb_dir,"/PDB/structures");
  strcpy(name,pdb_code);
  for(i=0;i<4;i++) {
    ok=check_aa(name[i],uppmat,36);
    if(ok<36) {
      name[i]=lowmat[ok];
    }
  }
  ok=0;
  if(name[0]!='\0' && name[0]!='\n' && name[0]!='\t' && name[0]!=' ') {
    strcpy(sub1,"         ");
    sub1[0]=name[1];
    sub1[1]=name[2];
    sub1[2]='\0';
    sub1[3]=' ';
    sub1[4]=' ';
    strcpy(sub2,name);
    sub2[4]='\0';
    sub2[5]=' ';
    sub2[6]=' ';
    if(chain=='\0' || chain=='\n' || chain=='\t' || chain=='\r' || chain=='-' || chain=='_') chain=' ';
    strcat(sub2,".ent");
    strcpy(sub3,pdb_dir);
    strcat(sub3,"/");
    strcat(sub3,sub1);
    strcat(sub3,"/pdb");
    strcat(sub3,sub2);

    printf("\n# Loading PARENT structure: %4s (chain: %c)",name,chain);

    if((tmp = fopen(sub3,"r"))==NULL) {
      printf("\n# ERROR! The PDB entry %4s cannot be found in the current release.\n",name);
      ok=0;
    }
    else {
      ok=1;
      clean_pdb(structure);
      read_pdb_orig(chain, structure, tmp);
      if(structure->n_atoms==0) {
        printf("\n# ERROR! Check chain %c in the PDB entry %4s\n",chain,name);
        ok=0;
      }
      printf("\n# Number of residues in PARENT structure: %d\n",structure->n_aa);
    }
    fclose(tmp);
  }

  return ok;
}

/*-----------------------------------------------------------
/
/   read_pdb_orig - read a pdb file (without correction)
/                   (called by pdb_align to extract the threading 
/                   coordinates from pdb file)
/
/------------------------------------------------------------*/
void read_pdb_orig(char chain, pdb_struct *molecule, FILE* fp)
{
  int  i, j, k, n_aa, n_atoms, n_mol, endflag, gap, gapl, gapr;
  char keyword[100], line[120], sub[10], sub1[50], sub2[50], sub3[50], ch;
  int  alt_loc_ok;
  char alt_loc;

  /* Read in the molecule
  -------------------------------------------*/
  k=0;
  n_atoms=0;
  n_aa=0;
  n_mol=0;
  ch='#';
  endflag=0;
  alt_loc=' ';
  alt_loc_ok=0;
  while(fgets(line,120,fp)!=NULL && !endflag) {
    for(i=0;i<10;i++) keyword[i]=0;
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword,"HEADER",6)) {}
    else if(!strncmp(keyword,"COMPND",6)) {}
    else if(!strncmp(keyword,"SOURCE",6)) {}
    else if(!strncmp(keyword,"AUTHOR",6)) {}
    else if(!strncmp(keyword,"REVDAT",6)) {}
    else if(!strncmp(keyword,"JRNL",4))   {}
    else if(!strncmp(keyword,"REMARK",6)) {}
    else if(!strncmp(keyword,"SEQRES",6)) {}
    else if(!strncmp(keyword,"FTNOTE",6)) {}
    else if(!strncmp(keyword,"FORMUL",6)) {}
    else if(!strncmp(keyword,"HELIX",5))  {}
    else if(!strncmp(keyword,"SHEET",5))  {}
    else if(!strncmp(keyword,"TURN",4))   {}
    else if(!strncmp(keyword,"SITE",4))   {}
    else if(!strncmp(keyword,"CRYST",5))  {}
    else if(!strncmp(keyword,"SCALE",5))  {}
    else if(!strncmp(keyword,"TER",3))    {}
    else if(!strncmp(keyword,"CONECT",6)) {}
    else if(!strncmp(keyword,"MASTER",6)) {}
    else if(!strncmp(keyword,"END",3))    {endflag=1;}
    else if(!strncmp(keyword,"ATOM",4))   {

/* AK: check chain ID - no respect to case */
     if(toupper(line[21])==toupper(chain)) { 

/*     if(line[21]==chain) { */
      i=0;
      while (line[i]!='\n') i++;
      n_atoms++;
      if(n_atoms>=MAXATM) {
        printf("\n# ERROR! Check the number of ATOM records. (Limit MAX = %d)\n\n",MAXATM);
        fclose(fp);
        exit(0);
      }
      if(i>6) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[6],5);
        sscanf(sub,"%d",&molecule->atom[n_atoms].serial);
      }
      if(i>12) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[12],4);
        sscanf(sub,"%s",molecule->atom[n_atoms].name);
      }
      if(i>17) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[17],3);
        sscanf(sub,"%s",molecule->atom[n_atoms].res_name);
      }
      if(i>21) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[21],1);
        sscanf(sub,"%s",molecule->atom[n_atoms].chain_id);
        if(ch!=molecule->atom[n_atoms].chain_id[0]) {
          ch=molecule->atom[n_atoms].chain_id[0];
          n_mol++;
          alt_loc=' ';
          alt_loc_ok=0;
        }
      }
      if(i>22) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[22],4);
        sscanf(sub,"%d",&molecule->atom[n_atoms].res_seq);
        if(k!=molecule->atom[n_atoms].res_seq) {
          k=molecule->atom[n_atoms].res_seq;
          n_aa++;
          alt_loc=' ';
          alt_loc_ok=0;
        }
      }
      if(i>16) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[16],1);
        sscanf(sub,"%s",molecule->atom[n_atoms].alt_loc);
        if(alt_loc_ok==1 && line[16]!=alt_loc) {
          alt_loc_ok=2;
        }
        if(alt_loc_ok<2) {
          alt_loc_ok=1;
          alt_loc=line[16];
        }
      }
      if(i>26) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[26],1);
        sscanf(sub,"%s",molecule->atom[n_atoms].i_code);
      }
      if(i>30) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[30],8);
        sscanf(sub,"%f",&molecule->atom[n_atoms].R.x);
      }
      if(i>38) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[38],8);
        sscanf(sub,"%f",&molecule->atom[n_atoms].R.y);
      }
      if(i>46) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[46],8);
        sscanf(sub,"%f",&molecule->atom[n_atoms].R.z);
      }
      if(i>54) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[54],6);
        sscanf(sub,"%f",&molecule->atom[n_atoms].occupancy);
      }
      if(i>60) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[60],6);
        sscanf(sub,"%f",&molecule->atom[n_atoms].temp_factor);
      }
      if(i>72) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[72],4);
        sscanf(sub,"%s",molecule->atom[n_atoms].seg_id);
      }
      if(i>76) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[76],2);
        sscanf(sub,"%s",molecule->atom[n_atoms].element);
      }
      if(i>78) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[78],2);
        sscanf(sub,"%s",molecule->atom[n_atoms].charge);
      }
      if(i<30) {
        printf("\n# ERROR! Check ATOM number %d\n",n_atoms);
        fclose(fp);
        exit(0);
      }
      if(i>=30 && i<54) {
        printf("\n# ERROR! Check coordinates and occupancy factor: ATOM number %d",molecule->atom[n_atoms].serial);
        fclose(fp);
        exit(0);
      }
     }      
    }
    else if(!strncmp(keyword,"HETATM",6)) {}
    else if(!strncmp(keyword,"HET",3))    {}
    else {}
  }
  molecule->n_atoms=n_atoms;
  molecule->n_aa=n_aa;
  molecule->n_mol=n_mol;
  fclose(fp);
  return;
}

/*-----------------------------------------------------------
/
/   clean pdb structure
/
/------------------------------------------------------------*/
void clean_pdb(pdb_struct *molecule)
{
  int i;

    molecule->n_atoms=0;
    molecule->n_aa=0;
    for(i=0;i<MAXATM;i++) {
      molecule->atom[i].serial=0;
      strcpy(molecule->atom[i].name,"   ");
      strcpy(molecule->atom[i].alt_loc," ");
      strcpy(molecule->atom[i].res_name,"   ");
      strcpy(molecule->atom[i].chain_id," ");
      molecule->atom[i].res_seq=0;
      strcpy(molecule->atom[i].i_code," ");
      molecule->atom[i].R.x=0.0; 
      molecule->atom[i].R.y=0.0; 
      molecule->atom[i].R.z=0.0;
      molecule->atom[i].occupancy=0.0;
      molecule->atom[i].temp_factor=0.0;
      strcpy(molecule->atom[i].seg_id,"    ");
      strcpy(molecule->atom[i].element,"  ");
      strcpy(molecule->atom[i].charge,"  ");
    }
  return;
}

/*-------------------------------------------------------------------
/
/   CA_dist -    calculates distances between CAs
/
/--------------------------------------------------------------------*/
int CA_dist(ts_f *ts_data)
{
  int i, j, ii, jj; 
  int n_ch=0, severe_num=0, clash_num=0, break_num=0;
  float dist, xx, yy, zz, n_aa, clash_perc;
  float CAsevere=1.9, CAlow=0.1, CAclash=3.5, CAbreak=4.5;
  char *iRN, *jRN;

  dist=0.0;
  n_aa=ts_data->tsp[n_ch].n_aa;
  for(i=1; i<n_aa; i++){
    for(j=i+1; j<=n_aa; j++){
      if (ts_data->tsp[n_ch].aa[i].n_fragm != ts_data->tsp[n_ch].aa[j].n_fragm) {continue;}
      xx=ts_data->tsp[n_ch].aa[i].CA.x-ts_data->tsp[n_ch].aa[j].CA.x;
      yy=ts_data->tsp[n_ch].aa[i].CA.y-ts_data->tsp[n_ch].aa[j].CA.y;
      zz=ts_data->tsp[n_ch].aa[i].CA.z-ts_data->tsp[n_ch].aa[j].CA.z;
      dist=sqrt(xx*xx+yy*yy+zz*zz);
      ii= ts_data->tsp[n_ch].aa[i].res_seq;
      jj= ts_data->tsp[n_ch].aa[j].res_seq;
      if (dist<CAclash){
        iRN=ts_data->tsp[n_ch].aa[i].res_name;
        jRN=ts_data->tsp[n_ch].aa[j].res_name;
        clash_num++;
        printf ("# Warning! CA-CA clash (%4.2fA) between residues %s%d and %s%d\n",dist,iRN,ii,jRN,jj);
        if (dist<CAsevere){
          severe_num++;
          printf ("# Warning! Severe CA-CA clash (%4.2fA) between residues %s%d and %s%d\n",dist,iRN,ii,jRN,jj);
        }
      }
      if ((jj-ii==1) && dist>CAbreak){
        iRN=ts_data->tsp[n_ch].aa[i].res_name;
        jRN=ts_data->tsp[n_ch].aa[j].res_name;
        break_num++;
        printf ("# Warning! Too big distance (%5.2fA) between residues %s%d and %s%d\n",dist,iRN,ii,jRN,jj);
      } 
    }
  }
  clash_perc=(float)clash_num/n_aa;
  if ( (clash_perc>0.25) || ((float)severe_num/n_aa>0.05) ) { return 3; }
  else if (clash_perc>0.1 || severe_num>0) { return 2; } 
  else if (break_num>4) { return 1; } 
  else {return 0;}
}


