/*-----------------------------------------------------------
/
/   al_ver.c 
/
/   Program checks the AL format for CASP3 submissions
/   (unambiguous ALignments to PDB entries)
/
/   Copyright by Adam Zemla (06/05/1998)
/   Modified by AK (2004, 2006)
/
/------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXRES           10000
#define MAXATM           32000
#define N_AA                23
#define N_AA1               23

char *numbers="0123456789.";                           /* 11 chars             */
/* added B and Z */
char *letter="ARNDCQEGHILKMFPSTWYVXBZ";                  /* 23 chars             */
char *lowmat="abcdefghijklmnopqrstuvwxyz1234567890";   /* 36 chars             */
char *uppmat="ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";   /* 36 chars             */

typedef struct {
  float   x;
  float   y;
  float   z;
} vector;

typedef struct {
  int     serial;
  char    name[10];
  char    alt_loc[10];
  char    res_name[10];
  char    chain_id[10];
  int     res_seq;
  char    i_code[10];
  vector  R;
  float   occupancy;
  float   temp_factor;
  char    seg_id[10];
  char    element[10];
  char    charge[10];
} atom_struct;

typedef struct {
  char        res;
  int         c;
  int         ca;
  int         n;
  int         o;
  int         main;
} res_struct;

typedef struct {
  atom_struct atom[MAXATM];
  int         n_atoms;
  int         n_mol;
  int         n_aa;
} pdb_struct;

typedef struct {
  char    t_res_name[10];
  int     t_res_num;
  char    p_res_name[10];
  int     p_res_num;
  char    p_ins[10];
  int     n_fragment;
  char    parent_name[10];
} al_struct;

typedef struct {
  res_struct  aa[MAXRES];
  atom_struct atom[MAXATM];
  al_struct   al[MAXRES];
  int         n_atoms;
  int         n_aa;
  int         n_fragments;
  int         n_occupancy;
  int         parent_none;
} data_alp;

typedef struct {
  char     target_name[10];
  char     target_chain;
  char     target_aa[MAXRES];
  int      target_aa_num[MAXRES];
  int      target_n_aa;
  int      model;
  int      end;
  int      n_method;
  int      errors;
  data_alp alp;
} al_f;

int check_aa(char, char*, int);
void read_al_f(al_f *, char*);
void read_data_alp(al_f *, FILE*);
void convert_al_3d(al_f *);
int pdb_align(char*, char, pdb_struct *);
void read_pdb_orig(char, pdb_struct *, FILE*);
void check_al_f(al_f *);
void clean_al_f(al_f *);
void clean_pdb(pdb_struct *);
void read_seq(char*, al_f *);
void escape(int);
void C_123(char, char*);
void C_321(char*, char*);

main(int argc, char *argv[]) 
{
  char al_file[120];
  al_f al_data;
  
  if(argc<2){
    printf(" Usage: al_ver <al_file>\n");
    exit(0);
  }
  
  strcpy(al_file,argv[1]);

  clean_al_f(&al_data);

  printf("# Reading prediction format AL\n\n");
  read_al_f(&al_data,al_file);
  printf("\n# Reading prediction format AL              (DONE)\n");

  printf("\n# Converting alignments into a 3D structure\n");
  convert_al_3d(&al_data);
  printf("\n# Converting alignments into a 3D structure (DONE)\n");

  check_al_f(&al_data);

  if(al_data.errors==0) {
    printf("\n# MODEL index: %d ",al_data.model);
    printf("\n\n# Target name: %-7s ",al_data.target_name);
    if(al_data.alp.parent_none>0) {
      printf("\n# THREADING/FOLD RECOGNITION MODEL:       PARENT NONE ");
    }
    printf("\n# Total number of residues in target:            %4d ",al_data.target_n_aa);
    printf("\n# Total number of residues in model:             %4d ",al_data.alp.n_aa);
    printf("\n# Total number of atoms in model:                %4d ",al_data.alp.n_atoms);
    printf("\n# Number of fragments in model:                  %4d ",al_data.alp.n_fragments);
    printf("\n# Number of METHOD records:                      %4d \n",al_data.n_method);
    printf("\n# No errors.\n\n");
  }
  else {
    printf("\n# Number of errors = %d.\n\n",al_data.errors);
  }
}

void escape(int error)
{
  if(error>25) {
    printf("\n# Too many ERRORS ...");
    printf("\n#    Please check format for AL predictions.\n");
    exit(0);
  }
  return;
}

/*-----------------------------------------------------------
/
/   clean al_f structure
/
/------------------------------------------------------------*/
void clean_al_f(al_f *al_data)
{
  int j;

  strcpy(al_data->target_name,"   ");
  al_data->target_chain=' ';
  al_data->target_n_aa=0;
  al_data->model=0;
  al_data->end=0;
  al_data->n_method=0;
  al_data->errors=0;
  al_data->alp.n_aa=0;
  al_data->alp.n_atoms=0;
  al_data->alp.n_fragments=0;
  al_data->alp.parent_none=0;
  for(j=0;j<MAXRES;j++) {
    al_data->target_aa[j]=' ';
    al_data->target_aa_num[j]=-9999;
    al_data->alp.aa[j].res=' ';
    al_data->alp.aa[j].c=0;
    al_data->alp.aa[j].ca=0;
    al_data->alp.aa[j].n=0;
    al_data->alp.aa[j].o=0;
    al_data->alp.aa[j].main=0;
    strcpy(al_data->alp.al[j].t_res_name,"   ");
    strcpy(al_data->alp.al[j].p_res_name,"   ");
    strcpy(al_data->alp.al[j].p_ins,"   ");
    strcpy(al_data->alp.al[j].parent_name,"   ");
    al_data->alp.al[j].t_res_num=0;
    al_data->alp.al[j].p_res_num=0;
    al_data->alp.al[j].n_fragment=0;
  }
  for(j=0;j<MAXATM;j++) {
    al_data->alp.atom[j].serial=0;
    strcpy(al_data->alp.atom[j].name,"   ");
    strcpy(al_data->alp.atom[j].alt_loc," ");
    strcpy(al_data->alp.atom[j].res_name,"   ");
    strcpy(al_data->alp.atom[j].chain_id," ");
    al_data->alp.atom[j].res_seq=0;
    strcpy(al_data->alp.atom[j].i_code," ");
    al_data->alp.atom[j].R.x=0.0; 
    al_data->alp.atom[j].R.y=0.0; 
    al_data->alp.atom[j].R.z=0.0;
    al_data->alp.atom[j].occupancy=0.0;
    al_data->alp.atom[j].temp_factor=0.0;
    strcpy(al_data->alp.atom[j].seg_id,"    ");
    strcpy(al_data->alp.atom[j].element,"  ");
    strcpy(al_data->alp.atom[j].charge,"  ");
  }
  return;
}

/*-----------------------------------------------------------
/
/   clean pdb structure
/
/------------------------------------------------------------*/
void clean_pdb(pdb_struct *molecule)
{
  int i;

    molecule->n_atoms=0;
    molecule->n_aa=0;
    for(i=0;i<MAXATM;i++) {
      molecule->atom[i].serial=0;
      strcpy(molecule->atom[i].name,"   ");
      strcpy(molecule->atom[i].alt_loc," ");
      strcpy(molecule->atom[i].res_name,"   ");
      strcpy(molecule->atom[i].chain_id," ");
      molecule->atom[i].res_seq=0;
      strcpy(molecule->atom[i].i_code," ");
      molecule->atom[i].R.x=0.0; 
      molecule->atom[i].R.y=0.0; 
      molecule->atom[i].R.z=0.0;
      molecule->atom[i].occupancy=0.0;
      molecule->atom[i].temp_factor=0.0;
      strcpy(molecule->atom[i].seg_id,"    ");
      strcpy(molecule->atom[i].element,"  ");
      strcpy(molecule->atom[i].charge,"  ");
    }
  return;
}

/*-----------------------------------------------------------
/
/   read_al_f - read the AL predictions format
/
/------------------------------------------------------------*/
void read_al_f(al_f *al_data, char* fname)
{
  int i, begflag, authflag;
  char keyword[500], line[500], name[30], model_nb[100], unrefined[100];
  FILE *fp;

  if((fp = fopen(fname,"r"))==NULL) {
    printf("\n# error opening file %s for read\n\n",fname);
    exit(0);
  }

  /* Read in the al_data
  -------------------------------------------*/
  begflag=0;
  authflag=0;
  while(fgets(line,500,fp)!=NULL) {
    strcpy(keyword,"   ");
    strcpy(name,"   ");
    strcpy(model_nb,"#   ");
    strcpy(unrefined,"#   ");
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword,"PFRMAT\0",7)) {

      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);

      begflag=1;
      sscanf(line,"%s %s",keyword,name);
      if(strncmp(name,"AL\0",3)) {
        printf("\n# ERROR! Wrong specification of the AL format category");
        printf("\nPFRMAT AL      # was expected\n\n");
        exit(0);
      }
    }
    else if(!strncmp(keyword,"TARGET\0",7)) {

      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);

      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the AL prediction data records");
        printf("\nPFRMAT AL                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      begflag=2;
      sscanf(line,"%s %s",keyword,name);
      strcpy(al_data->target_name,name);
      read_seq(name,al_data);
    }
    else if(!strncmp(keyword,"AUTHOR\0",7)) {
      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the AL prediction data records");
        printf("\nPFRMAT AL                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);
      authflag=1;
    }
    else if(!strncmp(keyword,"REMARK\0",7)) {
      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the AL prediction data records");
        printf("\nPFRMAT AL                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);
    }
    else if(!strncmp(keyword,"METHOD\0",7)) {
      if(begflag<1) {
        printf("\n# ERROR! Unacceptable order of the AL prediction data records");
        printf("\nPFRMAT AL                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      al_data->n_method++;
    }
    else if(!strncmp(keyword,"MODEL\0",6)) {
      if(begflag<2 || authflag!=1) {
        printf("\n# ERROR! Unacceptable order of the AL prediction data records");
        printf("\nPFRMAT AL                # the first line");
        printf("\nTARGET Txxxx             # the second line");
        printf("\nAUTHOR xxxx-xxxx-xxxx    # the third line was expected\n\n");
        exit(0);
      }
      i=0;
      sscanf(line,"%s %s %s",keyword,model_nb,unrefined);
      sscanf(model_nb,"%d",&i);
      if(model_nb[1]!=' ' && model_nb[1]!='\0' && model_nb[1]!='\n') {
        printf("\n# ERROR! Unacceptable index of the prediction MODEL\n\n");
        al_data->errors++;
        escape(al_data->errors);
      }
      if(unrefined[0]!='#') {
        if(strncmp(unrefined,"UNREFINED\0",10) &&
           strncmp(unrefined,"REFINED\0",8)) {
          printf("\n# ERROR! Please check MODEL record.");
          printf("\n#        Only REFINED or UNREFINED labels are acceptable.\n\n");
          al_data->errors++;
          escape(al_data->errors);
        }
      }
      if(i>0 && i<=5) {
        begflag=3;
        al_data->model=i;
        printf("\n# Reading MODEL %2d\n",i);
        read_data_alp(al_data,fp);
        break;
      }
      else {
        printf("\n# ERROR! Unacceptable index of the AL prediction MODEL\n\n");
        al_data->errors++;
        escape(al_data->errors);
      }
    }
    else if(!strncmp(keyword,"END\0",4)) {
      al_data->end=1;
      break;
    }
    else if(strncmp(keyword," ",1)) {

      for(i=0;i<500;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);

      printf("\n# ERROR! Unknown record keyword in this section of the prediction.");
      printf("\n#        Check AL format description.\n\n");
      al_data->errors++;
      escape(al_data->errors);
    }
  }
  if(begflag!=3) {
    printf("\n# ERROR! There is no AL prediction data in this file\n\n");
    exit(0);
  }
  if(al_data->end==0) {
    printf("\n# ERROR! There is no END record in this file\n\n");
    exit(0);
  }
  if(authflag!=1) {
    printf("\n# ERROR! Check AUTHOR record in this file\n\n");
    exit(0);
  }
  if(al_data->n_method==0) {
    printf("\n# ERROR! There is no METHOD records in this file\n\n");
    exit(0);
  }
  fclose(fp);

  return;
}

/*-----------------------------------------------------------
/
/   read_seq - read a sequence file
/
/------------------------------------------------------------*/
void read_seq(char* tname, al_f *al_data)
{
  int i, n_aa, k, old_res, new_res;
  char chain;
  FILE *fp;
  char line[MAXRES], fname[20], lname[20], keyword[20];

  strcpy(lname,tname);
  strcpy(fname,"TARGETS/");
  strcat(fname,lname);
  strcat(fname,".seq.txt");
  if((fp = fopen(fname,"r"))==NULL) {
    for(i=0;i<20;i++) if(lname[i]=='t') lname[i]='T';
    strcpy(fname,"TARGETS/");
    strcat(fname,lname);
    strcat(fname,".seq.txt");
    if((fp = fopen(fname,"r"))==NULL) {
      for(i=0;i<20;i++) if(lname[i]=='T') lname[i]='t';
      strcpy(fname,"TARGETS/");
      strcat(fname,lname);
      strcat(fname,".seq.txt");
      if((fp = fopen(fname,"r"))==NULL) {
        printf("\n# ERROR! There is no target name:  %s",tname);
        printf("\n# TARGET Txxxx      # was expected\n\n");
        exit(0);
      }
    }
  }

  k=0;
  n_aa=0;
  chain=' ';
  while ((fgets(line, MAXRES, fp) != NULL)) {    
    if (strncmp(line, ">", 1) == 0) {
      if(line[6]=='_' && isalnum(line[7])!=0) {
        chain=line[7];
        if(isalpha(chain)==0) {
          printf("\n# ERROR! Wrong chain %c specification in the target:  %s",chain,tname);
          printf("\n#        Please send a message to the CASP organizers. \n\n");
          exit(0);
        }
      }
      k++;
      if(k>1) {
        printf("\n# ERROR! Format AL can be used for the prediction");
        printf("\n#        of target containing single chain only.\n\n");
        exit(0);
      }
    }
    else {
      i=0;
      while (line[i]!='\n') {
        if(line[i]!='\0' && line[i]!=' ') {
          if(check_aa(line[i],letter,N_AA1)<N_AA) {
            al_data->target_aa[n_aa]=line[i];
            n_aa++;
          }
          else {
            printf("\n# ERROR! Wrong amino acid code %c in the target:  %s",line[i],tname);
            printf("\n#        Please send a message to the CASP organizers. \n\n");
            fclose(fp);
            exit(0);
          }            
        }
        i++;
      }
    }
  }
  fclose(fp);
  al_data->target_chain=chain;
  al_data->target_n_aa=n_aa;

  strcpy(lname,tname);
  strcpy(fname,"TARGETS/");
  strcat(fname,lname);
  strcat(fname,".pdb.txt");
  if((fp = fopen(fname,"r"))==NULL) {
    for(i=0;i<20;i++) if(lname[i]=='t') lname[i]='T';
    strcpy(fname,"TARGETS/");
    strcat(fname,lname);
    strcat(fname,".pdb.txt");
    if((fp = fopen(fname,"r"))==NULL) {
      for(i=0;i<20;i++) if(lname[i]=='T') lname[i]='t';
      strcpy(fname,"TARGETS/");
      strcat(fname,lname);
      strcat(fname,".pdb.txt");
      if((fp = fopen(fname,"r"))==NULL) {
        printf("\n# ERROR! There is no target name:  %s",tname);
        printf("\n# TARGET Txxxx      # was expected\n\n");
        exit(0);
      }
    }
  }

  k=0;
  n_aa=0;
  chain='-';
  old_res=-9999;
  while ((fgets(line, MAXRES, fp) != NULL)) {    
    for(i=0;i<10;i++) keyword[i]=0;
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword,"ATOM\0",5))   {
      if(chain != line[21]) {
        chain=line[21];
        k++;
        n_aa=0;
        if(k>1) {
          printf("\n# ERROR! Wrong chain %c specification in the target:  %s",chain,tname);
          printf("\n#        Please send a message to the CASP organizers. \n\n");
          exit(0);
        }
      }
      for(i=0;i<10;i++) keyword[i]=0;
      strncpy(keyword,&line[22],4);
      sscanf(keyword,"%d",&new_res);
      if(new_res != old_res) {
        al_data->target_aa_num[n_aa]=new_res;
        old_res=new_res;
        n_aa++;
      }
    }
  }
  fclose(fp);

  return;
}

/*-----------------------------------------------------------
/
/   check_al_f - checks the AL prediction file format
/
/------------------------------------------------------------*/
void check_al_f(al_f *al_data)
{
  int i, j, k, kr, l, n_aa, imp_note;
  char res;

  imp_note=0;
  printf("\n# Checking the AL prediction MODEL %2d\n",al_data->model);

  n_aa=0;
  for(i=0;i<al_data->alp.n_atoms;i++) {
    C_321(al_data->alp.atom[i].res_name,&res);
    kr=al_data->alp.atom[i].res_seq;
    k=-9999;
    for(l=0;l<al_data->target_n_aa;l++) {
      if(kr==al_data->target_aa_num[l]) {
        k=l+1;
      }
    }
    if(k==-9999) {
      printf("\n# ERROR! Check the residue number:  %c %c%d",
                       res,al_data->target_chain,kr);
      printf("\n#        There is no such a residue in TARGET");
      printf("\n#        Check residue numbering in template PDB file for TARGET: %s\n\n",al_data->target_name);
      if(al_data->target_aa_num[0]!=1) {
        j=al_data->target_n_aa-1;
        printf("# The residue numbering of the  TARGET:  %s  is not standard: ",
                    al_data->target_name);
        printf("\n#   the first residue in the TARGET is:  %c %c%4d ",
                    al_data->target_aa[0],al_data->target_chain,al_data->target_aa_num[0]);
        printf("\n#   the last  residue in the TARGET is:  %c %c%4d ",
                    al_data->target_aa[j],al_data->target_chain,al_data->target_aa_num[j]);
        printf("\n# Please check if the numbering of residues in your ");
        printf("\n# prediction corresponds to this numbering scheme. \n\n");
      }
      exit(0);
    }

/* Added 
     if (al_data->alp.aa[k-1].c>0 && al_data->alp.atom[i].alt_loc[0] != ' '){
     } else {
     }
   to skip alternative location
*/
    if(!strncmp(al_data->alp.atom[i].name,"C\0",2)) {
     if (al_data->alp.aa[k-1].c>0 && al_data->alp.atom[i].alt_loc[0] != ' '){
     } else {     
        al_data->alp.aa[k-1].c++;
        al_data->alp.aa[k-1].main++;
     } 
    }
    if(!strncmp(al_data->alp.atom[i].name,"CA\0",3)) {
     if (al_data->alp.aa[k-1].ca>0 && al_data->alp.atom[i].alt_loc[0] != ' '){
     } else {
      al_data->alp.aa[k-1].ca++;
      al_data->alp.aa[k-1].main++;
     }
    }
    if(!strncmp(al_data->alp.atom[i].name,"N\0",2)) {
     if (al_data->alp.aa[k-1].n>0 && al_data->alp.atom[i].alt_loc[0] != ' '){
     } else {
      al_data->alp.aa[k-1].n++;
      al_data->alp.aa[k-1].main++;
     }
    }
    if(!strncmp(al_data->alp.atom[i].name,"O\0",2)) {
     if (al_data->alp.aa[k-1].o>0 && al_data->alp.atom[i].alt_loc[0] != ' '){
     } else {
      al_data->alp.aa[k-1].o++;
      al_data->alp.aa[k-1].main++;
     }
    }
    if(k>n_aa) n_aa=k;
    al_data->alp.aa[k-1].res=res;
    if(al_data->target_aa[k-1]!=al_data->alp.aa[k-1].res) {
      printf("# ERROR! Check residue:  %c %c%d  (In TARGET:  %c %c%d)\n",
                al_data->alp.aa[k-1].res,
                al_data->target_chain,
                kr,
                al_data->target_aa[k-1],
                al_data->target_chain,
                kr);
      al_data->errors++;
      escape(al_data->errors);
    }
    if(al_data->alp.atom[i].occupancy==1.0) {
      al_data->alp.n_occupancy++;
    }
  }
  al_data->alp.n_aa=0;
  for(i=0;i<n_aa;i++) {
    if(al_data->alp.aa[i].res!=' ') {
      al_data->alp.n_aa++;
      if(al_data->alp.aa[i].main<4) {
        printf("# IMPORTANT NOTE! Not complete main chain atoms for residue: %c %c%d \n",
                  al_data->alp.aa[i].res,al_data->target_chain,al_data->target_aa_num[i]);
        if(imp_note==0) {
          printf("\n# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #");
          printf("\n# It is requested  that coordinate data be supplied for  at least #"); 
          printf("\n# all non-hydrogen main chain atoms, i.e. the  N,  CA,  C and  O  #");
          printf("\n# atoms of every residue.  Specifically,   if only  CA  atoms are #"); 
          printf("\n# predicted by the method,  predictors  are  encouraged  to build #"); 
          printf("\n# the main chain  atoms  for  every  residue before submission to #"); 
          printf("\n# CASP. If only CA atoms were submitted it would not be possible  #");
          printf("\n# to run  most of the analysis  software,  which  would  severely #"); 
          printf("\n# limit the evaluation of that prediction.                        #"); 
          printf("\n# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #\n\n");
          imp_note=1;
        }
      }
      if(al_data->alp.aa[i].c>1 ||
         al_data->alp.aa[i].ca>1 ||
         al_data->alp.aa[i].n>1 ||
         al_data->alp.aa[i].o>1) {
        printf("# ERROR! Check residue:  %c %c%d  (! main chain atoms REPEATED !)\n",
                  al_data->alp.aa[i].res,al_data->target_chain,al_data->target_aa_num[i]);
        al_data->errors++;
        escape(al_data->errors);
      }
      if(al_data->alp.aa[i].ca==0) {
        printf("# ERROR! Check residue:  %c %c%d  (! There is no CA atom !)\n",
                  al_data->alp.aa[i].res,al_data->target_chain,al_data->target_aa_num[i]);
        al_data->errors++;
        escape(al_data->errors);
      }
    }
  }

  if(al_data->alp.n_aa>al_data->target_n_aa || 
     (al_data->alp.n_aa==0 && al_data->alp.parent_none==0)) {
    printf("# ERROR! Check the number %d of residues in the model. (In TARGET: %d)\n",
              al_data->alp.n_aa,al_data->target_n_aa);
    al_data->errors++;
    escape(al_data->errors);
  }
  if(al_data->alp.n_occupancy==0 && al_data->alp.parent_none==0) {
    printf("# ERROR! The number of predicted atoms in the model:  0 \n");
    al_data->errors++;
    escape(al_data->errors);
    return;
  }
  printf("# Checking the AL prediction MODEL %2d       (DONE)\n",al_data->model);
  return;
}

/*-----------------------------------------------------------
/
/   check_aa - checks an amino acid
/
/------------------------------------------------------------*/
int check_aa(char token, char* letter, int n)
{
  int i;

  for(i=0;i<n;i++) {
    if(letter[i]==token)
      return i;
  }
  return n;
}

/*-----------------------------------------------------------
/
/   C_123 - converts an amino acid code1 to code3
/
/------------------------------------------------------------*/
void C_123(char code1, char* code3)
{
  strcpy(code3,"###");
  if(code1=='A') strcpy(code3,"ALA");
  else if(code1=='V') strcpy(code3,"VAL");
  else if(code1=='L') strcpy(code3,"LEU");
  else if(code1=='I') strcpy(code3,"ILE");
  else if(code1=='P') strcpy(code3,"PRO");
  else if(code1=='M') strcpy(code3,"MET");
  else if(code1=='F') strcpy(code3,"PHE");
  else if(code1=='W') strcpy(code3,"TRP");
  else if(code1=='G') strcpy(code3,"GLY");
  else if(code1=='S') strcpy(code3,"SER");
  else if(code1=='T') strcpy(code3,"THR");
  else if(code1=='C') strcpy(code3,"CYS");
  else if(code1=='Y') strcpy(code3,"TYR");
  else if(code1=='N') strcpy(code3,"ASN");
  else if(code1=='Q') strcpy(code3,"GLN");
  else if(code1=='D') strcpy(code3,"ASP");
  else if(code1=='E') strcpy(code3,"GLU");
  else if(code1=='K') strcpy(code3,"LYS");
  else if(code1=='R') strcpy(code3,"ARG");
  else if(code1=='H') strcpy(code3,"HIS");

  return;
}


/*-----------------------------------------------------------
/
/   C_321 - converts an amino acid code3 to code1
/     including many non-standard AA 
/------------------------------------------------------------*/
void C_321(char* code3, char* code1){

  char up3[4];

  up3[0]=toupper(code3[0]);
  up3[1]=toupper(code3[1]);
  up3[2]=toupper(code3[2]);
  up3[3]='\0';

  if(!strcmp(up3, "ALA" )) {*code1='A';}
  else if(!strcmp(up3, "VAL" )) {*code1='V';}
  else if(!strcmp(up3, "LEU" )) {*code1='L';}
  else if(!strcmp(up3, "ILE" )) {*code1='I';}
  else if(!strcmp(up3, "PRO" )) {*code1='P';}
  else if(!strcmp(up3, "MET" ) || !strcmp(up3, "MSE")) {*code1='M';}
  else if(!strcmp(up3, "PHE" )) {*code1='F';}
  else if(!strcmp(up3, "TRP" )) {*code1='W';}
  else if(!strcmp(up3, "GLY" )) {*code1='G';}
  else if(!strcmp(up3, "SER" )) {*code1='S';}
  else if(!strcmp(up3, "THR" )) {*code1='T';}
  else if(!strcmp(up3, "CYS" )) {*code1='C';}
  else if(!strcmp(up3, "TYR" )) {*code1='Y';}
  else if(!strcmp(up3, "ASN" )) {*code1='N';}
  else if(!strcmp(up3, "GLN" )) {*code1='Q';}
  else if(!strcmp(up3, "ASP" )) {*code1='D';}
  else if(!strcmp(up3, "GLU" )) {*code1='E';}
  else if(!strcmp(up3, "LYS" )) {*code1='K';}
  else if(!strcmp(up3, "ARG" )) {*code1='R';}
  else if(!strcmp(up3, "HIS" )) {*code1='H';}
  else if(!strcmp(up3, "ASX" ))
    {*code1='B';} /* B stands for Asp or Asn */
   else if(!strcmp(up3, "GLX" ))
    {*code1='Z';} /* Z stands for Glu or Gln */
  else
    {*code1='X';}
  return;
}

/*-----------------------------------------------------------
/
/   read_data_alp - read the AL predictions file format.
/                   Four column format: T_aa T_n  P_aa P_n
/
/------------------------------------------------------------*/
void read_data_alp(al_f *al_data, FILE* fp)
{
  int i, j, k, l, n_aa, n_fragments, too_short;
  int parter, n1, n2, bug, was_none;
  char keyword[200], line[200], parent[100], sn[10], sn1[10], sn2[10];
  char t_aa[10], t_n[10], p_aa[10], p_n[10], p_ins[10], ch, chain;

  /* Read in the molecule:  al_data->alp.*****
  -------------------------------------------*/
  ch='#';
  n_aa=0;
  parter=0;
  strcpy(parent,"NONE");
  while(fgets(line,200,fp)!=NULL) {
    strcpy(keyword,"   ");
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword," ",1) ||
       !strncmp(keyword,"ATOM",4) ||
       !strncmp(keyword,"HETATM",6))      {
      for(i=0;i<200;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='/' || line[i]=='\\') line[i]=' ';
      printf(line);
      printf("\n# ERROR! Unknown record in the MODEL - END section.");
      printf("\n#        Submission should end with END record.");
      printf("\n#        The blank records are not allowed.");
      printf("\n#        Check four column AL format.\n\n");
      al_data->errors++;
      escape(al_data->errors);
    }
    else if(!strncmp(keyword,"#",1))      {}
    else if(!strncmp(keyword,"REMARK\0",7)) {}
    else if(!strncmp(keyword,"METHOD\0",7)) {
      al_data->n_method++;
    }
    else if(!strncmp(keyword,"TER\0",4))    {
      if(was_none==0 && too_short<3) {
        printf("\n# ERROR! Check the number of residues inside the PARENT - TER section.");
        printf("\n#        The independent segment cannot be shorter than 3 residues.\n");
        printf("\n#        Maybe PARENT NONE record should be used. \n");
        printf("\n#        Please check AL format description.\n");
        al_data->errors++;
        escape(al_data->errors);
      }
      was_none=0;
      if(parter!=1) {
        printf("\n# ERROR! Check the number of PARENT and TER records.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.\n");
        al_data->errors++;
        escape(al_data->errors);
        parter=1;
      }
      parter--;
    }
    else if(!strncmp(keyword,"PARENT\0",7)) {
      too_short=0;
      for(i=0;i<200;i++)
        if(line[i]=='%' || line[i]=='"' || 
           line[i]=='\\') line[i]=' ';
      printf(line);
      if(parter!=0) {
        printf("\n# ERROR! Check the number of PARENT and TER records.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.\n");
        al_data->errors++;
        escape(al_data->errors);
        parter=0;
      }
      for(i=0;i<10;i++) parent[i]=' ';
      sscanf(line,"%s %s",keyword,parent);
      if(!strncmp(parent,"NONE\0",5)) {
        was_none=1;
        al_data->alp.parent_none++;
        i=0;
        bug=0;
        while (line[i]!='\n' && bug<1) {
          if(isdigit(line[i])!=0) bug=1;
          i++;
        }
        if(bug==1) {
          n1=-9999;
          n2=-9999;
          strcpy(sn1,"   ");
          strcpy(sn2,"   ");
          sscanf(line,"%s %s %s %s",keyword,parent,sn1,sn2);
          if(al_data->target_chain==' ') {
            sscanf(sn1,"%d",&n1);
            sscanf(sn2,"%d",&n2);
          }
          else {
            strncpy(sn,&sn1[1],6);
            sscanf(sn,"%d",&n1);
            strncpy(sn,&sn2[1],6);
            sscanf(sn,"%d",&n2);
            if(sn1[0]!=al_data->target_chain ||
               sn2[0]!=al_data->target_chain) {
              n1=-9999;
            }
          }
          i=al_data->target_n_aa-1;
          if(n1<al_data->target_aa_num[0] || n2>al_data->target_aa_num[i] || n1>n2) {
            printf("\n# ERROR! Check the syntax of the line:  PARENT NONE n1 n2");
            printf("\n#        Delimiters n1 n2 indicate the range of the target"); 
            printf("\n#        sequence predicted as having no homologue in the");
            printf("\n#        current PDB. Omission of n1 n2 indicates the entire");
            printf("\n#        target.");
            n1=0;
            n2=al_data->target_n_aa-1;
            if(al_data->target_chain==' ') {
              printf("\n#        BY DEFAULT:  n1 = %d  n2 = %d\n",
                                 al_data->target_aa_num[n1],
                                 al_data->target_aa_num[n2]);
            }
            else {
              printf("\n#        BY DEFAULT:  n1 = %c%d  n2 = %c%d\n",
                                 al_data->target_chain,
                                 al_data->target_aa_num[n1],
                                 al_data->target_chain,
                                 al_data->target_aa_num[n2]);
            }
            al_data->errors++;
            escape(al_data->errors);
          }
          else {
            j=-9999;
            k=-9999;
            for(l=0;l<al_data->target_n_aa;l++) {
              if(n1==al_data->target_aa_num[l]) {
                j=l;
              }
              if(n2==al_data->target_aa_num[l]) {
                k=l;
              }
            }
            n1=j;
            n2=k;
          }
        }
        else {
          n1=0;
          n2=al_data->target_n_aa-1;
        }
        for(i=n1;i<=n2;i++) {
          al_data->alp.aa[i].c++;
          al_data->alp.aa[i].ca++;
          al_data->alp.aa[i].n++;
          al_data->alp.aa[i].o++;
        }
      }
      al_data->alp.n_fragments++;
      parter++;
    }
    else if(!strncmp(keyword,"END\0",4))    {
      if(parter!=0) {
        printf("\n# ERROR! Check the TER record before END.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.\n");
        al_data->errors++;
        escape(al_data->errors);
        parter=0;
      }
      al_data->alp.n_aa=n_aa;
      al_data->end=1;
      break;
    }
    else {
      too_short++;
      if(was_none==1) {
        printf("\n# ERROR! Check the TER record.");
        printf("\n#        After PARENT NONE the TER record is expected.\n");
        al_data->errors++;
        escape(al_data->errors);
        was_none=0;
      }
      if(parter!=1) {
        printf("\n# ERROR! Check the number of PARENT and TER records.");
        printf("\n#        Each structure segment should begin with a single");
        printf("\n#        PARENT record and terminate with a TER record.");
        printf("\n#        Model should be terminated with the END record.\n");
        al_data->errors++;
        escape(al_data->errors);
        parter=1;
      }
      strcpy(t_aa,"   ");
      strcpy(t_n,"    ");
      strcpy(p_aa,"    ");
      strcpy(p_n,"    ");
      strcpy(p_ins,"    ");
      sscanf(line,"%s %s %s %s",t_aa,t_n,p_aa,p_n);
      if(t_aa[0]==' ' || t_n[0]==' ' || p_aa[0]==' ' || p_n[0]==' ') {
        for(i=0;i<200;i++)
          if(line[i]=='%' || line[i]=='"' || 
             line[i]=='/' || line[i]=='\\') line[i]=' ';
        printf(line);
        printf("\n# ERROR! Unknown record in the PARENT - TER section.");
        if(t_aa[0]==' ' && t_n[0]==' ' && p_aa[0]==' ' && p_n[0]==' ') {
          printf("\n#        The blank records are not allowed.");
        }
        printf("\n#        Check four column AL format.\n\n");
        fclose(fp);
        exit(0);
      }
      else {
        i=0;
        if(al_data->target_chain!=' ') {
          if(t_n[0]!=al_data->target_chain) {
            printf("\n# ERROR! Check the chain specification in model residue: %s %s",t_aa,t_n);
            printf("\n#        The TARGET chain ID is %c\n",al_data->target_chain);
            al_data->errors++;
            escape(al_data->errors);
          }
          if(isdigit(t_n[1])==0) {
            printf("\n# ERROR! Check model residue: %s %s \n\n",t_aa,t_n);
            fclose(fp);
            exit(0);
          }
          t_n[0]=' ';
        }
        else if(isdigit(t_n[0])==0 && t_n[0]!='-') {
          printf("\n# ERROR! Wrong chain specification. Check model residue: %s %s \n\n",t_aa,t_n);
          fclose(fp);
          exit(0);
        }
        sscanf(t_n,"%d",&i);
        al_data->alp.al[n_aa].t_res_num=i;
        al_data->alp.al[n_aa].t_res_name[0]=t_aa[0];
        chain=' ';
        if(isalnum(parent[4])!=0) {
          chain=parent[4];
        }
        else { 
          chain=parent[5];
        }
        i=0;
        j=0;
        if(isalpha(chain)!=0) {
          if(chain==p_n[0]) {
            j=1;
            p_n[0]=' ';
          }
          else if(isalpha(p_n[0])!=0) {
            printf("\n# ERROR! Check four column AL format.");
            printf("\n#        Check the chain specification in parent residue: %s %s",p_aa,p_n);
            printf("\n#        The PARENT chain ID is %c\n\n",chain);
            fclose(fp);
            exit(0);
          }
        }
        if(isalnum(chain)==0 && isalpha(p_n[0])!=0) {
          printf("\n# ERROR! Check four column AL format.");
          printf("\n#        Check the chain specification in parent residue: %s %s",p_aa,p_n);
          printf("\n#        The PARENT chain is %c (if SPACE - single chain protein)\n\n",chain);
          fclose(fp);
          exit(0);
        }
        while(p_n[j]!='\0' && j<10) {
          if(isalpha(p_n[j])!=0) i=j;
          j++;
        }
        if(i>0 && i<10) {
          al_data->alp.al[n_aa].p_ins[0]=p_n[i];
          p_n[i]='\0';
        }
        i=0;
        sscanf(p_n,"%d",&i);
        al_data->alp.al[n_aa].p_res_num=i;
        al_data->alp.al[n_aa].p_res_name[0]=p_aa[0];
        al_data->alp.al[n_aa].n_fragment=al_data->alp.n_fragments;
        for(i=0;i<10;i++) al_data->alp.al[n_aa].parent_name[i]=' ';
        strcpy(al_data->alp.al[n_aa].parent_name,parent);
        n_aa++;
      }
    }      
  }
  al_data->alp.n_aa=n_aa;

  for(i=0;i<al_data->alp.n_aa;i++) {
  /*  
    printf(" %s %d %s %d %s %d %s \n",
             al_data->alp.al[i].t_res_name,
             al_data->alp.al[i].t_res_num,
             al_data->alp.al[i].p_res_name,
             al_data->alp.al[i].p_res_num,
             al_data->alp.al[i].p_ins,
             al_data->alp.al[i].n_fragment,
             al_data->alp.al[i].parent_name);
  */
    k=al_data->alp.al[i].t_res_num;
    j=-9999;
    for(l=0;l<al_data->target_n_aa;l++) {
      if(k==al_data->target_aa_num[l]) {
        j=l+1;
      }
    }
    if(j==-9999) {
      printf("\n# ERROR! Check the residue number:  %c %c%d",
                       al_data->alp.al[i].t_res_name[0],al_data->target_chain,k);
      printf("\n#        There is no such a residue in TARGET");
      printf("\n#        Check residue numbering in template PDB file for TARGET: %s\n\n",al_data->target_name);
      if(al_data->target_aa_num[0]!=1) {
        l=al_data->target_n_aa-1;
        printf("# The residue numbering of the  TARGET:  %s  is not standard: ",
                    al_data->target_name);
        printf("\n#   the first residue in the TARGET is:  %c %c%4d ",
                    al_data->target_aa[0],al_data->target_chain,al_data->target_aa_num[0]);
        printf("\n#   the last  residue in the TARGET is:  %c %c%4d ",
                    al_data->target_aa[l],al_data->target_chain,al_data->target_aa_num[l]);
        printf("\n# Please check if the numbering of residues in your ");
        printf("\n# prediction corresponds to this numbering scheme. \n\n");
      }
      exit(0);
    }
    if(al_data->alp.al[i].t_res_name[0]!=al_data->target_aa[j-1]) {
      printf("\n# ERROR! Check model residue: %c %c%d  (In TARGET: %c %c%d)\n",
             al_data->alp.al[i].t_res_name[0],
             al_data->target_chain,
             k,
             al_data->target_aa[j-1],
             al_data->target_chain,
             al_data->target_aa_num[j-1]);
      al_data->errors++;
      escape(al_data->errors);
    }
  }

  return ;
}

/*-----------------------------------------------------------
/
/   convert_al_3d - Converting alignments into a 3D structure
/                   extracting the coordinates from pdb file
/
/------------------------------------------------------------*/
void convert_al_3d(al_f *al_data)
{
  int i, j, j_ins, k, ok, fragment;
  int donothing;
  int n_aa, n_atoms;
  char pdb_id[10], chain, res[10], i_code[10], res1;
  pdb_struct molecule;

  ok=0;
  fragment=0;
  n_aa=0;
  n_atoms=0;
  i=0;
  if(al_data->alp.parent_none==0 && al_data->alp.n_aa<3) {
    printf("\n# ERROR! Check PARENT record and AL format.\n");
    al_data->errors++;
    escape(al_data->errors);
  }
  while(i<al_data->alp.n_aa) {
    if(al_data->alp.al[i].n_fragment!=fragment) {
      if(!strncmp(al_data->alp.al[i].parent_name,"NONE\0",5)) {
        printf("\n# ERROR! Check PARENT NONE record and AL format");
        printf("\n#        inside the PARENT - TER section.\n\n");
        exit(0);
      }
      fragment=al_data->alp.al[i].n_fragment;
      pdb_id[0]=al_data->alp.al[i].parent_name[0];
      pdb_id[1]=al_data->alp.al[i].parent_name[1];
      pdb_id[2]=al_data->alp.al[i].parent_name[2];
      pdb_id[3]=al_data->alp.al[i].parent_name[3];
      pdb_id[4]='\0';
      pdb_id[5]=' ';
      if(isalnum(al_data->alp.al[i].parent_name[4])!=0) {
        chain=al_data->alp.al[i].parent_name[4];
      }
      else { 
        chain=al_data->alp.al[i].parent_name[5];
      }
      ok=pdb_align(pdb_id, chain, &molecule);
      /* 
      for(k=1;k<=molecule.n_atoms;k++){
       printf ("==%d %s \n", molecule.atom[k].serial, molecule.atom[k].name);
      }    
      */
      if(ok==0) {
        printf("\n# ERROR! Check PARENT record. PDB code: %s\n\n",
                    al_data->alp.al[i].parent_name);
        exit(0);
      }
    }
    if(ok==1) {
      j_ins=0;
      j=1;
      while(j_ins<j) {
        j_ins=j;
        while(molecule.atom[j].res_seq!=al_data->alp.al[i].p_res_num &&
              j<=molecule.n_atoms) j++;
        while(molecule.atom[j].res_seq==al_data->alp.al[i].p_res_num &&
              molecule.atom[j].i_code[0]!=al_data->alp.al[i].p_ins[0] &&
              j<=molecule.n_atoms) j++;
      }
      C_321(molecule.atom[j].res_name,&res1);
      donothing=1;
      if(molecule.atom[j].res_seq!=al_data->alp.al[i].p_res_num ||
         res1!=al_data->alp.al[i].p_res_name[0] ||
         molecule.atom[j].i_code[0]!=al_data->alp.al[i].p_ins[0]) {
        if(res1=='X'){ 
	  if (molecule.atom[j].res_seq==0) {
             printf("\n# ERROR! Check parent residue %c %d%s (In PDB: none)\n",
                    al_data->alp.al[i].p_res_name[0],
                    al_data->alp.al[i].p_res_num,
                    al_data->alp.al[i].p_ins);
          } else { /* added to allow UNKNOWN residues be substituted by anything */
             donothing=0;
          } 
	} else {
          printf("\n# ERROR! Check parent residue %c %d%s (In PDB: %c %d%s)\n",
                    al_data->alp.al[i].p_res_name[0],
                    al_data->alp.al[i].p_res_num,
                    al_data->alp.al[i].p_ins,
                    res1,
                    molecule.atom[j].res_seq,
                    molecule.atom[j].i_code);
        }
        if(donothing){
          al_data->errors++;
          escape(al_data->errors);
	}
      }
      while(molecule.atom[j].res_seq==al_data->alp.al[i].p_res_num &&
            molecule.atom[j].i_code[0]==al_data->alp.al[i].p_ins[0] &&
            j<=molecule.n_atoms) {
        if(!strncmp(molecule.atom[j].name,"N\0",2) ||
           !strncmp(molecule.atom[j].name,"CA\0",3) ||
           !strncmp(molecule.atom[j].name,"C\0",2) ||
           !strncmp(molecule.atom[j].name,"O\0",2)) {
          n_aa=al_data->alp.al[i].t_res_num;
          C_123(al_data->alp.al[i].t_res_name[0],res);
          al_data->alp.atom[n_atoms].serial=molecule.atom[j].serial;
          strcpy(al_data->alp.atom[n_atoms].name,molecule.atom[j].name);
          strcpy(al_data->alp.atom[n_atoms].alt_loc,molecule.atom[j].alt_loc);
          strcpy(al_data->alp.atom[n_atoms].res_name,res);
          strcpy(al_data->alp.atom[n_atoms].chain_id," ");
          al_data->alp.atom[n_atoms].res_seq=n_aa;
          strcpy(al_data->alp.atom[n_atoms].i_code," ");
          al_data->alp.atom[n_atoms].R.x=molecule.atom[j].R.x; 
          al_data->alp.atom[n_atoms].R.y=molecule.atom[j].R.y; 
          al_data->alp.atom[n_atoms].R.z=molecule.atom[j].R.z;
          al_data->alp.atom[n_atoms].occupancy=1.0;
          al_data->alp.atom[n_atoms].temp_factor=0.0;
          strcpy(al_data->alp.atom[n_atoms].seg_id,molecule.atom[j].seg_id);
          strcpy(al_data->alp.atom[n_atoms].element,molecule.atom[j].element);
          strcpy(al_data->alp.atom[n_atoms].charge,molecule.atom[j].charge);
          n_atoms++;
        }
        j++; 
      }
    }
    i++;
  }
  al_data->alp.n_atoms=n_atoms;
  return;
}

/*-----------------------------------------------------------
/
/   pdb_align - read a pdb file (code and chain from AL format)
/
/------------------------------------------------------------*/
int pdb_align(char* pdb_code, char chain, pdb_struct* structure)
{
  int i, ok;
  FILE *tmp;
  char name[20], sub1[500], sub2[500], sub3[500], pdb_dir[100];

  strcpy(pdb_dir,"/PDB/structures");
  strcpy(name,pdb_code);
  for(i=0;i<4;i++) {
    ok=check_aa(name[i],uppmat,36);
    if(ok<36) {
      name[i]=lowmat[ok];
    }
  }
  ok=0;
  if(name[0]!='\0' && name[0]!='\n' && name[0]!='\t' && name[0]!=' ') {
    strcpy(sub1,"         ");
    sub1[0]=name[1];
    sub1[1]=name[2];
    sub1[2]='\0';
    sub1[3]=' ';
    sub1[4]=' ';
    strcpy(sub2,name);
    sub2[4]='\0';
    sub2[5]=' ';
    sub2[6]=' ';
    if(chain=='\0' || chain=='\n' || chain=='\t' || chain=='\r' || chain=='-' || chain=='_') chain=' ';
    strcat(sub2,".ent");
    strcpy(sub3,pdb_dir);
    strcat(sub3,"/");
    strcat(sub3,sub1);
    strcat(sub3,"/pdb");
    strcat(sub3,sub2);
    printf("\n# Loading structure PDB: %4s (chain: %c)",name,chain);
    if((tmp = fopen(sub3,"r"))==NULL) {
      printf("\n# ERROR! The PDB entry %4s cannot be found in the current release.\n",name);
      ok=0;
    }
    else {
      ok=1;
      clean_pdb(structure);
      read_pdb_orig(chain, structure, tmp);
      if(structure->n_atoms==0) {
        printf("\n# ERROR! Check chain %c in the PDB entry %4s\n",chain,name);
        ok=0;
      }
    }
    fclose(tmp);
  }

  return ok;
}

/*-----------------------------------------------------------
/
/   read_pdb_orig - read a pdb file (without correction)
/                   (called by pdb_align to extract the threading 
/                   coordinates from pdb file)
/
/------------------------------------------------------------*/
void read_pdb_orig(char chain, pdb_struct *molecule, FILE* fp)
{
  int  i, j, k, n_aa, n_atoms, n_mol, endflag, gap, gapl, gapr, ok;
  char keyword[100], line[120], sub[10], sub_res[10], sub1[50], sub2[50], sub3[50], ch;
  int  alt_loc_ok;
  char alt_loc;

  /* Read in the molecule
  -------------------------------------------*/
  k=0;
  n_atoms=0;
  n_aa=0;
  n_mol=0;
  ch='#';
  endflag=0;
  alt_loc=' ';
  alt_loc_ok=0;
  while(fgets(line,120,fp)!=NULL && endflag!=1) {
    for(i=0;i<10;i++) keyword[i]=0;
    sscanf(line,"%s",keyword);
    if(!strncmp(keyword,"HEADER",6)) {}
    else if(!strncmp(keyword,"COMPND",6)) {}
    else if(!strncmp(keyword,"SOURCE",6)) {}
    else if(!strncmp(keyword,"AUTHOR",6)) {}
    else if(!strncmp(keyword,"REVDAT",6)) {}
    else if(!strncmp(keyword,"JRNL",4))   {}
    else if(!strncmp(keyword,"REMARK",6)) {}
    else if(!strncmp(keyword,"SEQRES",6)) {}
    else if(!strncmp(keyword,"FTNOTE",6)) {}
    else if(!strncmp(keyword,"FORMUL",6)) {}
    else if(!strncmp(keyword,"HELIX",5))  {}
    else if(!strncmp(keyword,"SHEET",5))  {}
    else if(!strncmp(keyword,"TURN",4))   {}
    else if(!strncmp(keyword,"SITE",4))   {}
    else if(!strncmp(keyword,"CRYST",5))  {}
    else if(!strncmp(keyword,"SCALE",5))  {}
    else if(!strncmp(keyword,"TER\0",4))  {}
    else if(!strncmp(keyword,"CONECT",6)) {}
    else if(!strncmp(keyword,"MASTER",6)) {}
    else if(!strncmp(keyword,"END",3))    {endflag=1;}
    else if(!strncmp(keyword,"END\0",4))  {endflag=1;}
    else if(!strncmp(keyword,"ENDMDL",6)) {endflag=1;}
    else if(!strncmp(keyword,"ATOM",4) ||
            !strncmp(keyword,"HETATM",6)) {
     for(j=0;j<10;j++) sub_res[j]=0;
     strncpy(sub_res,&line[17],3);
     ok=1;

/* AK: check chain ID - no respect to case ### removed after causing errors 
     if(toupper(line[21])==toupper(chain) && ok==1) {
*/     
     if(line[21]==chain && ok==1) { 
      i=0;
      while (line[i]!='\n') i++;
      n_atoms++;
      if(n_atoms>=MAXATM) {
        printf("\n# ERROR! Check the number of ATOM records. (Limit MAX = %d)\n\n",MAXATM);
        fclose(fp);
        exit(0);
      }
      if(i>6) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[6],5);
        sscanf(sub,"%d",&molecule->atom[n_atoms].serial);
      }
      if(i>12) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[12],4);
        sscanf(sub,"%s",molecule->atom[n_atoms].name);
      }
      if(i>17) {
        sscanf(sub_res,"%s",molecule->atom[n_atoms].res_name);
      }
      if(i>21) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[21],1);
        sscanf(sub,"%s",molecule->atom[n_atoms].chain_id);
        if(ch!=molecule->atom[n_atoms].chain_id[0]) {
          ch=molecule->atom[n_atoms].chain_id[0];
          n_mol++;
          alt_loc=' ';
          alt_loc_ok=0;
        }
      }
      if(i>22) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[22],4);
        sscanf(sub,"%d",&molecule->atom[n_atoms].res_seq);
        if(k!=molecule->atom[n_atoms].res_seq) {
          k=molecule->atom[n_atoms].res_seq;
          n_aa++;
          alt_loc=' ';
          alt_loc_ok=0;
        }
      }
      if(i>16) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[16],1);
        sscanf(sub,"%s",molecule->atom[n_atoms].alt_loc);
        if(alt_loc_ok==1 && line[16]==' ') {
          alt_loc=' ';
          alt_loc_ok=0;
        }
        if(alt_loc_ok==1 && line[16]!=alt_loc) {
          alt_loc_ok=2;
        }
        if(alt_loc_ok==0 && line[16]!=alt_loc) {
          alt_loc_ok=1;
          alt_loc=line[16];
        }
      }
      if(i>26) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[26],1);
        sscanf(sub,"%s",molecule->atom[n_atoms].i_code);
      }
      if(i>30) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[30],8);
        sscanf(sub,"%f",&molecule->atom[n_atoms].R.x);
      }
      if(i>38) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[38],8);
        sscanf(sub,"%f",&molecule->atom[n_atoms].R.y);
      }
      if(i>46) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[46],8);
        sscanf(sub,"%f",&molecule->atom[n_atoms].R.z);
      }
      if(i>54) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[54],6);
        sscanf(sub,"%f",&molecule->atom[n_atoms].occupancy);
      }
      if(i>60) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[60],6);
        sscanf(sub,"%f",&molecule->atom[n_atoms].temp_factor);
      }
      if(i>72) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[72],4);
        sscanf(sub,"%s",molecule->atom[n_atoms].seg_id);
      }
      if(i>76) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[76],2);
        sscanf(sub,"%s",molecule->atom[n_atoms].element);
      }
      if(i>78) {
        for(j=0;j<10;j++) sub[j]=0;
        strncpy(sub,&line[78],2);
        sscanf(sub,"%s",molecule->atom[n_atoms].charge);
      }
      if(i<30) {
        printf("\n# ERROR! Check ATOM number %d\n",n_atoms);
        fclose(fp);
        exit(0);
      }
      if(i>=30 && i<54) {
        printf("\n# ERROR! Check coordinates: ATOM number %d",molecule->atom[n_atoms].serial);
        fclose(fp);
        exit(0);
      }
     }
    }
    else if(!strncmp(keyword,"HET\0",4))    {}
    else {}
  }
  printf("\n");
  molecule->n_atoms=n_atoms;
  molecule->n_aa=n_aa;
  molecule->n_mol=n_mol;
  fclose(fp);
  return;
}

