{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import nglview\n",
    "import rna_tools.Seq as Seq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "seq = Seq.RNASequence(\"GGCGCGGCACCGUCCGCGGAACAAACGG\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GGCGCGGCACCGUCCGCGGAACAAACGG"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seq"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Secondary structure prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ">rna_seq\n",
      "GGCGCGGCACCGUCCGCGGAACAAACGG\n",
      "..(((((......))))).......... ( -8.00)\n"
     ]
    }
   ],
   "source": [
    "print(seq.predict_ss())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ">rna_seq [100]\n",
      "GGCGCGGCACCGUCCGCGGAACAAACGG  -8.00   1.00\n",
      "..(((((......)))))..........  -8.00\n",
      "..((((((...).)))))..........  -7.10\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print seq.predict_ss(method='RNAsubopt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# print seq.predict_ss(method='ipknot')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# print seq.predict_ss(method='centroid_fold')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((((......))))..((.......)).\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(seq.predict_ss(method='contextfold'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dot2ct /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUtK9Bu /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUtK9Bu.ct\n",
      "Converting dot bracket file...Dot bracket file conversion complete. Created: <closed file '/var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUtK9Bu', mode 'w' at 0x10adb3540>.ct\n",
      "28  ss\n",
      "    1 G       0    2   14    1\n",
      "    2 G       1    3   13    2\n",
      "    3 C       2    4   12    3\n",
      "    4 G       3    5   11    4\n",
      "    5 C       4    6    0    5\n",
      "    6 G       5    7    0    6\n",
      "    7 G       6    8    0    7\n",
      "    8 C       7    9    0    8\n",
      "    9 A       8   10    0    9\n",
      "   10 C       9   11    0   10\n",
      "   11 C      10   12    4   11\n",
      "   12 G      11   13    3   12\n",
      "   13 U      12   14    2   13\n",
      "   14 C      13   15    1   14\n",
      "   15 C      14   16    0   15\n",
      "   16 G      15   17    0   16\n",
      "   17 C      16   18   27   17\n",
      "   18 G      17   19   26   18\n",
      "   19 G      18   20    0   19\n",
      "   20 A      19   21    0   20\n",
      "   21 A      20   22    0   21\n",
      "   22 C      21   23    0   22\n",
      "   23 A      22   24    0   23\n",
      "   24 A      23   25    0   24\n",
      "   25 A      24   26    0   25\n",
      "   26 C      25   27   18   26\n",
      "   27 G      26   28   17   27\n",
      "   28 G      27    0    0   28\n"
     ]
    }
   ],
   "source": [
    "from rna_tools.rna_dot2ct import dot2ct\n",
    "ss = seq.predict_ss(method='contextfold')\n",
    "print(dot2ct(seq.seq, ss))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RFAM search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCCGGCAUCGAUAGCCGCCCGCCUGGGC\n",
      "# cmscan :: search sequence(s) against a CM database\n",
      "# INFERNAL 1.1.2 (July 2016)\n",
      "# Copyright (C) 2016 Howard Hughes Medical Institute.\n",
      "# Freely distributed under a BSD open source license.\n",
      "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
      "# query sequence file:                   /tmp/ss.fa\n",
      "# target CM database:                    Rfam.cm\n",
      "# sequence reporting threshold:          E-value <= 1\n",
      "# number of worker threads:              4\n",
      "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
      "\n",
      "Query:       test  [L=71]\n",
      "Hit scores:\n",
      " rank     E-value  score  bias  modelname  start    end   mdl trunc   gc  description\n",
      " ----   --------- ------ -----  --------- ------ ------   --- ----- ----  -----------\n",
      "  (1) !   1.3e-06   44.6   0.1  pfl            1     71 +  cm 5'&3' 0.72  -\n",
      "\n",
      "\n",
      "Hit alignments:\n",
      ">> pfl  \n",
      " rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc\n",
      " ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----\n",
      "  (1) !   1.3e-06   44.6   0.1  cm        6       84 ~~           1          71 + ~~ 0.97 5'&3' 0.72\n",
      "\n",
      "                                                                        ???      ??                     NC\n",
      "          ~~~~~(((((((,,,,,,,,,,,,,<<<___>>>,,<<<<_____>>>>.,,,,,))))))))))---...<<<<<_________>>>~~~~~ CS\n",
      "   pfl  1 <[5]*cccgcgcgACUGGCGaaaacggcuuagccguGUGGAucuACCAC.GGGgAgcgcgggggguaa...cuGCCGauCGCCUGGGC*[7]> 91\n",
      "               ::::CG:GACUGGCGAA A            GUGG ++ ACCAC GGGGA:CG:::: GG A+     GCCG +CGCCUGGGC     \n",
      "  test  1 <[0]*GGGUCGUGACUGGCGAACAG-----------GUGGGAA-ACCACcGGGGAGCGACCCCGGCAUcgaUAGCCGCCCGCCUGGGC*[0]> 71\n",
      "          .....*****************966...........******9.*********************888********************..... PP\n",
      "\n",
      "\n",
      "\n",
      "Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)\n",
      "--------------------------------------------------------------------------------------\n",
      "Query sequence(s):                                               1  (142 residues searched)\n",
      "Target model(s):                                               347  (40911 consensus positions)\n",
      "Windows   passing  local HMM SSV           filter:               5  (0.007205); expected (0.02)\n",
      "Windows   passing  local HMM MSV      bias filter:               3  (0.004323); expected (0.02)\n",
      "Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)\n",
      "Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)\n",
      "Total HMM hits reported:                                         0  (0)\n",
      "\n",
      "Internal CM pipeline statistics summary:\n",
      "----------------------------------------\n",
      "Query sequence(s):                                               1  (142 residues searched)\n",
      "Query sequences re-searched for truncated hits:                  1  (410.5 residues re-searched, avg per model)\n",
      "Target model(s):                                              2126  (267652 consensus positions)\n",
      "Windows   passing  local HMM SSV           filter:            1029  (0.05685); expected (0.35)\n",
      "Windows   passing  local HMM Viterbi       filter:                  (off)\n",
      "Windows   passing  local HMM Viterbi  bias filter:                  (off)\n",
      "Windows   passing  local HMM Forward       filter:             414  (0.02386); expected (0.02)\n",
      "Windows   passing  local HMM Forward  bias filter:             350  (0.02025); expected (0.02)\n",
      "Windows   passing glocal HMM Forward       filter:             104  (0.005853); expected (0.02)\n",
      "Windows   passing glocal HMM Forward  bias filter:              92  (0.005128); expected (0.02)\n",
      "Envelopes passing glocal HMM envelope defn filter:              86  (0.004435); expected (0.02)\n",
      "Envelopes passing  local CM  CYK           filter:              11  (0.0004274); expected (0.0001)\n",
      "Total CM hits reported:                                          1  (0.0001209); includes 1 truncated hit(s)\n",
      "\n",
      "Total CM and HMM hits reported:                                  1\n",
      "\n",
      "# CPU time: 4.04u 0.58s 00:00:04.62 Elapsed: 00:00:03.25\n",
      "//\n",
      "[ok]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import rna_pdb_tools.RfamSearch as rf\n",
    "reload(rf)\n",
    "\n",
    "seq = Seq.Seq(\"GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCCGGCAUCGAUAGCCGCCCGCCUGGGC\")\n",
    "rs = rf.RfamSearch()\n",
    "print rs.cmscan(seq)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3D structure analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The RNARNAStructure report: rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb \n",
      "A:1-43 B:1-10\n",
      "ATOM      1  P     G A   1     -12.509  18.639  13.726  1.00  0.00\n",
      "ATOM      2  OP1   G A   1     -13.934  18.507  14.168  1.00  0.00\n",
      "ATOM      3  OP2   G A   1     -11.541  17.557  14.097  1.00  0.00\n",
      "ATOM      4  O5'   G A   1     -12.604  18.683  12.146  1.00  0.00\n",
      "ATOM      5  C5'   G A   1     -13.512  19.569  11.525  1.00  0.00\n"
     ]
    }
   ],
   "source": [
    "from rna_pdb_tools.pdb_parser_lib import RNAStructure\n",
    "\n",
    "fn = \"rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb\"\n",
    "\n",
    "s = RNAStructure(fn)\n",
    "print s.get_report()\n",
    "print s.get_info_chains()\n",
    "print s.get_head()\n",
    "#print s.view() # image paste here :-)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> 260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb A:1-43\n",
      "GGCGGAACCGGUGAGUACACCGGAAUCCGAAAGGAUUUGGGCG\n",
      "> 260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb B:1-10\n",
      "UGCCCCCGCC\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "cd rna_pdb_tools\n",
    "./rna-pdb-tools.py --no_hr --get_seq data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RNA 3D structure prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# model using SimRNA\n",
    "#res = SimRNA(ss,seq.get_ss())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# fake import, should be \n",
    "res = \"rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb\"\n",
    "# view\n",
    "view = nglview.show_structure_file(res)\n",
    "view.add_representation(repr_type='cartoon')\n",
    "view"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# rna_pdb_tools --get_seq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> 5k7c.pdb A:1-47\n",
      "CGUGGUUAGGGCCACGUUAAAUAGUUGCUUAAGCCCUAAGCGUUGAU\n",
      "> 5k7c.pdb B:48-58\n",
      "AUCAGgUGCAA\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "cd rna_pdb_tools\n",
    "./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> 5k7c.pdb A:1-47\n",
      "CGUGGUUAGGGCCACGUUAAAUAGUUGCUUAAGCCCUAAGCGUUGAU\n",
      "> 5k7c.pdb B:48-58\n",
      "AUCAGgUGCAA\n",
      "> tetraloop.pdb A:4-7\n",
      "GCAA\n",
      "> 1xjr.pdb A:1-47\n",
      "GGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU\n"
     ]
    }
   ],
   "source": [
    "%%bash\n",
    "cd rna_pdb_tools\n",
    "./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb\n",
    "./rna-pdb-tools.py --no_hr --get_seq input/tetraloop.pdb\n",
    "./rna-pdb-tools.py --get_seq input/1xjr.pdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "101px",
    "width": "253px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false,
   "widenNotebook": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
