{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RNA-Puzzle 18"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Init the library and needed functions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nglview\n",
    "import rna_pdb_tools.Seq as Seq\n",
    "import rna_pdb_tools.BlastPDB\n",
    "from rna_pdb_tools.BlastPDB import BlastPDB\n",
    "reload(rna_pdb_tools.BlastPDB);\n",
    "reload(Seq);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a RNASeqence object."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG\n"
     ]
    }
   ],
   "source": [
    "seq = Seq.RNASequence(\"GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG\")\n",
    "print(seq)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Secondary structure prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ">rna_seq\n",
      "GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG\n",
      "(((((((((.(((((...)))))(((((((.....)))))))...)))))..)))).(((((....))))) (-33.10)\n"
     ]
    }
   ],
   "source": [
    "print(seq.predict_ss())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ">rna_seq [100]\n",
      "GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG -33.10   1.00\n",
      "(((((((((.((((.....))))(((((((.....)))))))...)))))..)))).(((((....))))) -32.40\n",
      "(((((((((.(((((...)))))(((((((.....)))))))...)))))..)))).(((((....))))) -33.10\n",
      "(((((((((((((....)))).((((((((.....))))))))..)))))..)))).(((((....))))) -32.30\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(seq.predict_ss(method='RNAsubopt'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(seq.predict_ss(method='ipknot'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PDB Blast search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<HTML>\n",
      "<TITLE>BLAST Search Results</TITLE>\n",
      "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" VLINK=\"#660099\" ALINK=\"#660099\">\n",
      "<PRE>\n",
      "<b>BLASTN 2.2.18 [Mar-02-2008]</b>\n",
      "\n",
      "\n",
      "<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids\n",
      "=9254694&dopt=Citation\">Reference</a>:</b>\n",
      "Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch&auml;ffer, \n",
      "Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), \n",
      "\"Gapped BLAST and PSI-BLAST: a new generation of protein database search\n",
      "programs\",  Nucleic Acids Res. 25:3389-3402.\n",
      "\n",
      "<b>Query=</b> UNKNOWN_SEQUENCE\n",
      "         (71 letters)\n",
      "\n",
      "<b>Database:</b> pdb_nucleotide \n",
      "           19,357 sequences; 3,678,086 total letters\n",
      "\n",
      "Searching..................................................done\n",
      "\n",
      "<PRE>\n",
      "\n",
      "\n",
      "                                                                 Score    E\n",
      "Sequences producing significant alignments:                      (bits) Value\n",
      "\n",
      "5TPY:1:A|pdbid|entity|chain(s)|sequence                               <a href = #16642>105</a>   3e-24\n",
      "4PQV:1:A|pdbid|entity|chain(s)|sequence                               <a href = #11431> 34</a>   0.010\n",
      "</PRE>\n",
      "<PRE>\n",
      "><a name = 16642></a>5TPY:1:A|pdbid|entity|chain(s)|sequence\n",
      "          Length = 71\n",
      "\n",
      " Score =  105 bits (53), Expect = 3e-24\n",
      " Identities = 53/53 (100%)\n",
      " Strand = Plus / Plus\n",
      "\n",
      "                                                               \n",
      "Query: 1  gggtcaggccggcgaaagtcgccacagtttggggaaagctgtgcagcctgtaa 53\n",
      "          |||||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "Sbjct: 1  gggtcaggccggcgaaagtcgccacagtttggggaaagctgtgcagcctgtaa 53\n",
      "</PRE>\n",
      "\n",
      "\n",
      "<PRE>\n",
      "><a name = 11431></a>4PQV:1:A|pdbid|entity|chain(s)|sequence\n",
      "          Length = 68\n",
      "\n",
      " Score = 34.2 bits (17), Expect = 0.010\n",
      " Identities = 23/25 (92%)\n",
      " Strand = Plus / Plus\n",
      "\n",
      "                                   \n",
      "Query: 1  gggtcaggccggcgaaagtcgccac 25\n",
      "          |||||||  ||||||||||||||||\n",
      "Sbjct: 1  gggtcagatcggcgaaagtcgccac 25\n",
      "</PRE>\n",
      "\n",
      "\n",
      "<PRE>\n",
      "  Database: pdb_nucleotide\n",
      "    Posted date:  Sep 28, 2018 10:56 PM\n",
      "  Number of letters in database: 3,678,086\n",
      "  Number of sequences in database:  19,357\n",
      "  \n",
      "Lambda     K      H\n",
      "    1.37    0.711     1.31 \n",
      "\n",
      "Gapped\n",
      "Lambda     K      H\n",
      "    1.37    0.711     1.31 \n",
      "\n",
      "\n",
      "Matrix: blastn matrix:1 -3\n",
      "Gap Penalties: Existence: 5, Extension: 2\n",
      "Number of Sequences: 19357\n",
      "Number of Hits to DB: 1840\n",
      "Number of extensions: 12\n",
      "Number of successful extensions: 7\n",
      "Number of sequences better than 10.0: 2\n",
      "Number of HSP's gapped: 7\n",
      "Number of HSP's successfully gapped: 2\n",
      "Length of query: 71\n",
      "Length of database: 3,678,086\n",
      "Length adjustment: 14\n",
      "Effective length of query: 57\n",
      "Effective length of database: 3,407,088\n",
      "Effective search space: 194204016\n",
      "Effective search space used: 194204016\n",
      "X1: 10 (19.8 bits)\n",
      "X2: 15 (29.7 bits)\n",
      "X3: 50 (99.1 bits)\n",
      "S1: 10 (20.3 bits)\n",
      "S2: 12 (24.3 bits)\n",
      "</PRE>\n",
      "</BODY>\n",
      "</HTML>\n"
     ]
    }
   ],
   "source": [
    "p = BlastPDB(seq.seq)\n",
    "p.search()\n",
    "print p.result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "RfamSearchError",
     "evalue": "Error: File existence/permissions problem in trying to open CM file /home/magnus/work/db/rfamdb/Rfam.cm.\nCM file /home/magnus/work/db/rfamdb/Rfam.cm not found (nor an .i1m binary of it); also looked in RFAMDB",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRfamSearchError\u001b[0m                           Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-30-fc7568030162>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m#seq = Seq.Seq(\"GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mrs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRfamSearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mrs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcmscan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/Users/magnus/work-src/rna-pdb-tools/rna_pdb_tools/RfamSearch.pyc\u001b[0m in \u001b[0;36mcmscan\u001b[0;34m(self, seq)\u001b[0m\n\u001b[1;32m     62\u001b[0m         \u001b[0merr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mRfamSearchError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     65\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mof\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     66\u001b[0m         \u001b[0;31m# os.chdir(old_pwd)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mRfamSearchError\u001b[0m: Error: File existence/permissions problem in trying to open CM file /home/magnus/work/db/rfamdb/Rfam.cm.\nCM file /home/magnus/work/db/rfamdb/Rfam.cm not found (nor an .i1m binary of it); also looked in RFAMDB"
     ]
    }
   ],
   "source": [
    "import rna_pdb_tools.RfamSearch as rf\n",
    "#reload(rf)\n",
    "\n",
    "#seq = Seq.Seq(\"GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG\")\n",
    "rs = rf.RfamSearch()\n",
    "print rs.cmscan(seq)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3D structure analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rna_pdb_tools.pdb_parser_lib import RNAStructure\n",
    "\n",
    "fn = \"rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb\"\n",
    "\n",
    "s = RNAStructure(fn)\n",
    "print s.get_report()\n",
    "print s.get_info_chains()\n",
    "print s.get_head()\n",
    "#print s.view() # image paste here :-)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "cd rna_pdb_tools\n",
    "./rna-pdb-tools.py --no_hr --get_seq data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RNA 3D structure prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# model using SimRNA\n",
    "#res = SimRNA(ss,seq.get_ss())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fake import, should be \n",
    "res = \"rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb\"\n",
    "# view\n",
    "view = nglview.show_structure_file(res)\n",
    "view.add_representation(repr_type='cartoon')\n",
    "view"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# rna_pdb_tools --get_seq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "cd ~/rna-bench/opt/xxxcx rna_pdb_tools\n",
    "./rna-pdb-tools.py --no_hr --get_seq ~/rna-bench/examples/5k7c.pdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "cd rna_pdb_tools\n",
    "./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb\n",
    "./rna-pdb-tools.py --no_hr --get_seq input/tetraloop.pdb\n",
    "./rna-pdb-tools.py --get_seq input/1xjr.pdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.16"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "120px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false,
   "widenNotebook": false
  },
  "widgets": {
   "state": {
    "6ceeeae558b24cccb2f19b5ce9e7a1bf": {
     "views": [
      {
       "cell_index": 17
      }
     ]
    },
    "ba29d5caff4446039cffaf8a7429154f": {
     "views": [
      {
       "cell_index": 17
      }
     ]
    }
   },
   "version": "1.2.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
