{
 "metadata": {
  "name": "",
  "signature": "sha256:8a9edae870851327562c4e01d6fc9a2c2a00319e9c6353ac21dada578d06d83f"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "GC percentage"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def calc_gc_percent(seq):\n",
      "    at_count, gc_count = 0, 0\n",
      "    for char in seq:\n",
      "        if char in ('A', 'T'):\n",
      "            at_count += 1\n",
      "        elif char in ('G', 'C'):\n",
      "            gc_count += 1\n",
      "            \n",
      "    return gc_count * 100.0 / (gc_count + at_count)       \n",
      "\n",
      "print \"The sequence 'CAGG' has a %GC of {:.2f}\".format(\n",
      "          calc_gc_percent(\"CAGG\"))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "The sequence 'CAGG' has a %GC of 75.00\n"
       ]
      }
     ],
     "prompt_number": 47
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Adding user input"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "user_input = 'CACG'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 48
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#!/usr/bin/env python\n",
      "import sys\n",
      "\n",
      "def calc_gc_percent(seq):\n",
      "    at_count, gc_count = 0, 0\n",
      "    for char in seq:\n",
      "        if char in ('A', 'T'):\n",
      "            at_count += 1\n",
      "        elif char in ('G', 'C'):\n",
      "            gc_count += 1\n",
      "\n",
      "    return gc_count * 100.0 / (gc_count + at_count)       \n",
      "\n",
      "input_seq = user_input\n",
      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
      "          input_seq, calc_gc_percent(input_seq))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "ZeroDivisionError",
       "evalue": "float division by zero",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 16\u001b[1;33m           input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
        "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m     10\u001b[0m             \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
       ]
      }
     ],
     "prompt_number": 51
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Adding .upper() for convenience:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "user_input = 'cacg'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 42
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def calc_gc_percent(seq):\n",
      "    at_count, gc_count = 0, 0\n",
      "    for char in seq.upper():\n",
      "        if char in ('A', 'T'):\n",
      "            at_count += 1\n",
      "        elif char in ('G', 'C'):\n",
      "            gc_count += 1\n",
      "\n",
      "    return gc_count * 100.0 / (gc_count + at_count)  \n",
      "\n",
      "input_seq = user_input\n",
      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
      "          input_seq, calc_gc_percent(input_seq))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "The sequence 'cacg' has a %GC of 75.00\n"
       ]
      }
     ],
     "prompt_number": 43
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Adding a sensible docstring"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#!/usr/bin/env python\n",
      "import sys\n",
      "\n",
      "def calc_gc_percent(seq):\n",
      "    \"\"\"\n",
      "    Calculates the GC percentage of the given sequence.\n",
      "\n",
      "    Arguments:\n",
      "        - seq - the input sequence (string).\n",
      "\n",
      "    Returns:\n",
      "        - GC percentage (float).\n",
      "\n",
      "    The returned value is always <= 100.0\n",
      "    \"\"\"\n",
      "    at_count, gc_count = 0, 0\n",
      "    # Change input to all caps to allow for non-capital\n",
      "    # input sequence.\n",
      "    for char in seq.upper():\n",
      "        if char in ('A', 'T'):\n",
      "            at_count += 1\n",
      "        elif char in ('G', 'C'):\n",
      "            gc_count += 1\n",
      "\n",
      "    return gc_count * 100.0 / (gc_count + at_count)       \n",
      "\n",
      "input_seq = sys.argv[1]\n",
      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
      "          input_seq, calc_gc_percent(input_seq))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "ZeroDivisionError",
       "evalue": "float division by zero",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     28\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 29\u001b[1;33m           input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
        "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m     23\u001b[0m             \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 25\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     26\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
       ]
      }
     ],
     "prompt_number": 44
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "help(calc_gc_percent)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Trying unexpected input"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "user_input = 'ACTG123'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def calc_gc_percent(seq):\n",
      "    at_count, gc_count = 0, 0\n",
      "    for char in seq.upper():\n",
      "        if char in ('A', 'T'):\n",
      "            at_count += 1\n",
      "        elif char in ('G', 'C'):\n",
      "            gc_count += 1\n",
      "\n",
      "    return gc_count * 100.0 / (gc_count + at_count)  \n",
      "\n",
      "input_seq = user_input\n",
      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
      "          input_seq, calc_gc_percent(input_seq))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Adding input checks"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "user_input = 'ACTG123'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 52
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "user_input = ''"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 54
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def calc_gc_percent(seq):\n",
      "    \"\"\"\n",
      "    Calculates the GC percentage of the given sequence.\n",
      "\n",
      "    Arguments:\n",
      "        - seq - the input sequence (string).\n",
      "\n",
      "    Returns:\n",
      "        - GC percentage (float).\n",
      "\n",
      "    The returned value is always <= 100.0\n",
      "    \"\"\"\n",
      "    at_count, gc_count = 0, 0\n",
      "    # Change input to all caps to allow for non-capital\n",
      "    # input sequence.\n",
      "    for char in seq.upper():\n",
      "        if char in ('A', 'T'):\n",
      "            at_count += 1\n",
      "        elif char in ('G', 'C'):\n",
      "            gc_count += 1\n",
      "        else:\n",
      "            raise ValueError(\n",
      "                \"Unexpeced character found: {}. Only \"\n",
      "                \"ACTGs are allowed.\".format(char))\n",
      "         \n",
      "    return gc_count * 100.0 / (gc_count + at_count)\n",
      "\n",
      "input_seq = user_input\n",
      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
      "          input_seq, calc_gc_percent(input_seq))\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "ZeroDivisionError",
       "evalue": "float division by zero",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     29\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 30\u001b[1;33m           input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
        "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m     24\u001b[0m                 \"ACTGs are allowed.\".format(char))\n\u001b[0;32m     25\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 26\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
       ]
      }
     ],
     "prompt_number": 55
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Adding corner cases"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "user_input = ''"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 56
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def calc_gc_percent(seq):\n",
      "    \"\"\"\n",
      "    Calculates the GC percentage of the given sequence.\n",
      "\n",
      "    Arguments:\n",
      "        - seq - the input sequence (string).\n",
      "\n",
      "    Returns:\n",
      "        - GC percentage (float).\n",
      "\n",
      "    The returned value is always <= 100.0\n",
      "    \"\"\"\n",
      "    at_count, gc_count = 0, 0\n",
      "    # Change input to all caps to allow for non-capital\n",
      "    # input sequence.\n",
      "    for char in seq.upper():\n",
      "        if char in ('A', 'T'):\n",
      "            at_count += 1\n",
      "        elif char in ('G', 'C'):\n",
      "            gc_count += 1\n",
      "        else:\n",
      "            raise ValueError(\n",
      "                \"Unexpeced character found: {}. Only \"\n",
      "                \"ACTGs are allowed.\".format(char))\n",
      "\n",
      "    # Corner case handling: empty input sequence.\n",
      "    try:\n",
      "        return gc_count * 100.0 / (gc_count + at_count)\n",
      "    except ZeroDivisionError:\n",
      "        return 0.0\n",
      "\n",
      "input_seq = user_input\n",
      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
      "          input_seq, calc_gc_percent(input_seq))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "The sequence '' has a %GC of 0.00\n"
       ]
      }
     ],
     "prompt_number": 57
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}