From 2075d6334449b880b442d2286b57bfb093a7a7ea Mon Sep 17 00:00:00 2001
From: Michiel van Galen <m.van_galen@lumc.nl>
Date: Mon, 14 Jul 2014 16:17:23 +0200
Subject: [PATCH] Examples more goodness prepared in a notebook.

---
 solutions/More_goodness_progression.ipynb | 430 ++++++++++++++++++++++
 1 file changed, 430 insertions(+)
 create mode 100644 solutions/More_goodness_progression.ipynb

diff --git a/solutions/More_goodness_progression.ipynb b/solutions/More_goodness_progression.ipynb
new file mode 100644
index 0000000..4119b84
--- /dev/null
+++ b/solutions/More_goodness_progression.ipynb
@@ -0,0 +1,430 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:8a9edae870851327562c4e01d6fc9a2c2a00319e9c6353ac21dada578d06d83f"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "GC percentage"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def calc_gc_percent(seq):\n",
+      "    at_count, gc_count = 0, 0\n",
+      "    for char in seq:\n",
+      "        if char in ('A', 'T'):\n",
+      "            at_count += 1\n",
+      "        elif char in ('G', 'C'):\n",
+      "            gc_count += 1\n",
+      "            \n",
+      "    return gc_count * 100.0 / (gc_count + at_count)       \n",
+      "\n",
+      "print \"The sequence 'CAGG' has a %GC of {:.2f}\".format(\n",
+      "          calc_gc_percent(\"CAGG\"))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "The sequence 'CAGG' has a %GC of 75.00\n"
+       ]
+      }
+     ],
+     "prompt_number": 47
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Adding user input"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "user_input = 'CACG'"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 48
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#!/usr/bin/env python\n",
+      "import sys\n",
+      "\n",
+      "def calc_gc_percent(seq):\n",
+      "    at_count, gc_count = 0, 0\n",
+      "    for char in seq:\n",
+      "        if char in ('A', 'T'):\n",
+      "            at_count += 1\n",
+      "        elif char in ('G', 'C'):\n",
+      "            gc_count += 1\n",
+      "\n",
+      "    return gc_count * 100.0 / (gc_count + at_count)       \n",
+      "\n",
+      "input_seq = user_input\n",
+      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
+      "          input_seq, calc_gc_percent(input_seq))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "ename": "ZeroDivisionError",
+       "evalue": "float division by zero",
+       "output_type": "pyerr",
+       "traceback": [
+        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
+        "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 16\u001b[1;33m           input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
+        "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m     10\u001b[0m             \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+        "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
+       ]
+      }
+     ],
+     "prompt_number": 51
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Adding .upper() for convenience:"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "user_input = 'cacg'"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 42
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def calc_gc_percent(seq):\n",
+      "    at_count, gc_count = 0, 0\n",
+      "    for char in seq.upper():\n",
+      "        if char in ('A', 'T'):\n",
+      "            at_count += 1\n",
+      "        elif char in ('G', 'C'):\n",
+      "            gc_count += 1\n",
+      "\n",
+      "    return gc_count * 100.0 / (gc_count + at_count)  \n",
+      "\n",
+      "input_seq = user_input\n",
+      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
+      "          input_seq, calc_gc_percent(input_seq))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "The sequence 'cacg' has a %GC of 75.00\n"
+       ]
+      }
+     ],
+     "prompt_number": 43
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Adding a sensible docstring"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#!/usr/bin/env python\n",
+      "import sys\n",
+      "\n",
+      "def calc_gc_percent(seq):\n",
+      "    \"\"\"\n",
+      "    Calculates the GC percentage of the given sequence.\n",
+      "\n",
+      "    Arguments:\n",
+      "        - seq - the input sequence (string).\n",
+      "\n",
+      "    Returns:\n",
+      "        - GC percentage (float).\n",
+      "\n",
+      "    The returned value is always <= 100.0\n",
+      "    \"\"\"\n",
+      "    at_count, gc_count = 0, 0\n",
+      "    # Change input to all caps to allow for non-capital\n",
+      "    # input sequence.\n",
+      "    for char in seq.upper():\n",
+      "        if char in ('A', 'T'):\n",
+      "            at_count += 1\n",
+      "        elif char in ('G', 'C'):\n",
+      "            gc_count += 1\n",
+      "\n",
+      "    return gc_count * 100.0 / (gc_count + at_count)       \n",
+      "\n",
+      "input_seq = sys.argv[1]\n",
+      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
+      "          input_seq, calc_gc_percent(input_seq))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "ename": "ZeroDivisionError",
+       "evalue": "float division by zero",
+       "output_type": "pyerr",
+       "traceback": [
+        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
+        "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     28\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 29\u001b[1;33m           input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
+        "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m     23\u001b[0m             \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 25\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     26\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+        "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
+       ]
+      }
+     ],
+     "prompt_number": 44
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "help(calc_gc_percent)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Trying unexpected input"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "user_input = 'ACTG123'"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def calc_gc_percent(seq):\n",
+      "    at_count, gc_count = 0, 0\n",
+      "    for char in seq.upper():\n",
+      "        if char in ('A', 'T'):\n",
+      "            at_count += 1\n",
+      "        elif char in ('G', 'C'):\n",
+      "            gc_count += 1\n",
+      "\n",
+      "    return gc_count * 100.0 / (gc_count + at_count)  \n",
+      "\n",
+      "input_seq = user_input\n",
+      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
+      "          input_seq, calc_gc_percent(input_seq))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Adding input checks"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "user_input = 'ACTG123'"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 52
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "user_input = ''"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 54
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def calc_gc_percent(seq):\n",
+      "    \"\"\"\n",
+      "    Calculates the GC percentage of the given sequence.\n",
+      "\n",
+      "    Arguments:\n",
+      "        - seq - the input sequence (string).\n",
+      "\n",
+      "    Returns:\n",
+      "        - GC percentage (float).\n",
+      "\n",
+      "    The returned value is always <= 100.0\n",
+      "    \"\"\"\n",
+      "    at_count, gc_count = 0, 0\n",
+      "    # Change input to all caps to allow for non-capital\n",
+      "    # input sequence.\n",
+      "    for char in seq.upper():\n",
+      "        if char in ('A', 'T'):\n",
+      "            at_count += 1\n",
+      "        elif char in ('G', 'C'):\n",
+      "            gc_count += 1\n",
+      "        else:\n",
+      "            raise ValueError(\n",
+      "                \"Unexpeced character found: {}. Only \"\n",
+      "                \"ACTGs are allowed.\".format(char))\n",
+      "         \n",
+      "    return gc_count * 100.0 / (gc_count + at_count)\n",
+      "\n",
+      "input_seq = user_input\n",
+      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
+      "          input_seq, calc_gc_percent(input_seq))\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "ename": "ZeroDivisionError",
+       "evalue": "float division by zero",
+       "output_type": "pyerr",
+       "traceback": [
+        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
+        "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     29\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 30\u001b[1;33m           input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
+        "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m     24\u001b[0m                 \"ACTGs are allowed.\".format(char))\n\u001b[0;32m     25\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 26\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+        "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
+       ]
+      }
+     ],
+     "prompt_number": 55
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Adding corner cases"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "user_input = ''"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 56
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def calc_gc_percent(seq):\n",
+      "    \"\"\"\n",
+      "    Calculates the GC percentage of the given sequence.\n",
+      "\n",
+      "    Arguments:\n",
+      "        - seq - the input sequence (string).\n",
+      "\n",
+      "    Returns:\n",
+      "        - GC percentage (float).\n",
+      "\n",
+      "    The returned value is always <= 100.0\n",
+      "    \"\"\"\n",
+      "    at_count, gc_count = 0, 0\n",
+      "    # Change input to all caps to allow for non-capital\n",
+      "    # input sequence.\n",
+      "    for char in seq.upper():\n",
+      "        if char in ('A', 'T'):\n",
+      "            at_count += 1\n",
+      "        elif char in ('G', 'C'):\n",
+      "            gc_count += 1\n",
+      "        else:\n",
+      "            raise ValueError(\n",
+      "                \"Unexpeced character found: {}. Only \"\n",
+      "                \"ACTGs are allowed.\".format(char))\n",
+      "\n",
+      "    # Corner case handling: empty input sequence.\n",
+      "    try:\n",
+      "        return gc_count * 100.0 / (gc_count + at_count)\n",
+      "    except ZeroDivisionError:\n",
+      "        return 0.0\n",
+      "\n",
+      "input_seq = user_input\n",
+      "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
+      "          input_seq, calc_gc_percent(input_seq))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "The sequence '' has a %GC of 0.00\n"
+       ]
+      }
+     ],
+     "prompt_number": 57
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
-- 
GitLab