diff --git a/solutions/More_goodness_progression.ipynb b/solutions/More_goodness_progression.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4119b843f9592d1b5adaecee2c6a171848d8f944 --- /dev/null +++ b/solutions/More_goodness_progression.ipynb @@ -0,0 +1,430 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:8a9edae870851327562c4e01d6fc9a2c2a00319e9c6353ac21dada578d06d83f" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "GC percentage" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq:\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " \n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "print \"The sequence 'CAGG' has a %GC of {:.2f}\".format(\n", + " calc_gc_percent(\"CAGG\"))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "The sequence 'CAGG' has a %GC of 75.00\n" + ] + } + ], + "prompt_number": 47 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Adding user input" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "user_input = 'CACG'" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 48 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "#!/usr/bin/env python\n", + "import sys\n", + "\n", + "def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq:\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "input_seq = user_input\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "ename": "ZeroDivisionError", + "evalue": "float division by zero", + "output_type": "pyerr", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 16\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m", + "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero" + ] + } + ], + "prompt_number": 51 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Adding .upper() for convenience:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "user_input = 'cacg'" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 42 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "input_seq = user_input\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "The sequence 'cacg' has a %GC of 75.00\n" + ] + } + ], + "prompt_number": 43 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Adding a sensible docstring" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "#!/usr/bin/env python\n", + "import sys\n", + "\n", + "def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "input_seq = sys.argv[1]\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "ename": "ZeroDivisionError", + "evalue": "float division by zero", + "output_type": "pyerr", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 29\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m", + "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 25\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 26\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero" + ] + } + ], + "prompt_number": 44 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "help(calc_gc_percent)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Trying unexpected input" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "user_input = 'ACTG123'" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "input_seq = user_input\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Adding input checks" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "user_input = 'ACTG123'" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 52 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "user_input = ''" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 54 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " else:\n", + " raise ValueError(\n", + " \"Unexpeced character found: {}. Only \"\n", + " \"ACTGs are allowed.\".format(char))\n", + " \n", + " return gc_count * 100.0 / (gc_count + at_count)\n", + "\n", + "input_seq = user_input\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))\n" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "ename": "ZeroDivisionError", + "evalue": "float division by zero", + "output_type": "pyerr", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 29\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 30\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m", + "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 24\u001b[0m \"ACTGs are allowed.\".format(char))\n\u001b[0;32m 25\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 26\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero" + ] + } + ], + "prompt_number": 55 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Adding corner cases" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "user_input = ''" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 56 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " else:\n", + " raise ValueError(\n", + " \"Unexpeced character found: {}. Only \"\n", + " \"ACTGs are allowed.\".format(char))\n", + "\n", + " # Corner case handling: empty input sequence.\n", + " try:\n", + " return gc_count * 100.0 / (gc_count + at_count)\n", + " except ZeroDivisionError:\n", + " return 0.0\n", + "\n", + "input_seq = user_input\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "The sequence '' has a %GC of 0.00\n" + ] + } + ], + "prompt_number": 57 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file