{ "metadata": { "name": "", "signature": "sha256:8a9edae870851327562c4e01d6fc9a2c2a00319e9c6353ac21dada578d06d83f" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "GC percentage" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def calc_gc_percent(seq):\n", " at_count, gc_count = 0, 0\n", " for char in seq:\n", " if char in ('A', 'T'):\n", " at_count += 1\n", " elif char in ('G', 'C'):\n", " gc_count += 1\n", " \n", " return gc_count * 100.0 / (gc_count + at_count) \n", "\n", "print \"The sequence 'CAGG' has a %GC of {:.2f}\".format(\n", " calc_gc_percent(\"CAGG\"))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "The sequence 'CAGG' has a %GC of 75.00\n" ] } ], "prompt_number": 47 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Adding user input" ] }, { "cell_type": "code", "collapsed": false, "input": [ "user_input = 'CACG'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 48 }, { "cell_type": "code", "collapsed": false, "input": [ "#!/usr/bin/env python\n", "import sys\n", "\n", "def calc_gc_percent(seq):\n", " at_count, gc_count = 0, 0\n", " for char in seq:\n", " if char in ('A', 'T'):\n", " at_count += 1\n", " elif char in ('G', 'C'):\n", " gc_count += 1\n", "\n", " return gc_count * 100.0 / (gc_count + at_count) \n", "\n", "input_seq = user_input\n", "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", " input_seq, calc_gc_percent(input_seq))" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "ZeroDivisionError", "evalue": "float division by zero", "output_type": "pyerr", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 16\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m", "\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero" ] } ], "prompt_number": 51 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Adding .upper() for convenience:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "user_input = 'cacg'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "def calc_gc_percent(seq):\n", " at_count, gc_count = 0, 0\n", " for char in seq.upper():\n", " if char in ('A', 'T'):\n", " at_count += 1\n", " elif char in ('G', 'C'):\n", " gc_count += 1\n", "\n", " return gc_count * 100.0 / (gc_count + at_count) \n", "\n", "input_seq = user_input\n", "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", " input_seq, calc_gc_percent(input_seq))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "The sequence 'cacg' has a %GC of 75.00\n" ] } ], "prompt_number": 43 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Adding a sensible docstring" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#!/usr/bin/env python\n", "import sys\n", "\n", "def calc_gc_percent(seq):\n", " \"\"\"\n", " Calculates the GC percentage of the given sequence.\n", "\n", " Arguments:\n", " - seq - the input sequence (string).\n", "\n", " Returns:\n", " - GC percentage (float).\n", "\n", " The returned value is always <= 100.0\n", " \"\"\"\n", " at_count, gc_count = 0, 0\n", " # Change input to all caps to allow for non-capital\n", " # input sequence.\n", " for char in seq.upper():\n", " if char in ('A', 'T'):\n", " at_count += 1\n", " elif char in ('G', 'C'):\n", " gc_count += 1\n", "\n", " return gc_count * 100.0 / (gc_count + at_count) \n", "\n", "input_seq = sys.argv[1]\n", "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", " input_seq, calc_gc_percent(input_seq))" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "ZeroDivisionError", "evalue": "float division by zero", "output_type": "pyerr", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 29\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m", "\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 25\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 26\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero" ] } ], "prompt_number": 44 }, { "cell_type": "code", "collapsed": false, "input": [ "help(calc_gc_percent)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Trying unexpected input" ] }, { "cell_type": "code", "collapsed": false, "input": [ "user_input = 'ACTG123'" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "def calc_gc_percent(seq):\n", " at_count, gc_count = 0, 0\n", " for char in seq.upper():\n", " if char in ('A', 'T'):\n", " at_count += 1\n", " elif char in ('G', 'C'):\n", " gc_count += 1\n", "\n", " return gc_count * 100.0 / (gc_count + at_count) \n", "\n", "input_seq = user_input\n", "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", " input_seq, calc_gc_percent(input_seq))" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Adding input checks" ] }, { "cell_type": "code", "collapsed": false, "input": [ "user_input = 'ACTG123'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 52 }, { "cell_type": "code", "collapsed": false, "input": [ "user_input = ''" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 54 }, { "cell_type": "code", "collapsed": false, "input": [ "def calc_gc_percent(seq):\n", " \"\"\"\n", " Calculates the GC percentage of the given sequence.\n", "\n", " Arguments:\n", " - seq - the input sequence (string).\n", "\n", " Returns:\n", " - GC percentage (float).\n", "\n", " The returned value is always <= 100.0\n", " \"\"\"\n", " at_count, gc_count = 0, 0\n", " # Change input to all caps to allow for non-capital\n", " # input sequence.\n", " for char in seq.upper():\n", " if char in ('A', 'T'):\n", " at_count += 1\n", " elif char in ('G', 'C'):\n", " gc_count += 1\n", " else:\n", " raise ValueError(\n", " \"Unexpeced character found: {}. Only \"\n", " \"ACTGs are allowed.\".format(char))\n", " \n", " return gc_count * 100.0 / (gc_count + at_count)\n", "\n", "input_seq = user_input\n", "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", " input_seq, calc_gc_percent(input_seq))\n" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "ZeroDivisionError", "evalue": "float division by zero", "output_type": "pyerr", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 29\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 30\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m", "\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 24\u001b[0m \"ACTGs are allowed.\".format(char))\n\u001b[0;32m 25\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 26\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero" ] } ], "prompt_number": 55 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Adding corner cases" ] }, { "cell_type": "code", "collapsed": false, "input": [ "user_input = ''" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 56 }, { "cell_type": "code", "collapsed": false, "input": [ "def calc_gc_percent(seq):\n", " \"\"\"\n", " Calculates the GC percentage of the given sequence.\n", "\n", " Arguments:\n", " - seq - the input sequence (string).\n", "\n", " Returns:\n", " - GC percentage (float).\n", "\n", " The returned value is always <= 100.0\n", " \"\"\"\n", " at_count, gc_count = 0, 0\n", " # Change input to all caps to allow for non-capital\n", " # input sequence.\n", " for char in seq.upper():\n", " if char in ('A', 'T'):\n", " at_count += 1\n", " elif char in ('G', 'C'):\n", " gc_count += 1\n", " else:\n", " raise ValueError(\n", " \"Unexpeced character found: {}. Only \"\n", " \"ACTGs are allowed.\".format(char))\n", "\n", " # Corner case handling: empty input sequence.\n", " try:\n", " return gc_count * 100.0 / (gc_count + at_count)\n", " except ZeroDivisionError:\n", " return 0.0\n", "\n", "input_seq = user_input\n", "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", " input_seq, calc_gc_percent(input_seq))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "The sequence '' has a %GC of 0.00\n" ] } ], "prompt_number": 57 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }