Skip to content
Snippets Groups Projects
Commit 2075d633 authored by Michiel van Galen's avatar Michiel van Galen
Browse files

Examples more goodness prepared in a notebook.

parent 607d95b1
No related branches found
No related tags found
No related merge requests found
{
"metadata": {
"name": "",
"signature": "sha256:8a9edae870851327562c4e01d6fc9a2c2a00319e9c6353ac21dada578d06d83f"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"GC percentage"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def calc_gc_percent(seq):\n",
" at_count, gc_count = 0, 0\n",
" for char in seq:\n",
" if char in ('A', 'T'):\n",
" at_count += 1\n",
" elif char in ('G', 'C'):\n",
" gc_count += 1\n",
" \n",
" return gc_count * 100.0 / (gc_count + at_count) \n",
"\n",
"print \"The sequence 'CAGG' has a %GC of {:.2f}\".format(\n",
" calc_gc_percent(\"CAGG\"))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"The sequence 'CAGG' has a %GC of 75.00\n"
]
}
],
"prompt_number": 47
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Adding user input"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user_input = 'CACG'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 48
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#!/usr/bin/env python\n",
"import sys\n",
"\n",
"def calc_gc_percent(seq):\n",
" at_count, gc_count = 0, 0\n",
" for char in seq:\n",
" if char in ('A', 'T'):\n",
" at_count += 1\n",
" elif char in ('G', 'C'):\n",
" gc_count += 1\n",
"\n",
" return gc_count * 100.0 / (gc_count + at_count) \n",
"\n",
"input_seq = user_input\n",
"print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
" input_seq, calc_gc_percent(input_seq))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ZeroDivisionError",
"evalue": "float division by zero",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 16\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
"\u001b[1;32m<ipython-input-51-0c3101a450ef>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
]
}
],
"prompt_number": 51
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Adding .upper() for convenience:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user_input = 'cacg'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def calc_gc_percent(seq):\n",
" at_count, gc_count = 0, 0\n",
" for char in seq.upper():\n",
" if char in ('A', 'T'):\n",
" at_count += 1\n",
" elif char in ('G', 'C'):\n",
" gc_count += 1\n",
"\n",
" return gc_count * 100.0 / (gc_count + at_count) \n",
"\n",
"input_seq = user_input\n",
"print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
" input_seq, calc_gc_percent(input_seq))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"The sequence 'cacg' has a %GC of 75.00\n"
]
}
],
"prompt_number": 43
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Adding a sensible docstring"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#!/usr/bin/env python\n",
"import sys\n",
"\n",
"def calc_gc_percent(seq):\n",
" \"\"\"\n",
" Calculates the GC percentage of the given sequence.\n",
"\n",
" Arguments:\n",
" - seq - the input sequence (string).\n",
"\n",
" Returns:\n",
" - GC percentage (float).\n",
"\n",
" The returned value is always <= 100.0\n",
" \"\"\"\n",
" at_count, gc_count = 0, 0\n",
" # Change input to all caps to allow for non-capital\n",
" # input sequence.\n",
" for char in seq.upper():\n",
" if char in ('A', 'T'):\n",
" at_count += 1\n",
" elif char in ('G', 'C'):\n",
" gc_count += 1\n",
"\n",
" return gc_count * 100.0 / (gc_count + at_count) \n",
"\n",
"input_seq = sys.argv[1]\n",
"print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
" input_seq, calc_gc_percent(input_seq))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ZeroDivisionError",
"evalue": "float division by zero",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 29\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
"\u001b[1;32m<ipython-input-44-42591751c28f>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 25\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 26\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margv\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
]
}
],
"prompt_number": 44
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"help(calc_gc_percent)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Trying unexpected input"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user_input = 'ACTG123'"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def calc_gc_percent(seq):\n",
" at_count, gc_count = 0, 0\n",
" for char in seq.upper():\n",
" if char in ('A', 'T'):\n",
" at_count += 1\n",
" elif char in ('G', 'C'):\n",
" gc_count += 1\n",
"\n",
" return gc_count * 100.0 / (gc_count + at_count) \n",
"\n",
"input_seq = user_input\n",
"print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
" input_seq, calc_gc_percent(input_seq))"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Adding input checks"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user_input = 'ACTG123'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user_input = ''"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 54
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def calc_gc_percent(seq):\n",
" \"\"\"\n",
" Calculates the GC percentage of the given sequence.\n",
"\n",
" Arguments:\n",
" - seq - the input sequence (string).\n",
"\n",
" Returns:\n",
" - GC percentage (float).\n",
"\n",
" The returned value is always <= 100.0\n",
" \"\"\"\n",
" at_count, gc_count = 0, 0\n",
" # Change input to all caps to allow for non-capital\n",
" # input sequence.\n",
" for char in seq.upper():\n",
" if char in ('A', 'T'):\n",
" at_count += 1\n",
" elif char in ('G', 'C'):\n",
" gc_count += 1\n",
" else:\n",
" raise ValueError(\n",
" \"Unexpeced character found: {}. Only \"\n",
" \"ACTGs are allowed.\".format(char))\n",
" \n",
" return gc_count * 100.0 / (gc_count + at_count)\n",
"\n",
"input_seq = user_input\n",
"print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
" input_seq, calc_gc_percent(input_seq))\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ZeroDivisionError",
"evalue": "float division by zero",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 29\u001b[0m print \"The sequence '{}' has a %GC of {:.2f}\".format(\n\u001b[1;32m---> 30\u001b[1;33m input_seq, calc_gc_percent(input_seq))\n\u001b[0m",
"\u001b[1;32m<ipython-input-55-3d66cef1b29a>\u001b[0m in \u001b[0;36mcalc_gc_percent\u001b[1;34m(seq)\u001b[0m\n\u001b[0;32m 24\u001b[0m \"ACTGs are allowed.\".format(char))\n\u001b[0;32m 25\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 26\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgc_count\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m100.0\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mgc_count\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mat_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0minput_seq\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0muser_input\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
]
}
],
"prompt_number": 55
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Adding corner cases"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user_input = ''"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 56
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def calc_gc_percent(seq):\n",
" \"\"\"\n",
" Calculates the GC percentage of the given sequence.\n",
"\n",
" Arguments:\n",
" - seq - the input sequence (string).\n",
"\n",
" Returns:\n",
" - GC percentage (float).\n",
"\n",
" The returned value is always <= 100.0\n",
" \"\"\"\n",
" at_count, gc_count = 0, 0\n",
" # Change input to all caps to allow for non-capital\n",
" # input sequence.\n",
" for char in seq.upper():\n",
" if char in ('A', 'T'):\n",
" at_count += 1\n",
" elif char in ('G', 'C'):\n",
" gc_count += 1\n",
" else:\n",
" raise ValueError(\n",
" \"Unexpeced character found: {}. Only \"\n",
" \"ACTGs are allowed.\".format(char))\n",
"\n",
" # Corner case handling: empty input sequence.\n",
" try:\n",
" return gc_count * 100.0 / (gc_count + at_count)\n",
" except ZeroDivisionError:\n",
" return 0.0\n",
"\n",
"input_seq = user_input\n",
"print \"The sequence '{}' has a %GC of {:.2f}\".format(\n",
" input_seq, calc_gc_percent(input_seq))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"The sequence '' has a %GC of 0.00\n"
]
}
],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment