From be3451dc932089be9ca72cfe1632af36666bd21f Mon Sep 17 00:00:00 2001 From: mlefter <m.lefter@lumc.nl> Date: Tue, 19 Sep 2017 08:12:13 +0200 Subject: [PATCH] More python small changes --- more_python/03_more_python_goodness_1.ipynb | 260 ++++++++++---------- more_python/03_more_python_goodness_2.ipynb | 114 ++++++--- 2 files changed, 218 insertions(+), 156 deletions(-) diff --git a/more_python/03_more_python_goodness_1.ipynb b/more_python/03_more_python_goodness_1.ipynb index fed127b..48d1949 100644 --- a/more_python/03_more_python_goodness_1.ipynb +++ b/more_python/03_more_python_goodness_1.ipynb @@ -140,7 +140,9 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import sys" @@ -156,7 +158,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sys?" @@ -228,22 +232,22 @@ "To use `sys.argv` in our script, open a text editor and edit the script by adding an import statement, capturing the `sys.argv` value, and editing our last `print` line:\n", "\n", "```python\n", - " #!/usr/bin/env python\n", - " import sys\n", - "\n", - " def calc_gc_percent(seq):\n", - " at_count, gc_count = 0, 0\n", - " for char in seq:\n", - " if char in ('A', 'T'):\n", - " at_count += 1\n", - " elif char in ('G', 'C'):\n", - " gc_count += 1\n", - "\n", - " return gc_count * 100.0 / (gc_count + at_count) \n", - "\n", - " input_seq = sys.argv[1]\n", - " print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", - " input_seq, calc_gc_percent(input_seq))\n", + "#!/usr/bin/env python\n", + "import sys\n", + "\n", + "def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq:\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "input_seq = sys.argv[1]\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))\n", "```\n", "To test it, you can run the following command in your shell:\n", "\n", @@ -267,7 +271,9 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "my_str = 'Hello again, ipython!'" @@ -441,15 +447,15 @@ "\n", "Let's use `upper()` to fortify our function. It should now look something like this:\n", "```python\n", - " def calc_gc_percent(seq):\n", - " at_count, gc_count = 0, 0\n", - " for char in seq.upper():\n", - " if char in ('A', 'T'):\n", - " at_count += 1\n", - " elif char in ('G', 'C'):\n", - " gc_count += 1\n", - "\n", - " return gc_count * 100.0 / (gc_count + at_count) \n", + "def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", "```\n", "And run it (in whichever way you prefer). Do you get the expected output?" ] @@ -484,35 +490,35 @@ "\n", "Open your script again in a text editor, and add the following comments and docstrings: \n", "```python\n", - " #!/usr/bin/env python\n", - " import sys\n", - "\n", - " def calc_gc_percent(seq):\n", - " \"\"\"\n", - " Calculates the GC percentage of the given sequence.\n", - "\n", - " Arguments:\n", - " - seq - the input sequence (string).\n", - "\n", - " Returns:\n", - " - GC percentage (float).\n", - "\n", - " The returned value is always <= 100.0\n", - " \"\"\"\n", - " at_count, gc_count = 0, 0\n", - " # Change input to all caps to allow for non-capital\n", - " # input sequence.\n", - " for char in seq.upper():\n", - " if char in ('A', 'T'):\n", - " at_count += 1\n", - " elif char in ('G', 'C'):\n", - " gc_count += 1\n", - "\n", - " return gc_count * 100.0 / (gc_count + at_count) \n", - "\n", - " input_seq = sys.argv[1]\n", - " print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", - " input_seq, calc_gc_percent(input_seq))\n", + "#!/usr/bin/env python\n", + "import sys\n", + "\n", + "def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "input_seq = sys.argv[1]\n", + "print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))\n", "```" ] }, @@ -633,13 +639,14 @@ "We don't always want to let exceptions stop program flow, sometimes we want to provide alternative flow. The `try ... except` block allows you to do this.\n", "\n", "The syntax is:\n", - "\n", - " try:\n", - " # Statements that may raise exceptions.\n", - " # [...]\n", - " except {exception type}:\n", - " # What to do when the exceptionis raised.\n", - " # [...]" + "```python\n", + "try:\n", + " # Statements that may raise exceptions.\n", + " # [...]\n", + "except {exception type}:\n", + " # What to do when the exceptionis raised.\n", + " # [...]\n", + "```" ] }, { @@ -651,36 +658,36 @@ "Let's change our script by adding a `try ... except` block:\n", "\n", "```python\n", - " def calc_gc_percent(seq):\n", - " \"\"\"\n", - " Calculates the GC percentage of the given sequence.\n", - "\n", - " Arguments:\n", - " - seq - the input sequence (string).\n", - "\n", - " Returns:\n", - " - GC percentage (float).\n", - "\n", - " The returned value is always <= 100.0\n", - " \"\"\"\n", - " at_count, gc_count = 0, 0\n", - " # Change input to all caps to allow for non-capital\n", - " # input sequence.\n", - " for char in seq.upper():\n", - " if char in ('A', 'T'):\n", - " at_count += 1\n", - " elif char in ('G', 'C'):\n", - " gc_count += 1\n", - " else:\n", - " raise ValueError(\n", - " \"Unexpeced character found: {}. Only \"\n", - " \"ACTGs are allowed.\".format(char))\n", - "\n", - " # Corner case handling: empty input sequence.\n", - " try:\n", - " return gc_count * 100.0 / (gc_count + at_count)\n", - " except ZeroDivisionError:\n", - " return 0.0\n", + "def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " else:\n", + " raise ValueError(\n", + " \"Unexpeced character found: {}. Only \"\n", + " \"ACTGs are allowed.\".format(char))\n", + "\n", + " # Corner case handling: empty input sequence.\n", + " try:\n", + " return gc_count * 100.0 / (gc_count + at_count)\n", + " except ZeroDivisionError:\n", + " return 0.0\n", "```" ] }, @@ -696,19 +703,20 @@ "\n", "Example of code that violates this principle:\n", "```python\n", - " try:\n", - " my_function()\n", - " my_other_function()\n", - " except ValueError:\n", - " my_fallback_function()\n", + "try:\n", + " my_function()\n", + " my_other_function()\n", + "except ValueError:\n", + " my_fallback_function()\n", "```\n", "A better way would be:\n", "```python\n", - " try:\n", - " my_function()\n", - " except ValueError:\n", - " my_fallback_function()\n", - " my_other_function()" + "try:\n", + " my_function()\n", + "except ValueError:\n", + " my_fallback_function()\n", + "my_other_function()\n", + "```" ] }, { @@ -719,21 +727,21 @@ "\n", "The following code is syntactically valid, but *never* use it in your real scripts / programs:\n", "```python\n", - " try:\n", - " my_function()\n", - " except:\n", - " my_fallback_function()\n", + "try:\n", + " my_function()\n", + "except:\n", + " my_fallback_function()\n", "```\n", "*Always* use the full exception name when handling exceptions, to make for a much cleaner code:\n", "```python\n", - " try:\n", - " my_function()\n", - " except ValueError:\n", - " my_fallback_function()\n", - " except TypeError:\n", - " my_other_fallback_function()\n", - " except IndexError:\n", - " my_final_function()\n", + "try:\n", + " my_function()\n", + "except ValueError:\n", + " my_fallback_function()\n", + "except TypeError:\n", + " my_other_fallback_function()\n", + "except IndexError:\n", + " my_final_function()\n", "```" ] }, @@ -745,9 +753,9 @@ " \n", "We could have written our last exception block like so:\n", "```python\n", - " if gc_count + at_count == 0:\n", - " return 0.0\n", - " return gc_count * 100.0 / (gc_count + at_count)\n", + "if gc_count + at_count == 0:\n", + " return 0.0\n", + "return gc_count * 100.0 / (gc_count + at_count)\n", "```\n", "Both approaches are correct and have their own plus and minuses in general. However in this case, I would argue that EAFP is better since it makes the code more readable." ] @@ -766,7 +774,9 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from IPython.core.display import HTML\n", @@ -919,21 +929,21 @@ "metadata": { "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, diff --git a/more_python/03_more_python_goodness_2.ipynb b/more_python/03_more_python_goodness_2.ipynb index 0cf23e7..4c45b26 100644 --- a/more_python/03_more_python_goodness_2.ipynb +++ b/more_python/03_more_python_goodness_2.ipynb @@ -130,7 +130,9 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import seq_toolbox" @@ -166,7 +168,9 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "seq_toolbox.calc_gc_percent?" @@ -202,7 +206,9 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from seq_toolbox import calc_gc_percent" @@ -238,7 +244,9 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from seq_toolbox import calc_gc_percent as gc_calc" @@ -305,7 +313,9 @@ { "cell_type": "code", "execution_count": 9, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import os" @@ -354,7 +364,9 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "my_filename = 'input.fastq'" @@ -475,7 +487,9 @@ { "cell_type": "code", "execution_count": 18, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import sys" @@ -595,7 +609,9 @@ { "cell_type": "code", "execution_count": 23, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import math" @@ -715,7 +731,9 @@ { "cell_type": "code", "execution_count": 29, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import random" @@ -817,7 +835,9 @@ { "cell_type": "code", "execution_count": 34, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import re" @@ -826,7 +846,9 @@ { "cell_type": "code", "execution_count": 35, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "my_seq = 'CAGTCAGT'" @@ -835,7 +857,9 @@ { "cell_type": "code", "execution_count": 36, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "results1 = re.search(r'CA.+CA', my_seq)" @@ -864,7 +888,9 @@ { "cell_type": "code", "execution_count": 38, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "results2 = re.search(r'CCC..', my_seq)" @@ -955,7 +981,9 @@ { "cell_type": "code", "execution_count": 40, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "open?" @@ -973,7 +1001,9 @@ { "cell_type": "code", "execution_count": 41, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fh = open('data/short_file.txt')" @@ -1043,7 +1073,9 @@ { "cell_type": "code", "execution_count": 44, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fh.seek(0)" @@ -1079,7 +1111,9 @@ { "cell_type": "code", "execution_count": 46, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fh.seek(0)" @@ -1155,7 +1189,9 @@ { "cell_type": "code", "execution_count": 50, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fh.seek(0)" @@ -1246,7 +1282,9 @@ { "cell_type": "code", "execution_count": 54, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fh.close()" @@ -1284,7 +1322,9 @@ { "cell_type": "code", "execution_count": 58, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fw = open('data/my_file.txt', 'w')" @@ -1327,7 +1367,9 @@ { "cell_type": "code", "execution_count": 60, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fw.write('This is my first line ')" @@ -1336,7 +1378,9 @@ { "cell_type": "code", "execution_count": 61, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fw.write('Still on my first line\\n')" @@ -1345,7 +1389,9 @@ { "cell_type": "code", "execution_count": 62, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fw.write('Now on my second line')" @@ -1361,7 +1407,9 @@ { "cell_type": "code", "execution_count": 63, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "fw.close()" @@ -1399,7 +1447,9 @@ { "cell_type": "code", "execution_count": 65, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "os.remove('data/my_file.txt')" @@ -1676,7 +1726,9 @@ { "cell_type": "code", "execution_count": 71, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from IPython.core.display import HTML\n", @@ -1824,21 +1876,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, -- GitLab