diff --git a/README.md b/README.md index 7345fecf93ace469895810a4f2627e2e1e90e635..83da5b6e753d8185beb74eaea5f2104118b1e140 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ be done in your own time (i.e., during the afternoons). Please note that the above mentioned date and location are subject to change. -Program +Program and Materials ------- - Mornings: presentations. @@ -31,13 +31,13 @@ Program | Day | Time | Lesson | Teacher | |-----------------|-------|------------------------------------ |----------| -| Monday, 18/9 | 9-10 | Welcome, Introduction to Python (1) | Mihai | -| | 10-11 | Introduction to Python (2) | Jeroen | -| | 11-12 | Introduction to Python (3) | Mihai | +| Monday, 18/9 | 9-10 | [Welcome][lesson_01], [Introduction to Python (1)][lesson_02_01] | Mihai | +| | 10-11 | [Introduction to Python (2)][lesson_02_02] | Jeroen | +| | 11-12 | [Introduction to Python (3)][lesson_02_03] | Mihai | | | 12-13 | Practical help | | | Tuesday, 19/9 | 9-10 | Assignments review | | -| | 10-11 | More Python Goodness (1) | Mihai | -| | 11-12 | More Python Goodness (2) | Mihai | +| | 10-11 | [More Python Goodness (1)][lesson_03_01] | Mihai | +| | 11-12 | [More Python Goodness (2)][lesson_03_02] | Mihai | | | 12-13 | IPython Notebook | Mark | | Wednesday, 20/9 | 9-10 | Assignments review | | | | 10-11 | Data manipulation (NumPy, Pandas) | Mark | @@ -48,29 +48,22 @@ Program | | 11-12 | Biopython | Guy | | | 12-13 | Putting everything together | Jeroen | +Some of the lessons are slideshows, whereas others are just +notebooks we scroll through during class. The links above are all one-page +static renderings on [IPython Notebook Viewer](http://nbviewer.ipython.org/). -Materials ---------- -The top-level directory contains materials for the following lessons: +Assignments +------- +- [First day](https://classroom.github.com/a/QU2iPYKn). -1. Welcome ([slides][lesson_01]) -2. Introduction to Python ([slides 1][lesson_02_01], [slides 2][lesson_02_02], - [slides 3][lesson_02_03]) -3. More Python Goodness ([notebook 1][lesson_03_01], [notebook 2][lesson_03_02]) -4. Working with NumPy arrays ([slides][lesson_04]) -5. IPython Notebook ([notebook][lesson_05]) -6. Plotting with matplotlib ([notebook][lesson_06]) -7. Painting Pandas ([slides][lesson_07]) -8. Object-oriented programming ([slides][lesson_08]) -9. A sip of Biopython ([notebook 1][lesson_09_01], [notebook 2][lesson_09_02]) [lesson_01]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/introduction/01_welcome.ipynb -[lesson_02_01]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/02%20-%20Introduction%20to%20Python%20(1).ipynb -[lesson_02_02]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/02%20-%20Introduction%20to%20Python%20(2).ipynb -[lesson_02_03]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/02%20-%20Introduction%20to%20Python%20(3).ipynb -[lesson_03_01]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/03%20-%20More%20Python%20goodness%20(1).ipynb -[lesson_03_02]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/03%20-%20More%20Python%20goodness%20(2).ipynb +[lesson_02_01]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/introduction/02_introduction_to_python_1.ipynb +[lesson_02_02]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/introduction/02_introduction_to_python_2.ipynb +[lesson_02_03]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/introduction/02_introduction_to_python_3.ipynb +[lesson_03_01]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/more_python/03_more_python_goodness_1.ipynb +[lesson_03_02]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/more_python/03_more_python_goodness_2.ipynb [lesson_04]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/04%20-%20Working%20with%20NumPy%20arrays.ipynb [lesson_05]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/05%20-%20IPython%20Notebook.ipynb [lesson_06]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/06%20-%20Plotting%20with%20matplotlib.ipynb @@ -79,17 +72,10 @@ The top-level directory contains materials for the following lessons: [lesson_09_01]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/09%20-%20A%20sip%20of%20Biopython%20(1).ipynb [lesson_09_02]: http://nbviewer.ipython.org/urls/git.lumc.nl/courses/programming-course/raw/master/09%20-%20A%20sip%20of%20Biopython%20(2).ipynb -As indicated, some of the lessons are slideshows, whereas others are just -notebooks we scroll through during class. The links above are all one-page -static renderings on [IPython Notebook Viewer](http://nbviewer.ipython.org/). - -We also have a -[repository with material for the assignments](https://git.lumc.nl/courses/programming-course-assignments). - Software installation --------------------- -See the instructions in [INSTALL.md](/INSTALL.md). +See the instructions [here](https://docs.anaconda.com/anaconda/install/). Notebooks --------- diff --git a/images/python_logo.svg b/images/python_logo.svg new file mode 100644 index 0000000000000000000000000000000000000000..2f45aadacef503e2ab4777e848211675c6276412 --- /dev/null +++ b/images/python_logo.svg @@ -0,0 +1,65 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + version="1.1" + viewBox="0 0 1052.4 368.09" + id="svg2" + inkscape:version="0.91 r13725" + sodipodi:docname="python_logo.svg" + width="1052.4" + height="368.09003"> + <metadata + id="metadata14"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> + <defs + id="defs12" /> + <sodipodi:namedview + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1" + objecttolerance="10" + gridtolerance="10" + guidetolerance="10" + inkscape:pageopacity="0" + inkscape:pageshadow="2" + inkscape:window-width="2495" + inkscape:window-height="1416" + id="namedview10" + showgrid="false" + inkscape:zoom="0.8009039" + inkscape:cx="589.44223" + inkscape:cy="227.21656" + inkscape:window-x="65" + inkscape:window-y="24" + inkscape:window-maximized="1" + inkscape:current-layer="svg2" /> + <g + transform="translate(324.63321,-501.97596)" + id="g4"> + <path + d="m 524.02,846.67 c -33.839,-1.2297 -54.784,-11.264 -60.113,-28.8 l -0.6194,-2.0382 -0.0869,-34.378 c -0.0994,-39.346 -0.0645,-40.143 2.0299,-46.408 3.8218,-11.432 12.859,-18.803 26.993,-22.016 l 3.1253,-0.71039 38.862,-0.17214 c 27.843,-0.12334 39.388,-0.26278 40.716,-0.49179 7.7818,-1.3418 12.887,-3.3508 17.679,-6.9578 5.7253,-4.309 10.451,-11.685 12.563,-19.607 1.5862,-5.9503 1.5436,-5.3002 1.715,-26.202 l 0.15935,-19.431 27.992,0 2.7176,0.75892 c 14.561,4.0662 25.133,17.862 29.633,38.671 1.7311,8.0037 1.7492,8.2998 1.7524,28.718 0.003,19.87 -0.006,20.044 -1.3943,26.562 -1.3645,6.4064 -3.1506,11.619 -5.8196,16.985 -5.3828,10.822 -13.32,18.146 -23.357,21.553 -5.7335,1.9464 -0.93423,1.7836 -57.342,1.9449 l -50.82,0.14528 0,9.24 33.699,0.13588 33.699,0.13589 0.13588,16.578 c 0.0747,9.1177 0.0421,17.36 -0.0725,18.316 -0.27725,2.3131 -1.2585,5.0581 -2.6625,7.4485 -1.4458,2.4616 -5.8352,6.8642 -8.8149,8.8417 -9.0505,6.0063 -22.849,9.6675 -41.036,10.888 -6.1192,0.41068 -14.785,0.52882 -21.333,0.29086 z m 46.743,-21.481 c 6.4954,-1.053 11.145,-7.4099 10.074,-13.773 -0.88336,-5.2488 -4.9357,-9.3086 -10.155,-10.173 -7.9672,-1.3202 -15.213,5.9958 -13.795,13.929 1.0313,5.7708 5.6585,9.8336 11.701,10.274 0.2242,0.0163 1.2026,-0.0992 2.1741,-0.25667 z m -144.8,-49.035 c -7.6439,-1.3898 -15.171,-6.0227 -20.491,-12.611 -10.075,-12.479 -15.162,-33.284 -14.523,-59.406 0.3979,-16.278 2.8315,-28.362 7.782,-38.641 6.3813,-13.25 15.916,-21.108 28.787,-23.724 2.8193,-0.57313 2.9606,-0.57462 52.615,-0.55479 27.384,0.0109 49.989,-0.0572 50.235,-0.1514 0.38956,-0.14949 0.44634,-0.75521 0.44634,-4.7622 0,-2.525 -0.0306,-4.6203 -0.0679,-4.6562 -0.0374,-0.0359 -15.232,-0.12763 -33.767,-0.20383 l -33.699,-0.13854 -0.07,-17.514 c -0.0766,-19.183 -0.092,-18.96 1.5503,-22.483 5.6933,-12.213 22.465,-19.633 49.339,-21.829 1.9431,-0.1588 7.2017,-0.36064 11.686,-0.44852 29.753,-0.58311 51.579,4.8575 63.052,15.717 1.271,1.2031 2.9273,2.9988 3.6807,3.9906 1.7132,2.255 3.8458,6.5603 4.6624,9.4125 l 0.62243,2.1741 0,36.145 c 0,33.739 -0.0327,36.316 -0.49158,38.726 -0.59212,3.1098 -2.0144,7.4792 -3.1997,9.8301 -4.1039,8.1393 -11.665,14.003 -21.689,16.82 -6.5086,1.8288 -3.5525,1.7086 -46.637,1.8956 -42.787,0.18564 -40.201,0.086 -46.2,1.7812 -11.651,3.2927 -20.21,12.367 -23.687,25.113 -1.5631,5.7299 -1.5284,5.1946 -1.6996,26.225 l -0.15925,19.567 -13.045,0.0447 c -9.8474,0.0338 -13.532,-0.0439 -15.034,-0.31707 z m 71.232,-162.58 c 4.9456,-2.3234 7.924,-7.7698 7.0286,-12.853 -0.82592,-4.6888 -4.0455,-8.4308 -8.4148,-9.7804 -6.983,-2.1568 -14.269,2.2939 -15.558,9.5032 -1.0478,5.8633 2.3432,11.571 8.0784,13.598 1.2817,0.45295 2.1059,0.54476 4.303,0.47938 2.3937,-0.0712 2.9375,-0.18417 4.5622,-0.94747 z" + id="path6" + inkscape:connector-curvature="0" + style="fill:#fed142" /> + <path + d="m 425.96,776.15 c -7.6439,-1.3898 -15.171,-6.0227 -20.491,-12.611 -10.075,-12.479 -15.162,-33.284 -14.523,-59.406 0.3979,-16.278 2.8315,-28.362 7.782,-38.641 6.3813,-13.25 15.916,-21.108 28.787,-23.724 2.8193,-0.57313 2.9606,-0.57462 52.615,-0.55479 27.384,0.0109 49.989,-0.0572 50.235,-0.1514 0.38956,-0.14949 0.44634,-0.75521 0.44634,-4.7622 0,-2.525 -0.0306,-4.6203 -0.0679,-4.6562 -0.0374,-0.0359 -15.232,-0.12763 -33.767,-0.20383 l -33.699,-0.13854 -0.07,-17.514 c -0.0766,-19.183 -0.092,-18.96 1.5503,-22.483 5.6933,-12.213 22.465,-19.633 49.339,-21.829 1.9431,-0.1588 7.2017,-0.36064 11.686,-0.44852 29.753,-0.58311 51.579,4.8575 63.052,15.717 1.271,1.2031 2.9273,2.9988 3.6807,3.9906 1.7132,2.255 3.8458,6.5603 4.6624,9.4125 l 0.62243,2.1741 0,36.145 c 0,33.739 -0.0327,36.316 -0.49158,38.726 -0.59212,3.1098 -2.0144,7.4792 -3.1997,9.8301 -4.1039,8.1393 -11.665,14.003 -21.689,16.82 -6.5086,1.8288 -3.5525,1.7086 -46.637,1.8956 -42.787,0.18564 -40.201,0.086 -46.2,1.7812 -11.651,3.2927 -20.21,12.367 -23.687,25.113 -1.5631,5.7299 -1.5284,5.1946 -1.6996,26.225 l -0.15925,19.567 -13.045,0.0447 c -9.8474,0.0338 -13.532,-0.0439 -15.034,-0.31707 z m 71.232,-162.58 c 4.9456,-2.3234 7.924,-7.7698 7.0286,-12.853 -0.82592,-4.6888 -4.0455,-8.4308 -8.4148,-9.7804 -6.983,-2.1568 -14.269,2.2939 -15.558,9.5032 -1.0478,5.8633 2.3432,11.571 8.0784,13.598 1.2817,0.45295 2.1059,0.54476 4.303,0.47938 2.3937,-0.0712 2.9375,-0.18417 4.5622,-0.94747 z" + id="path8" + inkscape:connector-curvature="0" + style="fill:#3571a3" /> + </g> +</svg> diff --git a/introduction/01_welcome.ipynb b/introduction/01_welcome.ipynb index dd1d9299576d53ebcf61b793e928a0822ca8b724..305472de6764392057fff89e667941bf13a6fa4c 100644 --- a/introduction/01_welcome.ipynb +++ b/introduction/01_welcome.ipynb @@ -11,8 +11,7 @@ "<span style=\"font-size: 200%\">Programming in Python</span>\n", "===\n", "\n", - "<br>\n", - "\n" + "<br>" ] }, { @@ -27,12 +26,36 @@ "===\n", "\n", "* Aimed at PhD students, Postdocs, researchers, analysts, ...\n", - "* Programming as a tool to do your research.\n", - "* Slightly focussed on bioinformatics.\n", + "* Focus on:\n", + " - Basic understanding of Python.\n", + " - Programming as a tool to do your research.\n", + " - Slightly biased on bioinformatics.\n", + "\n", "\n", "**Note:** This is its third session." ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Hands on!\n", + "===\n", + "\n", + "**Programming is fun!**\n", + "* You only learn programming by doing it.\n", + "* Lecture format:\n", + " - Blended teaching + exercising.\n", + "* Have your laptop open during the lessons.\n", + "* Repeat the code from the slides, play around with it.\n", + "* Do the session exercises.\n", + "* There will be a few assignments to submit." + ] + }, { "cell_type": "markdown", "metadata": { @@ -75,24 +98,37 @@ "Program\n", "===\n", "\n", - "| Day | Time | Lesson | Teacher | \n", - "|-----------------|-------|------------------------------------ |----------|\n", - "| Monday, 18/9 | 9-10 | Welcome, Introduction to Python (1) | Mihai | \n", - "| | 10-11 | Introduction to Python (2) | Jeroen | \n", - "| | 11-12 | Introduction to Python (3) | Mihai | \n", - "| | 12-13 | Practical help | | \n", - "| Tuesday, 19/9 | 9-10 | Assignments review | |\n", - "| | 10-11 | More Python Goodness (1) | Mihai | \n", - "| | 11-12 | More Python Goodness (2) | Mihai | \n", - "| | 12-13 | IPython Notebook | Mark | \n", - "| Wednesday, 20/9 | 9-10 | Assignments review | | \n", - "| | 10-11 | Data manipulation (NumPy, Pandas) | Mark | \n", - "| | 11-12 | Data visualisation (1) | Guy | \n", - "| | 12-13 | Data visualisation (2) | Guy | \n", - "| Thursday, 21/9 | 9-10 | Assignments review | | \n", - "| | 10-11 | Object-oriented programming | Jonathan | \n", - "| | 11-12 | Biopython | Guy | \n", - "| | 12-13 | Putting everything together | Jeroen |\n", + "| Day | Time | Lesson | Teacher | \n", + "|-------------------|-------|------------------------------------ |----------|\n", + "| **Monday, 18/9** | 9 - 9:30 | Welcome, Setup | Mihai | \n", + "| | 9:30 - 10:30 | Introduction to Python (1) | Mihai |\n", + "| | 10:30 - 11:30 | Introduction to Python (2) | Jeroen | \n", + "| | 11:30 - 12:30 | Introduction to Python (3) | Mihai | \n", + "| | 12:30 - 13:00 | Practical help | | \n", + "| **Tuesday, 19/9** | 9-10 | Assignments review | |\n", + "| | 10-11 | More Python Goodness (1) | Mihai | \n", + "| | 11-12 | More Python Goodness (2) | Mihai | \n", + "| | 12-13 | IPython Notebook | Mark | " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "| Day | Time | Lesson | Teacher | \n", + "|---------------------|-------|------------------------------------ |----------|\n", + "| **Wednesday, 20/9** | 9-10 | Assignments review | | \n", + "| | 10-11 | Data manipulation (NumPy, Pandas) | Mark | \n", + "| | 11-12 | Data visualisation (1) | Guy | \n", + "| | 12-13 | Data visualisation (2) | Guy | \n", + "| **Thursday, 21/9** | 9-10 | Assignments review | | \n", + "| | 10-11 | Object-oriented programming | Jonathan | \n", + "| | 11-12 | Biopython | Guy | \n", + "| | 12-13 | Putting everything together | Jeroen |\n", "\n", "All slides are linked as one-page documents from the course website: [https://git.lumc.nl/courses/programming-course](https://git.lumc.nl/courses/programming-course)" ] @@ -108,12 +144,15 @@ "Software requirements\n", "===\n", "\n", - "* Anaconda\n", - " * Python 2.7\n", - " * NumPy, matplotlib, Biopython\n", + "* Anaconda:\n", + " * Python 2.7\n", + " * Comes with all that's required:\n", + " - Python interpreter, Jupyter Notebook.\n", + " - Libraries:\n", + " - NumPy, Panda, matplotlib, Bokeh, Biopython, ...\n", + " * [Installation instructions](https://docs.anaconda.com/anaconda/install/). \n", "* Git\n", - "\n", - "Anaconda installation instructions [here](https://docs.anaconda.com/anaconda/install/)." + "\n" ] }, { @@ -124,15 +163,17 @@ } }, "source": [ - "Exercises\n", + "Assignments\n", "===\n", "\n", - "You only learn programming by doing it.\n", - "\n", - "* Have your laptop open during the lessons.\n", - "* Repeat the code from the slides, play around with it.\n", - "* Do the exercises.\n", - "* There will be a few assignments to submit." + "* We make use of GitHub Classroom.\n", + "* GitHub account required.\n", + "* Receive link with assignment repository.\n", + "* Own forked repository to work on.\n", + " - Clone it.\n", + " - Code.\n", + " - Push it.\n", + "* Direct file upload to repository is also possible." ] }, { @@ -152,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": { "slideshow": { "slide_type": "skip" @@ -191,7 +232,7 @@ "<IPython.core.display.HTML object>" ] }, - "execution_count": 3, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -208,7 +249,7 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "slide" + "slide_type": "-" } }, "source": [ @@ -217,22 +258,23 @@ } ], "metadata": { + "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 2", "language": "python", - "name": "python3" + "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" + "pygments_lexer": "ipython2", + "version": "2.7.12" } }, "nbformat": 4, diff --git a/introduction/02_introduction_to_python_1.ipynb b/introduction/02_introduction_to_python_1.ipynb index 4a01e69444ff586a2851aa1f8ae5eb9dc2ef0851..9d8504ff43e2273d3b3a6ade9f7837267bd1da27 100644 --- a/introduction/02_introduction_to_python_1.ipynb +++ b/introduction/02_introduction_to_python_1.ipynb @@ -9,17 +9,7 @@ }, "source": [ "<span style=\"font-size: 200%\">Introduction to Python (1)</span>\n", - "===\n", - "\n", - "<br>\n", - "\n", - "[Martijn Vermaat](mailto:m.vermaat.hg@lumc.nl), [Department of Human Genetics, Leiden University Medical Center](http://humgen.nl)\n", - "\n", - "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl), [Department of Human Genetics, Leiden University Medical Center](http://humgen.nl)\n", - "\n", - "Based on: [Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", - "\n", - "License: [Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" + "===\n" ] }, { @@ -37,6 +27,7 @@ "---\n", "\n", "* Created early 90's by Guido van Rossem at CWI.\n", + " - Name: Monty Python.\n", "* General purpose, high-level programming language.\n", "* Design is driven by code readability." ] @@ -56,7 +47,8 @@ "---\n", "\n", "* Interpreted, no separate compilation step needed.\n", - "* Imperative and object-oriented programming (and some functional programming).\n", + "* Imperative and object-oriented programming.\n", + " - And some functional programming.\n", "* Dynamic type system.\n", "* Automatic memory management.\n", "\n", @@ -404,6 +396,13 @@ "35 / 5" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Division is a bit weird: if you give it integer arguments, the result will also be an integer." + ] + }, { "cell_type": "code", "execution_count": 8, @@ -424,13 +423,6 @@ "36 / 5" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Division is a bit weird: if you give it integer arguments, the result will also be an integer." - ] - }, { "cell_type": "markdown", "metadata": { @@ -606,15 +598,12 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "subslide" + "slide_type": "slide" } }, "source": [ - "Python as a calculator\n", - "===\n", - "\n", "Variables\n", - "---\n", + "===\n", "\n", "* We can use names to reference values (variables).\n", "* No need to declare them first or define the type." @@ -622,16 +611,17 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ - "a = 1.3e20" + "a = 1.3e20\n", + "b = 2" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -640,7 +630,7 @@ "1.3e+20" ] }, - "execution_count": 16, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -651,23 +641,23 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "2.84e+20" + "3.2e+20" ] }, - "execution_count": 17, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "b = a + 1.2e19\n", - "b * 2" + "c = a + 1.5e19 * b\n", + "c * 2" ] }, { @@ -678,7 +668,7 @@ } }, "source": [ - "Python's type system (1/2)\n", + "Python's type system (1/4)\n", "===\n", "\n", "Every value has a type, view it using `type`:" @@ -746,8 +736,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, "source": [ + "Python's type system (2/4)\n", + "===\n", + "\n", "Another example of a builtin datatype is `str`, we'll see more later:" ] }, @@ -775,13 +772,10 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "subslide" + "slide_type": "-" } }, "source": [ - "Python's type system (2/2)\n", - "===\n", - "\n", "Some operations are defined on more than one type, possibly with different meanings." ] }, @@ -807,14 +801,61 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, "source": [ + "Python's type system (3/4)\n", + "===\n", + "\n", "Dynamic typing means that variables can be assigned values of different types during runtime." ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.3e+20" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "float" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -823,7 +864,7 @@ "str" ] }, - "execution_count": 23, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -835,8 +876,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, "source": [ + "Python's type system (4/4)\n", + "===\n", + "\n", "Python is strongly typed, meaning that operations on values with incompatible types are forbidden." ] }, @@ -860,6 +908,41 @@ "'beer' + 34" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "<div class=\"alert alert-success\">\n", + "<h1>Hands on!</h1>\n", + "\n", + "<ol>\n", + " <li>We’ve seen that b = 2 is legal.\n", + " <ul>\n", + " <li> What about 2 = b? </li>\n", + " <li>How about a = b = 1?</li>\n", + " </ul>\n", + " </li>\n", + " <li>In math notation you can multiply x and y like this: xy.\n", + " What happens if you try that in Python?</li>\n", + " <li>How many seconds are there in 42 minutes and 42 seconds?</li>\n", + " <li>How many miles are there in 16 kilometers? Hint: there are 1.61 kilometers in a mile.</li>\n", + " <li>Let's assume that you run a 42 kilometer race in 4 hours 42 minutes and 42 seconds.\n", + " <ul>\n", + " <li>What is your average pace (time per mile in minutes and seconds)?</li>\n", + " <li>What is your average speed in miles per hour?</li>\n", + " </ul>\n", + " </li>\n", + " <li>Use string operations to reference string 'tra la la la' in a variable named <i>song</i>.</li>\n", + " <li>If an article costs 249 Euros including the 19% Value Added Tax (VAT), what is the actual VAT amount in Euros for the corresponding article?</li>\n", + "</ol>\n", + "\n", + "</div>" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -898,7 +981,7 @@ "</style>" ], "text/plain": [ - "<IPython.core.display.HTML at 0x3463a50>" + "<IPython.core.display.HTML object>" ] }, "execution_count": 1, @@ -909,30 +992,54 @@ "source": [ "from IPython.display import HTML\n", "def css_styling():\n", - " styles = open('styles/custom.css', 'r').read()\n", + " styles = open('../styles/custom.css', 'r').read()\n", " return HTML('<style>' + styles + '</style>')\n", "css_styling()" ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Acknowledgements\n", + "========\n", + "\n", + "Martijn Vermaat\n", + "\n", + "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl)\n", + "\n", + "Based on\n", + "---------\n", + "[Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", + "\n", + "License\n", + "--------\n", + "[Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" + ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 2", "language": "python", - "name": "python3" + "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" + "pygments_lexer": "ipython2", + "version": "2.7.12" } }, "nbformat": 4, diff --git a/introduction/02_introduction_to_python_2.ipynb b/introduction/02_introduction_to_python_2.ipynb index d7010ad5a4bc1c27708e86ce779188c6d3977840..436f3ad00f30cbe98daf89dcde4d9772c27d0cec 100644 --- a/introduction/02_introduction_to_python_2.ipynb +++ b/introduction/02_introduction_to_python_2.ipynb @@ -1,1883 +1,1928 @@ { - "metadata": { - "celltoolbar": "Slideshow", - "name": "", - "signature": "sha256:7b4c4307ccc127e749bcc6fae78706177f4e98973022d13424e5a50a8325f24e" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ + "cells": [ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "<span style=\"font-size: 200%\">Introduction to Python (2)</span>\n", - "===\n", - "\n", - "<br>\n", - "\n", - "[Martijn Vermaat](mailto:m.vermaat.hg@lumc.nl), [Department of Human Genetics, Leiden University Medical Center](http://humgen.nl)\n", - "\n", - "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl), [Department of Human Genetics, Leiden University Medical Center](http://humgen.nl)\n", - "\n", - "Based on: [Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", - "\n", - "License: [Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "<span style=\"font-size: 200%\">Introduction to Python (2)</span>\n", + "===" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Lists\n", + "---\n", + "\n", + "Mutable sequences of values." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Lists\n", - "---\n", - "\n", - "Mutable sequences of values." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "l = [2, 5, 2, 3, 7]\n", - "type(l)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 1, - "text": [ - "list" - ] - } - ], - "prompt_number": 1 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Lists can be heterogeneous, but we typically don't use that." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a = 'spezi'\n", - "[3, 'abc', 1.3e20, [a, a, 2]]" - ], - "language": "python", + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 2, - "text": [ - "[3, 'abc', 1.3e+20, ['spezi', 'spezi', 2]]" - ] - } - ], - "prompt_number": 2 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "l = [2, 5, 2, 3, 7]\n", + "type(l)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lists can be heterogeneous, but we typically don't use that." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[3, 'abc', 1.3e+20, ['spezi', 'spezi', 2]]" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Tuples\n", - "---\n", - "\n", - "Immutable sequences of values." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "t = 'white', 77, 1.5\n", - "type(t)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 3, - "text": [ - "tuple" - ] - } - ], - "prompt_number": 3 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "color, width, scale = t\n", - "width" - ], - "language": "python", + "execution_count": 2, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 4, - "text": [ - "77" - ] - } - ], - "prompt_number": 4 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "a = 'spezi'\n", + "[3, 'abc', 1.3e20, [a, a, 2]]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Tuples\n", + "---\n", + "\n", + "Immutable sequences of values." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tuple" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Strings (1/2)\n", - "---\n", - "\n", - "Immutable sequences of characters." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'a string can be written in single quotes'" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 5, - "text": [ - "'a string can be written in single quotes'" - ] - } - ], - "prompt_number": 5 - }, - { - "cell_type": "markdown", + "execution_count": 3, "metadata": {}, - "source": [ - "Strings can also be written with double quotes, or over multiple lines with triple-quotes." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "\"this makes it easier to use the ' character\"" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 6, - "text": [ - "\"this makes it easier to use the ' character\"" - ] - } - ], - "prompt_number": 6 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "\"\"\"This is a multiline string.\n", - "\n", - "You see? I continued after a blank line.\"\"\"" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 7, - "text": [ - "'This is a multiline string.\\n\\nYou see? I continued after a blank line.'" - ] - } - ], - "prompt_number": 7 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "t = 'white', 77, 1.5\n", + "type(t)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Strings (2/2)\n", - "---\n", - "\n", - "A common operation is formatting strings using argument substitutions." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'{} times {} equals {:.2f}'.format('pi', 2, 6.283185307179586)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 8, - "text": [ - "'pi times 2 equals 6.28'" - ] - } - ], - "prompt_number": 8 - }, - { - "cell_type": "markdown", + "execution_count": 4, "metadata": {}, - "source": [ - "Accessing arguments by position or name is more readable." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'{1} times {0} equals {2:.2f}'.format('pi', 2, 6.283185307179586)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 9, - "text": [ - "'2 times pi equals 6.28'" - ] - } - ], - "prompt_number": 9 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'{number} times {amount} equals {result:.2f}'.format(number='pi', amount=2,\n", - " result=6.283185307179586)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 10, - "text": [ - "'pi times 2 equals 6.28'" - ] - } - ], - "prompt_number": 10 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "color, width, scale = t\n", + "width" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Strings (1/2)\n", + "---\n", + "\n", + "Immutable sequences of characters." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'a string can be written in single quotes'" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Common operations (1/2)\n", - "---\n", - "\n", - "All sequence types support concatenation, membership/substring tests, indexing, and slicing." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "[1, 2, 3] + [4, 5, 6]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 11, - "text": [ - "[1, 2, 3, 4, 5, 6]" - ] - } - ], - "prompt_number": 11 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'bier' in 'we drinken bier vanaf half 5'" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 12, - "text": [ - "True" - ] - } - ], - "prompt_number": 12 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'abcdefghijkl'[5]" - ], - "language": "python", + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 13, - "text": [ - "'f'" - ] - } - ], - "prompt_number": 13 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "'a string can be written in single quotes'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Strings can also be written with double quotes, or over multiple lines with triple-quotes." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"this makes it easier to use the ' character\"" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Slicing\n", - "---\n", - "\n", - "Slice `s` from `i` to `j` with `s[i:j]`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'abcdefghijkl'[4:8]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 14, - "text": [ - "'efgh'" - ] - } - ], - "prompt_number": 14 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'abcdefghijkl'[:3]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 15, - "text": [ - "'abc'" - ] - } - ], - "prompt_number": 15 - }, - { - "cell_type": "markdown", + "execution_count": 6, "metadata": {}, - "source": [ - "We can also define the step `k` with `s[i:j:k]`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'abcdefghijkl'[7:3:-1]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 16, - "text": [ - "'hgfe'" - ] - } - ], - "prompt_number": 16 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "\"this makes it easier to use the ' character\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'This is a multiline string.\\n\\nYou see? I continued after a blank line.'" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Common operations (2/2)\n", - "---\n", - "\n", - "Contrary to strings and tuples, lists are mutable. We can also get their length, smallest/largest item, and number/position of certain items." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "len('attacgataggcatccgt')" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 17, - "text": [ - "18" - ] - } - ], - "prompt_number": 17 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "max([17, 86, 34, 51])" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 18, - "text": [ - "86" - ] - } - ], - "prompt_number": 18 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "('atg', 22, True, 'atg').count('atg')" - ], - "language": "python", + "execution_count": 7, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 19, - "text": [ - "2" - ] - } - ], - "prompt_number": 19 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"This is a multiline string.\n", + "\n", + "You see? I continued after a blank line.\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Strings (2/2)\n", + "---\n", + "\n", + "A common operation is formatting strings using argument substitutions." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'pi times 2 equals 6.28'" + ] }, - "source": [ - "Sequence types\n", - "===\n", - "\n", - "Additional operations with lists\n", - "---\n", - "\n", - "We can replace, add, remove, reverse and sort items in-place." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "l = [1, 2, 3, 4]\n", - "l[3] = 7\n", - "l.append(1)\n", - "l[1:3] = [3, 2]\n", - "l.sort()\n", - "l.reverse()" - ], - "language": "python", + "execution_count": 8, "metadata": {}, - "outputs": [], - "prompt_number": 20 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "l" - ], - "language": "python", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 21, - "text": [ - "[7, 3, 2, 1, 1]" - ] - } - ], - "prompt_number": 21 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "'{} times {} equals {:.2f}'.format('pi', 2, 6.283185307179586)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Accessing arguments by position or name is more readable." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2 times pi equals 6.28'" + ] }, - "source": [ - "Dictionaries\n", - "===\n", - "\n", - "Dictionaries map *hashable* values to arbitrary objects\n", - "---\n", - "\n", - "* All built-in immutable objects are hashable.\n", - "* No built-in mutable objects are hashable." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d = {'a': 27, 'b': 18, 'c': 12}\n", - "type(d)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 22, - "text": [ - "dict" - ] - } - ], - "prompt_number": 22 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d['e'] = 17\n", - "'e' in d" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 23, - "text": [ - "True" - ] - } - ], - "prompt_number": 23 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.update({'a': 18, 'f': 2})\n", - "d" - ], - "language": "python", + "execution_count": 9, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 24, - "text": [ - "{'a': 18, 'b': 18, 'c': 12, 'e': 17, 'f': 2}" - ] - } - ], - "prompt_number": 24 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "'{1} times {0} equals {2:.2f}'.format('pi', 2, 6.283185307179586)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'pi times 2 equals 6.28'" + ] }, - "source": [ - "Dictionaries\n", - "===\n", - "\n", - "Accessing dictionary content\n", - "---" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d['b']" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 25, - "text": [ - "18" - ] - } - ], - "prompt_number": 25 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.keys()" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 26, - "text": [ - "['a', 'c', 'b', 'e', 'f']" - ] - } - ], - "prompt_number": 26 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.values()" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 27, - "text": [ - "[18, 12, 18, 17, 2]" - ] - } - ], - "prompt_number": 27 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.items()" - ], - "language": "python", + "execution_count": 10, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 28, - "text": [ - "[('a', 18), ('c', 12), ('b', 18), ('e', 17), ('f', 2)]" - ] - } - ], - "prompt_number": 28 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "'{number} times {amount} equals {result:.2f}'.format(number='pi', amount=2,\n", + " result=6.283185307179586)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Common operations (1/2)\n", + "---\n", + "\n", + "All sequence types support concatenation, membership/substring tests, indexing, and slicing." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4, 5, 6]" + ] }, - "source": [ - "Sets\n", - "===\n", - "\n", - "Mutable unordered collections of hashable values without duplication\n", - "---" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "x = {12, 28, 21, 17}\n", - "type(x)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 29, - "text": [ - "set" - ] - } - ], - "prompt_number": 29 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "x.add(12)\n", - "x" - ], - "language": "python", + "execution_count": 11, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 30, - "text": [ - "{12, 17, 21, 28}" - ] - } - ], - "prompt_number": 30 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "x.discard(21)\n", - "x" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 31, - "text": [ - "{12, 17, 28}" - ] - } - ], - "prompt_number": 31 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "[1, 2, 3] + [4, 5, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] }, - "source": [ - "Sets\n", - "===\n", - "\n", - "Operations with sets\n", - "---\n", - "\n", - "We can test for membership and apply many common set operations such as union and intersect." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "17 in {12, 28, 21, 17}" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 32, - "text": [ - "True" - ] - } - ], - "prompt_number": 32 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "{12, 28, 21, 17} | {12, 18, 11}" - ], - "language": "python", + "execution_count": 12, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 33, - "text": [ - "{11, 12, 17, 18, 21, 28}" - ] - } - ], - "prompt_number": 33 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "{12, 28, 21, 17} & {12, 18, 11}" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 34, - "text": [ - "{12}" - ] - } - ], - "prompt_number": 34 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "'bier' in 'we drinken bier vanaf half 5'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'f'" + ] }, - "source": [ - "Booleans\n", - "===\n", - "\n", - "Boolean values and operations\n", - "---\n", - "\n", - "The two boolean values are written `False` and `True`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "True or False" - ], - "language": "python", + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 35, - "text": [ - "True" - ] - } - ], - "prompt_number": 35 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "True and False" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "'abcdefghijkl'[5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Slicing\n", + "---\n", + "\n", + "Slice `s` from `i` to `j` with `s[i:j]`." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'efgh'" + ] + }, + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 36, - "text": [ - "False" - ] - } - ], - "prompt_number": 36 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "not False" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "'abcdefghijkl'[4:8]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'abc'" + ] + }, + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 37, - "text": [ - "True" - ] - } - ], - "prompt_number": 37 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "'abcdefghijkl'[:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also define the step `k` with `s[i:j:k]`." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'hgfe'" + ] }, - "source": [ - "Booleans\n", - "===\n", - "\n", - "Comparisons\n", - "---\n", - "\n", - "Comparisons can be done on all objects and return a boolean value." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "22 * 3 > 66" - ], - "language": "python", + "execution_count": 16, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 38, - "text": [ - "False" - ] - } - ], - "prompt_number": 38 - }, - { - "cell_type": "markdown", + "output_type": "execute_result" + } + ], + "source": [ + "'abcdefghijkl'[7:3:-1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Common operations (2/2)\n", + "---\n", + "\n", + "Contrary to strings and tuples, lists are mutable. We can also get their length, smallest/largest item, and number/position of certain items." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 17, "metadata": {}, - "source": [ - "We have two equivalence relations: value equality (`==`) and object identity (`is`)." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a, b = [1, 2, 3], [1, 2, 3]\n", - "a == b" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "len('attacgataggcatccgt')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "86" + ] + }, + "execution_count": 18, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 39, - "text": [ - "True" - ] - } - ], - "prompt_number": 39 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a is b" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "max([17, 86, 34, 51])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 19, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 40, - "text": [ - "False" - ] - } - ], - "prompt_number": 40 - }, + "output_type": "execute_result" + } + ], + "source": [ + "('atg', 22, True, 'atg').count('atg')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sequence types\n", + "===\n", + "\n", + "Additional operations with lists\n", + "---\n", + "\n", + "We can replace, add, remove, reverse and sort items in-place." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "l = [1, 2, 3, 4]\n", + "l[3] = 7\n", + "l.append(1)\n", + "l[1:3] = [3, 2]\n", + "l.sort()\n", + "l.reverse()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "data": { + "text/plain": [ + "[7, 3, 2, 1, 1]" + ] }, - "source": [ - "Booleans\n", - "===\n", - "\n", - "`if` statements\n", - "---\n", - "\n", - "(The `print` statement writes a string representation of the given value.)" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "if 26 <= 17:\n", - " print 'Fact: 26 is less than or equal to 17'\n", - "elif (26 + 8 > 14) == True:\n", - " print 'Did we need the ` == True` part here?'\n", - "else:\n", - " print 'Nothing seems true'" - ], - "language": "python", + "execution_count": 21, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "Did we need the ` == True` part here?\n" - ] - } - ], - "prompt_number": 41 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "l" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Dictionaries\n", + "===\n", + "\n", + "Dictionaries map *hashable* values to arbitrary objects\n", + "---\n", + "\n", + "* All built-in immutable objects are hashable.\n", + "* No built-in mutable objects are hashable." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict" + ] }, - "source": [ - "Booleans\n", - "===\n", - "\n", - "`while` statements\n", - "---\n", - "\n", - "Our first looping control structure just repeats until the given expression evaluates to `False`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "i = 0\n", - "while i < 5:\n", - " print i\n", - " i += 1" - ], - "language": "python", + "execution_count": 22, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "0\n", - "1\n", - "2\n", - "3\n", - "4\n" - ] - } - ], - "prompt_number": 42 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "d = {'a': 27, 'b': 18, 'c': 12}\n", + "type(d)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] }, - "source": [ - "Notes about syntax\n", - "===\n", - "\n", - "Indentation\n", - "---\n", - "\n", - "Python uses indentation to delimit blocks\n", - "\n", - "* Instead of `begin ... end` or `{ ... }` in other languages.\n", - "* Always increase indentation by *4 spaces*, never use tabs.\n", - "* In any case, be consistent." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "if False:\n", - " if False:\n", - " print 'Why am I here?'\n", - " else:\n", - " while True:\n", - " print 'When will it stop?'\n", - " print \"And we're back to the first indentation level\"" - ], - "language": "python", + "execution_count": 23, "metadata": {}, - "outputs": [], - "prompt_number": 43 - }, - { - "cell_type": "markdown", + "output_type": "execute_result" + } + ], + "source": [ + "d['e'] = 17\n", + "'e' in d" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': 18, 'b': 18, 'c': 12, 'e': 17, 'f': 2}" + ] + }, + "execution_count": 24, "metadata": {}, - "source": [ - "Some editors can be configured to behave just like that." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "d.update({'a': 18, 'f': 2})\n", + "d" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Dictionaries\n", + "===\n", + "\n", + "Accessing dictionary content\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] }, - "source": [ - "Notes about syntax\n", - "===\n", - "\n", - "Comments\n", - "---\n", - "\n", - "Comments are prepended by `#` and completely ignored." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Add 42 to this list.\n", - "l.append(42)" - ], - "language": "python", + "execution_count": 25, "metadata": {}, - "outputs": [], - "prompt_number": 44 - }, - { - "cell_type": "markdown", + "output_type": "execute_result" + } + ], + "source": [ + "d['b']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['a', 'c', 'b', 'e', 'f']" + ] + }, + "execution_count": 26, "metadata": {}, - "source": [ - "`pass` statements\n", - "---\n", - "\n", - "If you ever need a statement syntactically but don't want to do anything, use `pass`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "while False:\n", - " # This is never executed anyway.\n", - " pass" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "d.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[18, 12, 18, 17, 2]" + ] + }, + "execution_count": 27, "metadata": {}, - "outputs": [], - "prompt_number": 45 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "d.values()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('a', 18), ('c', 12), ('b', 18), ('e', 17), ('f', 2)]" + ] }, - "source": [ - "Useful built-ins\n", - "===\n", - "\n", - "Getting help\n", - "---\n", - "\n", - "You can get help on almost any object with `help`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "help(range)" - ], - "language": "python", + "execution_count": 28, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "Help on built-in function range in module __builtin__:\n", - "\n", - "range(...)\n", - " range([start,] stop[, step]) -> list of integers\n", - " \n", - " Return a list containing an arithmetic progression of integers.\n", - " range(i, j) returns [i, i+1, i+2, ..., j-1]; start (!) defaults to 0.\n", - " When step is given, it specifies the increment (or decrement).\n", - " For example, range(4) returns [0, 1, 2, 3]. The end point is omitted!\n", - " These are exactly the valid indices for a list of 4 elements.\n", - "\n" - ] - } - ], - "prompt_number": 46 - }, - { - "cell_type": "markdown", + "output_type": "execute_result" + } + ], + "source": [ + "d.items()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Sets\n", + "===\n", + "\n", + "Mutable unordered collections of hashable values without duplication\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "set" + ] + }, + "execution_count": 29, "metadata": {}, - "source": [ - "In IPython you can do it faster by typing:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "range?" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "x = {12, 28, 21, 17}\n", + "type(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{12, 17, 21, 28}" + ] + }, + "execution_count": 30, "metadata": {}, - "outputs": [], - "prompt_number": 47 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "x.add(12)\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{12, 17, 28}" + ] }, - "source": [ - "Useful built-ins\n", - "===\n", - "\n", - "We'll shortly use the following built-in functions." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "range(5, 16)" - ], - "language": "python", + "execution_count": 31, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 48, - "text": [ - "[5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]" - ] - } - ], - "prompt_number": 48 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "zip(['red', 'white', 'blue'], range(3))" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "x.discard(21)\n", + "x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Sets\n", + "===\n", + "\n", + "Operations with sets\n", + "---\n", + "\n", + "We can test for membership and apply many common set operations such as union and intersect." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 32, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 49, - "text": [ - "[('red', 0), ('white', 1), ('blue', 2)]" - ] - } - ], - "prompt_number": 49 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "list('abcdefghijk')" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "17 in {12, 28, 21, 17}" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{11, 12, 17, 18, 21, 28}" + ] + }, + "execution_count": 33, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 50, - "text": [ - "['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k']" - ] - } - ], - "prompt_number": 50 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "{12, 28, 21, 17} | {12, 18, 11}" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{12}" + ] }, - "source": [ - "Iteration\n", - "===\n", - "\n", - "Iterating over a sequence\n", - "---" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "colors = ['red', 'white', 'blue', 'orange']\n", - "cities = ['leiden', 'utrecht', 'warmond', 'san francisco']" - ], - "language": "python", + "execution_count": 34, "metadata": {}, - "outputs": [], - "prompt_number": 51 - }, - { - "cell_type": "markdown", + "output_type": "execute_result" + } + ], + "source": [ + "{12, 28, 21, 17} & {12, 18, 11}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Booleans\n", + "===\n", + "\n", + "Boolean values and operations\n", + "---\n", + "\n", + "The two boolean values are written `False` and `True`." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 35, "metadata": {}, - "source": [ - "The `for` statement can iterate over sequence items." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for color in colors:\n", - " print color" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "True or False" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 36, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "red\n", - "white\n", - "blue\n", - "orange\n" - ] - } - ], - "prompt_number": 52 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for character in 'blue':\n", - " print character" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "True and False" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 37, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "b\n", - "l\n", - "u\n", - "e\n" - ] - } - ], - "prompt_number": 53 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "not False" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Booleans\n", + "===\n", + "\n", + "Comparisons\n", + "---\n", + "\n", + "Comparisons can be done on all objects and return a boolean value." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] }, - "source": [ - "Iteration\n", - "===\n", - "\n", - "Python anti-patterns\n", - "---\n", - "\n", - "These are common for programmers coming from other languages." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "i = 0\n", - "while i < len(colors):\n", - " print colors[i]\n", - " i += 1" - ], - "language": "python", + "execution_count": 38, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "red\n", - "white\n", - "blue\n", - "orange\n" - ] - } - ], - "prompt_number": 54 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for i in range(len(colors)):\n", - " print colors[i]" - ], - "language": "python", + "output_type": "execute_result" + } + ], + "source": [ + "22 * 3 > 66" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have two equivalence relations: value equality (`==`) and object identity (`is`)." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 39, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "red\n", - "white\n", - "blue\n", - "orange\n" - ] - } - ], - "prompt_number": 55 - }, - { - "cell_type": "markdown", + "output_type": "execute_result" + } + ], + "source": [ + "a, b = [1, 2, 3], [1, 2, 3]\n", + "a == b" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 40, "metadata": {}, - "source": [ - "We call them *unpythonic*." + "output_type": "execute_result" + } + ], + "source": [ + "a is b" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Booleans\n", + "===\n", + "\n", + "`if` statements\n", + "---\n", + "\n", + "(The `print` statement writes a string representation of the given value.)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Did we need the ` == True` part here?\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "Iteration\n", - "===\n", - "\n", - "Using values *and* indices\n", - "---" + } + ], + "source": [ + "if 26 <= 17:\n", + " print 'Fact: 26 is less than or equal to 17'\n", + "elif (26 + 8 > 14) == True:\n", + " print 'Did we need the ` == True` part here?'\n", + "else:\n", + " print 'Nothing seems true'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Booleans\n", + "===\n", + "\n", + "`while` statements\n", + "---\n", + "\n", + "Our first looping control structure just repeats until the given expression evaluates to `False`." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n", + "1\n", + "2\n", + "3\n", + "4\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for i, color in enumerate(colors):\n", - " print i, '->', color" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "0 -> red\n", - "1 -> white\n", - "2 -> blue\n", - "3 -> orange\n" - ] - } - ], - "prompt_number": 56 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Taking two sequences together\n", - "---" + } + ], + "source": [ + "i = 0\n", + "while i < 5:\n", + " print i\n", + " i += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Hands on!\n", + "===\n", + "\n", + "Try to guess the outcome of the following statements:\n", + "\n", + " 2 * 3 > 4\n", + " 2 * (3 > 4)\n", + " 2 * (4 > 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Notes about syntax\n", + "===\n", + "\n", + "Indentation\n", + "---\n", + "\n", + "Python uses indentation to delimit blocks\n", + "\n", + "* Instead of `begin ... end` or `{ ... }` in other languages.\n", + "* Always increase indentation by *4 spaces*, never use tabs.\n", + "* In any case, be consistent." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "if False:\n", + " if False:\n", + " print 'Why am I here?'\n", + " else:\n", + " while True:\n", + " print 'When will it stop?'\n", + " print \"And we're back to the first indentation level\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some editors can be configured to behave just like that." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Notes about syntax\n", + "===\n", + "\n", + "Comments\n", + "---\n", + "\n", + "Comments are prepended by `#` and completely ignored." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Add 42 to this list.\n", + "l.append(42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`pass` statements\n", + "---\n", + "\n", + "If you ever need a statement syntactically but don't want to do anything, use `pass`." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "while False:\n", + " # This is never executed anyway.\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Useful built-ins\n", + "===\n", + "\n", + "Getting help\n", + "---\n", + "\n", + "You can get help on almost any object with `help`." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on built-in function range in module __builtin__:\n", + "\n", + "range(...)\n", + " range([start,] stop[, step]) -> list of integers\n", + " \n", + " Return a list containing an arithmetic progression of integers.\n", + " range(i, j) returns [i, i+1, i+2, ..., j-1]; start (!) defaults to 0.\n", + " When step is given, it specifies the increment (or decrement).\n", + " For example, range(4) returns [0, 1, 2, 3]. The end point is omitted!\n", + " These are exactly the valid indices for a list of 4 elements.\n", + "\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for city, color in zip(cities, colors):\n", - " print city, '->', color" - ], - "language": "python", + } + ], + "source": [ + "help(range)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In IPython you can do it faster by typing:" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "range?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Useful built-ins\n", + "===\n", + "\n", + "We'll shortly use the following built-in functions." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]" + ] + }, + "execution_count": 48, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "leiden -> red\n", - "utrecht -> white\n", - "warmond -> blue\n", - "san francisco -> orange\n" - ] - } - ], - "prompt_number": 57 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "range(5, 16)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('red', 0), ('white', 1), ('blue', 2)]" + ] }, - "source": [ - "Iteration\n", - "===\n", - "\n", - "Other iterables\n", - "---\n", - "\n", - "Iterating over a dictionary yields keys." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for key in {'a': 33, 'b': 17, 'c': 18}:\n", - " print key" - ], - "language": "python", + "execution_count": 49, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "a\n", - "c\n", - "b\n" - ] - } - ], - "prompt_number": 58 - }, - { - "cell_type": "markdown", + "output_type": "execute_result" + } + ], + "source": [ + "zip(['red', 'white', 'blue'], range(3))" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k']" + ] + }, + "execution_count": 50, "metadata": {}, - "source": [ - "Iterating over a file yields lines." + "output_type": "execute_result" + } + ], + "source": [ + "list('abcdefghijk')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Iteration\n", + "===\n", + "\n", + "Iterating over a sequence\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "colors = ['red', 'white', 'blue', 'orange']\n", + "cities = ['leiden', 'utrecht', 'warmond', 'san francisco']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `for` statement can iterate over sequence items." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "red\n", + "white\n", + "blue\n", + "orange\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for line in open('data/short_file.txt'):\n", - " print line" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "this short file has two lines\n", - "\n", - "it is used in the example code\n", - "\n" - ] - } - ], - "prompt_number": 59 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There are many more useful iterables in Python." + } + ], + "source": [ + "for color in colors:\n", + " print color" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b\n", + "l\n", + "u\n", + "e\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "$\\S$ Exercise: Iterate over a list\n", - "===\n", - "\n", - "First we are going to make a list and fill it with a simple sequence. Then we are going to use this list to print something.\n", - "\n", - "* Make a list containing the numbers 0, 1, ... 9.\n", - "* Print the last 10 lines of the song ''99 bottles of beer'' using this list." + } + ], + "source": [ + "for character in 'blue':\n", + " print character" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Iteration\n", + "===\n", + "\n", + "Python anti-patterns\n", + "---\n", + "\n", + "These are common for programmers coming from other languages." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "red\n", + "white\n", + "blue\n", + "orange\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "$\\S$ Exercise: Analyse a repeat structure\n", - "===\n", - "\n", - "We are going to make a repeating DNA sequence and extract some subsequences from it.\n", - "\n", - "* Make a short tandem repeat that consists of three \"ACGT\" units and five \"TTATT\" units.\n", - "* Print all suffixes of the repeat structure.\n", - "\n", - "**Note:** A suffix is an ending. For example, the word \"spam\" has five suffixes: \"spam\", \"pam\", \"am\", \"m\" and \"\".\n", - "\n", - "* Print all substrings of length 3.\n", - "* Print all unique substrings of length 3.\n", - "\n", - "**Hint:** All elements in a set are unique." + } + ], + "source": [ + "i = 0\n", + "while i < len(colors):\n", + " print colors[i]\n", + " i += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "red\n", + "white\n", + "blue\n", + "orange\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "$\\S$ Exercise: Boolean comparison\n", - "===\n", - "\n", - "Try to guess the outcome of the following statements:\n", - "\n", - " 2 * 3 > 4\n", - " 2 * (3 > 4)\n", - " 2 * (4 > 3)" + } + ], + "source": [ + "for i in range(len(colors)):\n", + " print colors[i]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We call them *unpythonic*." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Iteration\n", + "===\n", + "\n", + "Using values *and* indices\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 -> red\n", + "1 -> white\n", + "2 -> blue\n", + "3 -> orange\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "$\\S$ Exercise: Combining lists\n", - "===\n", - "\n", - "Calculate all coordinates of the line x=y with x < 100.\n", - "\n", - "**Note:** This is the sequence (0, 0), (1, 1), ... (99, 99)" + } + ], + "source": [ + "for i, color in enumerate(colors):\n", + " print i, '->', color" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Taking two sequences together\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "leiden -> red\n", + "utrecht -> white\n", + "warmond -> blue\n", + "san francisco -> orange\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "$\\S$ Exercise: Dictionaries\n", - "===\n", - "We are going to store the output of a function ($f(x) = x^2$) together with its input in a dictionary.\n", + } + ], + "source": [ + "for city, color in zip(cities, colors):\n", + " print city, '->', color" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Iteration\n", + "===\n", + "\n", + "Other iterables\n", + "---\n", + "\n", + "Iterating over a dictionary yields keys." + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a\n", + "c\n", + "b\n" + ] + } + ], + "source": [ + "for key in {'a': 33, 'b': 17, 'c': 18}:\n", + " print key" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Iterating over a file yields lines." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "this short file has two lines\n", "\n", - "* Make a dictionary containing all squares smaller than 100.\n", - "* Print the content of this dictionary in english, e.g., \"4 is the square of 2\"." + "it is used in the example code\n", + "\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from IPython.display import HTML\n", - "def css_styling():\n", - " styles = open('styles/custom.css', 'r').read()\n", - " return HTML('<style>' + styles + '</style>')\n", - "css_styling()" - ], - "language": "python", - "metadata": { - "slideshow": { - "slide_type": "skip" - } + } + ], + "source": [ + "for line in open('data/short_file.txt'):\n", + " print line" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are many more useful iterables in Python." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "<div class=\"alert alert-success\">\n", + "<h1>Hands on!</h1>\n", + "\n", + "<ol>\n", + " <li>Make a list with 10 integer elements. Sum all the items in the list.</li>\n", + " <li>Make a new list from the above one that does not include the 0th, 4th and 5th elements.\n", + " <li>Sum only the elements from the first list which are between the 2nd and 6th elements.\n", + " <li>Make a new list that includes only the elements that are greater than 10 from the first list.\n", + " <li>Food.\n", + " <ul>\n", + " <li>Create a dictionary for food products called \"prices\" and put some values in it, e.g., \"apples\": 2, \"oranges\": 1.5, \"pears\": 3, ...</li>\n", + " <li>Create a corresponding dictionary called \"stocks\" and put the stock values in it, e.g., \"apples\": 0, \"oranges\": 1, \"pears\": 10, ...</li>\n", + " <li>Print stock and price information for each food item.</li>\n", + " <li>Determine and print how much money you would make if you sold all of your food products.\n", + " </ul>\n", + " </li>\n", + "</ol>\n", + "\n", + "</div>" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Homework assignment\n", + "===\n", + "\n", + "https://classroom.github.com/a/QU2iPYKn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "$\\S$ Exercise: Iterate over a list\n", + "===\n", + "\n", + "First we are going to make a list and fill it with a simple sequence. Then we are going to use this list to print something.\n", + "\n", + "* Make a list containing the numbers 0, 1, ... 9.\n", + "* Print the last 10 lines of the song ''99 bottles of beer'' using this list." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "$\\S$ Exercise: Analyse a repeat structure\n", + "===\n", + "\n", + "We are going to make a repeating DNA sequence and extract some subsequences from it.\n", + "\n", + "* Make a short tandem repeat that consists of three \"ACGT\" units and five \"TTATT\" units.\n", + "* Print all suffixes of the repeat structure.\n", + "\n", + "**Note:** A suffix is an ending. For example, the word \"spam\" has five suffixes: \"spam\", \"pam\", \"am\", \"m\" and \"\".\n", + "\n", + "* Print all substrings of length 3.\n", + "* Print all unique substrings of length 3.\n", + "\n", + "**Hint:** All elements in a set are unique." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "$\\S$ Exercise: Combining lists\n", + "===\n", + "\n", + "Calculate all coordinates of the line x=y with x < 100.\n", + "\n", + "**Note:** This is the sequence (0, 0), (1, 1), ... (99, 99)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "$\\S$ Exercise: Dictionaries\n", + "===\n", + "We are going to store the output of a function ($f(x) = x^2$) together with its input in a dictionary.\n", + "\n", + "* Make a dictionary containing all squares smaller than 100.\n", + "* Print the content of this dictionary in english, e.g., \"4 is the square of 2\"." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "<style>/* Remove the vertical scrollbar added by nbconvert. */\n", + ".reveal {\n", + " overflow: hidden;\n", + "}\n", + "\n", + "/* Workaround some highlight.js bugs in language autodetection. */\n", + "code.objectivec *,\n", + "code.perl *,\n", + "code.cs *,\n", + "code.javascript *,\n", + "code.http * {\n", + " color: black ! important;\n", + " font-weight: normal ! important;\n", + "}\n", + "span.title {\n", + " color: black ! important;\n", + "}\n", + "span.tag {\n", + " color: black ! important;\n", + "}\n", + "span.attribute {\n", + " color: black ! important;\n", + "}\n", + "</style>" + ], + "text/plain": [ + "<IPython.core.display.HTML at 0x1873a50>" + ] }, - "outputs": [ - { - "html": [ - "<style>/* Remove the vertical scrollbar added by nbconvert. */\n", - ".reveal {\n", - " overflow: hidden;\n", - "}\n", - "\n", - "/* Workaround some highlight.js bugs in language autodetection. */\n", - "code.objectivec *,\n", - "code.perl *,\n", - "code.cs *,\n", - "code.javascript *,\n", - "code.http * {\n", - " color: black ! important;\n", - " font-weight: normal ! important;\n", - "}\n", - "span.title {\n", - " color: black ! important;\n", - "}\n", - "span.tag {\n", - " color: black ! important;\n", - "}\n", - "span.attribute {\n", - " color: black ! important;\n", - "}\n", - "</style>" - ], - "metadata": {}, - "output_type": "pyout", - "prompt_number": 1, - "text": [ - "<IPython.core.display.HTML at 0x1873a50>" - ] - } - ], - "prompt_number": 1 + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "from IPython.display import HTML\n", + "def css_styling():\n", + " styles = open('styles/custom.css', 'r').read()\n", + " return HTML('<style>' + styles + '</style>')\n", + "css_styling()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Acknowledgements\n", + "========\n", + "\n", + "Martijn Vermaat\n", + "\n", + "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl)\n", + "\n", + "Based on\n", + "---------\n", + "[Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", + "\n", + "License\n", + "--------\n", + "[Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/introduction/02_introduction_to_python_3.ipynb b/introduction/02_introduction_to_python_3.ipynb index 792c0926b7c98083cfb53aa045d31ddf4fff937a..5c02472834e9838edbf659c0680c02f71a36e576 100644 --- a/introduction/02_introduction_to_python_3.ipynb +++ b/introduction/02_introduction_to_python_3.ipynb @@ -1,892 +1,871 @@ { - "metadata": { - "celltoolbar": "Slideshow", - "name": "", - "signature": "sha256:255f999074050e52200e04b7179ca89a373daf14311e382c4ab05b4444cf392e" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ + "cells": [ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "<span style=\"font-size: 200%\">Introduction to Python (3)</span>\n", - "===\n", - "\n", - "<br>\n", - "\n", - "[Martijn Vermaat](mailto:m.vermaat.hg@lumc.nl), [Department of Human Genetics, Leiden University Medical Center](http://humgen.nl)\n", - "\n", - "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl), [Department of Human Genetics, Leiden University Medical Center](http://humgen.nl)\n", - "\n", - "Based on: [Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", - "\n", - "License: [Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "Functions\n", - "===\n", - "\n", - "Defining a function\n", - "---\n", - "\n", - "A function definition includes its name, arguments and body." + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "<span style=\"font-size: 200%\">Introduction to Python (3)</span>\n", + "===" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Functions\n", + "===\n", + "\n", + "A _function_ is a named sequence of statements that performs some piece of work.\n", + "Later on that function can be called by using its name.\n", + "\n", + "Defining a function\n", + "---\n", + "\n", + "A function definition includes its _name_, _arguments_ and _body_." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def add_two(number):\n", + " return number + 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "3\n", + "4\n", + "5\n", + "6\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def add_two(number):\n", - " return number + 2" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 1 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for i in range(5):\n", - " print add_two(i)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "2\n", - "3\n", - "4\n", - "5\n", - "6\n" - ] - } - ], - "prompt_number": 2 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + } + ], + "source": [ + "for i in range(5):\n", + " print add_two(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Functions\n", + "===\n", + "\n", + "Keyword arguments\n", + "---\n", + "\n", + "Besides regular arguments, functions can have keyword arguments." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def add_some_other_number(number, other_number=12):\n", + " return number + other_number" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] }, - "source": [ - "Functions\n", - "===\n", - "\n", - "Keyword arguments\n", - "---\n", - "\n", - "Besides regular arguments, functions can have keyword arguments." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def add_some_other_number(number, other_number=12):\n", - " return number + other_number" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 3 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "add_some_other_number(2, 6)" - ], - "language": "python", + "execution_count": 4, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 4, - "text": [ - "8" - ] - } - ], - "prompt_number": 4 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "add_some_other_number(3, other_number=4)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 5, - "text": [ - "7" - ] - } - ], - "prompt_number": 5 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "add_some_other_number(5)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 6, - "text": [ - "17" - ] - } - ], - "prompt_number": 6 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "add_some_other_number(2, 6)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "7" + ] }, - "source": [ - "Functions\n", - "===\n", - "\n", - "Docstrings\n", - "---\n", - "\n", - "Like many other definitions, functions can have docstrings.\n", - "\n", - "* Docstrings are regular string values which you start the definition body with.\n", - "* You can access an object's docstring using `help`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def factorial(n):\n", - " \"\"\"Compute factorial of n in the obious way.\"\"\"\n", - " if n == 0:\n", - " return 1\n", - " else:\n", - " return factorial(n - 1) * n" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 7 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "help(factorial)" - ], - "language": "python", + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "Help on function factorial in module __main__:\n", - "\n", - "factorial(n)\n", - " Compute factorial of n in the obious way.\n", - "\n" - ] - } - ], - "prompt_number": 8 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "add_some_other_number(3, other_number=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "17" + ] }, - "source": [ - "Functions\n", - "===\n", - "\n", - "Functions are values\n", - "---\n", - "\n", - "We can pass functions around just like other values, and call them." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "functions = [add_two, add_some_other_number]\n", - "for function in functions:\n", - " print function(7)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "9\n", - "19\n" - ] - } - ], - "prompt_number": 9 - }, - { - "cell_type": "markdown", + "execution_count": 6, "metadata": {}, - "source": [ - "Simple anonymous functions can be created with `lambda`." + "output_type": "execute_result" + } + ], + "source": [ + "add_some_other_number(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Functions\n", + "===\n", + "\n", + "Docstrings\n", + "---\n", + "\n", + "Like many other definitions, functions can have docstrings.\n", + "\n", + "* Docstrings are regular string values which you start the definition body with.\n", + "* You can access an object's docstring using `help`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def factorial(n):\n", + " \"\"\"Compute factorial of n in the obious way.\"\"\"\n", + " if n == 0:\n", + " return 1\n", + " else:\n", + " return factorial(n - 1) * n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function factorial in module __main__:\n", + "\n", + "factorial(n)\n", + " Compute factorial of n in the obious way.\n", + "\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "functions.append(lambda x: x * 7)\n", - "for function in functions:\n", - " print function(4)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "6\n", - "16\n", - "28\n" - ] - } - ], - "prompt_number": 10 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "Functions\n", - "===\n", - "\n", - "Higher-order functions\n", - "---\n", - "\n", - "A function that takes a function as argument is a higher-order function." + } + ], + "source": [ + "help(factorial)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Functions\n", + "===\n", + "\n", + "Functions are values\n", + "---\n", + "\n", + "We can pass functions around just like other values, and call them." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9\n", + "19\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "help(map)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "Help on built-in function map in module __builtin__:\n", - "\n", - "map(...)\n", - " map(function, sequence[, sequence, ...]) -> list\n", - " \n", - " Return a list of the results of applying the function to the items of\n", - " the argument sequence(s). If more than one sequence is given, the\n", - " function is called with an argument list consisting of the corresponding\n", - " item of each sequence, substituting None for missing values when not all\n", - " sequences have the same length. If the function is None, return a list of\n", - " the items of the sequence (or a list of tuples if more than one sequence).\n", - "\n" - ] - } - ], - "prompt_number": 11 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "map(add_two, [1, 2, 3, 4])" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 12, - "text": [ - "[3, 4, 5, 6]" - ] - } - ], - "prompt_number": 12 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "$\\S$ Exercise: k-mer counting (1/2)\n", - "===\n", - "\n", - "Remember the previous exercise of finding (unique) substrings of length 3.\n", - "\n", - "* Make a function from your implementation.\n", - "* Have `k` as an argument to the function.\n", - "* Test the function on several input strings.\n", - "\n", - "**Note:** Editing multi-line statements in the console can be frustrating. You can try the QT console (`ipython qtconsole`) or edit your function in an editor with `%edit`:\n", - "\n", - " def my_function(arg):\n", - " print arg * 4\n", - " %edit my_function" + } + ], + "source": [ + "functions = [add_two, add_some_other_number]\n", + "for function in functions:\n", + " print function(7)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Simple anonymous functions can be created with `lambda`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n", + "16\n", + "28\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "$\\S$ Exercise: k-mer counting (2/2)\n", - "===\n", - "\n", - "Modify your function to use a dictionary with substring counts.\n", - "\n", - "* Use the substrings as dictionary keys.\n", - "* Use the counts as dictionary values.\n", - "* Have the function return the dictionary.\n", - "* Add a docstring to the function.\n", - "* Use the function to print k-mer counts for some strings." + } + ], + "source": [ + "functions.append(lambda x: x * 7)\n", + "for function in functions:\n", + " print function(4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Functions\n", + "===\n", + "\n", + "Higher-order functions\n", + "---\n", + "\n", + "A function that takes a function as argument is a higher-order function." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on built-in function map in module __builtin__:\n", + "\n", + "map(...)\n", + " map(function, sequence[, sequence, ...]) -> list\n", + " \n", + " Return a list of the results of applying the function to the items of\n", + " the argument sequence(s). If more than one sequence is given, the\n", + " function is called with an argument list consisting of the corresponding\n", + " item of each sequence, substituting None for missing values when not all\n", + " sequences have the same length. If the function is None, return a list of\n", + " the items of the sequence (or a list of tuples if more than one sequence).\n", + "\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + } + ], + "source": [ + "help(map)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[3, 4, 5, 6]" + ] }, - "source": [ - "Comprehensions\n", - "===\n", - "\n", - "List comprehensions\n", - "---\n", - "\n", - "Similar to mathematical set notation (e.g., $\\{ x ~|~ x \\in \\mathbf R \\land x > 0\\}$), we can create lists." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "[(x, x * x) for x in range(10) if x % 2]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 13, - "text": [ - "[(1, 1), (3, 9), (5, 25), (7, 49), (9, 81)]" - ] - } - ], - "prompt_number": 13 - }, - { - "cell_type": "markdown", + "execution_count": 12, "metadata": {}, - "source": [ - "We can do the same thing using `map` and `filter`, but list comprehensions are often more readable." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "map(lambda x: (x, x * x), filter(lambda x: x %2, range(10)))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 14, - "text": [ - "[(1, 1), (3, 9), (5, 25), (7, 49), (9, 81)]" - ] - } - ], - "prompt_number": 14 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "map(add_two, [1, 2, 3, 4])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "<div class=\"alert alert-success\">\n", + "<h1>Hands on!</h1>\n", + "\n", + "<ol>\n", + " <li>Write a Python function that returns the maximum of two numbers.</li>\n", + " <li>Write a Python function that returns the maximum of three numbers. Try to reuse the first maximum of two numbers function.</li>\n", + " <li>Write a Python function that accepts a string as parameter. Next, it calculates and prints the number of upper case letters and lower case letters. Make us of the `isupper` and `islower` built in methods.</li>\n", + "</ol>\n", + "\n", + "</div>" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Comprehensions\n", + "===\n", + "\n", + "List comprehensions\n", + "---\n", + "\n", + "Similar to mathematical set notation (e.g., $\\{ x ~|~ x \\in \\mathbf R \\land x > 0\\}$), we can create lists." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 1), (3, 9), (5, 25), (7, 49), (9, 81)]" + ] }, - "source": [ - "Comprehensions\n", - "===\n", - "\n", - "Set and dictionary comprehensions\n", - "---\n", - "\n", - "Similar notation can be used for (non-empty) sets." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "{c for c in 'LUMC-standard' if 'a' <= c <= 'z'}" - ], - "language": "python", + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 15, - "text": [ - "{'a', 'd', 'n', 'r', 's', 't'}" - ] - } - ], - "prompt_number": 15 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And dictionaries." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "colors = ['red', 'white', 'blue', 'orange']\n", - "{c: len(c) for c in colors}" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 16, - "text": [ - "{'blue': 4, 'orange': 6, 'red': 3, 'white': 5}" - ] - } - ], - "prompt_number": 16 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "[(x, x * x) for x in range(10) if x % 2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can do the same thing using `map` and `filter`, but list comprehensions are often more readable." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(1, 1), (3, 9), (5, 25), (7, 49), (9, 81)]" + ] }, - "source": [ - "Everything is an object\n", - "===\n", - "\n", - "* Objects have properties and methods. \n", - "* Explore them using `dir(o)`, or by typing `o.<tab>` in the IPython interpreter." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dir('abc')[-5:]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 17, - "text": [ - "['swapcase', 'title', 'translate', 'upper', 'zfill']" - ] - } - ], - "prompt_number": 17 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "help('abc'.upper)" - ], - "language": "python", + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "Help on built-in function upper:\n", - "\n", - "upper(...)\n", - " S.upper() -> string\n", - " \n", - " Return a copy of the string S converted to uppercase.\n", - "\n" - ] - } - ], - "prompt_number": 18 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "'abc'.upper()" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 19, - "text": [ - "'ABC'" - ] - } - ], - "prompt_number": 19 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + "output_type": "execute_result" + } + ], + "source": [ + "map(lambda x: (x, x * x), filter(lambda x: x %2, range(10)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Comprehensions\n", + "===\n", + "\n", + "Set and dictionary comprehensions\n", + "---\n", + "\n", + "Similar notation can be used for (non-empty) sets." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a', 'd', 'n', 'r', 's', 't'}" + ] }, - "source": [ - "Code in files\n", - "===\n", - "\n", - "Running code from a file\n", - "---" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "cat examples/fsquare.py" - ], - "language": "python", + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "d = {}\r\n", - "for i in range(10):\r\n", - " d[i] = i ** 2\r\n", - "\r\n", - "for i in d:\r\n", - " print \"{0} is the square of {1}.\".format(d[i], i)\r\n" - ] - } - ], - "prompt_number": 20 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%%sh\n", - "python examples/fsquare.py" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "0 is the square of 0.\n", - "1 is the square of 1.\n", - "4 is the square of 2.\n", - "9 is the square of 3.\n", - "16 is the square of 4.\n", - "25 is the square of 5.\n", - "36 is the square of 6.\n", - "49 is the square of 7.\n", - "64 is the square of 8.\n", - "81 is the square of 9.\n" - ] - } - ], - "prompt_number": 21 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } + "output_type": "execute_result" + } + ], + "source": [ + "{c for c in 'LUMC-standard' if 'a' <= c <= 'z'}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And dictionaries." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'blue': 4, 'orange': 6, 'red': 3, 'white': 5}" + ] }, - "source": [ - "Code in files\n", - "===\n", - "\n", - "Working with files in IPython\n", - "---\n", - "\n", - "The `%run` magic runs the code from a file directly in IPython:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%run examples/fsquare.py" - ], - "language": "python", + "execution_count": 16, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "0 is the square of 0.\n", - "1 is the square of 1.\n", - "4 is the square of 2.\n", - "9 is the square of 3.\n", - "16 is the square of 4.\n", - "25 is the square of 5.\n", - "36 is the square of 6.\n", - "49 is the square of 7.\n", - "64 is the square of 8.\n", - "81 is the square of 9.\n" - ] - } - ], - "prompt_number": 22 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "-" - } + "output_type": "execute_result" + } + ], + "source": [ + "colors = ['red', 'white', 'blue', 'orange']\n", + "{c: len(c) for c in colors}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Everything is an object\n", + "===\n", + "\n", + "* Objects have properties and methods. \n", + "* Explore them using `dir(o)`, or by typing `o.<tab>` in the IPython interpreter." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['swapcase', 'title', 'translate', 'upper', 'zfill']" + ] }, - "source": [ - "You can edit and run a file with `%edit`." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%edit examples/fsquare.py" - ], - "language": "python", + "execution_count": 17, "metadata": {}, - "outputs": [], - "prompt_number": 23 - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "Code in files\n", - "===\n", - "\n", - "Saving your IPython session history to a file\n", - "---\n", - "\n", - "Give the `%save` magic a name and a range of input lines and it will save them to a `.py` file with that name:\n", - "\n", - " In [4]: %save my_session 1-3\n", - " The following commands were written to file `my_session.py`:\n", - " a = 4\n", - " a += 3\n", - " b = a" + "output_type": "execute_result" + } + ], + "source": [ + "dir('abc')[-5:]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on built-in function upper:\n", + "\n", + "upper(...)\n", + " S.upper() -> string\n", + " \n", + " Return a copy of the string S converted to uppercase.\n", + "\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } + } + ], + "source": [ + "help('abc'.upper)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'ABC'" + ] }, - "source": [ - "$\\S$ Exercise: Running code from a file\n", - "===\n", - "\n", - "* Save your k-mer counting code to a file `kmer_counting.py`.\n", - "* Include some code using it on an example string and printing the results.\n", - "* Run the code from the command line." + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'abc'.upper()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Code in files\n", + "===\n", + "\n", + "Running code from a file\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d = {}\r\n", + "for i in range(10):\r\n", + " d[i] = i ** 2\r\n", + "\r\n", + "for i in d:\r\n", + " print \"{0} is the square of {1}.\".format(d[i], i)\r\n" ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "Further reading\n", - "===\n", - "\n", - "* [The Python Tutorial](http://docs.python.org/2/tutorial/index.html)\n", - " <br>\n", - " From the official Python documentation.\n", - "\n", - "\n", - "* [Learn Python The Hard Way](http://learnpythonthehardway.org/book/)\n", - " <br>\n", - " Book on learning Python by exercises, online available for free.\n", - "\n", - "\n", - "* [The Hitchhiker's Guide to Python](http://docs.python-guide.org/en/latest/)\n", - " <br>\n", - " This opinionated guide exists to provide both novice and expert Python developers a best-practice handbook to the installation, configuration, and usage of Python on a daily basis.\n", - "\n", - "\n", - "* [A Primer on Scientific Programming with Python](http://codingcat.com/knjige/python/A%20Primer%20on%20Scientific%20Programming%20with%20Python.pdf)\n", - " <br>\n", - " Complete PDF version of the book. The aim of this book is to teach computer programming using examples from mathematics and the natural sciences.\n", - "\n", - "\n", - "* [Python Module of the Week](http://pymotw.com/)\n", - " <br>\n", - " Series of articles providing a tour of the Python standard library through short examples." + } + ], + "source": [ + "cat examples/fsquare.py" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 is the square of 0.\n", + "1 is the square of 1.\n", + "4 is the square of 2.\n", + "9 is the square of 3.\n", + "16 is the square of 4.\n", + "25 is the square of 5.\n", + "36 is the square of 6.\n", + "49 is the square of 7.\n", + "64 is the square of 8.\n", + "81 is the square of 9.\n" ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from IPython.display import HTML\n", - "def css_styling():\n", - " styles = open('styles/custom.css', 'r').read()\n", - " return HTML('<style>' + styles + '</style>')\n", - "css_styling()" - ], - "language": "python", - "metadata": { - "slideshow": { - "slide_type": "skip" - } + } + ], + "source": [ + "%%sh\n", + "python examples/fsquare.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Code in files\n", + "===\n", + "\n", + "Working with files in IPython\n", + "---\n", + "\n", + "The `%run` magic runs the code from a file directly in IPython:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 is the square of 0.\n", + "1 is the square of 1.\n", + "4 is the square of 2.\n", + "9 is the square of 3.\n", + "16 is the square of 4.\n", + "25 is the square of 5.\n", + "36 is the square of 6.\n", + "49 is the square of 7.\n", + "64 is the square of 8.\n", + "81 is the square of 9.\n" + ] + } + ], + "source": [ + "%run examples/fsquare.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "You can edit and run a file with `%edit`." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%edit examples/fsquare.py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Code in files\n", + "===\n", + "\n", + "Saving your IPython session history to a file\n", + "---\n", + "\n", + "Give the `%save` magic a name and a range of input lines and it will save them to a `.py` file with that name:\n", + "\n", + " In [4]: %save my_session 1-3\n", + " The following commands were written to file `my_session.py`:\n", + " a = 4\n", + " a += 3\n", + " b = a" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Further reading\n", + "===\n", + "\n", + "* [The Python Tutorial](http://docs.python.org/2/tutorial/index.html)\n", + " <br>\n", + " From the official Python documentation.\n", + "\n", + "\n", + "* [Learn Python The Hard Way](http://learnpythonthehardway.org/book/)\n", + " <br>\n", + " Book on learning Python by exercises, online available for free.\n", + "\n", + "\n", + "* [The Hitchhiker's Guide to Python](http://docs.python-guide.org/en/latest/)\n", + " <br>\n", + " This opinionated guide exists to provide both novice and expert Python developers a best-practice handbook to the installation, configuration, and usage of Python on a daily basis.\n", + "\n", + "\n", + "* [A Primer on Scientific Programming with Python](http://codingcat.com/knjige/python/A%20Primer%20on%20Scientific%20Programming%20with%20Python.pdf)\n", + " <br>\n", + " Complete PDF version of the book. The aim of this book is to teach computer programming using examples from mathematics and the natural sciences.\n", + "\n", + "\n", + "* [Python Module of the Week](http://pymotw.com/)\n", + " <br>\n", + " Series of articles providing a tour of the Python standard library through short examples." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Homework assignment\n", + "===\n", + "\n", + "https://classroom.github.com/a/QU2iPYKn" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "<style>/* Remove the vertical scrollbar added by nbconvert. */\n", + ".reveal {\n", + " overflow: hidden;\n", + "}\n", + "\n", + "/* Workaround some highlight.js bugs in language autodetection. */\n", + "code.objectivec *,\n", + "code.perl *,\n", + "code.cs *,\n", + "code.javascript *,\n", + "code.http * {\n", + " color: black ! important;\n", + " font-weight: normal ! important;\n", + "}\n", + "span.title {\n", + " color: black ! important;\n", + "}\n", + "span.tag {\n", + " color: black ! important;\n", + "}\n", + "span.attribute {\n", + " color: black ! important;\n", + "}\n", + "</style>" + ], + "text/plain": [ + "<IPython.core.display.HTML at 0x2e0ea50>" + ] }, - "outputs": [ - { - "html": [ - "<style>/* Remove the vertical scrollbar added by nbconvert. */\n", - ".reveal {\n", - " overflow: hidden;\n", - "}\n", - "\n", - "/* Workaround some highlight.js bugs in language autodetection. */\n", - "code.objectivec *,\n", - "code.perl *,\n", - "code.cs *,\n", - "code.javascript *,\n", - "code.http * {\n", - " color: black ! important;\n", - " font-weight: normal ! important;\n", - "}\n", - "span.title {\n", - " color: black ! important;\n", - "}\n", - "span.tag {\n", - " color: black ! important;\n", - "}\n", - "span.attribute {\n", - " color: black ! important;\n", - "}\n", - "</style>" - ], - "metadata": {}, - "output_type": "pyout", - "prompt_number": 1, - "text": [ - "<IPython.core.display.HTML at 0x2e0ea50>" - ] - } - ], - "prompt_number": 1 + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "from IPython.display import HTML\n", + "def css_styling():\n", + " styles = open('styles/custom.css', 'r').read()\n", + " return HTML('<style>' + styles + '</style>')\n", + "css_styling()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Acknowledgements\n", + "========\n", + "\n", + "Martijn Vermaat\n", + "\n", + "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl)\n", + "\n", + "Based on\n", + "---------\n", + "[Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", + "\n", + "License\n", + "--------\n", + "[Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/more_python/03_more_python_goodness_1.ipynb b/more_python/03_more_python_goodness_1.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fed127b7737b5afc7c750d115064263b649d908b --- /dev/null +++ b/more_python/03_more_python_goodness_1.ipynb @@ -0,0 +1,941 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# More Python Goodness (1)\n", + "***" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Table of contents\n", + "\n", + "1. [Working with scripts](#scripts)\n", + "2. [The standard library](#stdlib)\n", + "3. [String methods](#stringmethods)\n", + "4. [Comments and docstrings](#docstrings)\n", + "5. [Detour: PEP8 and other PEPs](#peps)\n", + "6. [Errors and exceptions](#exceptions)\n", + "7. Working with modules\n", + "8. Examples from the standard library\n", + "9. Reading and writing files\n", + "10. Assignment: Finding the most common 7-mer in a FASTA file\n", + "11. Further reading" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"scripts\"></a>\n", + "## Working with scripts\n", + "\n", + "Interpreters are great for *prototyping*, but not really suitable if you want to **share** or **release** code. To do so, we write our Python commands in scripts (and later, modules).\n", + "\n", + "A **script** is a simple text file containing Python instructions to execute." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Executing scripts\n", + "\n", + "There are two common ways to execute a script:\n", + "\n", + "1. As an argument of the Python interpreter command.\n", + "2. As a standalone executable (with the appropriate shebang line & file mode).\n", + "\n", + "IPython gives you a third option:\n", + "\n", + "<ol start=\"3\">\n", + " <li>As an argument of the `%run` magic.\n", + "</ol>" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Writing your script\n", + "\n", + "Let's start with a simple GC calculator. Open your text editor, and write the following Python statements (remember your indentations):\n", + "```python\n", + "def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq:\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " \n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + "print \"The sequence 'CAGG' has a %GC of {:.2f}\".format(\n", + " calc_gc_percent(\"CAGG\"))\n", + "```\n", + "Save the file as `seq_toolbox.py` (you can use any other name if you like) and go to your shell." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running the script\n", + "\n", + "Let's try the first method: using your script as an argument:\n", + "\n", + " $ python seq_toolbox.py\n", + "\n", + "Is the output as you expect?\n", + "\n", + "For the second method, we need to do two more things:\n", + "\n", + "1. Open the script in your editor and add the following line to the very top:\n", + "\n", + " #!/usr/bin/env python\n", + "\n", + "2. Save the file, go back to the shell, and allow the file to be executed:\n", + "\n", + " $ chmod +x seq_toolbox.py\n", + "\n", + "You can now execute the file directly:\n", + "\n", + " $ ./seq_toolbox.py\n", + "\n", + "Is the output the same as the previous method?\n", + "\n", + "Finally, try out the third method. Open an IPython interpreter session and do:\n", + "\n", + " %run seq_toolbox.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"stdlib\"></a>\n", + "## The standard library\n", + "\n", + "Our script is nice and dandy, but we don't want to edit the source file everytime we calculate a sequence's GC.\n", + "\n", + "The **standard library** is a collection of Python modules (or functions, for now) that comes packaged with a default Python installation. They're not part of the language per se, more like a *batteries included* thing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Our first standard library module: `sys`\n", + "\n", + "We'll start by using the simple `sys` module to make our script more flexible.\n", + "\n", + "Standard library (and other modules, as we'll see later) can be used via the `import` statement, for example:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like other objects so far, we can peek into the documentation of these modules using `help`, or the IPython `?` shortcut. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "sys?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The `sys.argv` list\n", + "\n", + "The `sys` module provides a way to capture command line arguments with its `argv` object. This is a list of arguments supplied when invoking the current Python session. Not really useful for an interpreter session, but very handy for scripts." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['-c',\n", + " '-f',\n", + " '/home/martijn/.ipython/profile_default/security/kernel-9535cbe5-e69a-4c48-91f2-80a022c362b9.json',\n", + " \"--IPKernelApp.parent_appname='ipython-notebook'\",\n", + " '--profile-dir',\n", + " '/home/martijn/.ipython/profile_default',\n", + " '--parent=1']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sys.argv" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['-c',\n", + " '-f',\n", + " '/home/martijn/.ipython/profile_default/security/kernel-9535cbe5-e69a-4c48-91f2-80a022c362b9.json']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sys.argv[:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script with `sys.argv`\n", + "\n", + "To use `sys.argv` in our script, open a text editor and edit the script by adding an import statement, capturing the `sys.argv` value, and editing our last `print` line:\n", + "\n", + "```python\n", + " #!/usr/bin/env python\n", + " import sys\n", + "\n", + " def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq:\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + " input_seq = sys.argv[1]\n", + " print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))\n", + "```\n", + "To test it, you can run the following command in your shell:\n", + "\n", + " $ python seq_toolbox.py CAGG\n", + "\n", + "Try it with `./seq_toolbox.py` instead. What happens?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"stringmethods\"></a>\n", + "## String methods\n", + "\n", + "Try running the script with `'cagg'` as the input sequence. What happens?\n", + "\n", + "As we saw earlier, many objects, like those of type `list`, `dict`, or `str`, have useful methods defined on them. One way to squash this potential bug is by using Python's string method `upper`. Let's first check out some commonly used string functions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "my_str = 'Hello again, ipython!'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'HELLO AGAIN, IPYTHON!'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.upper()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'hello again, ipython!'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.lower()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Hello Again, Ipython!'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.title()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.startswith('H')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.startswith('h')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Hello again', ' ipython!']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.split(',')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Hello again, lumc!'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.replace('ipython', 'lumc')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_str.count('n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script with `upper()`\n", + "\n", + "Let's use `upper()` to fortify our function. It should now look something like this:\n", + "```python\n", + " def calc_gc_percent(seq):\n", + " at_count, gc_count = 0, 0\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "```\n", + "And run it (in whichever way you prefer). Do you get the expected output?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"docstrings\"></a>\n", + "## Comments and docstrings\n", + "\n", + "There's a golden rule in programming: write code for humans (this includes you in 6 months). Python provides two ways to accomplish this: comments and docstrings.\n", + "\n", + "### Comments\n", + "\n", + "Any lines prepended with `#` are **comments**, making them ignored by the interpreter. Comments can be freeform text; anything that helps in understanding the code\n", + "\n", + "### Docstrings\n", + "\n", + "**Docstrings** are Python's way of attaching proper documentation to objects. Officially, the first string literal that occurs in a module, function, class, or method definition is used as that object's docstring.\n", + "\n", + "In practice, *triple-quoted strings* are used, to handle newlines easier.\n", + "\n", + "Remember how we used the `help` function (or IPython's `?` shortcut) to get information about an object, function, or module? This actually prints that object's docstring." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script with comments and docstrings\n", + "\n", + "Open your script again in a text editor, and add the following comments and docstrings: \n", + "```python\n", + " #!/usr/bin/env python\n", + " import sys\n", + "\n", + " def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + "\n", + " return gc_count * 100.0 / (gc_count + at_count) \n", + "\n", + " input_seq = sys.argv[1]\n", + " print \"The sequence '{}' has a %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"peps\"></a>\n", + "## Detour: PEP8 and other PEPs\n", + "\n", + "Since comments and docstrings are basically free-form text, whether it's useful or not depends heavily on the developer. To mitigate this, the Python community has come up with practical conventions. They are documented in a document called **PEP8**.\n", + "\n", + "Complementary to PEP8, there is **PEP257** which is for docstrings specifically. It's not a must to follow these conventions, but *very* encouraged to do so.\n", + "\n", + "Python Enhancement Proposals, or **PEP**s, are how Python grows. There are hundreds of them now, all have to be approved by our BDFL.\n", + "\n", + "> [PEP8: Style Guide for Python Code](http://www.python.org/dev/peps/pep-0008/)\n", + "\n", + "> [PEP257: Docstring Conventions](http://www.python.org/dev/peps/pep-0257/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"exceptions\"></a>\n", + "## Errors and exceptions\n", + "\n", + "Try running the script with `ACTG123` as the argument. What happens? Is this acceptable behavior?\n", + "\n", + "Sometimes we want to put safeguards to handle invalid inputs. In this case we only accept `ACTG`, all other characters are invalid.\n", + "\n", + "Python provides a way to break out of the normal execution flow, by raising what's called as an **exception**. We can raise exceptions ourselves as well, by using the `raise` statement." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The `ValueError` built-in exception\n", + "\n", + "One of the most often used exceptions is the builtin exception `ValueError`. It is used on occasions where inappropriate argument values are used, for example when trying to convert the string `A` to an integer:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "invalid literal for int() with base 10: 'A'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-14-0da6d315d7ad>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'A'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mValueError\u001b[0m: invalid literal for int() with base 10: 'A'" + ] + } + ], + "source": [ + "int('A')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ValueError` is the appropriate exception to raise when your function is called with argument values it cannot handle." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script by handling invalid inputs\n", + "\n", + "Open your script, and edit the `if` clause to add our exception:\n", + "```python\n", + "def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " else:\n", + " raise ValueError(\n", + " \"Unexpeced character found: {}. Only \"\n", + " \"ACTGs are allowed.\".format(char))\n", + " \n", + " return gc_count * 100.0 / (gc_count + at_count)\n", + "```\n", + "Try running the script again with `ACTG123` as the argument. What happens now?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handling corner cases\n", + "\n", + "Try running the script with `''` (two quote signs) as the argument. What happens? Why? Is this a valid input?\n", + "\n", + "We don't always want to let exceptions stop program flow, sometimes we want to provide alternative flow. The `try ... except` block allows you to do this.\n", + "\n", + "The syntax is:\n", + "\n", + " try:\n", + " # Statements that may raise exceptions.\n", + " # [...]\n", + " except {exception type}:\n", + " # What to do when the exceptionis raised.\n", + " # [...]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script by handling corner cases\n", + "\n", + "Let's change our script by adding a `try ... except` block:\n", + "\n", + "```python\n", + " def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " else:\n", + " raise ValueError(\n", + " \"Unexpeced character found: {}. Only \"\n", + " \"ACTGs are allowed.\".format(char))\n", + "\n", + " # Corner case handling: empty input sequence.\n", + " try:\n", + " return gc_count * 100.0 / (gc_count + at_count)\n", + " except ZeroDivisionError:\n", + " return 0.0\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Detour: Exception handling best practices\n", + "\n", + "#### Aim for a minimal `try` block\n", + "\n", + "We want to be able to pinpoint the statements that may raise the exceptions so we can tailor our handling.\n", + "\n", + "Example of code that violates this principle:\n", + "```python\n", + " try:\n", + " my_function()\n", + " my_other_function()\n", + " except ValueError:\n", + " my_fallback_function()\n", + "```\n", + "A better way would be:\n", + "```python\n", + " try:\n", + " my_function()\n", + " except ValueError:\n", + " my_fallback_function()\n", + " my_other_function()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Be specific when handling exceptions\n", + "\n", + "The following code is syntactically valid, but *never* use it in your real scripts / programs:\n", + "```python\n", + " try:\n", + " my_function()\n", + " except:\n", + " my_fallback_function()\n", + "```\n", + "*Always* use the full exception name when handling exceptions, to make for a much cleaner code:\n", + "```python\n", + " try:\n", + " my_function()\n", + " except ValueError:\n", + " my_fallback_function()\n", + " except TypeError:\n", + " my_other_fallback_function()\n", + " except IndexError:\n", + " my_final_function()\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Look Before You Leap (LBYL) vs Easier to Ask for Apology (EAFP)\n", + " \n", + "We could have written our last exception block like so:\n", + "```python\n", + " if gc_count + at_count == 0:\n", + " return 0.0\n", + " return gc_count * 100.0 / (gc_count + at_count)\n", + "```\n", + "Both approaches are correct and have their own plus and minuses in general. However in this case, I would argue that EAFP is better since it makes the code more readable." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script by handling more corner cases\n", + "\n", + "Now try running your script without any arguments at all. What happens?\n", + "\n", + "Armed with what you now know, how would you handle this situation?" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.core.display import HTML\n", + "def custom_style():\n", + " style = open('styles/notebook.css', 'r').read()\n", + " return HTML('<style>' + style + '</style>')\n", + "def custom_script():\n", + " script = open('styles/notebook.js', 'r').read()\n", + " return HTML('<script>' + script + '</script>')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>/*\n", + " https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers\n", + "*/\n", + "@font-face {\n", + " font-family: \"Computer Modern\";\n", + " src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');\n", + "}\n", + "div.cell{\n", + " width:800px;\n", + " margin-left:16% !important;\n", + " margin-right:auto;\n", + "}\n", + "h1 {\n", + " font-family: Helvetica, serif;\n", + "}\n", + "h4{\n", + " margin-top:12px;\n", + " margin-bottom: 3px;\n", + " }\n", + "div.text_cell_render{\n", + " font-family: Computer Modern, \"Helvetica Neue\", Arial, Helvetica, Geneva, sans-serif;\n", + " line-height: 145%;\n", + " font-size: 130%;\n", + " width:800px;\n", + " margin-left:auto;\n", + " margin-right:auto;\n", + "}\n", + ".CodeMirror{\n", + " font-family: \"Source Code Pro\", source-code-pro,Consolas, monospace;\n", + "}\n", + ".prompt{\n", + " display: None;\n", + "}\n", + ".text_cell_render .exercise {\n", + " font-weight: 300;\n", + " /*font-size: 22pt;*/\n", + " color: #4057A1;\n", + " font-style: italic;\n", + " /*margin-bottom: .5em;\n", + " margin-top: 0.5em;\n", + " display: block;*/\n", + "}\n", + ".text_cell_render .example {\n", + " font-weight: 300;\n", + " color: #40A157;\n", + " font-style: italic;\n", + "}\n", + "\n", + ".warning{\n", + " color: rgb( 240, 20, 20 )\n", + "}\n", + "</style>" + ], + "text/plain": [ + "<IPython.core.display.HTML at 0x2ceccd0>" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_style()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<script>// https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers\n", + "MathJax.Hub.Config({\n", + " TeX: {\n", + " extensions: [\"AMSmath.js\"]\n", + " },\n", + " tex2jax: {\n", + " inlineMath: [ ['$','$'], [\"\\\\(\",\"\\\\)\"] ],\n", + " displayMath: [ ['$$','$$'], [\"\\\\[\",\"\\\\]\"] ]\n", + " },\n", + " displayAlign: 'center', // Change this to 'center' to center equations.\n", + " \"HTML-CSS\": {\n", + " styles: {'.MathJax_Display': {\"margin\": 4}}\n", + " }\n", + " });\n", + "</script>" + ], + "text/plain": [ + "<IPython.core.display.HTML at 0x2cecdd0>" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_script()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Acknowledgements\n", + "========\n", + "\n", + "[Wibowo Arindrarto](mailto:w.arindrarto@lumc.nl)\n", + "\n", + "Martijn Vermaat\n", + "\n", + "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl)\n", + "\n", + "Based on\n", + "---------\n", + "[Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", + "\n", + "License\n", + "--------\n", + "[Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" + ] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/more_python/03_more_python_goodness_2.ipynb b/more_python/03_more_python_goodness_2.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0cf23e705b4d1c5d3e1b0372f22d6ddc19b88564 --- /dev/null +++ b/more_python/03_more_python_goodness_2.ipynb @@ -0,0 +1,1846 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# More Python Goodness (2)\n", + "***" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Table of contents\n", + "\n", + "1. Working with scripts\n", + "2. The standard library\n", + "3. String methods\n", + "4. Comments and docstrings\n", + "5. Detour: PEP8 and other PEPs\n", + "6. Errors and exceptions\n", + "7. [Working with modules](#modules)\n", + "8. [Examples from the standard library](#stdlib-examples)\n", + "9. [Reading and writing files](#io)\n", + "10. [Assignment: Finding the most common 7-mer in a FASTA file](#assignment)\n", + "11. [Further reading](#further)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"modules\"></a>\n", + "## Working with modules\n", + "\n", + "Sometimes it is useful to group functions and other objects in different files. Sometimes you need to use that fancy function you've written 2 years ago. This is where modules in Python come in handy.\n", + "\n", + "More officially, a **module** allows you to share code in the form of libraries. You've seen one example: the `sys` module in the standard library. There are many other modules in the standard library, as we'll see soon." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What modules look like\n", + "\n", + "Any Python script can in principle be imported as a module. We can import whenever we can write a valid Python statement, in a script or in an interpreter session.\n", + "\n", + "If a script is called `script.py`, then we use `import script`. This gives us access to the objects defined in `script.py` by prefixing them with `script` and a dot.\n", + "\n", + "Keep in mind that this is not the only way to import Python modules. Refer to the Python documentation to find out more ways to do imports." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using `seq_toolbox.py` as a module\n", + "\n", + "Open an interpreter and try importing your module:\n", + "\n", + "```python\n", + "import seq_toolbox\n", + "```\n", + "Does this work? Why?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script for importing\n", + "\n", + "During a module import, Python executes all the statements inside the module.\n", + "\n", + "To make our script work as a module (in the intended way), we need to add a check whether the module is imported or not:\n", + "```python\n", + " #!/usr/bin/env python\n", + " import sys\n", + "\n", + " def calc_gc_percent(seq):\n", + " \"\"\"\n", + " Calculates the GC percentage of the given sequence.\n", + "\n", + " Arguments:\n", + " - seq - the input sequence (string).\n", + "\n", + " Returns:\n", + " - GC percentage (float).\n", + "\n", + " The returned value is always <= 100.0\n", + " \"\"\"\n", + " at_count, gc_count = 0, 0\n", + " # Change input to all caps to allow for non-capital\n", + " # input sequence.\n", + " for char in seq.upper():\n", + " if char in ('A', 'T'):\n", + " at_count += 1\n", + " elif char in ('G', 'C'):\n", + " gc_count += 1\n", + " else:\n", + " raise ValueError(\n", + " \"Unexpeced character found: {}. Only \"\n", + " \"ACTGs are allowed.\".format(char))\n", + "\n", + " # Corner case handling: empty input sequence.\n", + " try:\n", + " return gc_count * 100.0 / (gc_count + at_count)\n", + " except ZeroDivisionError:\n", + " return 0.0\n", + "\n", + " if __name__ == '__main__':\n", + " input_seq = sys.argv[1]\n", + " print \"The sequence '{}' has %GC of {:.2f}\".format(\n", + " input_seq, calc_gc_percent(input_seq))\n", + "```\n", + "Now try importing the module again. What happens? Can you still use the module as a script?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using modules\n", + "\n", + "When a module is imported, we can access the objects defined in it:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import seq_toolbox" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<function seq_toolbox.calc_gc_percent>" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seq_toolbox.calc_gc_percent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By the way, remember we added docstring to the `calc_gc_percent` function? After importing our module, we can read up on how to use the function in its docstring:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "seq_toolbox.calc_gc_percent?" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50.0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seq_toolbox.calc_gc_percent('ACTG')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also expose an object inside the module directly into our current namespace using the `from ... import ...` statement:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from seq_toolbox import calc_gc_percent" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "25.0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "calc_gc_percent('AAAG')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sometimes, we want to alias the imported object to reduce the chance of it overwriting any already-defined objects with the same name. This is accomplished using the `from ... import ... as ...` statement:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from seq_toolbox import calc_gc_percent as gc_calc" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "25.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gc_calc('AAAG')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### (A simple guide on) How modules are discovered\n", + "\n", + "In our case, Python imports by checking whether the module exists in the current directory. This is not the only place Python looks, however.\n", + "\n", + "A complete list of paths where Python looks for modules is available via the `sys` module as `sys.path`. It is composed of (in order):\n", + "\n", + "1. The current directory.\n", + "2. The `PYTHONPATH` environment variable.\n", + "3. Installation-dependent defaults." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"stdlib-examples\"></a>\n", + "## Examples from the standard library\n", + "\n", + "> Official Python documentation: [The Python Standard Library](http://docs.python.org/2/library/index.html)\n", + "\n", + "Just to improve our knowledge, let's go through some of the most often used standard library modules." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The standard library: `os` module\n", + "\n", + "> The Python Standard Library: [15.1. os — Miscellaneous operating system interfaces](http://docs.python.org/2/library/os.html)\n", + "\n", + "The `os` module provides a portable way of using various operating system-specific functionality. It is a large module, but the one of the most frequently used bits is the file-related functions." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/martijn/projects/programming-course'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.getcwd() # Get current directory." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/martijn/.virtualenvs/programming-course/bin:/home/martijn/projects/vcftools_0.1.11/bin:/home/martijn/projects/vcftools_0.1.11/cpp:/home/martijn/projects/muscle/muscle3.8.31/src:/home/martijn/projects/bedtools/bin:/home/martijn/projects/bamtools/bamtools/bin:/home/martijn/projects/gvnl/concordance/tabix:/home/martijn/projects/samtools-trunk:/home/martijn/projects/samtools-trunk/bcftools:/home/martijn/.venvburrito/bin:/home/martijn/coq-8.3-rc1/bin:/home/martijn/projects/kiek/trunk:/home/martijn/bin:/home/martijn/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.environ['PATH'] # Get the value of the environment variable PATH." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "my_filename = 'input.fastq'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('input', '.fastq')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.path.splitext(my_filename) # Split the extension and filename." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/martijn/projects/programming-course/input.fastq'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Join the current directory and `my_filename` to create a file path.\n", + "os.path.join(os.getcwd(), my_filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.path.exists(my_filename) # Check whether `my_filename` exists or not." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.path.isdir('/home') # Checks whether '/home' is a directory." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.path.isfile('/home') # Checks whether '/home' is a file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The standard library: `sys` module\n", + "\n", + "> The Python Standard Library: [27.1. sys — System-specific parameters and functions](http://docs.python.org/2/library/sys.html)\n", + "\n", + "This module has various runtime-related and interpreter-related functions. We've seen two of the most commonly used: `sys.argv` and `sys.path`." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['',\n", + " '/home/martijn/.venvburrito/lib/python/distribute-0.6.49-py2.7.egg',\n", + " '/home/martijn/.venvburrito/lib/python/pip-1.4.1-py2.7.egg',\n", + " '/home/martijn/.venvburrito/lib/python2.7/site-packages',\n", + " '/home/martijn/.venvburrito/lib/python',\n", + " '/usr/local/samba/lib/python2.6/site-packages',\n", + " '/home/martijn/.virtualenvs/programming-course/lib/python2.7',\n", + " '/home/martijn/.virtualenvs/programming-course/lib/python2.7/plat-linux2',\n", + " '/home/martijn/.virtualenvs/programming-course/lib/python2.7/lib-tk',\n", + " '/home/martijn/.virtualenvs/programming-course/lib/python2.7/lib-old',\n", + " '/home/martijn/.virtualenvs/programming-course/lib/python2.7/lib-dynload',\n", + " '/usr/lib/python2.7',\n", + " '/usr/lib/python2.7/plat-linux2',\n", + " '/usr/lib/python2.7/lib-tk',\n", + " '/home/martijn/.virtualenvs/programming-course/local/lib/python2.7/site-packages',\n", + " '/home/martijn/.virtualenvs/programming-course/local/lib/python2.7/site-packages/gtk-2.0',\n", + " '/home/martijn/.virtualenvs/programming-course/lib/python2.7/site-packages',\n", + " '/home/martijn/.virtualenvs/programming-course/lib/python2.7/site-packages/gtk-2.0',\n", + " '/usr/local/samba/lib/python2.6/site-packages',\n", + " '/usr/local/samba/lib/python2.6/site-packages',\n", + " '/home/martijn/.virtualenvs/programming-course/local/lib/python2.7/site-packages/IPython/extensions']" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sys.path # List of places where Python looks for modules when importing." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/martijn/.virtualenvs/programming-course/bin/python'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sys.executable # Path to the current interpreter's executable." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sys.version_info(major=2, minor=7, micro=3, releaselevel='final', serial=0)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sys.version_info # Information about our Python version." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sys.version_info.major # It also provide a more granular access." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The standard library: `math` module\n", + "\n", + "> The Python Standard Library: [9.2. math — Mathematical functions](http://docs.python.org/2/library/math.html)\n", + "\n", + "Useful math-related functions can be found here. Other more comprehensive modules exist (`numpy`, your lesson tomorrow), but nevertheless `math` is still useful." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import math" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.302585092994046" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.log(10) # Natural log of 10." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.0" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.log(100, 10) # Log base 10 of 100." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "81.0" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.pow(3, 4) # 3 raised to the 4th power." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.4142135623730951" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.sqrt(2) # Square root of 2." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.141592653589793" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.pi # The value of pi." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The standard library: `random` module\n", + "\n", + "> The Python Standard Library: [9.6. random — Generate pseudo-random numbers](http://docs.python.org/2/library/random.html)\n", + "\n", + "The `random` module contains useful functions for generating pseudo-random numbers." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.05941901356497081" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.random() # Random float x, such that 0.0 <= x < 1.0." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.randint(2, 17) # Random integer between 2 and 17, inclusive." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'grape'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Random choice of any items in the given list.\n", + "random.choice(['apple', 'banana', 'grape', 'kiwi', 'orange'])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['orange', 'apple', 'banana']" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Random sampling of 3 items from the given list.\n", + "random.sample(['apple', 'banana', 'grape', 'kiwi', 'orange'], 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The standard library: `re` module\n", + "\n", + "> The Python Standard Library: [7.2. re — Regular expression operations](http://docs.python.org/2/library/re.html)\n", + "\n", + "Regular expression-related functions are in the `re` module." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "my_seq = 'CAGTCAGT'" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "results1 = re.search(r'CA.+CA', my_seq)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'CAGTCA'" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results1.group(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "results2 = re.search(r'CCC..', my_seq)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [ + "print results2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The standard library: `argparse` module\n", + "\n", + "> The Python Standard Library: [15.4. argparse — Parser for command-line options, arguments and sub-commands](http://docs.python.org/2/library/argparse.html)\n", + "\n", + "Using `sys.argv` is neat for small scripts, but as our script gets larger and more complex, we want to be able to handle complex arguments too. The `argparse` module has handy functionalities for creating command-line scripts." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Improving our script with `argparse`\n", + "\n", + "Open your script/module in a text editor and replace `import sys` with `import argparse`. Remove all lines / blocks referencing `sys.argv`\n", + "\n", + "Change the `if __name__ == '__main__'` block to be the following:\n", + "\n", + "```python\n", + "if __name__ == '__main__':\n", + " # Create our argument parser object.\n", + " parser = argparse.ArgumentParser()\n", + " # Add the expected argument.\n", + " parser.add_argument('input_seq', type=str,\n", + " help=\"Input sequence\")\n", + " # Do the actual parsing.\n", + " args = parser.parse_args()\n", + " # And show the output.\n", + " print \"The sequence '{}' has %GC of {:.2f}\".format(\n", + " args.input_seq,\n", + " calc_gc_percent(args.input_seq))\n", + "```\n", + "The code does look a little more verbose, but we get something better in return.\n", + "\n", + "Go back to the shell and execute your script without any arguments. What happens?\n", + "\n", + "Try executing the following command in the shell. What happens?\n", + "\n", + " $ python seq_toolbox.py --help\n", + " \n", + "We're just getting started on `argparse`. There are other useful bits that we'll see shortly after a small intro on file I/O." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"io\"></a>\n", + "## Reading and writing files\n", + "\n", + "Opening files for reading or writing is done using the `open` function. It is commonly used with two arguments, *name* and *mode*:\n", + "\n", + "* *name* is the name of the file to open.\n", + "* *mode* specifies how the file should be handled.\n", + "\n", + "These are some of the common file modes:\n", + "\n", + "* `r`: open file for reading (default).\n", + "* `w`: open file for writing.\n", + "* `a`: open file for appending content." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "open?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reading files\n", + "\n", + "Let's go through some ways of reading from a file." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "fh = open('data/short_file.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`fh` is a file handle object which we can use to retrieve the file contents. One simple way would be to read the whole file contents:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'this short file has two lines\\nit is used in the example code\\n'" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.read()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Executing `fh.read()` a second time gives an empty string. This is because we have \"walked\" through the file to its end." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "''" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.read()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can reset the handle to the beginning of the file again using the `seek()` function. Here, we use 0 as the argument since we want to move the handle to position 0 (beginning of the file):" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "fh.seek(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'this short file has two lines\\nit is used in the example code\\n'" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.read()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In practice, reading the whole file into memory is not always a good idea. It is practical for small files, but not if our file is big (e.g., bigger than our memory). In this case, the alternative is to use the `readline()` function." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "fh.seek(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'this short file has two lines\\n'" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.readline()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'it is used in the example code\\n'" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.readline()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "''" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.readline()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "More common in Python is to use the `for` loop with the file handle itself. Python will automatically iterate over each line." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "fh.seek(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "this short file has two lines\n", + "\n", + "it is used in the example code\n", + "\n" + ] + } + ], + "source": [ + "for line in fh:\n", + " print line" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that iteration exhausts the handle since we are at the end of the file after the loop." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "''" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.readline()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also check the file handle position using the `tell()` function. If `tell()` returns a nonzero number, then we are not at the beginning of the file." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "61" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fh.tell()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we're done with the file handle, we can call the `close()` method to free up any system resources still being used to keep the file open. After we closed the file, we can not use the file object anymore." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "fh.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "I/O operation on closed file", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-55-4e86183cf03e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mValueError\u001b[0m: I/O operation on closed file" + ] + } + ], + "source": [ + "fh.readline()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Writing files\n", + "\n", + "When writing files, we supply the `w` file mode explicitely:" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "fw = open('data/my_file.txt', 'w')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`fw` is a file handle similar to the `fh` that we've seen previously. It is used only for writing and not reading, however." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "ename": "IOError", + "evalue": "File not open for reading", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mIOError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-59-73497a15302b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mIOError\u001b[0m: File not open for reading" + ] + } + ], + "source": [ + "fw.read()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To write to the file, we use its `write()` method. Remember that Python *does not* add newline characters here (as opposed to when you use the `print` statement), so to move to a new line we have to add `\\n` ourselves." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "fw.write('This is my first line ')" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "fw.write('Still on my first line\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "fw.write('Now on my second line')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As with the `r` mode, we can close the handle when we're done with it. The file can then be reopened with the `r` mode and we can check its contents." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "fw.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This is my first line Still on my first line\n", + "\n", + "Now on my second line\n" + ] + } + ], + "source": [ + "fr = open('data/my_file.txt') # Remember to use the same file we wrote to.\n", + "for line in fr:\n", + " print line\n", + "fr.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And finally, to remove the file, we can use the `remove()` function from the `os` module." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "os.remove('data/my_file.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Be cautious when using file handles\n", + "\n", + "When reading / writing files, we are interacting with external resources that may or may not behave as expected. For example, we don't always have permission to read / write a file, the file itself may not exist, or we have a completely wrong idea of what's in the file. In situations like these, you are encouraged to use the `try ... finally` block.\n", + " \n", + "The syntax is similar to `try ... except` that we've seen earlier (in fact they are part of the same block, as we'll see later). Unlike `try ... except`, the `finally` block in `try ... finally` is always executed regardless of any raised exceptions.\n", + "\n", + "Let's take a look at some examples. First, the not recommended one:" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "invalid literal for int() with base 10: 'this short file has two lines\\n'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-66-f293b9e3578f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'data/short_file.txt'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mprint\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mline\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mprint\u001b[0m \u001b[1;34m'We closed our filehandle'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mValueError\u001b[0m: invalid literal for int() with base 10: 'this short file has two lines\\n'" + ] + } + ], + "source": [ + "f = open('data/short_file.txt')\n", + "for line in f:\n", + " print int(line)\n", + "f.close()\n", + "print 'We closed our filehandle'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apart from our erroneous conversion of a line of text to an integer, the exception raised because of that causes the `f.close()` statement to be not executed. At this point we have a stale open file handle.\n", + "\n", + "Stubbornly trying to do the same thing again, this time we use a `finally` clause:" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We closed our file handle\n" + ] + }, + { + "ename": "ValueError", + "evalue": "invalid literal for int() with base 10: 'this short file has two lines\\n'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m<ipython-input-67-71128226da53>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'data/short_file.txt'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[1;32mprint\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mline\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mValueError\u001b[0m: invalid literal for int() with base 10: 'this short file has two lines\\n'" + ] + } + ], + "source": [ + "try:\n", + " f = open('data/short_file.txt')\n", + " for line in f:\n", + " print int(line)\n", + "finally:\n", + " f.close()\n", + " print 'We closed our file handle'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, this way the file handle still got closed.\n", + "\n", + "Now, an even better way would be to also use the `catch` block, to handle the exception we might get if we try it a third time." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Seems there was a line we could not handle\n", + "We closed our file handle\n" + ] + } + ], + "source": [ + "try:\n", + " f = open('data/short_file.txt')\n", + " for line in f:\n", + " print int(line)\n", + "except ValueError:\n", + " print 'Seems there was a line we could not handle'\n", + "finally:\n", + " f.close()\n", + " print 'We closed our file handle'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Intermezzo: `sys.stdout`, `sys.stderr`, and `sys.stdin`\n", + " \n", + "We've seen that the `sys` module provides some useful runtime functions. Now that we know about file handles, we can use three `sys` objects that are essentially file handles: `sys.stdout`, `sys.stderr`, and `sys.stdin`.\n", + "\n", + "Together, they provide access to the standard output, standard error, and standard input streams. We can use them appropriately by writing to `sys.stdout` and `sys.stderr`, and reading from `sys.stdin`.\n", + "\n", + "Unlike regular file handles, you don't need to close them after using (in fact you should not). The assumption is that these handles are always open to write to or to read from." + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I'm writing to stdout!\n" + ] + } + ], + "source": [ + "sys.stdout.write(\"I'm writing to stdout!\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Now to stderr.\n" + ] + } + ], + "source": [ + "sys.stderr.write(\"Now to stderr.\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Improving our script to allow input from a file\n", + "\n", + "Before we go on to the exercise, let's do a final improvement on our script/module.\n", + "\n", + "We want to add some extra functionality: the script should accept as its argument a path to a file containing sequences. It will then compute the GC percentage for each sequence in this file.\n", + "\n", + "There are at least two things we need to do:\n", + "\n", + "1. Change the argument parser so that it deals with a new execution mode.\n", + "2. Add some statements to read from a file.\n", + "\n", + "Open the script in your text editor, and change the `if __name__ == '__main__'` block to the following:\n", + "```python\n", + "if __name__ == '__main__':\n", + " # Create our argument parser object.\n", + " parser = argparse.ArgumentParser()\n", + " # Add argument for the input type.\n", + " parser.add_argument(\n", + " 'mode', type=str, choices=['file', 'text'],\n", + " help='Input type of the script')\n", + " # Add argument for the input value.\n", + " parser.add_argument(\n", + " 'value', type=str,\n", + " help='Input value of the script')\n", + " # Do the actual parsing.\n", + " args = parser.parse_args()\n", + "\n", + " message = \"The sequence '{}' has a %GC of {:.2f}\"\n", + "\n", + " if args.mode == 'file':\n", + " try:\n", + " f = open(args.value, 'r')\n", + " for line in f:\n", + " seq = line.strip()\n", + " gc = calc_gc_percent(seq)\n", + " print message.format(seq, gc)\n", + " finally:\n", + " f.close()\n", + " else:\n", + " seq = args.value\n", + " gc = calc_gc_percent(seq)\n", + " print message.format(seq, gc)\n", + "``` \n", + "Note the things we've done here:\n", + "\n", + "1. We've added a new argument to our parser to specify the input type.\n", + "2. Correspondingly, we've expanded the our function call to handle both input types.\n", + "\n", + "Save the script, and try running it. What do you see? Is running\n", + "\n", + " $ python seq_toolbox.py --help\n", + "\n", + "helpful to resolve this?\n", + "\n", + "Try running the script with the following command. What do you see?\n", + "\n", + " $ python seq_toolbox.py file data/seq.txt\n", + "\n", + "Feel free to look into `data/seq.txt`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"assignment\"></a>\n", + "## Assignment: Finding the most common 7-mer in a FASTA file\n", + "\n", + "### Your task\n", + "\n", + "Write a script to print out the most common 7-mer and its GC percentage from all the sequences in `data/records.fa`. You are free to reuse your existing toolbox.\n", + "\n", + "> The example FASTA file was adapted from: [Genome Biology DNA60 Bioinformatics Challenge](http://genomebiology.com/about/update/DNA60_STEPONE)\n", + "\n", + "### Hints\n", + "\n", + "1. FASTA files have two types of lines: header lines starting with a `>` character and sequence lines. We are only concerned with the sequence line.\n", + "2. Read the string functions documentation.\n", + "3. Read the documentation for built in functions.\n", + "\n", + "### Challenges\n", + "\n", + "1. Find out how to change your script so that it can read from `data/challenge.fa.gz` without unzipping the file first (hint: standard library).\n", + "\n", + "2. Can you change the parser so that there is an option flag to tell the program whether the input file is gzipped or not?\n", + "\n", + "3. Can you change your script so that it works for any N-mers instead of for just 7-mers?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a id=\"further\"></a>\n", + "## Further reading\n", + "\n", + "> Python standard library by examples: [Python Module of the Week](http://pymotw.com/2/contents.html)\n", + "\n", + "> [PEP8: Style Guide for Python Code](http://www.python.org/dev/peps/pep-0008/)\n", + "\n", + "> [PEP20: The Zen of Python](http://www.python.org/dev/peps/pep-0020/)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.core.display import HTML\n", + "def custom_style():\n", + " style = open('styles/notebook.css', 'r').read()\n", + " return HTML('<style>' + style + '</style>')\n", + "def custom_script():\n", + " script = open('styles/notebook.js', 'r').read()\n", + " return HTML('<script>' + script + '</script>')" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>/*\n", + " https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers\n", + "*/\n", + "@font-face {\n", + " font-family: \"Computer Modern\";\n", + " src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');\n", + "}\n", + "div.cell{\n", + " width:800px;\n", + " margin-left:16% !important;\n", + " margin-right:auto;\n", + "}\n", + "h1 {\n", + " font-family: Helvetica, serif;\n", + "}\n", + "h4{\n", + " margin-top:12px;\n", + " margin-bottom: 3px;\n", + " }\n", + "div.text_cell_render{\n", + " font-family: Computer Modern, \"Helvetica Neue\", Arial, Helvetica, Geneva, sans-serif;\n", + " line-height: 145%;\n", + " font-size: 130%;\n", + " width:800px;\n", + " margin-left:auto;\n", + " margin-right:auto;\n", + "}\n", + ".CodeMirror{\n", + " font-family: \"Source Code Pro\", source-code-pro,Consolas, monospace;\n", + "}\n", + ".prompt{\n", + " display: None;\n", + "}\n", + ".text_cell_render .exercise {\n", + " font-weight: 300;\n", + " /*font-size: 22pt;*/\n", + " color: #4057A1;\n", + " font-style: italic;\n", + " /*margin-bottom: .5em;\n", + " margin-top: 0.5em;\n", + " display: block;*/\n", + "}\n", + ".text_cell_render .example {\n", + " font-weight: 300;\n", + " color: #40A157;\n", + " font-style: italic;\n", + "}\n", + "\n", + ".warning{\n", + " color: rgb( 240, 20, 20 )\n", + "}\n", + "</style>" + ], + "text/plain": [ + "<IPython.core.display.HTML at 0x1caac50>" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_style()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<script>// https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers\n", + "MathJax.Hub.Config({\n", + " TeX: {\n", + " extensions: [\"AMSmath.js\"]\n", + " },\n", + " tex2jax: {\n", + " inlineMath: [ ['$','$'], [\"\\\\(\",\"\\\\)\"] ],\n", + " displayMath: [ ['$$','$$'], [\"\\\\[\",\"\\\\]\"] ]\n", + " },\n", + " displayAlign: 'center', // Change this to 'center' to center equations.\n", + " \"HTML-CSS\": {\n", + " styles: {'.MathJax_Display': {\"margin\": 4}}\n", + " }\n", + " });\n", + "</script>" + ], + "text/plain": [ + "<IPython.core.display.HTML at 0x1d3d950>" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "custom_script()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Acknowledgements\n", + "========\n", + "\n", + "[Wibowo Arindrarto](mailto:w.arindrarto@lumc.nl)\n", + "\n", + "Martijn Vermaat\n", + "\n", + "[Jeroen Laros](mailto:j.f.j.laros@lumc.nl)\n", + "\n", + "Based on\n", + "---------\n", + "[Python Scientific Lecture Notes](http://scipy-lectures.github.io/)\n", + "\n", + "License\n", + "--------\n", + "[Creative Commons Attribution 3.0 License (CC-by)](http://creativecommons.org/licenses/by/3.0)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/visualization/data/populations.csv b/visualization/data/populations.csv new file mode 100644 index 0000000000000000000000000000000000000000..eb5056f64062a4f8614960ff3b8295bafe9a97f7 --- /dev/null +++ b/visualization/data/populations.csv @@ -0,0 +1,67 @@ +,Year,Belgium,Denmark,Netherlands,Sweden +0,1950,17.278599999999997,8.5627,20.2273,14.0332 +1,1951,17.3564,8.607399999999998,20.528799999999997,14.140799999999999 +2,1952,17.4608,8.6676,20.7642,14.248899999999999 +3,1953,17.555500000000002,8.7386,20.986,14.3429 +4,1954,17.6388,8.811399999999999,21.2307,14.4272 +5,1955,17.7367,8.8784,21.5018,14.5247 +6,1956,17.8477,8.9328,21.7792,14.6314 +7,1957,17.9787,8.9757,22.0527,14.734300000000001 +8,1958,18.1051,9.0302,22.373600000000003,14.8306 +9,1959,18.2072,9.0932,22.6954,14.907499999999999 +10,1960,18.2376,9.1621,22.973399999999998,14.9607 +11,1961,18.331699999999998,9.2338,23.2775,15.0413 +12,1962,18.436799999999998,9.2939,23.6113,15.1232 +13,1963,18.566200000000002,9.3672,23.931600000000003,15.2087 +14,1964,18.7339,9.4404,24.2543,15.3228 +15,1965,18.896,9.516200000000001,24.5893,15.4678 +16,1966,19.015700000000002,9.595,24.9128,15.6157 +17,1967,19.1129,9.6775,25.1965,15.7355 +18,1968,19.179699999999997,9.729700000000001,25.4594,15.8246 +19,1969,19.2252,9.781500000000001,25.7561,15.936 +20,1970,19.275399999999998,9.8576,26.076999999999998,16.085700000000003 +21,1971,19.346600000000002,9.901399999999999,26.3891,16.1967 +22,1972,19.4222,9.9512,26.6567,16.2449 +23,1973,19.4836,10.0152,26.8784,16.2738 +24,1974,19.5131,10.0905,27.0903,16.320700000000002 +25,1975,19.6017,10.108799999999999,27.3328,16.3854 +26,1976,19.6363,10.1451,27.548000000000002,16.4448 +27,1977,19.660600000000002,10.1766,27.7125,16.502899999999997 +28,1978,19.674599999999998,10.2087,27.8837,16.5517 +29,1979,19.6968,10.2336,28.0765,16.587400000000002 +30,1980,19.7103,10.245999999999999,28.2994,16.6209 +31,1981,19.6976,10.243400000000001,28.4942,16.641 +32,1982,19.714599999999997,10.2355,28.625500000000002,16.6503 +33,1983,19.716,10.2286,28.7339,16.658 +34,1984,19.706,10.2232,28.848399999999998,16.6732 +35,1985,19.7165,10.2274,28.9833,16.7008 +36,1986,19.729599999999998,10.2408,29.144199999999998,16.7396 +37,1987,19.7404,10.254,29.3305,16.795699999999997 +38,1988,19.8034,10.2586,29.5202,16.873 +39,1989,19.8754,10.2646,29.698,16.9859 +40,1990,19.934800000000003,10.2798,29.903,17.1173 +41,1991,20.0092,10.3081,30.1391,17.2273 +42,1992,20.0903,10.3404,30.3681,17.3364 +43,1993,20.1674,10.378499999999999,30.5811,17.4373 +44,1994,20.230800000000002,10.4101,30.7657,17.5621 +45,1995,20.2735,10.4558,30.9179,17.654 +46,1996,20.313285999999998,10.523299999999999,31.061,17.6821 +47,1997,20.340452,10.5686,31.2216,17.6925 +48,1998,20.406016,10.602599999999999,31.4146,17.702399999999997 +49,1999,20.452844,10.63822,31.624,17.716102 +50,2000,20.502511,10.674688,31.851039,17.744588 +51,2001,20.573149,10.710163999999999,32.092372999999995,17.792254 +52,2002,20.665584000000003,10.74851,32.297866,17.849916 +53,2003,20.752265,10.774348,32.450617,17.916458 +54,2004,20.842273,10.802354000000001,32.563558,17.987062 +55,2005,20.957247000000002,10.831956,32.64002,18.059144 +56,2006,21.0959255,10.869133999999999,32.692484,18.161009 +57,2007,21.251411,10.91483,32.763406,18.296198 +58,2008,21.419958,10.978044,32.891203000000004,18.439287 +59,2009,21.592997,11.038882000000001,33.060784999999996,18.597040999999997 +60,2010,21.67981,11.087638,33.230799000000005,18.756252 +61,2011,22.001275999999997,11.133712,33.386160000000004,18.898439 +62,2012,22.189700000000002,11.17417,33.509935999999996,19.038762 +63,2013,22.356872,11.217568,33.608877,19.200757 +64,2014,22.454566,11.279437999999999,33.608877,19.392219 +65,2015,,,33.879855,19.392219