Skip to content
Snippets Groups Projects
Commit 9776a6ef authored by Zuotian Tatum's avatar Zuotian Tatum
Browse files

Merge branch 'master' of git.lumc.nl:humgen/programming-course

parents b8daa6b7 55ce2860
No related branches found
No related tags found
No related merge requests found
Showing
with 5244 additions and 104 deletions
......@@ -30,8 +30,9 @@ The top-level directory contains slides for the following lessons.
1. [Welcome](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/welcome.ipynb)
2. [Introduction to Python](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/python.ipynb)
3. [Version control with Git](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/git.ipynb)
4. [Working with NumPy arrays](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/numpy.ipynb)
5. [Plotting with matplotlib](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/matplotlib.ipynb)
4. [More Python Goodness](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/more-python.ipynb)
5. [Working with NumPy arrays](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/numpy.ipynb)
6. [Plotting with matplotlib](http://nbviewer.ipython.org/urls/raw.github.com/LUMC/programming-course/master/matplotlib.ipynb)
Note: These links are to one-page renderings on [IPython Notebook Viewer](http://nbviewer.ipython.org/), see below how to get the real slideshows.
......
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Classes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A class as a simple container:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class Coordinate(object):\n",
" def __init__(self, x=0, y=0):\n",
" self.x = x\n",
" self.y = y"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"coordinate = Coordinate(0, 10)\n",
"print coordinate.x, coordinate.y"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0 10\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class PrintCoordinate(Coordinate):\n",
" def __str__(self):\n",
" return \"({0}, {1})\".format(self.x, self.y)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print_coordinate = PrintCoordinate(0, 10)\n",
"print print_coordinate"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(0, 10)\n"
]
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class OtherPrintCoordinate(Coordinate):\n",
" def __str__(self):\n",
" return \"x={0}, y={1}\".format(self.x, self.y)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"other_print_coordinate = OtherPrintCoordinate(0, 10)\n",
"print other_print_coordinate"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"x=0, y=10\n"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
#!/usr/bin/env python
import random
class Sequence(object):
bases = ['A', 'C', 'G', 'T']
def __init__(self, spots, readlength=100):
self.cycle = 0
self.readlength = readlength
self.spots = spots
def __iter__(self):
return self
def __len__(self):
return self.readlength
def next(self):
self.cycle += 1
if self.cycle > self.readlength:
raise StopIteration
tile = []
for _ in range(self.spots):
nucleotide = self.bases[random.randrange(4)]
quality = 40 - (self.cycle * random.randrange(40) /
self.readlength)
tile.append((nucleotide, quality))
return tile
class Read(object):
def __init__(self):
self.read = ""
self.qual = []
def __str__(self):
return self.read
def add(self, base):
self.read += base[0]
self.qual.append(base[1])
def quality(self):
return sum(self.qual) / len(self.read)
def trim(self, score):
for position in range(len(self.read) - 1, 0, -1):
if self.qual[position] >= score:
return self.read[:position]
def withClass():
spots = 2
run = Sequence(spots)
reads = [Read() for _ in range(spots)]
for tile in run:
for read_id, base in enumerate(tile):
reads[read_id].add(base)
for i in reads:
print i, i.quality()
print i.trim(39)
def quality(quals):
return sum(quals) / len(quals)
def trim(read, quals, score):
for position in range(len(read) - 1, 0, -1):
if quals[position] >= score:
return read[:position]
def withoutClass():
spots = 2
run = Sequence(spots)
reads = ["" for _ in range(spots)]
quals = [[] for _ in range(spots)]
for tile in run:
for read_id, base in enumerate(tile):
reads[read_id] += base[0]
quals[read_id].append(base[1])
for read_id, read in enumerate(reads):
print read, quality(quals[read_id])
print trim(read, quals[read_id], 39)
if __name__ == "__main__":
withoutClass()
File added
This diff is collapsed.
AGCTAG
TATCGTA
TGTAGAT
GAGATCGTAGATC
def calc_gc_percent(seq):
at_count, gc_count = 0, 0
for char in seq:
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
return gc_count * 100.0 / (gc_count + at_count)
print "The sequence 'CAGG' has a %GC of {:.2f}".format(calc_gc_percent("CAGG"))
#!/usr/bin/env python
def calc_gc_percent(seq):
at_count, gc_count = 0, 0
for char in seq:
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
return gc_count * 100.0 / (gc_count + at_count)
print "The sequence 'CAGG' has a %GC of {:.2f}".format(calc_gc_percent("CAGG"))
#!/usr/bin/env python
import sys
def calc_gc_percent(seq):
at_count, gc_count = 0, 0
for char in seq:
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
return gc_count * 100.0 / (gc_count + at_count)
input_seq = sys.argv[1]
print "The sequence '{}' has a %GC of {:.2f}".format(input_seq, calc_gc_percent(input_seq))
#!/usr/bin/env python
import sys
def calc_gc_percent(seq):
at_count, gc_count = 0, 0
for char in seq.upper():
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
return gc_count * 100.0 / (gc_count + at_count)
input_seq = sys.argv[1]
print "The sequence '{}' has a %GC of {:.2f}".format(input_seq, calc_gc_percent(input_seq))
#!/usr/bin/env python
import sys
def calc_gc_percent(seq):
"""
Calculates the GC percentage of the given sequence.
Arguments:
- seq - the input sequence (string).
Returns:
- GC percentage (float).
"""
at_count, gc_count = 0, 0
# change input to all caps to allow for non-capital
# input sequence.
for char in seq.upper():
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
return gc_count * 100.0 / (gc_count + at_count)
input_seq = sys.argv[1]
print "The sequence '{}' has a %GC of {:.2f}".format(input_seq, calc_gc_percent(input_seq))
#!/usr/bin/env python
import sys
def calc_gc_percent(seq):
"""
Calculates the GC percentage of the given sequence.
Arguments:
- seq - the input sequence (string).
Returns:
- GC percentage (float).
"""
at_count, gc_count = 0, 0
# change input to all caps to allow for non-capital
# input sequence.
for char in seq.upper():
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
else:
raise ValueError("Unexpected character found: {}. Only "
"ACTGs are allowed.".format(char))
return gc_count * 100.0 / (gc_count + at_count)
input_seq = sys.argv[1]
print "The sequence '{}' has a %GC of {:.2f}".format(input_seq, calc_gc_percent(input_seq))
#!/usr/bin/env python
import sys
def calc_gc_percent(seq):
"""
Calculates the GC percentage of the given sequence.
Arguments:
- seq - the input sequence (string).
Returns:
- GC percentage (float).
"""
at_count, gc_count = 0, 0
# change input to all caps to allow for non-capital
# input sequence.
for char in seq.upper():
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
else:
raise ValueError("Unexpected character found: {}. Only "
"ACTGs are allowed.".format(char))
# Corner case handling: empty input sequence.
try:
return gc_count * 100.0 / (gc_count + at_count)
except ZeroDivisionError:
return 0.0
input_seq = sys.argv[1]
print "The sequence '{}' has a %GC of {:.2f}".format(input_seq, calc_gc_percent(input_seq))
#!/usr/bin/env python
import sys
def calc_gc_percent(seq):
"""
Calculates the GC percentage of the given sequence.
Arguments:
- seq - the input sequence (string).
Returns:
- GC percentage (float).
"""
at_count, gc_count = 0, 0
# change input to all caps to allow for non-capital
# input sequence.
for char in seq.upper():
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
else:
raise ValueError("Unexpected character found: {}. Only "
"ACTGs are allowed.".format(char))
# Corner case handling: empty input sequence.
try:
return gc_count * 100.0 / (gc_count + at_count)
except ZeroDivisionError:
return 0.0
if __name__ == '__main__':
input_seq = sys.argv[1]
print "The sequence '{}' has a %GC of {:.2f}".format(input_seq, calc_gc_percent(input_seq))
This diff is collapsed.
......@@ -2631,36 +2631,6 @@
"**Hint:** All elements in a set are unique."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"$\\S$ Exercise: Analyse a repeat structure\n",
"===\n",
"\n",
"* Make a short tandem repeat that consists of three \"ACGT\" units and five \"TTATT\" units.\n",
"* Print all suffixes of the repeat structure."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"$\\S$ Exercise: Analyse a repeat structure\n",
"===\n",
"\n",
"* Print all substrings of length 3.\n",
"* Print all unique substrings of length 3."
]
},
{
"cell_type": "markdown",
"metadata": {
......@@ -2679,78 +2649,6 @@
" 2 * (4 > 3)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"2 * 3 > 4"
],
"language": "python",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"True"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"2 * (3 > 4)"
],
"language": "python",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"0"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"2 * (4 > 3)"
],
"language": "python",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"2"
]
}
],
"prompt_number": 11
},
{
"cell_type": "markdown",
"metadata": {
......
#!/usr/bin/env python
import sys
def calc_gc_percent(seq):
"""Calculates the GC percentage of the given sequence.
Arguments:
- seq - the input sequence (string).
Returns:
- GC percentage (float).
The returned value is always <= 100.0
"""
at_count, gc_count = 0, 0
# Change input to all caps to allow for non-capital input
# sequence.
for char in seq.upper():
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
else:
raise ValueError(
"Unexpeced character found: {}. Only ACTGs "
"are allowed.".format(char))
# Corner case handling: empty input sequence.
try:
return gc_count * 100.0 / (gc_count + at_count)
except ZeroDivisionError:
return 0.0
if __name__ == '__main__':
input_seq = sys.argv[1]
print "The sequence '{}' has a %GC of {:.2f}".format(
input_seq, calc_gc_percent(input_seq))
#!/usr/bin/env python
"""
My sequence toolbox module.
"""
import argparse
def calc_gc_percent(seq):
"""Calculates the GC percentage of the given sequence.
Arguments:
- seq - the input sequence (string).
Returns:
- GC percentage (float).
The returned value is always <= 100.0
"""
at_count, gc_count = 0, 0
# change input to all caps to allow for non-capital input sequence
for char in seq.upper():
if char in ('A', 'T'):
at_count += 1
elif char in ('G', 'C'):
gc_count += 1
else:
raise ValueError("Invalid character found: {}".format(char))
# corner case handling: empty input sequence
try:
return (gc_count * 100.0) / (at_count + gc_count)
except ZeroDivisionError:
return 0.0
def _show_results(seq, seq_gc):
"""Prints the sequence and its GC content.
Arguments:
- seq - sequence to print (string).
- seq_gc - GC percentage (float).
Returns:
- None
"""
print "The sequence '{}' has a %GC of {:.2f}".format(seq, seq_gc)
if __name__ == '__main__':
# create our argument parser object
parser = argparse.ArgumentParser()
# add argument for input type
parser.add_argument('mode', type=str,
choices=['file', 'text'],
help="Input type of the script")
# add argument for the input value
parser.add_argument('value', type=str,
help="Input value of the script")
# do the actual parsing
args = parser.parse_args()
if args.mode == 'file':
try:
f = open(args.value, 'r')
for line in f:
seq = line.strip()
gc = calc_gc_percent(seq)
_show_results(seq, gc)
finally:
f.close()
else:
_show_results(args.value, calc_gc_percent(args.value))
/*
https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers
*/
@font-face {
font-family: "Computer Modern";
src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');
}
div.cell{
width:800px;
margin-left:16% !important;
margin-right:auto;
}
h1 {
font-family: Helvetica, serif;
}
h4{
margin-top:12px;
margin-bottom: 3px;
}
div.text_cell_render{
font-family: Computer Modern, "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif;
line-height: 145%;
font-size: 130%;
width:800px;
margin-left:auto;
margin-right:auto;
}
.CodeMirror{
font-family: "Source Code Pro", source-code-pro,Consolas, monospace;
}
.prompt{
display: None;
}
.text_cell_render h5 {
font-weight: 300;
font-size: 22pt;
color: #4057A1;
font-style: italic;
margin-bottom: .5em;
margin-top: 0.5em;
display: block;
}
.warning{
color: rgb( 240, 20, 20 )
}
/*
https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers
*/
@font-face {
font-family: "Computer Modern";
src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');
}
div.cell{
/*width:800px;
margin-left:16% !important;*/
margin-right:auto;
}
h1 {
font-family: Helvetica, serif;
}
h4{
margin-top:12px;
margin-bottom: 3px;
}
div.text_cell_render{
font-family: Computer Modern, "Helvetica Neue", Arial, Helvetica, Geneva, sans-serif;
line-height: 145%;
font-size: 130%;
/*width:800px;*/
margin-left:auto;
margin-right:auto;
}
.CodeMirror{
font-family: "Source Code Pro", source-code-pro,Consolas, monospace;
}
.prompt{
display: None;
}
.text_cell_render h5 {
font-weight: 300;
font-size: 22pt;
color: #4057A1;
font-style: italic;
margin-bottom: .5em;
margin-top: 0.5em;
display: block;
}
.warning{
color: rgb( 240, 20, 20 )
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment