From 945a659be5061c1aaf56ea1a1127ccdd42e830f6 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Mon, 8 Apr 2013 11:14:00 +0000 Subject: [PATCH] Proposal for student project: MoBiLe 2013 git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@688 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- doc/Proposal_MoBiLe_2013/Makefile | 50 ++++++++ .../mutalyzer-mobile-2013.bib | 17 +++ .../mutalyzer-mobile-2013.tex | 114 ++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 doc/Proposal_MoBiLe_2013/Makefile create mode 100644 doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.bib create mode 100644 doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.tex diff --git a/doc/Proposal_MoBiLe_2013/Makefile b/doc/Proposal_MoBiLe_2013/Makefile new file mode 100644 index 00000000..a097b818 --- /dev/null +++ b/doc/Proposal_MoBiLe_2013/Makefile @@ -0,0 +1,50 @@ +# Generate PDF from LaTeX and BibTeX source. +# +# There are some hacks in here to work with LaTeX compilation, I'll try +# to document them with comments :) +# +# Martijn Vermaat, martijn@vermaat.name + +# Configuration +DOCUMENT = mutalyzer-mobile-2013 +PDFLATEX = /usr/bin/pdflatex +PDFLATEXFLAGS = -halt-on-error -interaction errorstopmode +BIBTEX = /usr/bin/bibtex + +# Just create the PDF! +all: pdf + +# Compile BibTeX source file (run this when citations or .bib file change) +bibtex: $(DOCUMENT).bbl + +# Create the files +pdf: $(DOCUMENT).pdf + +# For the PDF, we need a .tex LaTeX source. Actually, we also need a .bbl +# BibTeX database, but we don't want to regenerate that on every .tex change. +# We compile the file twice to make sure all references are okay. +%.pdf: %.tex %.bbl + $(PDFLATEX) $(PDFLATEXFLAGS) $< + while egrep -q -s 'Rerun (LaTeX|to get cross-references right)' $*.log ;\ + do \ + $(PDFLATEX) $(PDFLATEXFLAGS) $< ;\ + done + $(PDFLATEX) $(PDFLATEXFLAGS) $< # With ntheorem, another run is needed + +# For the .bbl BibTeX database we need the .bib BibTeX source and .tex LaTeX +# source. +# Afterwards, we remove the resulting PDF, because we require some additional +# compilation passes that are done by the %.pdf rule. +%.bbl: %.bib %.tex + $(PDFLATEX) $(PDFLATEXFLAGS) $* + $(BIBTEX) $* + $(PDFLATEX) $(PDFLATEXFLAGS) $* + while egrep -q -s 'Rerun (LaTeX|to get cross-references right)' $*.log ;\ + do \ + $(PDFLATEX) $(PDFLATEXFLAGS) $* ;\ + done + /bin/rm $*.pdf + +# Please make sure we don't kill any sources... +clean: + rm -f $(DOCUMENT).aux $(DOCUMENT).pdf $(DOCUMENT).log $(DOCUMENT).toc $(DOCUMENT).out $(DOCUMENT).bbl $(DOCUMENT).blg $(DOCUMENT).nav $(DOCUMENT).snm $(DOCUMENT).vrb diff --git a/doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.bib b/doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.bib new file mode 100644 index 00000000..475acd92 --- /dev/null +++ b/doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.bib @@ -0,0 +1,17 @@ +@unpublished{elzanowski-2010, + title = {{T}he {G}enetic {C}odes}, + author = {Andrzej (Anjay) Elzanowski and Jim Ostell}, + month = jul # "~07,", + year = {2010}, + note = {\url{http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c}} +} + +@article{slavoff-2013, +author = {Slavoff SA and Mitchell AJ and Schwaid AG and Cabili MN and Ma J and Levin JZ and Karger AD and Budnik BA and Rinn JL and Saghatelian A}, +title = {Peptidomic discovery of short open reading frame-encoded peptides in human cells}, +volume = {9}, +number = {1}, +pages = {59-64}, +year = {2013}, +journal = {Nature chemical biology} +} diff --git a/doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.tex b/doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.tex new file mode 100644 index 00000000..86818688 --- /dev/null +++ b/doc/Proposal_MoBiLe_2013/mutalyzer-mobile-2013.tex @@ -0,0 +1,114 @@ +\documentclass[a4paper,11pt]{article} +\usepackage{a4,fullpage} +\usepackage[latin1]{inputenc} +\usepackage[english]{babel} +\usepackage{amsmath,amsfonts,amssymb} +\usepackage{qpxmath} +\usepackage{tgpagella} +\renewcommand{\ttdefault}{txtt} +\usepackage[scaled=0.95]{helvet} +\usepackage[T1]{fontenc} +\usepackage[numbers]{natbib} +\bibliographystyle{plainnat} +\addto\captionsenglish{\renewcommand{\bibname}{References}} +%\shortcites{green-2010} +\usepackage{url} +%% Define a new 'leo' style for the package that will use a smaller +%% font. +\makeatletter +\def\url@leostyle{% + \@ifundefined{selectfont}{\def\UrlFont{\sf}}{\def\UrlFont{\small\ttfamily}}} +\makeatother +%% Now actually use the newly defined style. +\urlstyle{leo} +\usepackage{hyperref} +\hypersetup{ + final, + colorlinks=true, + citecolor=black, + filecolor=black, + linkcolor=red, + urlcolor=red, + anchorcolor=black, + pdfauthor={Martijn Vermaat}, + pdftitle={Quality control of full-genome alignments for 756 individuals in the + Genome of the NetherlandsInfinitary Rewriting in Coq}, +} +\setlength\parskip{\medskipamount} +\setlength{\parindent}{0pt} +\pagestyle{plain} + + +\title{Research project proposal: Extending the Mutalyzer reference sequence + parser for the analysis of mysterious genes} +\date{April 8, 2013} +\author{Martijn Vermaat \and Jeroen F. J. Laros \and Peter + E. M. Taschner\\[1.5em] +\normalsize{Department of Human Genetics, Leiden University Medical Center}} + + +\begin{document} + + +\maketitle +\thispagestyle{empty} + + +\section*{Background} + +The application of molecular genetic techniques to elucidate the molecular +basis of hereditary disease in both research and diagnostic settings has led +to the identification of many sequence variations in human genes. +The department of Human Genetics maintains several databases containing +sequence variations (see \href{http://www.lovd.nl}{lovd.nl}), which need to be +curated to assure use of appropriate sequence variation nomenclature. +The current Mutalyzer 2 (\href{https://mutalyzer.nl}{mutalyzer.nl}) enables +extended checks of sequence variation nomenclature provided by the user, but +also provides mutation descriptions for all transcripts and proteins affected +by a genomic sequence change when a properly annotated reference sequence is +provided. The latter information already provides the basis for an extended +analysis of genotype-phenotype correlations. + + +\section*{Project description} + +The students will work on the reference sequence record parser of Mutalyzer to +capture and use additional annotation in +\href{http://www.ncbi.nlm.nih.gov/refseq/}{RefSeq}, +\href{http://www.lrg-sequence.org/}{LRG}, and +\href{http://www.ensembl.org/index.html}{Ensembl} records which will help +extending Mutalyzer's functionality to ``exotic'' human genes +\begin{enumerate} + \itemsep0em + \item with alternative use of termination codons (UGA for selenocystein and + UAG for pyrrolysine incorporation) \citep{elzanowski-2010}, + \item having mismatches between RefSeq genomic and transcript or protein + coding sequences, or + \item using alternative start codons \citep{slavoff-2013}. +\end{enumerate} +Each extension of the improved parser will be implemented by the Mutalyzer +development team on a publicly reachable server, so progress can be followed +during the project. +In addition, a solution has to be developed for bacterial translation, +including the automatic generation of mutated sequences, automatic submission +of these sequences and their reference counterparts for analysis and +comparison with web-based analysis tools and prediction of mutation effects. + + +\section*{Implementation details} + +The Mutalyzer sequence variation nomenclature checker is implemented in Python +and uses \href{http://biopython.org/}{BioPython} and Python's XML libraries +for parsing reference sequence records. +To generalize specific formats such as GenBank and LRG, an abstract reference +sequence representation is used internally (``GenRecord''). +Students are expected to extend this representation with additional attributes +and write the code to implement them from data in the specific formats. +Some experience with programming in Python is required to take on this +project. + + +\bibliography{mutalyzer-mobile-2013} + + +\end{document} -- GitLab