Commit 0cb75314 authored by Laros's avatar Laros
Browse files

Added nbic_pi slides.

parent 18a90b24
../../submodules/presentation/Makefile
\ No newline at end of file
../../submodules/presentation/beamerthemelumc.sty
\ No newline at end of file
../../submodules/presentation-pics/pics/galaxy_mpileup.xcf
\ No newline at end of file
../../submodules/presentation/gen2phen_logo.eps
\ No newline at end of file
../../submodules/presentation-pics/pics/ifgen_home.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/ifgen_mpileup.xcf
\ No newline at end of file
../../shared/k-mer.dat
\ No newline at end of file
../../shared/k-mer.gnp
\ No newline at end of file
../../shared/k-mer2.dat
\ No newline at end of file
../../shared/k-mer2.gnp
\ No newline at end of file
../../submodules/presentation/lgtc_logo.eps
\ No newline at end of file
../../submodules/presentation/lumc_logo.eps
\ No newline at end of file
../../submodules/presentation/lumc_logo_small.eps
\ No newline at end of file
../../submodules/presentation-pics/pics/mutalyzer_shot1.eps
\ No newline at end of file
../../submodules/presentation-pics/pics/mutalyzer_shot2.eps
\ No newline at end of file
../../submodules/presentation-pics/pics/mutalyzer_shot3.eps
\ No newline at end of file
../../submodules/presentation/nbic_logo.eps
\ No newline at end of file
\documentclass[slidestop]{beamer}
\title{Progress report 2011}
\providecommand{\myConference}{NBIC PI meeting}
\providecommand{\myDate}{Thursday, 26 January 2012}
\author{Jeroen F. J. Laros}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\providecommand{\lastCenterLogo}{
\raisebox{-0.1cm}{
\includegraphics[height = 1cm]{lgtc_logo}
}
}
\providecommand{\lastRightLogo}{
\includegraphics[height = 0.7cm]{nbic_logo}
%\includegraphics[height = 0.8cm]{nwo_logo_en}
%\hspace{1.5cm}\includegraphics[height = 0.7cm]{gen2phen_logo}
}
\usetheme{lumc}
\begin{document}
\input{petrinet}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title page.
\bodytemplate
% First page of the presentation.
\section{Introduction}
\begin{frame}
\frametitle{About me}
\begin{tabular}{@{\fakeitem}ll}
May 2005 & Master Computer Science. \\
Dec 2009 & Ph.D. Mathematics and Natural Sciences. \\
May 2009 & Post-doctoral researcher LUMC. \\
Sep 2010 & Co\"ordinator Bioinformatics LGCT (LUMC).
\end{tabular}
\bigskip
\pause
Currently active in:
\begin{itemize}
\item Databases.
\item Formal descriptions.
\item Next Generation Sequencing.
\end{itemize}
\end{frame}
\section{Current activities}
\begin{frame}
\frametitle{Research and Development}
\pause
Genotype to Phenotype Databases (Gen2Phen):
\begin{itemize}
\item The \emph{Leiden Open Variation Database} (LOVD).
\item {\color{yellow} Mutalyzer}.
\end{itemize}
\medskip
\pause
Forensic Laboratory for DNA Research (FLDO):
\begin{itemize}
\item \emph{Short Tandem Repeat} (STR) profiling.
\item \emph{Metagenomics} on forensic traces.
\end{itemize}
\medskip
\pause
Leiden Genome Technology Center (LGTC):
\begin{itemize}
\item \emph{Next Generation Sequencing} (NGS) pipelines.
\item {\color{yellow} Parallel and implicit workflows} in a cluster
environment.
\end{itemize}
\medskip
\pause
National collaborations:
\begin{itemize}
\item The {\color{yellow} \emph{Genome of the Netherlands}} (GoNL).
\item The {\color{yellow} \emph{Diagnostic Variant Database}} (DVD).
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Research and Development (2)}
\pause
Workflow engines and modules:
\begin{itemize}
\item {\color{yellow} CLI-mate}.
\end{itemize}
\medskip
\pause
Metrics for NGS datasets:
\begin{itemize}
\item {\color{yellow} Distance between datasets} with expression
information.
\item {\color{yellow} Short sequence profiles} for phylogenetic research.
\end{itemize}
\medskip
\pause
Infrastructure:
\begin{itemize}
\item {\color{yellow} NGS \emph{Laboratory Information Management System}}
(LIMS).
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Research and Development (3)}
\pause
Pre-mRNA analysis:
\begin{itemize}
\item Novel splice sites.
\item Recursive splicing.
\item Splicing order.
\end{itemize}
\medskip
\pause
Emerging technologies:
\begin{itemize}
\item Ion Torrent.
\item Pacific Bio.
\end{itemize}
\medskip
\pause
Other:
\begin{itemize}
\item {\color{yellow} Variant description ontology}.
\item Copy number variation in exome sequencing datasets.
\item Low pass full genome sequencing.
\item Bacterial strain identification.
\item Antibiotic resistance gene identification.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Education}
\pause
Courses:
\begin{itemize}
\item Next Generation Sequencing data analysis.
\item Basic Linux Course.
\item NGS Introduction Course.
\end{itemize}
\medskip
\pause
Lectures:
\begin{itemize}
\item Version Control.
\item Good Practices for Programming.
\item Template Attribute Language.
\item Diagnostic Variant Database.
\item GAPSS3 exome sequencing pipeline.
\item Y-STRs and population sampling for forensic reference purposes.
\item Usage of $k$-mer profiles in NGS data.
\item \ldots
\end{itemize}
\end{frame}
\section{Research and Development}
\begin{fframe}
\frametitle{Mutalyzer 2.0}
Variant nomenclature checker applying \emph{Human Genome Variation Society}
(HGVS) guidelines.
\medskip
\pause
Integral part of \emph{Leiden Open Variation Database} (LOVD).
\begin{itemize}
\item Adding a new variant.
\begin{itemize}
\item Disambiguation of variant descriptions.
\item Basic effect prediction.
\end{itemize}
\pause
\item Finding alternative variant descriptions.
\begin{itemize}
\item Mapping variants from a transcript to a genome build and vice
versa.
\pause
\begin{itemize}
\item Bridge between NGS data and LSDBs.
\end{itemize}
\end{itemize}
\end{itemize}
\vfill
{\onslide<1->
\bt{https://www.mutalyzer.nl}
\bs{M. Vermaat}
}
\end{fframe}
\begin{frame}
\frametitle{Mutalyzer 2.0}
\begin{figure}
\includegraphics[height = \textwidth, angle = 270]{mutalyzer_shot1}
\caption{The Mutalyzer name checker.}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Mutalyzer 2.0}
\begin{figure}
\includegraphics[height = \textwidth, angle = 270]{mutalyzer_shot2}
\caption{Checking a variant.}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Mutalyzer 2.0}
\begin{figure}
\includegraphics[height = \textwidth, angle = 270]{mutalyzer_shot3}
\caption{Basic effect prediction.}
\end{figure}
\end{frame}
\begin{fframe}
\frametitle{Mutalyzer: Highlights}
\pause
More than $11$ million requests processed.
\medskip
\pause
Integral part of LOVD version 3:
\begin{itemize}
\item Checking the correctness of variants entered in LOVD.
\item Mapping variant descriptions on other transcripts.
\end{itemize}
\medskip
\pause
The syntax of the HGVS nomenclature is now formalised.
\vfill
\input{publication}
\end{fframe}
\begin{fframe}
\frametitle{Mutalyzer: future plans}
By adding tags to production rules, we can do more:
\begin{itemize}
\pause
\item Distinguish between current and obsolete rules (for backwards
compatibility).
\pause
\item Distinguish between the \emph{actual} grammar and the superset that
Mutalyzer accepts (to correct common mistakes).
\begin{itemize}
\item We generate the formal description in BNF from source.
\end{itemize}
\pause
\item Relabel rules to make them compatible with OWL.
\begin{itemize}
\item Clears the road for publishing these descriptions in the semantic
web.
\end{itemize}
\end{itemize}
\vfill
{\onslide<1->
\bt{https://www.mutalyzer.nl}
\bs{Z. Tatum}
}
\end{fframe}
\begin{fframe}
\frametitle{Mutalyzer: future plans}
Description generation (currently in alpha):
\begin{itemize}
\pause
\item Combine or split variant descriptions.
\begin{itemize}
\item E.g., \bt{1\_5delATGGCinsCCATG} $\Rightarrow$ \bt{1\_4inv;5C>G}.
\end{itemize}
\pause
\item Find the difference between two reference sequences.
\begin{itemize}
\item Lift over variant descriptions.
\end{itemize}
\pause
\item Formalise the semantics of variant descriptions.
\end{itemize}
\medskip
\pause
Direct support for full chromosomes.
\begin{itemize}
\item Close the gap between NGS and locus specific databases.
\end{itemize}
\vfill
{\onslide<1->
\bt{https://www.mutalyzer.nl}
\bs{\phantom{I}}
}
\end{fframe}
\begin{fframe}
\frametitle{CLI-mate}
Interface generator for command line programs.
\begin{itemize}
\item Web form to define the CLI of a program.
\item Definition is saved in RDF.
\pause
\item From the definition multiple user friendly interfaces can be
generated.
\begin{itemize}
\item Galaxy.
\item Moteur.
\end{itemize}
\pause
\item But it doesn't end there.
\begin{itemize}
\item User manual.
\item HTML webinterface.
\item Webservices.
\item \ldots
\end{itemize}
\end{itemize}
\vfill
{\onslide<1->
\bt{http://www.humgen.nl/cli-mate}
\bs{Z. Tatum}
}
\end{fframe}
\begin{frame}
\frametitle{CLI-mate}
\begin{figure}
\includegraphics[width = \textwidth, height = 0.85\textheight]{ifgen_home}
\caption{CLI-mate homepage.}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{CLI-mate}
\begin{figure}
\includegraphics[width = \textwidth, height = 0.85\textheight]
{ifgen_mpileup}
\caption{Defining a tool.}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{CLI-mate}
\begin{figure}
\includegraphics[width = \textwidth, height = 0.85\textheight]
{galaxy_mpileup}
\caption{Generated interface.}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Metrics for NGS data files}
Distance between wiggle files.
\begin{figure}
\includegraphics[height = \textwidth, angle = 270]{wiggle}
\caption{Wiggle file.}
\end{figure}
Pairwise comparison based on the \emph{multiset} distance measure.
\end{frame}
\begin{fframe}
\frametitle{Metrics for NGS data files}
Comparing $k$-mer profiles.
\begin{figure}
\vspace{-0.5cm}
\colorbox{white}{
\includegraphics[width = \textwidth]{k-mer}
}
\colorbox{white}{
\includegraphics[width = \textwidth]{k-mer2}
}
\vspace{-0.5cm}
\caption{Two $k$-mer profiles.}
\end{figure}
\vfill
\bs{J.W.F. van der Heijden}
\end{fframe}
\begin{fframe}
\frametitle{Forensics}
\pause
STR profiling:
\begin{itemize}
\item Look deeper into STRs by using sequencing.
\item Semi-global alignment of flanking sequences.
\item Regular expressions for known alleles.
\item Classification of new alleles.
\end{itemize}
\medskip
\pause
SNP profiling:
\begin{itemize}
\item Highly variable regions in a certain population.
\item Easier to work with than with STRs.
\end{itemize}
\vfill
{\onslide<1->
\bs{J.W.F. van der Heijden, K.J. van der Gaag, P. de Knijff}
}
\end{fframe}
\begin{fframe}
\frametitle{Genome of the Netherlands}
mtDNA analysis:
\pause
\begin{itemize}
\item Heteroplasmy.
\item Parental leakage.
\item Quality control.
\item Haplotyping.
\end{itemize}
\medskip
\pause
Y-chromosome:
\pause
\begin{itemize}
\item Y-chromosomal STR markers.
\item Haplotyping.
\end{itemize}
\vfill
{\onslide<1->
\bs{M. Vermaat, M. Li, M. Stoneking, the GoNL consortium}
}
\end{fframe}
\section{Development}
\begin{fframe}
\frametitle{DVD}
The \emph{Diagnostic Variant Database}.
\begin{itemize}
\item Share variants found in exome sequencing experiments.
\item Find functionally relevant variants.
\end{itemize}
\medskip
\pause
Technical details:
\begin{itemize}
\item Store coverage information to determine reference calls.
\item Disambiguation of variant descriptions.
\item Pooling without loss of information.
\item Duplicate sample detection.
\begin{itemize}
\item Allows for re-annotation without polluting the database.
\end{itemize}
\item Encrypted connection with authentication.
\end{itemize}
\vfill
{\onslide<1->
\bs{V. Guryev, C. Gilissen, I. Nijman, D. van Enckevort, H. Mei, R. Wagner}
}
\end{fframe}
\begin{fframe}
\frametitle{DVD}
As a side product, VCFlib:
\begin{itemize}
\item API for reading and writing VCF files.
\item Added disambiguation.
\end{itemize}
\vfill
\bt{https://trac.nbic.nl/dvd/}
\bs{M. Vermaat}
\end{fframe}
\begin{frame}
\frametitle{SGE qmake GAPSS3}
Advantages when using a cluster:
\begin{itemize}
\item Automatic scheduling.
\item Maximum parallelisation.
\end{itemize}
\medskip
\pause
Conceptually interesting:
\begin{itemize}
\item The workflow is implicit.
\item Many workflows can be combined.
\end{itemize}
\medskip
\pause
Possibly we can combine this with CLI-mate to generate workflows.
\begin{itemize}
\item We already have a description of the parameters.
\item Formalise the description of the input and output files.
\end{itemize}
\end{frame}
%\begin{frame}
% \begin{figure}
% \petrinet{}{}{}{}{}{}{}{}
% \caption{A Petri net}
% \end{figure}
%\end{frame}
\begin{fframe}
\frametitle{NGS LIMS}
Technical details:
\begin{itemize}
\item Support for four different sequencing platforms.
\item An API to communicate with a scheduler.
\item Based on Django for rapid development.
\end{itemize}
\medskip
\pause
Bringing pipelines together:
\begin{itemize}
\item Scheduler communicates with:
\begin{itemize}
\item LIMS.
\item Sequencers.
\item Cluster / Storage.
\end{itemize}
\end{itemize}
\vfill
{\onslide<1->
\bs{F. Schaeffer, Z. Tatum}
}
\end{fframe}
\section{Education}
\begin{fframe}
\frametitle{Next Generation Sequencing data analysis}
5-7 September 2011
\begin{itemize}
\item PhD students, postdocs, senior researchers.
\pause
\item Discussion of different platforms and produced data.
\begin{itemize}
\item Illumina, Roche, ABI, Ion Torrent, etc.
\end{itemize}