Commit ce9066f2 authored by Laros's avatar Laros
Browse files

Added lecture.

parent 236eeddf
../../submodules/presentation-pics/pics/Ion_Proton_s.jpg
\ No newline at end of file
../../submodules/presentation/Makefile
\ No newline at end of file
../../submodules/presentation/beamerthemelumc.sty
\ No newline at end of file
../../submodules/presentation-pics/pics/hiseq_2000.jpg
\ No newline at end of file
../../shared/k-mer.dat
\ No newline at end of file
../../shared/k-mer.gnp
\ No newline at end of file
../../shared/k-mer2.dat
\ No newline at end of file
../../shared/k-mer2.gnp
\ No newline at end of file
../../submodules/presentation/logos
\ No newline at end of file
../../submodules/presentation-pics/pics/mutalyzer_input.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/mutalyzer_output.xcf
\ No newline at end of file
\documentclass[slidestop]{beamer}
\title{Research selection}
\providecommand{\myConference}{TU/e}
\providecommand{\myDate}{Tuesday, May 26, 2015}
\author{Jeroen F.J. Laros}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\providecommand{\lastCenterLogo}{
\raisebox{-0.1cm}{
\includegraphics[height=1cm]{logos/lgtc_logo}
}
}
\providecommand{\lastRightLogo}{
}
\usetheme{lumc}
\begin{document}
\newcommand{\algorithmexample}[1]{
\begin{figure}[]
\begin{center}
\fbox{
\setlength{\unitlength}{1pt}
\linethickness{3pt}
\begin{picture}(300, 60)(0, 0)
\put(0, 10){\line(1, 0){30}} % Observed sequence.
\put(30, 10){\color{red}\line(1, 0){240}\color{white}} % Change.
\put(270, 10){\line(1, 0){30}}
\put(0, 14){{\scriptsize observed}}
\put(0, 40){\line(1, 0){30}} % Reference sequence.
\put(30, 40){\color{green}\line(1, 0){240}\color{white}} % Change.
\put(270, 40){\line(1, 0){30}}
\put(0, 46){{\scriptsize reference}}
\put(30, 30){{\scriptsize $8$}}
\put(270, 30){{\scriptsize $98$}}
\ifthenelse{\equal{#1}{1}}{
\drawcurve(50, 40)(55, 35)(155, 25)(255, 15)(260, 10)
\drawcurve(260, 40)(255, 35)(155, 25)(55, 15)(50, 10)
}{}
\ifthenelse{#1>1}{
\put(50, 10){\line(1, 0){210}} % Inv.
\put(50, 40){\line(1, 0){210}} % Inv.
}{}
\ifthenelse{#1>2}{
\put(35, 10){\line(1, 0){10}}
\put(35, 40){\line(1, 0){10}}
}{}
\end{picture}
}
\end{center}
\caption{How would a human do it?}
\end{figure}
}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title page.
\bodytemplate
% First page of the presentation.
\section{Introduction}
\subsection{About me}
\begin{pframe}
The last decade:
\begin{itemize}
\item Master Computer Science.
\item Ph.D. Mathematics and Natural Sciences.
\item Post-doctoral researcher LUMC.
\item Senior researcher.
\begin{itemize}
\item Co\"ordinator Bioinformatics LGTC.
\end{itemize}
\end{itemize}
\bigskip
\pause
Currently active in:
\begin{itemize}
\item Variant databases / formal descriptions.
\item Next Generation Sequencing.
\item Metagenomics.
\item Forensics.
\end{itemize}
\end{pframe}
\subsection{Activities}
\begin{pframe}
Collaborations:
\begin{itemize}
\item Within the LUMC: Human Genetics, Clinical Genetics, Forensic lab,
Medical Microbiology, Dermatology, Hematology, ICT.
\item Leiden University: Leiden Institute of Advanced Computer Science,
Faculty of Social Sciences.
\item Hogeschool Leiden.
\item SURFSara.
\item Commercial: GenomeScan, BaseClear, PhenoSystems.
\end{itemize}
\end{pframe}
\begin{pframe}
Other activities:
\begin{itemize}
\item Communication with external partners.
\item Innovation / research and development.
\item Implementation in diagnostics.
\item Design and policy HPC infrastructure.
\item Design and policy good research practice.
\item Algorithm design.
\item Audits.
\item Education.
\end{itemize}
\end{pframe}
\section{Next generation sequencing}
\subsection{Sequencers}
\begin{pframe}
\begin{minipage}[t]{0.47\textwidth}
\begin{figure}
\includegraphics[width=\textwidth]{hiseq_2000}
\caption{HiSeq 2500.}
\end{figure}
\end{minipage}
\hfill
\begin{minipage}[t]{0.47\textwidth}
\begin{figure}
\includegraphics[width=\textwidth]{Ion_Proton_s}
\caption{Ion proton.}
\end{figure}
\end{minipage}
\end{pframe}
\subsection{Next generation sequencing data}
\begin{pframe}
\begin{lstlisting}[language=none, caption={A FastQ file.}]
@SGGPP:4:101
TTCGGGGGCTGGCAAATCCACTTCCGTGACACGCTACCATTCGCTGGTGGT
+
-'+4589,53330-0&07+03:54/2362-+.488587>@/25440++0(+
@SGGPP:4:102
CGGTAAACCACCCTGCTGACGGAACCCTAATGCGCCTGAAAGACAGCGTTC
+
34/--0'+.000(.55:;:99(0(+2(22(0316;185;;0;:<<>=AA59
@SGGPP:4:106
TCGTTAACGACTTTGTTCGCCACCGCAACCGCCTGTTTCGGGTCACAGGCA
+
09875;5?<;?@A4?B:BBB<AA>CCC>C>BB0.->=0488+3444:@5@<
@SGGPP:4:112
TTGATGAATATATTATTTCAGGGAATAATTATGACACCTTTAGAACGCATT
+
70<<@::5:<;==7;>>/79<:.:494.8(,,8:753/5@5??C>B???B7
\end{lstlisting}
\end{pframe}
\subsection{Data analysis}
\begin{pframe}
Very diverse.
\bigskip
Align to a reference genome (resequencing):
\begin{itemize}
\item Variant detection.
\item Phylogenetic reconstruction.
\end{itemize}
\bigskip
\pause
Or to multiple references:
\begin{itemize}
\item Antibiotic resistance testing.
\end{itemize}
\bigskip
\pause
\textit{De novo} assembly:
\begin{itemize}
\item First step to make a reference genome.
\item Finding large rearrangements.
\end{itemize}
\end{pframe}
\subsection{Metrics for NGS data files}
\begin{pframe}
Comparing $k$-mer profiles.
\begin{figure}
\vspace{-0.5cm}
\colorbox{white}{
\includegraphics[width = \textwidth]{k-mer}
}
\colorbox{white}{
\includegraphics[width = \textwidth]{k-mer2}
}
\vspace{-0.5cm}
\caption{Two $k$-mer profiles.}
\end{figure}
\end{pframe}
\subsection{Forensics}
\begin{pframe}
STR profiling:
\begin{itemize}
\item Look deeper into STRs by using sequencing.
\item Semi-global alignment of flanking sequences.
\item Regular expressions for known alleles.
\item Classification of new alleles.
\end{itemize}
\medskip
\pause
SNP profiling:
\begin{itemize}
\item Highly variable regions in a certain population.
\item Easier to work with than with STRs.
\end{itemize}
\end{pframe}
\section{Variant databases and formal descriptions}
\subsection{Effect prediction}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[width=0.9\textwidth]{mutalyzer_input}
\includegraphics[width=0.9\textwidth]{mutalyzer_output}
\end{center}
\caption{Mutalyzer.}
\end{figure}
\end{pframe}
\subsection{A ``human'' way of finding a description}
\begin{pframe}
Observation:
\begin{itemize}
\item There is always a default way of describing a variant (\bt{delins}).
\item A \bt{delins} may be split in smaller parts.
\end{itemize}
\bigskip
\pause
Outline:
\begin{itemize}
\item Find the \emph{area of change}.
\item Describe this as a \bt{delins}.
\item Find the largest overlap in this area of change, splitting the area
in two.
\item Describe the two sub areas, and see whether this description is
smaller than the one we have.
\end{itemize}
\end{pframe}
\subsection{Outline of the algorithm}
\begin{pframe}
\only<1>{\algorithmexample{0}}
\only<2>{\algorithmexample{1}}
\only<3>{\algorithmexample{2}}
\only<4>{\algorithmexample{3}}
\bt{8\_98\color{yellow}delins\color{white}AGATGCGATAGATTAGCTATATAGGATCG\ldots}
\onslide<3->{\bt{[8\_12\color{yellow}delins\color{white}AGATG;13\_96\color{yellow}inv\color{white};97\_98\color{yellow}delins\color{white}TG]}}
\onslide<4->{\bt{[8G\color{yellow}>\color{white}A;12C\color{yellow}>\color{white}G;13\_96\color{yellow}inv\color{white};97\_98\color{yellow}delins\color{white}TG]}}
\end{pframe}
\subsection{Finding common sub strings}
\begin{pframe}
How would a computer do it?
\begin{table}[]
\begin{center}
\begin{tabular}{l|lllllll}
& \bt{A} & \bt{T} & \bt{G} & \bt{A} & \bt{G} & \bt{C} & \bt{G} \\
\hline
\bt{A} & 1 & 0 & 0 & 1 & 0 & 0 & 0 \\
\bt{T} & 0 & 2 & 0 & 0 & 0 & 0 & 0 \\
\bt{C} & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\
\bt{A} & 1 & 0 & 0 & \color{yellow}1 & 0 & 0 & 0 \\
\bt{G} & 0 & 0 & 1 & 0 & \color{yellow}2 & 0 & 1 \\
\bt{C} & 0 & 0 & 0 & 0 & 0 & \color{yellow}3 & 0 \\
\bt{A} & 1 & 0 & 0 & 1 & 0 & 0 & 0 \\
\end{tabular}
\end{center}
\caption{LCS dynamic programming.}
\end{table}
\end{pframe}
\subsection{Accuracy vs. speed}
\begin{pframe}
\begin{tabular}{l@{\ \ $\Rightarrow$\ \ }l}
\bt{AGAGGACG} & \bt{AG AG GA CG} \\
\bt{GAGGACA} & \bt{GA AG GG GA AC CA}
\end{tabular}
\pause
\begin{table}
\begin{center}
\begin{tabular}{l|llll}
& \bt{A} & \bt{A} & \bt{G} & \bt{C} \\
& \bt{G} & \bt{G} & \bt{A} & \bt{G} \\
\hline
\bt{GA} & 0 & 0 & 1 & 0 \\
\bt{AG} & 1 & \onslide<3>{\color{yellow}}1 & 0 & 0 \\
\bt{GG} & 0 & 0 & 0 & 0 \\
\bt{GA} & 0 & 0 & \onslide<3>{\color{yellow}}2 & 0 \\
\bt{AC} & 0 & 0 & 0 & 0 \\
\bt{CA} & 0 & 0 & 0 & 0 \\
\end{tabular}
\end{center}
\caption{Rough method to find large strings.}
\end{table}
\end{pframe}
\begin{pframe}
\begin{minipage}[t]{0.45\textwidth}
\begin{table}[]
\begin{center}
\begin{tabular}{l|llll}
& \bt{A} & \bt{A} & \bt{G} & \bt{C} \\
& \bt{G} & \bt{G} & \bt{A} & \bt{G} \\
\hline
\bt{GA} & 0 & 0 & 1 & 0 \\
\bt{AG} & 1 & 1 & 0 & 0 \\
\bt{GG} & 0 & 0 & 0 & 0 \\
\bt{GA} & 0 & 0 & 2 & 0 \\
\bt{AC} & 0 & 0 & 0 & 0 \\
\bt{CA} & 0 & 0 & 0 & 0 \\
\end{tabular}
\end{center}
\caption{``Zoom out'' $k = 2$.}
\end{table}
\end{minipage}
\hfill
\begin{minipage}[t]{0.45\textwidth}
\begin{table}[]
\begin{center}
\begin{tabular}{l|ll}
& \bt{A} & \bt{G} \\
& \bt{G} & \bt{G} \\
& \bt{A} & \bt{A} \\
\hline
\bt{GAG} & 0 & 0 \\
\bt{AGG} & 0 & 0 \\
\bt{GGA} & 0 & 1 \\
\bt{GAC} & 0 & 0 \\
\bt{ACA} & 0 & 0 \\
\end{tabular}
\end{center}
\caption{``Zoom out'' $k = 3$.}
\end{table}
\end{minipage}
\pause
We find all common sub strings larger than $k$.
\pause
The length of these strings are at least $\ell k$ and at most
$\ell k + (k - 1)$ long.
\end{pframe}
\section{Questions?}
\lastpagetemplate
\begin{pframe}
\begin{center}
Acknowledgements:
\bigskip
\bigskip
Martijn Vermaat
Jonathan Vis
Lusine Khachatryan
Yahya Anvar
Kristiaan van der Gaag
Peter de Knijff
Johan den Dunnen
\end{center}
\end{pframe}
\end{document}
Subproject commit 347608b231e657d697d55c45e8e00f37ad532b60
Subproject commit 3186a8e4f8dc8f198c7a96756dcd3b252bcfea1d
Subproject commit 5eaf7b127b82480d4aa147e9a583523d09b66269
Subproject commit 57047ad1fa7d5f013e2af50c306329e4abca9db7
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment