Commit 82bc1c6f authored by Laros's avatar Laros
Browse files

Rough outline of the Melanoma analysis presentation.

parent 182d0bbd
../../submodules/presentation/Makefile
\ No newline at end of file
../../submodules/presentation-pics/pics/assemblyline.jpg
\ No newline at end of file
../../submodules/presentation/beamerthemelumc.sty
\ No newline at end of file
../../submodules/presentation-pics/pics/hiseq_2000.jpg
\ No newline at end of file
../../submodules/presentation/logos
\ No newline at end of file
\documentclass[slidestop]{beamer}
\author{Jeroen F.J. Laros}
\title{Whole genome sequencing in Dutch melanoma families}
\providecommand{\mySubTitle}{Experiences and challenges}
\providecommand{\myConference}{MTG work discussion}
\providecommand{\myDate}{3-dec-2015}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\usetheme{lumc}
\begin{document}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title slide.
\makeTitleSlide{}
% First page of the presentation.
\section{Introduction}
\subsection{Samples}
\begin{pframe}
X families, about Y members per family.
\begin{itemize}
\item Full genome.
\item $z\times$ coverage.
\end{itemize}
\end{pframe}
\section{Data generation}
\subsection{Sequencing}
\begin{pframe}
\begin{minipage}[t]{0.47\textwidth}
\begin{figure}
\includegraphics[width=\textwidth]{hiseq_2000}
\caption{HiSeq 2500.}
\end{figure}
\end{minipage}
\hfill
\begin{minipage}[t]{0.47\textwidth}
Characteristics:
\begin{itemize}
\item High throughput.
\item Paired end.
\item High accuracy.
\item Read length $2 \times 150$bp.
\item Run time of 6 days.
\end{itemize}
\end{minipage}
\end{pframe}
\begin{pframe}
Sequencing was one at the Sanger institute.
% 938528037 mapped reads
% 1672174 unmapped reads
\begin{itemize}
\item Two times $100$ nucleotides.
\item $1,\!000,\!000,\!000$ reads.
\item $100,\!000,\!000,\!000$ nucleotides.
\item $150$GB of data per sample (compressed).
\end{itemize}
\bigskip
A grand total of $4.5$TB was used, which completely filled up the storage at
Sanger.
\vfill
\permfoot{\url{http://www.sanger.ac.uk/}}
\end{pframe}
\subsection{Data transfer}
\begin{pframe}
We need to make sure the data is transfered in a \emph{secure} way.
Sending disks?
Public server, GPG encryption.
\end{pframe}
\subsection{Pipelines}
\begin{pframe}
\begin{figure}[]
\vspace{-0.5cm}
\begin{center}
\includegraphics[height=0.75\textheight]{assemblyline}
\end{center}
\caption{Scene from ``Modern times''.}
\end{figure}
\end{pframe}
\subsection{Data analysis}
\begin{pframe}
Resequencing pipelines can roughly be divided in five steps.
\begin{enumerate}
\item Pre-alignment.
\begin{itemize}
\item Quality control.
\item Data cleaning.
\end{itemize}
\item Alignment.
\begin{itemize}
\item Post-alignment quality control.
\end{itemize}
\item Variant calling.
\item Filtering.
\begin{itemize}
\item Post-variant calling quality control.
\end{itemize}
\item Annotation.
\end{enumerate}
\end{pframe}
\begin{frame}
\frametitle{Principle of variant calling}
\begin{figure}[]
\begin{center}
\includegraphics[width=0.9\textwidth]{varcall}
\end{center}
\caption{Result of an alignment.}
\label{}
\end{figure}
\end{frame}
\subsection{Prioritisation}
\begin{pframe}
Prioritisation is mainly done by filtering variants that we expect to be
irrelevant.
\bigskip
\pause
This can be because the variant does not follow the \emph{inheritance
pattern} of the disease.
\begin{itemize}
\item The disease is recessive, but the variant is \emph{homozygous} in an
unaffected individual.
\end{itemize}
\bigskip
\pause
It can be because the \emph{predicted effect} of a variant does not fit in
the phenotype.
\begin{itemize}
\item A variant found in an unrelated gene.
\item A variant that does not alter the protein.
\end{itemize}
\end{pframe}
\section{Annotation}
\subsection{Effect prediction}
\begin{pframe}
In most cases we are still left with a lot of variants.
\bigskip
Variant annotation.
\begin{itemize}
\item Frequency within a population.
\item Location of the variant.
\begin{itemize}
\item Gene panels.
\item Location within a gene.
\end{itemize}
\item Conservation.
\end{itemize}
\end{pframe}
\subsection{Variant Effect Predictor}
\begin{pframe}
A selection of VEP annotation:
\begin{itemize}
\item Affected genes and transcripts.
\item Location of the variant.
\begin{itemize}
\item Upstream of a transcript, in coding sequence, in non-coding RNA,
in regulatory region.
\end{itemize}
\item Consequence on the protein sequence.
\begin{itemize}
\item Stop gained, missense, stop lost, frameshift.
\end{itemize}
\item Minor allele frequencies from the 1000 Genomes Project.
\item SIFT and PolyPhen scores for changes to protein sequence.
\end{itemize}
\vfill
\permfoot{\url{http://www.ensembl.org/info/docs/tools/vep/index.html}}
\end{pframe}
\subsection{Databases}
\begin{pframe}
In most cases we are not interested in common variants.
\begin{itemize}
\item dbSNP.
\item 1000 Genomes.
\item Exome Variant Server (EVS).
\end{itemize}
\medskip
A cut-off of $1\%$ is usually fine.
\bigskip
\pause
Databases containing detailed information about variants:
\begin{itemize}
\item \emph{Locus specific} databases.
\begin{itemize}
\item LOVD.
\end{itemize}
\item Human Gene Mutation Database (HGMD).
\end{itemize}
\vfill
\bs{\url{http://www.lovd.nl/}}
\bs{\url{http://www.hgmd.cf.ac.uk/}}
\end{pframe}
\begin{pframe}
% iets moet in alle familieleden voorkomen, maar mag niet in een deel van een
% andere familie voorkomen maar wel in de gehele familie
\begin{table}[]
\begin{center}
\begin{tabular}{ccl}
Family one & Family two & Filter result \\
\hline
$5/5$ & $4/4$ & Passed \\
$0/5$ & $4/4$ & Passed \\
$3/5$ & $4/4$ & Filtered \\
$3/5$ & $3/4$ & Filtered \\
\end{tabular}
\end{center}
\caption{The advanced intersection.}
\end{table}
\end{pframe}
\begin{pframe}
\begin{table}[]
\begin{center}
\begin{tabular}{lr}
Filter & Variants left \\
\hline
None & $12,\!820,\!660$ \\
Unaffected member & $7,\!354,\!674$ \\
EU MAF below $1$\% & $5,\!504,\!165$ \\
GoNL MAF below $1$\% & $4,\!681,\!268$ \\
Intersection & $5,\!973,\!169$ \\
Advanced intersection & $1,\!549,\!550$
\end{tabular}
\end{center}
\caption{Single filters.}
\end{table}
\end{pframe}
\begin{pframe}
\begin{table}[]
\begin{center}
\begin{tabular}{lr}
Filter & Variants left \\
\hline
None & $12,\!820,\!660$ \\
EU MAF below $1$\% & $5,\!504,\!165$ \\
GoNL MAF below $1$\% & $4,\!327,\!913$ \\
Unaffected member & $2,\!471,\!569$ \\
Intersection & $479,\!494$ \\
Advanced intersection & $40,\!944$
\end{tabular}
\end{center}
\caption{Combining filters.}
\end{table}
(25.127 in all families)
\end{pframe}
\subsection{Conservation}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[width=\textwidth]{phylop}
\end{center}
\caption{PhyloP scores based on $100$ vertebrate species.}
\end{figure}
\vfill
\permfoot{\url{http://compgen.bscb.cornell.edu/phast/}}
\permfoot{\phantom{.}}
\end{pframe}
% Make the acknowledgements slide.
\makeAcknowledgementsSlide{
\begin{tabular}{lll}
\bf Dermatology & \bf SASC & \bf Sanger \\
Mijke Visser & Peter van 't Hof & Thomas Keane \\
Nelleke Gruis & Sander van der Zeeuw & Kim Wong \\
Nienke van der Stoep & Leon Mei & Daniela Espinoza \\
Remco van Doorn & & David Adams \\
\end{tabular}
}
\end{document}
../../submodules/presentation-pics/pics/phylop.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/varcall.gif
\ No newline at end of file
Subproject commit 3186a8e4f8dc8f198c7a96756dcd3b252bcfea1d
Subproject commit 63b5792d877b40e3ffeca84ece106e63159439cb
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment