Added introduction lecture (extracted from extended introduction).

79db5c8f · Laros · 1495d0b1 · 79db5c8f · 79db5c8f · 79db5c8f
Commit 79db5c8f authored 10 years ago by Laros
--- a/introduction_extended/Makefile
+++ b/introduction_extended/Makefile
--- a/introduction_extended/assembly.png
+++ b/introduction_extended/assembly.png
--- a/introduction_extended/assemblyline.jpg
+++ b/introduction_extended/assemblyline.jpg
--- a/introduction_extended/base-pacbio.ppm
+++ b/introduction_extended/base-pacbio.ppm
--- a/introduction_extended/base-torrent.ppm
+++ b/introduction_extended/base-torrent.ppm
--- a/introduction_extended/beamerthemelumc.sty
+++ b/introduction_extended/beamerthemelumc.sty
--- a/introduction_extended/gapss3.dot
+++ b/introduction_extended/gapss3.dot
--- a/introduction_extended/gen2phen_logo.eps
+++ b/introduction_extended/gen2phen_logo.eps
--- a/introduction_extended/hgap.png
+++ b/introduction_extended/hgap.png
--- a/introduction_extended/hiseq_2000.jpg
+++ b/introduction_extended/hiseq_2000.jpg
--- a/introduction_extended/introduction_extended.tex
+++ b/introduction_extended/introduction_extended.tex
 \documentclass[slidestop]{beamer}

 \title{Introduction to NGS data analysis}
-\providecommand{\myConference}{Workshop NGS, Hogeschool Leiden}
+\providecommand{\myConference}{Introduction NGS Data Analysis}
 \providecommand{\myDate}{Thursday, May 22, 2014}
 \author{Jeroen F. J. Laros}
 \providecommand{\myGroup}{Leiden Genome Technology Center}
@@ -340,93 +340,7 @@
  \end{itemize}
 \end{pframe}

-\section{Resequencing}
-\subsection{Data analysis}
-\begin{pframe}
-  Resequencing pipelines can roughly be divided in five steps.
-  \pause
-  \begin{enumerate}
-    \item Pre-alignment.
-    \begin{itemize}
-      \item Quality control.
-      \item Data cleaning.
-    \end{itemize}
-    \pause
-    \item Alignment.
-    \begin{itemize}
-      \item Post-alignment quality control.
-    \end{itemize}
-    \pause
-    \item Variant calling.
-    \pause
-    \item Filtering.
-    \begin{itemize}
-      \item Post-variant calling quality control.
-    \end{itemize}
-    \pause
-    \item Annotation.
-  \end{enumerate}
-\end{pframe}
-
-\section{Pre-alignment}
-\subsection{Data cleaning}
-\begin{pframe}
-  Depending on the sequencing platform, parts of the reads need to be removed.
-  \begin{itemize}
-    \item Remove linker sequences (\emph{Cutadapt}, \emph{FASTX toolkit}).
-    \item Clip low quality reads at the end of the read (\emph{Sickle},
-      \emph{Trimmomatic}, \emph{FASTX toolkit}).
-    \item Length filtering (\emph{Fastools}).
-  \end{itemize}
-
-  \vfill
-  \permfoot{http://code.google.com/p/cutadapt/}
-
-  \permfoot{http://hannonlab.cshl.edu/fastx\_toolkit/}
-
-  \permfoot{https://github.com/najoshi/sickle}
-
-  \permfoot{http://www.usadellab.org/cms/index.php?page=trimmomatic}
-
-  \permfoot{https://pypi.python.org/pypi/fastools}
-\end{pframe}
-
-\subsection{Trimming}
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[height=0.85\textheight]{pretrimmed_qscores}
-    \end{center}
-    \caption{Quality score per position.}
-  \end{figure}
-\end{pframe}
-
-\subsection{Clipping}
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[height=0.85\textheight]{linker-clip}
-    \end{center}
-    \caption{Sequencing linkers.}
-  \end{figure}
-\end{pframe}
-
 \subsection{Quality control}
-\begin{pframe}
-  The \emph{FastQC toolkit} can be used for quality control (both before and
-  after the data cleaning step).
-  \begin{itemize}
-    \item GC content.
-    \item GC distribution.
-    \item Quality scores distribution.
-    \item \ldots
-  \end{itemize}
-
-  \vfill
-  \permfoot{http://www.bioinformatics.babraham.ac.uk/projects/fastqc/}
-\end{pframe}
-
-\subsection{Example QC output}
 \begin{pframe}
  \begin{figure}
    \includegraphics[width=\textwidth, height=0.35\textheight]
@@ -464,55 +378,6 @@
  \end{minipage}
 \end{pframe}

-\subsection{Choose an aligner}
-\begin{pframe}
-  Not all aligners can deal with indels.
-  \begin{itemize}
-    \item Only a couple of years ago, only SNPs were considered.
-    \begin{itemize}
-      \item \emph{Bowtie}.
-    \end{itemize}
-  \end{itemize}
-  \medskip
-  \pause
-
-  Few aligners can work with large deletions.
-  \begin{itemize}
-    \item Spliced RNA.
-    \begin{itemize}
-      \item \emph{GMAP} / \emph{GSNAP}.
-      \item \emph{Tophat}.
-    \end{itemize}
-    \item \emph{BWA-MEM}.
-  \end{itemize}
-
-  \vfill
-  \permfoot{http://bowtie-bio.sourceforge.net/index.shtml}
-
-  \permfoot{http://research-pub.gene.com/gmap/}
-
-  \permfoot{http://tophat.cbcb.umd.edu/}
-
-  \permfoot{http://bio-bwa.sourceforge.net/}
-\end{pframe}
-
-\begin{pframe}
-  The choice of aligner may be restricted by the sequencer.
-  \begin{itemize}
-    \item For the Ion Torrent: \emph{Tmap}.
-    \begin{itemize}
-      \item Combination of three different aligners.
-      \item Deals with errors in homopolymer stretches.
-    \end{itemize}
-    \item For the PacBio: \emph{BLASR}.
-  \end{itemize}
-
-  \vfill
-  \permfoot{https://github.com/iontorrent/TS/tree/master/Analysis/TMAP}
-
-  \permfoot{https://github.com/PacificBiosciences/blasr}
-\end{pframe}
-
 \section{Variant calling}
 \subsection{Consistent deviations from the reference}
 \begin{pframe}
@@ -524,181 +389,8 @@
  \end{figure}
 \end{pframe}

-\subsection{Some considerations}
-\begin{pframe}
-  Things a variant caller might take into account:
-  \begin{itemize}
-    \item Strand balance.
-    \item Base quality.
-    \item Mapping quality.
-    \begin{itemize}
-      \item Distribution within the reads.
-    \end{itemize}
-    \item Ploidity of the organism in question.
-  \end{itemize}
-  \medskip
-  \pause
-
-  Complicating factors:
-  \begin{itemize}
-    \item Pooled samples.
-    \pause
-    \item RNA.
-    \begin{itemize}
-      \item Allele specific expression.
-      \item RNA editing.
-    \end{itemize}
-    \pause
-    \item Strand specific sampleprep.
-  \end{itemize}
-\end{pframe}
-
-\subsection{Choice of variant caller}
-\begin{pframe}
-  Rules of thumb:
-  \begin{itemize}
-    \item Well known organism and experiment: Statistical model.
-    \item Use a simpler variant caller otherwise.
-  \end{itemize}
-  \bigskip
-  \pause
-
-  Popular variant callers:
-  \begin{itemize}
-    \item \emph{Samtools}.
-    \item \emph{GATK}.
-    \item \emph{VarScan}.
-  \end{itemize}
-
-  \vfill
-  \permfoot{http://samtools.sourceforge.net/}
-
-  \permfoot{https://www.broadinstitute.org/gatk/}
-
-  \permfoot{http://varscan.sourceforge.net/}
-\end{pframe}
-
-\section{Variant filtering}
-\subsection{Filtering on coverage}
-\begin{pframe}
-  We can set some thresholds:
-  \begin{itemize}
-    \item Minimum.
-    \item Maximum.
-  \end{itemize}
-  \bigskip
-  \pause
-
-  We filter for a maximum coverage because of copy number variation.
-  \bigskip
-  \pause
-
-  A good way to calculate the maximum:
-  \begin{itemize}
-    \item Calculate the mean coverage.
-    \begin{itemize}
-      \item Only of the covered (targeted) regions.
-    \end{itemize}
-    \item Multiply this number with a reasonable factor e.g., $2.5$.
-  \end{itemize}
-\end{pframe}
-
-\section{Annotation}
-\subsection{What is already known about a variant}
-\begin{pframe}
-  A selection of SeattleSeq annotation:
-  \begin{itemize}
-    \item Is the variant known?
-    \item Does it hit a gene?
-    \pause
-    \begin{itemize}
-      \item Is it in an intron?
-      \begin{itemize}
-        \item Does it hit a splice site?
-      \end{itemize}
-      \pause
-      \item Is it in the coding region?
-      \begin{itemize}
-        \item Is there a gain/loss of a stop codon?
-        \item Does the variant result in a frameshift?
-        \item \ldots
-      \end{itemize}
-      \pause
-      \item Is it in the 5'/3' UTR of a gene?
-      \item \ldots
-    \end{itemize}
-    \pause
-    \item Is it in a regulatory region?
-    \item \ldots
-  \end{itemize}
-\end{pframe}
-
-\section{Full genome sequencing}
-\subsection{Copy number variation}
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[trim=0 5cm 0 0, clip, height=0.9\textheight, width=\textwidth]{cnv}
-    \end{center}
-    \caption{Coverage patterns over a whole chromosome.}
-  \end{figure}
-\end{pframe}
-
-\begin{pframe}
-  Per sample:
-  \begin{itemize}
-    \item The reference needs to be very good.
-    \item Sequencability biases.
-    \item Mapping biases.
-  \end{itemize}
-  \bigskip
-  \pause
-
-  Within a population:
-  \begin{itemize}
-    \item Mixture of distributions.
-    \item Not sensitive to aforementioned biases.
-    \item Needs a lot of controls.
-  \end{itemize}
-\end{pframe}
-
-\subsection{Structural variation}
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[height=0.9\textheight, width=\textwidth]{poorly_mapped}
-    \end{center}
-    \caption{Multiple issues while mapping.}
-  \end{figure}
-\end{pframe}
-
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[trim=0 19cm 0 0, clip, width=\textwidth]{discordant}
-    \end{center}
-    \caption{Discordant and split reads.}
-  \end{figure}
-
-  \vfill
-  \permfoot{http://breakdancer.sourceforge.net/}
-
-  \permfoot{http://sourceforge.net/projects/pindel/}
-\end{pframe}
-
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[height=0.9\textheight]{sv}
-    \end{center}
-    \caption{Different types of structural variation.}
-  \end{figure}
-\end{pframe}
-
-
 \section{De Novo assembly}
 \subsection{Assesmbly}
-
 \begin{pframe}
  \begin{figure}[]
    \begin{center}
@@ -708,35 +400,6 @@
  \end{figure}
 \end{pframe}

-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[width=\textwidth]{contig}
-    \end{center}
-    \caption{Overlaps are used to reconstruct a genome.}
-  \end{figure}
-\end{pframe}
-
-\subsection{Scaffolding}
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[height=0.9\textheight]{scaffold}
-    \end{center}
-    \caption{Paired end or mate pair reads can be used.}
-  \end{figure}
-\end{pframe}
-
-\subsection{Easier assembly with PacBio}
-\begin{pframe}
-  \begin{figure}[]
-    \begin{center}
-      \includegraphics[height=0.9\textheight]{hgap}
-    \end{center}
-    \caption{Correcting PacBio reads.}
-  \end{figure}
-\end{pframe}
-
 \section{Pipelines}
 \subsection{Pipelines}
 \begin{pframe}
@@ -797,42 +460,6 @@
  \end{lstlisting}
 \end{pframe}

-\section{Graphical interfaces}
-\subsection{Galaxy}
-\begin{pframe}
-  Galaxy: a graphical user interface:
-  \begin{itemize}
-    \item Wrapper for command line utilities.
-    \item User friendly.
-    \item Point and click.
-    \pause
-    \item Workflows.
-    \begin{itemize}
-      \item Save all the steps you did in your analysis.
-      \item Rerun the entire analysis on a new dataset.
-      \item Share your workflow with other people.
-      \item \ldots
-    \end{itemize}
-  \end{itemize}
-
-  \vfill
-  \permfoot{http://galaxy.psu.edu/}
-\end{pframe}
-
-\begin{pframe}
-  \begin{figure}
-    \includegraphics[trim=0 0 0 2cm, clip, width=\textwidth]{galaxy}
-    \caption{Galaxy main user interface}
-  \end{figure}
-\end{pframe}
-
-\begin{pframe}
-  \begin{figure}
-    \includegraphics[width=\textwidth, height=0.9\textheight]{galaxy_mpileup}
-    \caption{User friendly interface with Galaxy}
-  \end{figure}
-\end{pframe}
-
 \subsection{Workflow of a parallel pipeline}
 \begin{pframe}
  \begin{figure}

--- a/introduction_extended/ion-proton-chip.jpg
+++ b/introduction_extended/ion-proton-chip.jpg
--- a/introduction_extended/ion-proton.jpg
+++ b/introduction_extended/ion-proton.jpg
--- a/introduction_extended/ion-torrent-chip-close.gif
+++ b/introduction_extended/ion-torrent-chip-close.gif
--- a/introduction_extended/ion-torrent-chip.jpg
+++ b/introduction_extended/ion-torrent-chip.jpg
--- a/introduction_extended/ion-torrent-wells.jpg
+++ b/introduction_extended/ion-torrent-wells.jpg
--- a/introduction_extended/ion-torrent.jpg
+++ b/introduction_extended/ion-torrent.jpg
--- a/introduction_extended/k_align.png
+++ b/introduction_extended/k_align.png
--- a/introduction_extended/k_basecall.png
+++ b/introduction_extended/k_basecall.png
--- a/introduction_extended/k_bridge2.png
+++ b/introduction_extended/k_bridge2.png