Skip to content
Snippets Groups Projects
Commit 1495d0b1 authored by Laros's avatar Laros
Browse files

Merge branch 'leiden_2014' of git.lumc.nl:humgen/ngs-intro-course into leiden_2014

parents 7e21f0ee 51b66d40
No related branches found
No related tags found
No related merge requests found
Showing
with 431 additions and 0 deletions
../presentation/ngi_logo.eps
\ No newline at end of file
../presentation/nwo_logo_en.eps
\ No newline at end of file
../presentation/nwo_logo_nl.eps
\ No newline at end of file
\documentclass[slidestop]{beamer}
\title{Phylogenetic reconstruction}
\providecommand{\myConference}{NGS introduction}
\providecommand{\myDate}{Thursday, 22 May 2014}
\author{Michiel van Galen}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\providecommand{\lastCenterLogo}{
\raisebox{-0.1cm}{
%\includegraphics[height=1cm]{lgtc_logo}
%\includegraphics[height=0.7cm]{ngi_logo}
}
}
\providecommand{\lastRightLogo}{
%\includegraphics[height=0.7cm]{nbic_logo}
%\includegraphics[height=0.8cm]{nwo_logo_en}
%\hspace{1.5cm}\includegraphics[height=0.7cm]{gen2phen_logo}
}
\usetheme{lumc}
\begin{document}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title page.
\bodytemplate
% First page of the presentation.
\section{Material}
\subsection{Input and goal}
\begin{pframe}
\begin{itemize}
\item Sequence data available for different strains of bacteria
\item One FastQ file per strain
\end{itemize}
\bigskip
NGS throughput is much higher compared to conventional methods (Sanger
sequencing). Increasing the chances on new insights.
\bigskip
However, there is little solutions available to accommodate the magnitude in
the field of phylogenetic reconstruction.
\end{pframe}
\section{Methods}
\subsection{Naive approach}
\begin{pframe}
\begin{figure}
\centering
\includegraphics[width=0.75\textwidth]{previous}
\end{figure}
\end{pframe}
\subsection{Naive approach}
\begin{pframe}
Early workflow adapted from Sanger suffered from some limitations:
\bigskip
\begin{itemize}
\item Difficult to reproduce
\item Poorly documented
\item Using unconventional methods
\item Not parallelized
\item Susceptible to errors
\item Customization or modification nearly impossible
\item Stops at the tree construction
\end{itemize}
\end{pframe}
\subsection{From bundle of scripts to pipeline}
\begin{pframe}
Re-factor the workflow into a complete pipeline
\bigskip
\begin{itemize}
\item Convert the workflow to an automated pipeline
\item Replace custom scripts with maintained existing tools and methods
\item Include cluster support
\item Improve usability and customization
\end{itemize}
\end{pframe}
\section{Pipeline}
\subsection{Breakdown of the pipeline}
\begin{pframe}
The workflow can be roughly broken down into two parts
\bigskip
\begin{itemize}
\item Per sample part - Analyze the samples separately
\item Merged part - Combine output for each sample
\end{itemize}
\end{pframe}
\subsection{Per sample part}
\begin{pframe}
These steps are for each sample the same and can be parallelized
\begin{itemize}
\item Add QC - Standard tools
\item Alignment to canonical reference - BWA
\item Variant calling and filtering - Samtools
\item Mask variants in repeated regions - BEDtools
\end{itemize}
\end{pframe}
\subsection{Merged part, combining the output}
\begin{pframe}
\begin{itemize}
\item Compare the variants between strains - Python
\begin{itemize}
\item Merge the variant files into one matrix - VCFtools
\end{itemize}
\bigskip
\item Use PHYLIP to infer a evolutionary tree
\begin{itemize}
\item Create distance matrix (dnadist)
\item Create a phylogenetic tree
\end{itemize}
\end{itemize}
\end{pframe}
\section{Current situation}
\subsection{Implementation}
\begin{pframe}
The pipeline is designed to run on the LUMC Shark cluster
\begin{itemize}
\item All tools are available and maintained
\item Pipeline is written in Make, compatible to run in parallel
\item Reduced the number of custom scripts to just one
\begin{itemize}
\item Not reinventing the wheel, outsource support for tools
\end{itemize}
\end{itemize}
\end{pframe}
\section{Future work}
\subsection{Possible expansions}
\begin{pframe}
\begin{itemize}
\item Improve usability even more
\begin{itemize}
\item User friendly interface
\item More automation
\end{itemize}
\bigskip
\item kMer analysis
\begin{itemize}
\item Proven to work on meta-genomic datasets
\end{itemize}
\end{itemize}
\end{pframe}
\subsection{kMer}
\begin{pframe}
\begin{itemize}
\item Calculate distance between samples based on occurrences of words of length k
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.40\textwidth]{clusterall_bw.ps}
\end{figure}
\end{pframe}
\section{Conclusion}
\begin{pframe}
Summarizing:
\bigskip
\begin{itemize}
\item Much room for pipeline development and automation
\item Apply existing tools where possible reduce development time
\item Data is relatively small compared to human data making our
infrastructure well prepared
\end{itemize}
\end{pframe}
\section{Questions?}
\lastpagetemplate
\begin{pframe}
\begin{center}
Acknowledgements:
\bigskip
\bigskip
Wilco Knetsch
\bigskip
Jeroen Laros
\bigskip
Martijn Vermaat
\bigskip
Jeroen Frank
\bigskip
LGTC
\end{center}
\end{pframe}
\end{document}
\documentclass[slidestop]{beamer}
\title{Phylogenetic reconstruction}
\providecommand{\myConference}{NGS introduction}
\providecommand{\myDate}{Thursday, 22 May 2014}
\author{Michiel van Galen}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\providecommand{\lastCenterLogo}{
\raisebox{-0.1cm}{
%\includegraphics[height=1cm]{lgtc_logo}
%\includegraphics[height=0.7cm]{ngi_logo}
}
}
\providecommand{\lastRightLogo}{
%\includegraphics[height=0.7cm]{nbic_logo}
%\includegraphics[height=0.8cm]{nwo_logo_en}
%\hspace{1.5cm}\includegraphics[height=0.7cm]{gen2phen_logo}
}
\usetheme{lumc}
\begin{document}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title page.
\bodytemplate
% First page of the presentation.
\section{Material}
\subsection{Input and goal}
\begin{pframe}
\begin{itemize}
\item Sequence data available for different strains of bacteria
\item One FastQ file per strain
\end{itemize}
\bigskip
NGS throughput is much higher compared to conventional methods (Sanger
sequencing). Increasing the chances on new insights.
\bigskip
However, there is little solutions available to accomodate the magnitude in
the field of phylogenetic reconstruction.
\end{pframe}
\section{Methods}
\subsection{Naive approach}
\begin{pframe}
\begin{figure}
\centering
\includegraphics[width=0.75\textwidth]{previous}
\end{figure}
\end{pframe}
\subsection{Naive approach}
\begin{pframe}
Early workflow adapted from Sanger suffered from some limitations:
\bigskip
\begin{itemize}
\item Difficult to reproduce
\item Poorly documented
\item Using unconventional methods
\item Not parallelized
\item Susceptible to errors
\item Customization or modification nearly impossible
\item Stops at the tree construction
\end{itemize}
\end{pframe}
\subsection{From bundle of scripts to pipeline}
\begin{pframe}
Refactor the workflow into a complete pipeline
\bigskip
\begin{itemize}
\item Convert the workflow to an automated pipeline
\item Replace custom scripts with maintained existing tools and methods
\item Include cluster support
\item Improve usability and customization
\end{itemize}
\end{pframe}
\section{Pipeline}
\subsection{Breakdown of the pipeline}
\begin{pframe}
The workflow can be roughly broken down into two parts
\bigskip
\begin{itemize}
\item Per sample part - Analyze the samples seperately
\item Merged part - Combine output for each sample
\end{itemize}
\end{pframe}
\subsection{Per sample part}
\begin{pframe}
These steps are for each sample the same and can be parallelized
\begin{itemize}
\item Add QC - Standard tools
\item Alignment to canonical reference - BWA
\item Variant calling and filtering - Samtools
\item Mask variants in repeated regions - BEDtools
\end{itemize}
\end{pframe}
\subsection{Merged part, combining the output}
\begin{pframe}
\begin{itemize}
\item Compare the variants between strains - Python
\begin{itemize}
\item Merge the variant files into one matrix - VCFtools
\end{itemize}
\bigskip
\item Use PHYLIP to infer a evolutionary tree
\begin{itemize}
\item Create distance matrix (dnadist)
\item Create a phylogenetic tree
\end{itemize}
\end{itemize}
\end{pframe}
\section{Current situation}
\subsection{Implementation}
\begin{pframe}
The pipeline is designed to run on the LUMC Shark cluster
\begin{itemize}
\item All tools are available and maintained
\item Pipeline is written in Make, compatible to run in parallel
\item Reduced the number of custom scripts to just one
\begin{itemize}
\item Not reinventing the wheel, outsource support for tools
\end{itemize}
\end{itemize}
\end{pframe}
\section{Future work}
\subsection{Possible expansions}
\begin{pframe}
\begin{itemize}
\item Improve usabilty even more
\begin{itemize}
\item User friendly interface
\item More automation
\end{itemize}
\bigskip
\item kMer analysis
\begin{itemize}
\item Proven to work on metagenomic datasets
\end{itemize}
\end{itemize}
\end{pframe}
\subsection{kMer}
\begin{pframe}
\begin{itemize}
\item Calculate distance between samples based on occurences of words of length k
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.40\textwidth]{clusterall_bw.ps}
\end{figure}
\end{pframe}
\section{Conclusion}
\begin{pframe}
Summarizing:
\bigskip
\begin{itemize}
\item Much room for pipeline development and automation
\item Apply existing tools where possible reduce development time
\item Data is relatively small compared to human data making our
infrastructure well prepared
\end{itemize}
\end{pframe}
\section{Questions?}
\lastpagetemplate
\begin{pframe}
\begin{center}
Acknowledgements:
\bigskip
\bigskip
Wilco Knetsch
\bigskip
Jeroen Laros
\bigskip
Martijn Vermaat
\bigskip
Jeroen Frank
\bigskip
LGTC
\end{center}
\end{pframe}
\end{document}
phylogenetic_reconstruction/previous.png

168 KiB

../presentation/ul_logo.eps
\ No newline at end of file
../presentation/Makefile
\ No newline at end of file
../presentation-pics/pics/adapter_sequencing.png
\ No newline at end of file
../presentation/beamerthemelumc.sty
\ No newline at end of file
../presentation-pics/pics/garbage-in-garbage-out.jpg
\ No newline at end of file
../presentation/gen2phen_logo.eps
\ No newline at end of file
../presentation/lgtc_logo.eps
\ No newline at end of file
../presentation/lumc_logo.eps
\ No newline at end of file
../presentation/lumc_logo_small.eps
\ No newline at end of file
../presentation/nbic_logo.eps
\ No newline at end of file
../presentation/ngi_logo.eps
\ No newline at end of file
../presentation/nwo_logo_en.eps
\ No newline at end of file
../presentation/nwo_logo_nl.eps
\ No newline at end of file
../presentation-pics/pics/pretrimmed_qscores.png
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment