Commit b02eee9c authored by Laros's avatar Laros
Browse files

Added makefile lecture.

parent 88297415
../../submodules/presentation-pics/pics/DellBlade4.png
\ No newline at end of file
../../submodules/presentation/Makefile
\ No newline at end of file
../../submodules/presentation-pics/pics/4-philosophers.gif
\ No newline at end of file
../../submodules/presentation/beamerthemelumc.sty
\ No newline at end of file
../../submodules/presentation-pics/pics/gapss3.dot
\ No newline at end of file
../../submodules/presentation/gen2phen_logo.eps
\ No newline at end of file
../../submodules/presentation-pics/pics/hiseq_2000.jpg
\ No newline at end of file
../../submodules/presentation/lgtc_logo.eps
\ No newline at end of file
../../submodules/presentation/lumc_logo.eps
\ No newline at end of file
../../submodules/presentation/lumc_logo_small.eps
\ No newline at end of file
\documentclass[slidestop]{beamer}
\title{Automatic scheduling with \bt{make}.}
\providecommand{\myConference}{BioAssist Programmers Meeting}
\providecommand{\myDate}{Friday, 23 March 2012}
\author{Jeroen F. J. Laros}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\providecommand{\lastCenterLogo}{
\raisebox{-0.1cm}{
\includegraphics[scale = 0.055]{lgtc_logo}
}
}
\providecommand{\lastRightLogo}{
%\includegraphics[scale = 0.1]{nbic_logo}
}
\usetheme{lumc}
\begin{document}
\lstset{language = make, mathescape = false}
\input{petrinet}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title page.
\bodytemplate
% First page of the presentation.
\section{Introduction}
\begin{frame}
\frametitle{Some figures}
\begin{minipage}{0.49\textwidth}
\begin{figure}
\includegraphics[width = \textwidth]{hiseq_2000}
\caption{Illumina HiSeq 2000}
\end{figure}
\end{minipage}
\hfill
\pause
\begin{minipage}[\textheight]{0.49\textwidth}
At this moment:
\begin{itemize}
\item $\pm 100$ exomes per run.
\item $\pm 5$ full genomes per run.
\item Analysis can take up to $3$ days per exome.
\item Full genome sequencing is coming.
\end{itemize}
\vspace{3cm}
\end{minipage}
\end{frame}
\begin{frame}
\frametitle{Clusters}
\begin{figure}
\includegraphics[width=0.95\textwidth]{DellBlade4}
\caption{Dell M610 blade server}
\end{figure}
\end{frame}
\section{Using a cluster}
\begin{fframe}
\frametitle{Sun Grid Engine}
A cluster needs a job scheduler.
\begin{itemize}
\item Open source batch-queuing system developed by Sun Microsystems.
\item Now owned by Oracle, no longer free.
\item Fully compatible alternatives:
\begin{itemize}
\item Son of Grid.
\item \only<2>{\color{yellow}}Open Grid Scheduler.\only<2>{\color{white}}
\item Univa Grid Engine (commercial).
\end{itemize}
\end{itemize}
\vfill
\permfoot{http://gridscheduler.sourceforge.net}
\end{fframe}
\begin{frame}
\frametitle{Pipelines}
Classical pipeline:
\begin{itemize}
\item Linear.
\item Shell script.
\begin{itemize}
\item Or with the \lstinline!exec()! function in your favourite language.
\end{itemize}
\end{itemize}
\bigskip
\pause
Drawbacks:
\begin{itemize}
\item Parallelisation is done manually.
\item Syncing must be done manually.
\item Extensive error handling.
\item No save points.
\item No dry runs.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{A different viewpoint}
What if\ldots
\begin{itemize}
\item All commands are atomic.
\begin{itemize}
\item We describe input and output.
\end{itemize}
\item We build a \emph{dependency graph}.
\item Trace a path in this graph to find a workflow.
\end{itemize}
\bigskip
\pause
This way we do not need to:
\begin{itemize}
\item Design a workflow.
\item Figure out which parts can be run in parallel.
\end{itemize}
\end{frame}
\section{Theoretical solution}
\begin{frame}
\frametitle{Petri nets}
\begin{figure}
\colorbox{white}{
\includegraphics[height=0.8\textheight, width=0.8\textwidth]{PetriNet}
}
\caption{A model for parallelism.}
\end{figure}
\end{frame}
\begin{fframe}
\frametitle{Petri nets}
\emph{``Mathematical modelling language for the description of distributed
systems.''}
\bigskip
Background:
\begin{itemize}
\item Described first in 1939 by Carl Adam Petri (age 13).
\item Originally intended to describe chemical processes.
\item Graphical notation for stepwise processes.
\item Choice, iteration, and concurrent execution.
\end{itemize}
\vfill
\bs{http://en.wikipedia.org/wiki/Petri\_net}
\end{fframe}
\begin{frame}
\begin{figure}
\examplenet{\bs{place}}{\bs{place}}{\bs{transition}}
\caption{A simple Petri net}
\end{figure}
A \emph{transition} has:
\begin{itemize}
\item A \emph{preset}: a set of \emph{input places}.
\item A \emph{postset}: a set of \emph{output places}.
\end{itemize}
\bigskip
The output of one transition can be the input of an other.
\end{frame}
\begin{frame}
\begin{figure}
\petrinet{}{}{}{}{}{}{}{}
\caption{A more complicated Petri net}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Modelling parallel processes}
Key observation:
\begin{itemize}
\item A graph is uniquely defined by a list of nodes and their edges.
\end{itemize}
\bigskip
This means that you don't need to focus on the big picture.
\begin{itemize}
\item Each process can be modelled individually.
\item Processes are linked based on the in- and outputs.
\end{itemize}
\end{frame}
\section{Practical solution}
\begin{fframe}
\frametitle{make}
\emph{``Utility that automatically builds programs and libraries from source
code by reading files called makefiles which specify how to derive the target
program.''}
\bigskip
With a couple of minor alterations we can also use it for pipelines.
\begin{tabular}{@{\fakeitem}l@{\ \ $\Rightarrow$\ \ }l}
Source code & Raw data\\
Program & Result\\
Building & Analysing
\end{tabular}
\vfill
\bs{http://en.wikipedia.org/wiki/Make\_(software)}
\end{fframe}
\begin{frame}[fragile]
\frametitle{The anatomy of a Makefile}
\begin{lstlisting}[caption = {Makefile snippet}]
target: prerequisites
recepe
\end{lstlisting}
In the recipe, some special variables are available:
\begin{table}
\begin{tabular}{ll}
\lstinline!$@!\rmath{$} & Name of the target.\\
\lstinline!$<!\rmath{$} & The first prerequisite.\\
\lstinline!$^!\rmath{$} & All prerequisites.
\end{tabular}
\end{table}
\bigskip
Suffix rules
The \lstinline!%! can be used for implicit targets, e.g.,
\lstinline!%.bam: %.sam!
\end{frame}
\begin{frame}[fragile]
\begin{figure}
\examplenet{\bs{fastq}}{\bs{bam}}{\underline{\bsi{bwa}}}
\caption{A simple workflow}
\end{figure}
\begin{lstlisting}[caption = {Makefile snippet}]
%.bam: %_1.fq %_2.fq
bwa sampe reference.fa $^ > $@
\end{lstlisting}
\end{frame}
\begin{frame}
\begin{figure}
\petrinet{\bs{fastq}}{\bs{fq.f}}{\bs{stats}}{\bs{bam}}{\bs{report}}{
\underline{\bsi{fastx}}}{\underline{\bsi{bwa}}}{\underline{\bsi{latex}}}
\caption{A parallel workflow}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{Advantages of make}
Control flow:
\begin{itemize}
\item Implicit.
\item The target is removed if the recipe returns an error.
\item The build process can resume from any point.
\end{itemize}
\bigskip
\pause
Plus:
\begin{itemize}
\item Portable.
\begin{itemize}
\item \bt{make}, \bt{qmake}, \bt{nmake}, \ldots
\end{itemize}
\item Modular.
\begin{itemize}
\item Multiple (overlapping) pipelines can be combined.
\end{itemize}
\item Test without executing (\bt{make -n}).
\end{itemize}
\end{frame}
\section{GAPSS3}
\begin{fframe}
\frametitle{ Exome capture pipeline}
First version:
\begin{itemize}
\item Frame written in \bt{bash}.
\item Custom scripts for job handling.
\item Complicated to redeploy interrupted analysis.
\item Modularity is limited.
\end{itemize}
\bigskip
\pause
Current version:
\begin{itemize}
\item Reduced code from $\pm 2000$ to $\pm 400$ lines.
\item Optimised cluster usage.
\item Shorter analysis time.
\end{itemize}
\vfill
\permfoot{https://humgenprojects.lumc.nl/trac/GAPSS3}
\end{fframe}
\begin{frame}
\frametitle{The GAPSS3 workflow}
\begin{figure}
\includegraphics[width=\textwidth, height=0.9\textheight]{gapss3}
\caption{GAPSS3}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{The GAPSS3 workflow}
\begin{figure}
\includegraphics[trim=320 0 100 70, clip, width=\textwidth]{gapss3}
\caption{Part of GAPSS3}
\end{figure}
\end{frame}
\section{Questions?}
\lastpagetemplate
\begin{fframe}
\begin{center}
Acknowledgements:
\bigskip
\bigskip
Michiel van Galen\\
Martijn Vermaat\\
Johan den Dunnen
\end{center}
\vfill
\bs{https://humgenprojects.lumc.nl/trac/shark/wiki/Makefile}
\end{fframe}
\end{document}
../../submodules/presentation/nbic_logo.eps
\ No newline at end of file
../../submodules/presentation/ngi_logo.eps
\ No newline at end of file
../../submodules/presentation/nwo_logo_en.eps
\ No newline at end of file
../../submodules/presentation/nwo_logo_nl.eps
\ No newline at end of file
../../shared/petrinet.tex
\ No newline at end of file
../../submodules/presentation/ul_logo.eps
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment