Commit dc795f49 authored by Laros's avatar Laros
Browse files

Added eGRP presentation.

parent 4f15b416
../../submodules/presentation/Makefile
\ No newline at end of file
../../submodules/presentation/beamerthemelumc.sty
\ No newline at end of file
../../submodules/presentation-pics/pics/drawer.jpg
\ No newline at end of file
\documentclass[slidestop]{beamer}
\title{Good Research Practice for computational data analysis\\
\small{(part one)}}
\providecommand{\myConference}{Work discussion}
\providecommand{\myDate}{Thursday, March 5, 2015}
\author{Jeroen F. J. Laros}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\providecommand{\lastCenterLogo}{
\raisebox{-0.1cm}{
\includegraphics[height=1cm]{logos/lgtc_logo}
%\includegraphics[height=0.7cm]{logos/ngi_logo}
}
}
\providecommand{\lastRightLogo}{
%\includegraphics[height=0.7cm]{logos/nbic_logo}
%\includegraphics[height=0.8cm]{logos/nwo_logo_en}
%\hspace{1.5cm}\includegraphics[height=0.7cm]{logos/gen2phen_logo}
}
\usetheme{lumc}
\begin{document}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title page.
\bodytemplate
% First page of the presentation.
\section{Introduction}
\subsection{Good research practices}
\begin{pframe}
\begin{figure}[]
\begin{center}
\colorbox{white}{
\includegraphics[width=0.6\textwidth]{labjournal}
}
\end{center}
\caption{Lab journal.}
\end{figure}
In the lab: lab journals, standard operating procedures, etc.
\end{pframe}
\subsection{Current practice}
\begin{pframe}
Pipeline development and data analysis have become increasingly important.
\bigskip
Surprisingly, there are no standard operating procedures for data analysis.
\bigskip
\pause
Example.
\begin{itemize}
\item Person A creates a program.
\item Person B modifies the program.
\pause
\item Person B received PhD.
\item Person A mails the program to person C.
\item Person C can not reproduce the results of person B.
\end{itemize}
\end{pframe}
\begin{pframe}
\begin{figure}[]
\begin{center}
\colorbox{white}{
\includegraphics[width=0.2\textwidth]{harddisk}
\hfill
\includegraphics[width=0.38\textwidth]{drawer}
}
\end{center}
\caption{Final resting place for data.}
\end{figure}
Even worse, the results disappear into a drawer.
\end{pframe}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[height=0.85\textheight]{project_tree}
\end{center}
\caption{``I have my own system.''}
\end{figure}
\end{pframe}
\subsection{Current practice in our departement}
\begin{pframe}
Luckily, we have tackled these problems already.
\bigskip
Investments in infrastructure:
\begin{itemize}
\item Centralised storage.
\begin{itemize}
\item Backup.
\end{itemize}
\item Centralised computing.
\end{itemize}
\bigskip
\pause
Adoption of \emph{software engineering} solutions.
\bigskip
We want to share this knowledge with our colleagues in other departments.
\end{pframe}
\subsection{``Beleidsinitiatief'' (structural funding)}
\begin{pframe}
Proposal for a change in policy.
\begin{itemize}
\item The department Human Genetics has some nice solutions available.
\item We always keep the big picture in mind.
\item The LUMC as a whole can benefit.
\end{itemize}
\bigskip
Proposal was granted.
\begin{itemize}
\item One FTE structural.
\item One additional FTE for one year.
\end{itemize}
\vfill
\permfoot{M. Roos, P.A.C. 't Hoen}
\end{pframe}
\begin{pframe}
The proposal consists of two main parts.
\bigskip
Work package 1.
\begin{itemize}
\item Version control.
\item Interactive computational environment.
\end{itemize}
\bigskip
Work package 2.
\begin{itemize}
\item Data stewardship.
\end{itemize}
\bigskip
\pause
Work package 1 and 2.
\begin{itemize}
\item Education.
\end{itemize}
\end{pframe}
\section{Version control}
\subsection{Git}
\begin{pframe}
\emph{The management of changes to documents, computer programs, large web
sites, and other collections of information.} --- Wikipedia.
\bigskip
\pause
General features:
\begin{itemize}
\item Keeping track of your files in an orderly manner.
\begin{itemize}
\item Hiding old versions.
\item Recording who made changes and when.
\end{itemize}
\item Enables collaboration.
\end{itemize}
\vfill
\permfoot{http://www.git-scm.com/}
\permfoot{https://github.com/}
\end{pframe}
\subsection{Why version control?}
\begin{pframe}
For a single user:
\begin{itemize}
\item Revert files to a previous state.
\item Revert the entire project back to a previous state.
\item Review changes made over time.
\item Backup.
\end{itemize}
\bigskip
\pause
For multiple users:
\begin{itemize}
\item A reliable way to share files between people/computers.
\item Allow multiple people working on the same project at the same time.
\item Conflict resolution.
\item See who made which changes at which time.
\end{itemize}
\end{pframe}
%\subsection{Why should I not use it?}
%\begin{pframe}
% A list of common excuses:
% \begin{itemize}
% \item It is too much work.
% \item I have my own system.
% \item I am the only one working on this project.
% \item This code will not be used by anyone else.
% \item The bugs can be tracked forever.
% \item \ldots
% \end{itemize}
% \bigskip
% \pause
%
% Eventually leading to:
% \begin{itemize}
% \item I am too busy rewriting the code I accidentally deleted.
% \end{itemize}
%\end{pframe}
\subsection{Collaboration}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[height=0.85\textheight]{gitlab_network}
\end{center}
\caption{Collaboration with many people.}
\end{figure}
\end{pframe}
\subsection{Tracking of changes}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[width=\textwidth]{gitlab_diff}
\end{center}
\caption{Compare versions.}
\end{figure}
Not limited to the previous version and the latest one.
\begin{itemize}
\item Different authors.
\item Any two versions.
\end{itemize}
\end{pframe}
\subsection{Documentation}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[height=0.85\textheight]{gitlab_doc}
\end{center}
\caption{Documentation and programs in one place.}
\end{figure}
\end{pframe}
\subsection{Projects}
\begin{pframe}
Most of us work on multiple projects with multiple people.
\bigskip
That is why is is convenient to:
\begin{itemize}
\item Have everything in one place.
\begin{itemize}
\item Data.
\item Code.
\item Documentation.
\end{itemize}
\item Have the same structure for all projects.
\end{itemize}
\bigskip
\pause
This also makes transferring projects easier.
\end{pframe}
\begin{pframe}
Ideally, every directory in the project has a \lstinline{README} file.
\begin{table}[]
\begin{center}
\begin{tabular}{l|l}
directory & description\\
\hline
\lstinline{data} & Raw immutable data.\\
\lstinline{doc} & Sample sheets, papers, etc.\\
\lstinline{src} & Programs specific for this project.\\
\lstinline{analysis} & Results.\\
\end{tabular}
\end{center}
\caption{Project layout.}
\end{table}
This structure is used by the LGTC, later adopted by SASC.
\vfill
\permfoot{https://git.lumc.nl/lgtc-bioinformatics/project-skeleton/}
\end{pframe}
\section{Exploratory data analysis}
\subsection{Interactive computational environments}
\begin{pframe}
Combine code execution, text, mathematics, plots and rich media into a single
document.
\bigskip
Ideal for exploration of data.
\begin{itemize}
\item Documentation and code are interwoven.
\item Results are displayed inline.
\item Web based.
\item Versions.
\end{itemize}
\bigskip
\pause
Integration with GitLab.
\vfill
\permfoot{http://ipython.org/notebook.html}
\end{pframe}
\subsection{iPython notebook}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[height=0.85\textheight]{ipynb}
\end{center}
\caption{iPython notebook.}
\end{figure}
\end{pframe}
\section{Education}
\subsection{Overview}
\begin{pframe}
We offer the following courses:
\begin{itemize}
\item Introduction to clusters.
\item Introduction to Git.
\item Python programming.
\end{itemize}
\bigskip
Apart from this, we:
\begin{itemize}
\item Teach people running our pipelines (SASC).
\item Answer questions about the infrastructure.
\item Keep documentation up to date.
\end{itemize}
\vfill
\permfoot{https://humgenprojects.lumc.nl/}
\end{pframe}
\subsection{Introduction course clusters}
\begin{pframe}
Half day course.
\bigskip
An overview of the available infrastructure.
\bigskip
In particular:
\begin{itemize}
\item Connecting to a cluster.
\item Using the Sun Grid Engine.
\item Do's and don'ts.
\item Makefiles.
\end{itemize}
\vfill
\permfoot{M. Villerius}
\end{pframe}
\subsection{Git}
\begin{pframe}
Everyone in the Bioinformatics field:
\begin{itemize}
\item Software development.
\item Project management.
\item Collaboration.
\end{itemize}
\bigskip
\pause
Topics:
\begin{itemize}
\item Git Basics
\item Branching
\item Remotes
\item Project skeleton / git annex
\end{itemize}
\vfill
\permfoot{M. Vermaat, W. Arindrarto}
\permfoot{https://humgenprojects.lumc.nl/trac/humgenprojects/wiki/git}
\end{pframe}
\subsection{Programming in Python}
\begin{pframe}
Four day course.
\begin{itemize}
\item Python basics.
\item Standard data structures.
\item Working with NumPy arrays.
\item Plotting with matplotlib.
\item Object-oriented programming.
\item The Biopython library.
\end{itemize}
\vfill
\permfoot{M. Vermaat, W. Arindrato, Z. Tatum, W.Y. Leung}
\permfoot{https://humgenprojects.lumc.nl/trac/programming-course}
\end{pframe}
\section{Part two}
\subsection{Data stewardship}
\begin{pframe}
This topic will be covered by Marco Roos at a later time.
\end{pframe}
\section{Questions?}
\lastpagetemplate
\begin{pframe}
\begin{center}
Acknowledgements:
\bigskip
\bigskip
Marco Roos
Peter-Bram 't Hoen
Martijn Vermaat
Zuotian Tatum
Wibowo Arindrarto
Wai Yi Leung
Michel Villerius
Silv\`ere van der Maarel
\end{center}
\end{pframe}
\end{document}
../../submodules/presentation-pics/pics/gitlab_diff.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/gitlab_doc.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/gitlab_network.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/harddisk.jpg
\ No newline at end of file
../../submodules/presentation-pics/pics/ipynb.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/labjournal.png
\ No newline at end of file
../../submodules/presentation/logos
\ No newline at end of file
../../submodules/presentation-pics/pics/project_tree.xcf
\ No newline at end of file
Subproject commit 3c06d9af7428d39ad74524d7ec8992537849942c
Subproject commit 5eaf7b127b82480d4aa147e9a583523d09b66269
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment