Skip to content
Snippets Groups Projects
Commit 586dd7e4 authored by Laros's avatar Laros
Browse files

Cleaned branch.

parent d9827ff2
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 299 deletions
../presentation-pics/pics/Git-Logo.png
\ No newline at end of file
../presentation-pics/pics/Gitlab_Lumc.png
\ No newline at end of file
../presentation/Makefile
\ No newline at end of file
../presentation/beamerthemelumc.sty
\ No newline at end of file
../presentation/gen2phen_logo.eps
\ No newline at end of file
\documentclass[slidestop]{beamer}
\title{Good research practice for data analysis}
\providecommand{\myConference}{NGS introduction}
\providecommand{\myDate}{Thursday, 22 May 2014}
\author{Michiel van Galen}
\providecommand{\myGroup}{Leiden Genome Technology Center}
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
\providecommand{\lastCenterLogo}{
\raisebox{-0.1cm}{
%\includegraphics[height=1cm]{lgtc_logo}
%\includegraphics[height=0.7cm]{ngi_logo}
}
}
\providecommand{\lastRightLogo}{
%\includegraphics[height=0.7cm]{nbic_logo}
%\includegraphics[height=0.8cm]{nwo_logo_en}
%\hspace{1.5cm}\includegraphics[height=0.7cm]{gen2phen_logo}
}
\usetheme{lumc}
\begin{document}
% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}
% Make the title page.
\bodytemplate
% First page of the presentation.
\section{Introduction}
\begin{pframe}
\begin{itemize}
\item Where and how to process my data?
\item What and where to store?
\item How to document this?
\item What about big data?
\end{itemize}
\end{pframe}
\section{Git}
\subsection{Version control}
\begin{pframe}
We needed to select a system to track a project, analysis and data
\bigskip
\begin{figure}
\centering
\includegraphics[width=0.3\textwidth]{Git-Logo}
\end{figure}
\begin{itemize}
\item Distributed revision control and management system
\item Widely used for code tracking
\item Also very much capable for tracking analysis, documents, articles
\end{itemize}
\end{pframe}
\subsection{With or without Git}
\begin{pframe}
\begin{itemize}
\item Track - who did what
\item Store
\item Collaborate
\item Centralize
\item Backup
\item Version history
\item Secure your data
\item Share!
\end{itemize}
\bigskip
One day Git course
\end{pframe}
\subsection{Server}
\begin{pframe}
Local implementation of GitLab within the LUMC
\bigskip
Log in with LUMC credentials
\bigskip
\begin{figure}
\centering
\includegraphics[width=0.5\textwidth]{Gitlab_Lumc}
\end{figure}
\end{pframe}
\subsection{Ready to start}
\begin{pframe}
Project skeleton available to clone and hit the ground running, offering a platform to:
\begin{itemize}
\item Describe the project
\item Document the pipelines
\item Data description
\item Select participants
\item Maintain big data
\item Folder structure
\item Log activities
\end{itemize}
\end{pframe}
\subsection{Ipython notebook}
\begin{pframe}
\begin{figure}
\centering
\includegraphics[width=1.0\textwidth]{ipython_notebook}
\end{figure}
\end{pframe}
\section{Scaling computation}
\begin{pframe}
\begin{figure}
\centering
\includegraphics[width=1.0\textwidth]{scaling_computation}
\end{figure}
\end{pframe}
\subsection{Desktop workstation}
\begin{pframe}
Good for writing, browsing and testing
\begin{itemize}
\item Lacking power
\item Interface to real computers
\item No proper storage
\end{itemize}
Pretty interface
\end{pframe}
\subsection{Shark cluster}
\begin{pframe}
\begin{itemize}
\item Part of the LUMC network
\item 700 CPU
\item 4Tb RAM
\item Top node 256GB RAM, 24 CPU
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.45\textwidth]{shark_schematic}
\end{figure}
\end{pframe}
\subsection{Why the Shark cluster}
\begin{pframe}
\begin{itemize}
\item Useful for most research projects that need computing power
\item NGS, R, GWAS...
\item Local, secure and powerful
\item Relatively simple to use (Course available)
\item Plenty of resources and disk space
\begin{itemize}
\item Isilon storage nodes (37Tb) for analysis
\item Long term storage (0.5Pb) for archiving
\end{itemize}
\item Still being expanded
\end{itemize}
\end{pframe}
\subsection{Working on the Shark cluster}
\begin{pframe}
\begin{itemize}
\item Easy access with your LUMC account
\item Pipelines and knowledge available
\begin{itemize}
\item LGTC, SASC and other departments run many analyses
\item Very little downtime in recent years
\end{itemize}
\item Qlogin to work on a single node
\item Parallel analysis using makefiles and qsub
\end{itemize}
\end{pframe}
\subsection{Shark limitations}
\begin{pframe}
\begin{itemize}
\item 'Only' 256GB top node
\item Some graphical limitations
\item Queues can be full
\item Access from outside is difficult
\begin{itemize}
\item Only BAM export
\item Research network
\end{itemize}
\end{itemize}
\end{pframe}
\subsection{Dutch lifescience grid}
\begin{pframe}
\begin{itemize}
\item Across NL, 11 sites
\item ~1500 cpus, ~5Tb RAM
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.5\textwidth]{lifesciencegrid_NL}
\end{figure}
\end{pframe}
\subsection{Working on the grid}
\begin{pframe}
\begin{figure}
\centering
\includegraphics[width=1.0\textwidth]{rp3_grid}
\end{figure}
\end{pframe}
\subsection{Working on the grid}
\begin{pframe}
Getting the grid to work is not easy but worthwhile
\begin{itemize}
\item 2300 RNA-seq samples
\item 300 jobs in parallel across the country
\item 10 cores, 40GB RAM for 36 hours
\item One sample at a time
\end{itemize}
\bigskip
\emph{150.000 core hours in 4 days}
\end{pframe}
\section{Recap}
\begin{pframe}
\begin{itemize}
\item Data analysis, management and storage is changing into more centralized and powerful platforms
\item Due to the demand of larger studies and higher throughput
\end{itemize}
\bigskip
\begin{itemize}
\item Git offers a useful platform for project management
\item Remote resources are available reducing desktop machines to terminals
\item Storage and guidelines need considerations
\item Solutions are being worked on, LUMC 'Best Research Practices'
\end{itemize}
\end{pframe}
\subsection{Courses and documents}
\begin{pframe}
\begin{itemize}
\item Linux
\item Shark
\item Python
\item Scripting
\item Git
\end{itemize}
\bigskip
Documentation and wikis available at:
\bigskip
humgenprojects.lumc.nl
\end{pframe}
\section{Questions?}
\lastpagetemplate
\begin{pframe}
\begin{center}
Acknowledgements:
\bigskip
\bigskip
Jeroen Laros
\bigskip
Martijn Vermaat
\bigskip
Jeroen Frank
\bigskip
LGTC
\end{center}
\end{pframe}
\end{document}
../presentation-pics/pics/ipython_notebook.png
\ No newline at end of file
../presentation/lgtc_logo.eps
\ No newline at end of file
../presentation-pics/pics/lifesciencegrid_NL.png
\ No newline at end of file
../presentation/lumc_logo.eps
\ No newline at end of file
../presentation/lumc_logo_small.eps
\ No newline at end of file
../presentation/nbic_logo.eps
\ No newline at end of file
../presentation/ngi_logo.eps
\ No newline at end of file
../presentation/nwo_logo_en.eps
\ No newline at end of file
../presentation/nwo_logo_nl.eps
\ No newline at end of file
../presentation-pics/pics/rp3_grid.png
\ No newline at end of file
../presentation-pics/pics/scaling_computation.png
\ No newline at end of file
../presentation-pics/pics/shark_schematic.png
\ No newline at end of file
../presentation/ul_logo.eps
\ No newline at end of file
../presentation/Makefile
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment