diff --git a/good_research_practice/good_research_practice.tex b/good_research_practice/good_research_practice.tex index 14e0bde7f6a66c6fa57c3f3d30c062258d431e13..267d8449ac297ab2f7dce23eba57a68eb742b88e 100644 --- a/good_research_practice/good_research_practice.tex +++ b/good_research_practice/good_research_practice.tex @@ -40,12 +40,12 @@ \end{itemize} \end{pframe} -\subsection{Git} +\section{Git} +\subsection{Version control} \begin{pframe} We needed to select a system to track a project, analysis and data \bigskip - Git \begin{figure} \centering \includegraphics[width=0.3\textwidth]{Git-Logo} @@ -65,7 +65,7 @@ \item Collaborate \item Centralize \item Backup - \item Version historu + \item Version history \item Secure your data \item Share! \end{itemize} @@ -89,7 +89,7 @@ \subsection{Ready to start} \begin{pframe} - Project skeleton available to clone and hit the ground running. + Project skeleton available to clone and hit the ground running, offering a platform to: \begin{itemize} \item Describe the project \item Document the pipelines @@ -109,7 +109,7 @@ \end{figure} \end{pframe} -\subsection{Scaling computation} +\section{Scaling computation} \begin{pframe} \begin{figure} \centering @@ -117,6 +117,144 @@ \end{figure} \end{pframe} +\subsection{Desktop workstation} +\begin{pframe} + Good for writing, browsing and testing + \begin{itemize} + \item Lacking power + \item Interface to real computers + \item No proper storage + \end{itemize} + Pretty interface +\end{pframe} + +\subsection{Shark cluster} +\begin{pframe} + \begin{itemize} + \item Part of the LUMC network + \item 700 CPU + \item 4Tb RAM + \item Top node 256GB RAM, 24 CPU + \end{itemize} + \begin{figure} + \centering + \includegraphics[width=0.45\textwidth]{shark_schematic} + \end{figure} +\end{pframe} + +\subsection{Why the Shark cluster} +\begin{pframe} + \begin{itemize} + \item Useful for most research projects that need computing power + \item NGS, R, GWAS... + \item Local, secure and powerful + \item Relatively simple to use (Course available) + \item Plenty of resources and disk space + \begin{itemize} + \item Isilon storage nodes (37Tb) for analysis + \item Long term storage (0.5Pb) for archiving + \end{itemize} + \item Still being expanded + \end{itemize} +\end{pframe} + +\subsection{Working on the Shark cluster} +\begin{pframe} + \begin{itemize} + \item Easy access with your LUMC account + \item Pipelines and knowledge available + \begin{itemize} + \item LGTC, SASC and other departments run many analyses + \item Very little downtime in recent years + \end{itemize} + \item Qlogin to work on a single node + \item Parallel analysis using makefiles and qsub + \end{itemize} +\end{pframe} + +\subsection{Shark limitations} +\begin{pframe} + \begin{itemize} + \item 'Only' 256GB top node + \item Some graphical limitations + \item Queues can be full + \item Access from outside is difficult + \begin{itemize} + \item Only BAM export + \item Research network + \end{itemize} + \end{itemize} +\end{pframe} + +\subsection{Dutch lifescience grid} +\begin{pframe} + \begin{itemize} + \item Across NL, 11 sites + \item ~1500 cpus, ~5Tb RAM + \end{itemize} + \begin{figure} + \centering + \includegraphics[width=0.5\textwidth]{lifesciencegrid_NL} + \end{figure} +\end{pframe} + +\subsection{Working on the grid} +\begin{pframe} + \begin{figure} + \centering + \includegraphics[width=1.0\textwidth]{rp3_grid} + \end{figure} +\end{pframe} + +\subsection{Working on the grid} +\begin{pframe} + Getting the grid to work is not easy but worthwhile + \begin{itemize} + \item 2300 RNA-seq samples + \item 300 jobs in parallel across the country + \item 10 cores, 40GB RAM for 36 hours + \item One sample at a time + \end{itemize} + \bigskip + + \emph{150.000 core hours in 4 days} +\end{pframe} + +\section{Recap} +\begin{pframe} + \begin{itemize} + \item Data analysis, management and storage is changing into more centralized and powerful platforms + \item Due to the demand of larger studies and higher throughput + \end{itemize} + \bigskip + + \begin{itemize} + \item Git offers a useful platform for project management + \item Remote resources are available reducing desktop machines to terminals + \item Storage and guidelines need considerations + \item Solutions are being worked on, LUMC 'Best Research Practices' + \end{itemize} +\end{pframe} + +\subsection{Courses and documents} +\begin{pframe} + \begin{itemize} + \item Linux + \item Shark + \item Python + \item Scripting + \item Git + \end{itemize} + \bigskip + + Documentation and wikis available at: + \bigskip + + humgenprojects.lumc.nl +\end{pframe} + + + \section{Questions?} \lastpagetemplate \begin{pframe} diff --git a/good_research_practice/lifesciencegrid_NL.png b/good_research_practice/lifesciencegrid_NL.png new file mode 120000 index 0000000000000000000000000000000000000000..1e072004b9ba9da9a54bb628b8921232571f32d5 --- /dev/null +++ b/good_research_practice/lifesciencegrid_NL.png @@ -0,0 +1 @@ +../presentation-pics/pics/lifesciencegrid_NL.png \ No newline at end of file diff --git a/good_research_practice/rp3_grid.png b/good_research_practice/rp3_grid.png new file mode 120000 index 0000000000000000000000000000000000000000..5482e0da892b6777c9b02bd3bf98b2210edda05e --- /dev/null +++ b/good_research_practice/rp3_grid.png @@ -0,0 +1 @@ +../presentation-pics/pics/rp3_grid.png \ No newline at end of file diff --git a/good_research_practice/shark_schematic.png b/good_research_practice/shark_schematic.png new file mode 120000 index 0000000000000000000000000000000000000000..147bf9bb8a60faef6e8009354a75c102f0ad7e1c --- /dev/null +++ b/good_research_practice/shark_schematic.png @@ -0,0 +1 @@ +../presentation-pics/pics/shark_schematic.png \ No newline at end of file