skeleton.tex 8.82 KB
Newer Older
1 2
\documentclass[slidestop]{beamer}

3 4
\input{../shared/shared.tex}

5
\author{Jeroen F. J. Laros}
6 7
\title{\courseTitle}
\providecommand{\mySubTitle}{Analysis projects skeleton}
8
\providecommand{\myConference}{\courseTitle}
9
\providecommand{\myGroup}{}
10 11
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
12

13 14 15 16 17 18 19 20
\usetheme{lumc}

\begin{document}

% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}

% Make the title page.
21
\makeTitleSlide{\includegraphics[width=3.5cm]{git_logo}}
22 23 24

% First page of the presentation.
\section{Introduction}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
25 26
\subsection{Shared projects}
\begin{pframe}
27 28 29 30 31 32 33 34 35 36 37
  Most of us work on multiple projects with multiple people.
  \bigskip

  That is why is is convenient to:
  \begin{itemize}
    \item Have everything in one place.
    \begin{itemize}
      \item Data.
      \item Code.
      \item Documentation.
    \end{itemize}
38
    \pause
39 40
    \item Have the same structure for all projects.
  \end{itemize}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
41
\end{pframe}
42

43
\section{Starting a project}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
44 45
\subsection{Project skeleton}
\begin{pframe}
46 47
  Usage:
  \begin{itemize}
48
    \item Make a clone of the skeleton project.
49
    \item Rename the project.
50
    \item Create a new project on the server.
51
    \item Change the remote \lstinline{origin} to your new project.
52
  \end{itemize}
53 54
  \bigskip
  \pause
55 56 57 58 59

  Configure your project.
  \begin{itemize}
    \item Choose to make your project public or not.
    \begin{itemize}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
60
      \item Private by default.
61 62 63 64
      \item Public really means public.
    \end{itemize}
    \item Add the people that work on this project.
  \end{itemize}
65 66

  \vfill
67
  \permfoot{\url{https://git.lumc.nl/lgtc-bioinformatics/project-skeleton}}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
68
\end{pframe}
69

70
\section{Project structure}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
71 72
\subsection{Global overview}
\begin{pframe}
73 74 75 76 77 78 79 80 81
  Project layout:
  \begin{itemize}
    \item analysis
    \item data
    \item doc
    \item src
  \end{itemize}
  \bigskip

82
  Ideally, every directory in the project has a \lstinline{README.md} file.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
\end{pframe}

\subsection{Markdown files}
\begin{pframe}
  \begin{lstlisting}[language=none, caption=Markdown snippet.]
    # Installation

    To install [Git](http://www.git-scm.com/):

        apt-get install git

    Now you can do the following:

    - Make a new repository with `git init`.
    - Clone an existing repository with `git clone`.
  \end{lstlisting}
\end{pframe}
100

101 102 103 104 105 106 107 108
% \begin{pframe}
%   \begin{figure}[]
%     \begin{center}
%       \includegraphics[width=\textwidth]{markdown}
%     \end{center}
%     \caption{Rendered markdown page.}
%   \end{figure}
% \end{pframe}
109

110
\subsection{The toplevel ``README.md'' file}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
111
\begin{pframe}
112 113 114 115 116 117
  This file contains general information about the project, for example:
  \begin{itemize}
    \item Who leads the project.
    \item Who participates in the project.
    \item The amount of hours people have spent on this project.
  \end{itemize}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
118
\end{pframe}
119

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
120 121
\subsection{The ``doc'' directory}
\begin{pframe}
122 123
  Documentation on the project:
  \begin{itemize}
124
    \item Annotation of the data.
125 126 127 128 129 130
    \item Goal of the project.
    \item Related work and literature.
    \begin{itemize}
      \item You may want to note who provided the documentation.
    \end{itemize}
  \end{itemize}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
131
\end{pframe}
132

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
133 134
\subsection{The ``data'' directory}
\begin{pframe}
135 136 137
  Used to store all raw data.
  \bigskip

138
  The \lstinline{README.md} contains:
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
  \begin{itemize}
    \item Description of the delivered data.
    \begin{itemize}
      \item Sequencing centre.
      \item Platform.
      \item Molecular type.
      \item Owner.
      \item Gatherer.
    \end{itemize}
    \item Description of other data.
    \begin{itemize}
      \item Perhaps you already got BAM files.
      \begin{itemize}
        \item Who aligned it?
        \item Which aligner?
      \end{itemize}
    \end{itemize}
  \end{itemize}

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
158
\end{pframe}
159

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
160 161
\subsection{The ``analysis'' directory}
\begin{pframe}
162 163
  All analysis related files are stored here:
  \begin{itemize}
164
    \item Symlinks to the actual data.
165 166 167 168 169 170 171
    \item Run scripts.
    \item Make files.
    \item Result files.
  \end{itemize}
  \bigskip

  Try to separate self-contained parts of the analysis in their own
172
  subdirectories and document dependencies in a \lstinline{README.md} file.
173 174 175 176
  \begin{itemize}
    \item Normal data analysis.
    \item $k$-mer analysis.
  \end{itemize}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
177
\end{pframe}
178

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
179 180
\subsection{The ``src'' directory}
\begin{pframe}
181 182 183 184 185
  Any custom scripts and specific software versions for this project.
  \bigskip

  When these scripts are useful for other projects, move them to their own
  repository.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
186
\end{pframe}
187 188

\section{Working with large files}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
189 190
\subsection{Git is not designed for massive files}
\begin{pframe}
191 192 193 194 195 196 197
  Some problems with large files:
  \begin{itemize}
    \item Limited storage on the server.
    \item Checking out a repository would take a long time.
  \end{itemize}
  \bigskip

198 199 200 201 202 203 204 205
  It also does not make much sense:
  \begin{itemize}
    \item These files are usually \emph{static}.
    \item And probably \emph{binary}.
  \end{itemize}
  \bigskip
  \pause

206
  We do want to have some way to track our input and output data. This can be
207
  done with \lstinline{git-annex}.
208 209

  \vfill
210
  \permfoot{\url{http://git-annex.branchable.com/}}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
211
\end{pframe}
212

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
213 214
\subsection{Git annex}
\begin{pframe}
215
  Manage files with git, without checking their contents in.
216 217 218 219 220 221 222 223 224
  \begin{itemize}
    \item Manage large files without storing them.
    \item Store file checksums.
    \item Prevent files from being deleted accidentally.
  \end{itemize}
  \bigskip
  \pause

  You first have to enable this for your repository.
225
  \bigskip
226 227 228 229

  \begin{lstlisting}[language=none, caption=Enable git-annex.]
    $ git annex init "<name>"
  \end{lstlisting}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
230
\end{pframe}
231

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
232 233
\subsection{Adding big files}
\begin{pframe}
234 235 236
  In our master repository, we annex a file.
  \bigskip

237 238 239 240 241
  \begin{lstlisting}[language=none, caption=Adding files.]
    $ git annex add <filename>
    $ git commit
  \end{lstlisting}
  \bigskip
242
  \pause
243 244

  In a clone, this file will visible, but not really present.
245 246
  \bigskip

247 248 249 250 251
  \begin{lstlisting}[language=none, caption=Make a file available.]
    $ file <filename>
    <filename>: broken symbolic link to ...
    $ git annex get <filename>
  \end{lstlisting}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
252
\end{pframe}
253

254 255 256 257 258 259 260 261 262 263 264
\subsection{Modifying files}
\begin{pframe}
  Sometimes we need to change the content of a file.
  \bigskip

  \begin{lstlisting}[language=none, caption=Unlocking a file.]
    $ git annex edit <filename>
    unlock <filename> (copying...) ok
  \end{lstlisting}
  \bigskip

265
  You can use \lstinline{git annex add} when you are done.
266 267
\end{pframe}

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
268 269
\subsection{Removing files}
\begin{pframe}
270
  As long as there are enough copies available, you can remove files.
271 272
  \bigskip

273 274 275 276 277 278
  \begin{lstlisting}[language=none, caption=A failing drop command.]
    $ git annex drop <filename>
    drop bigfile (unsafe)
    git-annex: drop: 1 failed
  \end{lstlisting}
  \bigskip
279
  \pause
280 281

  It is actually quite well protected.
282 283
  \bigskip

284 285 286 287
  \begin{lstlisting}[language=none, caption=rm fails too.]
    $ rm -rf <repository>
    rm: cannot remove <repository>/.git/annex/objects/...
  \end{lstlisting}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
288
\end{pframe}
289

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
290 291
\subsection{Synchronise your results}
\begin{pframe}
292
  Let the other repositories know what you have done.
293 294 295
  \bigskip

  \begin{lstlisting}[language=none, caption=Synchronise with all repositories.]
296 297
    $ git annex sync
  \end{lstlisting}
298 299 300 301 302 303 304 305 306
  \bigskip
  \pause

  You can choose to sync with a selection of repositories.
  \bigskip

  \begin{lstlisting}[language=none, caption=Synchronise with a selection.]
    $ git annex sync origin
  \end{lstlisting}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
307
\end{pframe}
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322
\subsection{Cleaning your repository}
\begin{pframe}
  You can clean your repository with one command.
  \bigskip

  \begin{lstlisting}[language=none, caption=Remove untracked files.]
    $ git clean -f -x
  \end{lstlisting}

  \begin{table}[]
    \begin{center}
      \begin{tabular}{ll}
        option & description\\
        \hline
323 324 325
%         \lstinline{-f} & Force (really remove).\\
%         \lstinline{-x} & Also remove \emph{ignored} files.\\
%         \lstinline{-n} & Do a \emph{dry run}.\\
326 327 328 329 330 331
      \end{tabular}
    \end{center}
    \caption{Common options.}
  \end{table}
\end{pframe}

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
332 333
\subsection{Working together on the same clone}
\begin{pframe}
334 335 336 337 338 339 340 341 342 343
  Sometimes you need to work with other people on the same repository clone.
  \begin{itemize}
    \item Where the large files are stored.
  \end{itemize}
  \bigskip

  Use the following command to give group access:
  \bigskip

  \begin{lstlisting}[language=none, caption=Make everyting group writable.]
344 345 346
    $ find -type d -exec chmod 775 {} \;
    $ find -type f -exec chmod 664 {} \;
  \end{lstlisting}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
347
\end{pframe}
348

349 350 351 352 353
\makeAcknowledgementsSlide{
  \begin{tabular}{l}
    \acknowledgements
  \end{tabular}
  \bigskip
354

355
  \hfill\includegraphics[width=3.5cm]{git_logo}
356

357 358 359 360 361
  \begin{tabular}{l}
    \small\url{http://git-annex.branchable.com/}\\
    \small\url{https://git.lumc.nl/lgtc-bioinformatics/project-skeleton}
  \end{tabular}
}
362
\end{document}