Commit 7a45299c authored by Jeroen F.J. Laros's avatar Jeroen F.J. Laros

Updated the skeleton presentation, added handouts.

parent 8331c471
\documentclass[slidestop]{beamer} \documentclass[slidestop]{beamer}
\title{Introduction to Version Control} \title{Analysis projects skeleton}
\providecommand{\myConference}{Git course} \providecommand{\myConference}{Git course}
\providecommand{\myDate}{Monday, October 14, 2013} \providecommand{\myDate}{Monday, October 14, 2013}
\author{Jeroen F. J. Laros} \author{Jeroen F. J. Laros}
...@@ -32,12 +32,263 @@ ...@@ -32,12 +32,263 @@
% First page of the presentation. % First page of the presentation.
\section{Introduction} \section{Introduction}
\begin{frame} \begin{frame}
\frametitle{} \frametitle{Shared projects.}
Most of us work on multiple projects with multiple people.
\bigskip
That is why is is convenient to:
\begin{itemize}
\item Have everything in one place.
\begin{itemize}
\item Data.
\item Code.
\item Documentation.
\end{itemize}
\item Have the same structure for all projects.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Project skeleton.}
That is why we made a \emph{skeleton} project.
\bigskip
Usage:
\begin{itemize}
\item Make a \emph{fork} (copy) of the skeleton project.
\item Rename the project.
\item Do a checkout.
\item Start working with it.
\end{itemize}
\end{frame}
\section{Starting a project}
\begin{fframe}
\frametitle{Forking}
Make a new analysis project.
\begin{itemize}
\item Go to the ``Project skeleton'' project page on our GitLab server.
\item Click ``Fork'' to fork it to a new project.
\item Go to ``Settings'' to rename the new project.
\begin{itemize}
\item Change both the project as well as the repository path.
\end{itemize}
\end{itemize}
\vfill
\permfoot{https://git.lumc.nl/lgtc-bioinformatics/project-skeleton}
\end{fframe}
\begin{frame}
\frametitle{Configuration}
Configure your project.
\begin{itemize}
\item Choose to make your project public or not.
\begin{itemize}
\item Public by default.
\item Public really means public.
\end{itemize}
\item Add the people that work on this project.
\end{itemize}
\end{frame}
\section{Project structure}
\begin{frame}
\frametitle{Global overview.}
Project layout:
\begin{itemize}
\item analysis
\item data
\item doc
\item src
\end{itemize}
\bigskip
Ideally, every directory in the project has a \bt{README} file.
\end{frame}
\begin{frame}
\frametitle{The toplevel \bt{\,README\,\,} file.}
This file contains general information about the project, for example:
\begin{itemize}
\item Who leads the project.
\item Who participates in the project.
\item The amount of hours people have spent on this project.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The \bt{\,doc\,\,} directory.}
Documentation on the project:
\begin{itemize}
\item Annotated sample lists.
\item Goal of the project.
\item Related work and literature.
\begin{itemize}
\item You may want to note who provided the documentation.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The \bt{\,data\,\,} directory.}
Used to store all raw data.
\bigskip
The \bt{README} contains:
\begin{itemize}
\item Description of the delivered data.
\begin{itemize}
\item Sequencing centre.
\item Platform.
\item Molecular type.
\item Owner.
\item Gatherer.
\end{itemize}
\item Description of other data.
\begin{itemize}
\item Perhaps you already got BAM files.
\begin{itemize}
\item Who aligned it?
\item Which aligner?
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The \bt{\,analysis\,\,} directory.}
All analysis related files are stored here:
\begin{itemize}
\item Run scripts.
\item Make files.
\item Result files.
\end{itemize}
\bigskip
Try to separate self-contained parts of the analysis in their own
subdirectories and document dependencies in a \bt{README} file.
\begin{itemize}
\item Normal data analysis.
\item $k$-mer analysis.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{The \bt{\,src\,\,} directory.}
Any custom scripts and specific software versions for this project.
\bigskip
When these scripts are useful for other projects, move them to their own
repository.
\end{frame}
\section{Working with large files}
\begin{fframe}
\frametitle{Git is not designed for massive files.}
Some problems with large files:
\begin{itemize}
\item Limited storage on the server.
\item Checking out a repository would take a long time.
\end{itemize}
\bigskip
We do want to have some way to track our input and output data. This can be
done with \bt{git-annex}.
\vfill
\permfoot{http://git-annex.branchable.com/}
\end{fframe}
\begin{frame}[fragile]
\frametitle{Git annex.}
\begin{itemize}
\item Manage large files without storing them.
\item Store file checksums.
\item Prevent files from being deleted accidentally.
\end{itemize}
\bigskip
\pause
You first have to enable this for your repository.
\begin{lstlisting}[language=none, caption=Enable git-annex.]
$ git annex init "<name>"
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Adding big files.}
\begin{lstlisting}[language=none, caption=Adding files.]
$ git annex add <filename>
$ git commit
\end{lstlisting}
\bigskip
In a clone, this file will visible, but not really present.
\begin{lstlisting}[language=none, caption=Make a file available.]
$ file <filename>
<filename>: broken symbolic link to ...
$ git annex get <filename>
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Removing files.}
As long as there are enough copies available, you can remove files.
\begin{lstlisting}[language=none, caption=A failing drop command.]
$ git annex drop <filename>
drop bigfile (unsafe)
git-annex: drop: 1 failed
\end{lstlisting}
\bigskip
It is actually quite well protected.
\begin{lstlisting}[language=none, caption=rm fails too.]
$ rm -rf <repository>
rm: cannot remove <repository>/.git/annex/objects/...
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Sync your results.}
Let the other repositories know what you have done.
\begin{lstlisting}[language=none, caption=.]
$ git annex sync
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Working together on the same clone.}
If you need to work with other people on the same repository clone on the
Shark cluster, you can use the following command to give group access:
\begin{lstlisting}[language=none, caption=.]
$ find -type d -exec chmod 775 {} \;
$ find -type f -exec chmod 664 {} \;
\end{lstlisting}
\end{frame} \end{frame}
\section{Questions?} \section{Questions?}
\lastpagetemplate \lastpagetemplate
\begin{frame} \begin{fframe}
\begin{center} \begin{center}
Acknowledgements: Acknowledgements:
\bigskip \bigskip
...@@ -48,6 +299,10 @@ ...@@ -48,6 +299,10 @@
Zuotian Tatum Zuotian Tatum
\end{center} \end{center}
\end{frame}
\vfill
\permfoot{http://git-annex.branchable.com/}
\permfoot{https://git.lumc.nl/lgtc-bioinformatics/project-skeleton}
\end{fframe}
\end{document} \end{document}
\documentclass{article}
\usepackage{fullpage}
\usepackage{listings}
\frenchspacing
\setlength{\parindent}{0pt}
\pagestyle{empty}
\begin{document}
\begin{center}
{\bf Git Introduction Course}
Project skeleton practical.
\end{center}
\bigskip
\subsubsection*{Git annex.}
First, we make an empty repository:
\begin{lstlisting}
$ mkdir annex_project
$ cd annex_project
$ git init
$ git annex init "Original repository."
\end{lstlisting}
\bigskip
Now add a ``big file'' and annex it.
\begin{lstlisting}
$ dd if=/dev/urandom of=bigfile.dat count=1024
$ git annex add bigfile.dat
$ git commit -m "Added big file."
\end{lstlisting}
\bigskip
Now, we clone the project and let this repository know where the clone is.
\begin{lstlisting}
$ git clone . ../annex_clone
$ git remote add annex_clone ../annex_clone
$ cd ../annex_clone
$ git annex init "Cloned repository."
\end{lstlisting}
\bigskip
With the ``\texttt{file}'' command you can now see that
``\texttt{bigdata.dat}'' is not present in this repository.
\begin{lstlisting}
$ git pull
$ git annex get bigfile.dat
\end{lstlisting}
\bigskip
You can now remove the big data file from the original repository.
\begin{lstlisting}
$ cd ../annex_project
$ git annex drop bigfile.dat
\end{lstlisting}
But you can not remove it from the clone.
\subsubsection*{Project skeleton.}
Search for the ``Project skeleton''.
\begin{itemize}
\item \emph{Hint:} Click the ``Public area'' icon and use the search option.
\end{itemize}
\bigskip
Suppose you are going to do an RNASeq analysis. You have the following files:
\begin{itemize}
\item read\_1.fq
\item read\_2.fq
\item Makefile
\end{itemize}
\end{document}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment