From 77325486292444baf0c527c1e5bae7df77430680 Mon Sep 17 00:00:00 2001
From: "J.F.J. Laros" <j.f.j.laros@lumc.nl>
Date: Tue, 25 Sep 2012 21:14:22 +0000
Subject: [PATCH] Added a presentation that covers the description extractor.

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@608 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 .../Makefile                                  |   1 +
 .../beamerthemelumc.sty                       |   1 +
 .../gen2phen_logo.eps                         |   1 +
 .../lgtc_logo.eps                             |   1 +
 .../lstBNF.tex                                |   1 +
 .../lumc_logo.eps                             |   1 +
 .../lumc_logo_small.eps                       |   1 +
 .../nbic_logo.eps                             |   1 +
 .../ngi_logo.eps                              |   1 +
 .../nwo_logo_en.eps                           |   1 +
 .../presentation.tex                          | 676 ++++++++++++++++++
 .../ul_logo.eps                               |   1 +
 12 files changed, 687 insertions(+)
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/Makefile
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/beamerthemelumc.sty
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/gen2phen_logo.eps
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lgtc_logo.eps
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lstBNF.tex
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo.eps
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo_small.eps
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nbic_logo.eps
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ngi_logo.eps
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nwo_logo_en.eps
 create mode 100644 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/presentation.tex
 create mode 120000 doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ul_logo.eps

diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/Makefile b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/Makefile
new file mode 120000
index 00000000..90684a51
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/Makefile
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/Makefile
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/beamerthemelumc.sty b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/beamerthemelumc.sty
new file mode 120000
index 00000000..f080c7a5
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/beamerthemelumc.sty
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/beamerthemelumc.sty
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/gen2phen_logo.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/gen2phen_logo.eps
new file mode 120000
index 00000000..4d0481a6
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/gen2phen_logo.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/gen2phen_logo.eps
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lgtc_logo.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lgtc_logo.eps
new file mode 120000
index 00000000..8e90373b
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lgtc_logo.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/lgtc_logo.eps
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lstBNF.tex b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lstBNF.tex
new file mode 120000
index 00000000..8645ea1c
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lstBNF.tex
@@ -0,0 +1 @@
+../Presentation_24-02-11_HumGen_Mutalyzer2/lstBNF.tex
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo.eps
new file mode 120000
index 00000000..e9937f89
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/lumc_logo.eps
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo_small.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo_small.eps
new file mode 120000
index 00000000..290da08d
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/lumc_logo_small.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/lumc_logo_small.eps
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nbic_logo.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nbic_logo.eps
new file mode 120000
index 00000000..fcde8c20
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nbic_logo.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/nbic_logo.eps
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ngi_logo.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ngi_logo.eps
new file mode 120000
index 00000000..b6f678ac
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ngi_logo.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/ngi_logo.eps
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nwo_logo_en.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nwo_logo_en.eps
new file mode 120000
index 00000000..f54507d6
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/nwo_logo_en.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/nwo_logo_en.eps
\ No newline at end of file
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/presentation.tex b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/presentation.tex
new file mode 100644
index 00000000..3365ef1c
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/presentation.tex
@@ -0,0 +1,676 @@
+\documentclass[slidestop]{beamer}
+
+\title{Extracting HGVS descriptions}
+\providecommand{\myConference}{Work discussion}
+\providecommand{\myDate}{Thursday, 24 February 2011}
+\author{Jeroen F. J. Laros}
+\providecommand{\myGroup}{Leiden Genome Technology Center}
+\providecommand{\myDepartment}{Department of Human Genetics}
+\providecommand{\myCenter}{Center for Human and Clinical Genetics}
+\providecommand{\lastCenterLogo}{
+  \raisebox{-0.1cm}{
+    \includegraphics[height = 1cm]{lgtc_logo}
+    %\includegraphics[height = 0.7cm]{ngi_logo}
+  }
+}
+\providecommand{\lastRightLogo}{
+  %\includegraphics[height = 0.7cm]{nbic_logo}
+  %\includegraphics[height = 0.8cm]{nwo_logo_en}
+  \hspace{1.5cm}\includegraphics[height = 0.7cm]{gen2phen_logo}
+}
+
+\usetheme{lumc}
+
+\usepackage{ifthen}
+
+\input{lstBNF}
+
+\begin{document}
+
+\newcommand{\algorithmexample}[1]{
+  \begin{figure}[]
+    \begin{center}
+      \fbox{
+        \setlength{\unitlength}{1pt}
+        \linethickness{3pt}
+        \begin{picture}(300, 60)(0, 0)
+          \put(0, 10){\line(1, 0){30}} % Observed sequence.
+          \put(30, 10){\color{red}\line(1, 0){240}\color{white}} % Change.
+          \put(270, 10){\line(1, 0){30}}
+          \put(0, 14){{\scriptsize observed}}
+
+          \put(0, 40){\line(1, 0){30}} % Reference sequence.
+          \put(30, 40){\color{green}\line(1, 0){240}\color{white}} % Change.
+          \put(270, 40){\line(1, 0){30}}
+          \put(0, 46){{\scriptsize reference}}
+          \put(30, 30){{\scriptsize $8$}}
+          \put(270, 30){{\scriptsize $98$}}
+
+          \ifthenelse{\equal{#1}{1}}{
+            \drawcurve(50, 40)(55, 35)(155, 25)(255, 15)(260, 10)
+            \drawcurve(260, 40)(255, 35)(155, 25)(55, 15)(50, 10)
+          }{}
+          \ifthenelse{#1>1}{
+            \put(50, 10){\line(1, 0){210}} % Inv.
+            \put(50, 40){\line(1, 0){210}} % Inv.
+          }{}
+          \ifthenelse{#1>2}{
+            \put(35, 10){\line(1, 0){10}}
+            \put(35, 40){\line(1, 0){10}}
+          }{}
+        \end{picture}
+      }
+    \end{center}
+    \caption{How would a human do it?}
+  \end{figure}
+}
+
+% This disables the \pause command, handy in the editing phase.
+%\renewcommand{\pause}{}
+
+% Make the title page.
+\bodytemplate
+
+% First page of the presentation.
+\section{Introduction}
+
+\begin{frame}
+  \frametitle{Mutalyzer}
+  
+  A curational tool for \emph{Locus Specific Mutation Databases} (LSDBs).
+  \pause
+  \bigskip
+
+  Variant nomenclature checker applying \emph{Human Genome Variation Society}
+  (HGVS) guidelines.
+  \begin{itemize}
+    \item Is the syntax of the variant description valid?
+    \item Does the reference sequence exist?
+    \item Is the variant possible on this reference sequence?
+    \item Is this variant description the recommended one?
+  \end{itemize}
+  \bigskip
+  \pause
+
+  Basic effect prediction.
+  \begin{itemize}
+    \item Is the description of the transcript product as expected?
+    \item Is the predicted protein as expected?
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Mutalyzer}
+
+  Nowadays Mutalyzer is a vital part of LOVD version 3.
+  \bigskip
+  \pause
+
+  Make a reference sequence (configure new gene):
+  \begin{itemize}
+    \item Given a gene symbol, make a slice of a chromosome.
+    \item Receive information on the transcripts and genes in a genomic
+      reference sequence.
+  \end{itemize}
+  \medskip
+  \pause
+
+  Mapping variants:
+  \begin{itemize}
+    \item Find which transcript is affected.
+    \item Map variants to the genome and vice versa.
+    \item Lift a description over to an other transcript.
+  \end{itemize}
+  \medskip
+  \pause
+
+  Curating submissions:
+  \begin{itemize}
+    \item Checking the syntax.
+    \item Checking the variant description.
+  \end{itemize}
+\end{frame}
+
+\section{HGVS nomenclature}
+\begin{frame}
+  \frametitle{HGVS descriptions}
+
+  A simple variant:
+
+  \bt{NM\_002001.2:c.25A>T}
+  \bigskip
+  \pause
+
+  \begin{table}[]
+    \begin{center}
+      \begin{tabular}{c|l}
+        Token             & meaning \\
+        \hline
+        \bt{NM\_002001.2} & Reference sequence and version. \\
+        \bt{c.}           & Coordinate system. \\
+        \bt{25}           & Position within a coordinate system. \\
+        \bt{A>T}          & Variant (substitution). \\
+      \end{tabular}
+    \end{center}
+    \caption{A simple variant description.}
+  \end{table}
+  \bigskip
+  \pause
+
+  Combine simple variants to complex ones:
+
+  \bt{NM\_002001.2:c.[25A>T;100del]}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{HGVS syntax}
+  \pause
+
+  Definition of a gene symbol.
+  \begin{lstlisting}[language = BNF, caption = {Abstract HGVS nomenclature}]
+    TransVar   -> `_v' Number
+    ProtIso    -> `_i' Number
+    GeneSymbol -> `(' Name (TransVar | ProtIso)? `)'
+  \end{lstlisting}
+  \bigskip
+  \pause
+
+  Gene name and optionally a transcript or isoform number.
+
+  \begin{lstlisting}[caption = {HGVS nomenclature in Python}]
+      TransVar = Suppress("_v") + Number("TransVar")
+      ProtIso = Suppress("_i") + Number("ProtIso")
+      GeneSymbol = Suppress('(') + \
+          Group(Name("GeneSymbol") + \
+          Optional(TransVar ^ ProtIso))("Gene") + \
+          Suppress(')')
+  \end{lstlisting}
+\end{frame}
+
+\begin{frame}
+  \frametitle{HGVS semantics}
+
+  There are a few guidelines for describing variants:
+  \begin{itemize}
+    \item Always use the most 5' variant description.
+    \item Use the shortest description.
+  \end{itemize}
+  \bigskip
+  \pause
+
+  There are no guidelines on \emph{how} to do this.
+  \bigskip
+
+  Example: we observe a change from \bt{CCCCCCC} to \bt{CACACAC}.
+  \begin{itemize}
+    \item \bt{2\_6\color{yellow}delins\color{white}ACACA}
+    \item \bt{[2C\color{yellow}>\color{white}A;4C\color{yellow}>\color{white}A;6C\color{yellow}>\color{white}A]}
+    \item \bt{[1\_2\color{yellow}ins\color{white}A;3\_6\color{yellow}delins\color{white}ACA]}
+    \item \ldots
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Variants are not ``inherited''}
+
+  Silent mutations for example.
+  \bigskip
+  \pause
+
+  A double frameshift:
+
+  \bt{NM\_002001.2:c.[10del;22\_23del]}
+
+  \bt{NP\_001992.1:p.Ala4\_Pro7delinsProTrpAsn}
+  \bigskip
+  \pause
+
+  A complex variant that leads to a simple protein change:
+
+  \bt{NM\_002001.2:[c.10\_12delinsAAA;102G>A]}
+
+  \bt{NP\_001992.1:p.Ala4Lys}
+  \bigskip
+  \pause
+
+  An insertion that affects two codons:
+
+  \bt{NM\_002001.2:c.10\_11insTTT}
+
+  \bt{NP\_001992.1:p.Ala4delinsValSer}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Problem description}
+
+  Verifying the validity of a variant description is not enough:
+  \begin{itemize}
+    \item Both \bt{5\_7delinsATA} and \bt{[6G>A;7C>A]} are valid.
+    \item We want one representation.
+  \end{itemize}
+  \bigskip
+  \pause
+
+  We need something that:
+  \begin{itemize}
+    \item Accepts any description to modify a reference sequence.
+    \item Compares the reference and the modified sequence to make a
+      description.
+  \end{itemize}
+  \bigskip
+  \pause
+
+  A description extractor.
+\end{frame}
+
+\section{Extracting descriptions}
+\begin{frame}
+  \frametitle{A ``human'' way of finding a description}
+
+  Observation:
+  \begin{itemize}
+    \item There is always a default way of describing a variant (\bt{delins}).
+    \item A \bt{delins} may be split in smaller parts.
+  \end{itemize}
+  \bigskip
+  \pause
+
+  Outline:
+  \begin{itemize}
+    \item Find the \emph{area of change}.
+    \item Describe this as a \bt{delins}.
+    \item Find the largest overlap in this area of change, splitting the area
+      in two.
+    \item Describe the two sub areas, and see whether this description is
+      smaller than the one we have.
+  \end{itemize}
+\end{frame}
+
+\begin{fframe}
+  \frametitle{Outline of the algorithm}
+
+  \only<1>{\algorithmexample{0}}
+  \only<2>{\algorithmexample{1}}
+  \only<3>{\algorithmexample{2}}
+  \only<4>{\algorithmexample{3}}
+
+  \bt{8\_98\color{yellow}delins\color{white}AGATGCGATAGATTAGCTATATAGGATCG\ldots}
+  \onslide<3->{\bt{[8\_12\color{yellow}delins\color{white}AGATG;13\_96\color{yellow}inv\color{white};97\_98\color{yellow}delins\color{white}TG]}}
+
+  \onslide<4->{\bt{[8G\color{yellow}>\color{white}A;12C\color{yellow}>\color{white}G;13\_96\color{yellow}inv\color{white};97\_98\color{yellow}delins\color{white}TG]}}
+
+  \vfill
+\end{fframe}
+
+\begin{fframe}
+  \frametitle{Finding common sub strings}
+
+  How would a computer do it?
+  \begin{table}[]
+    \begin{center}
+      \begin{tabular}{l|lllllll}
+          & \bt{A} & \bt{T} & \bt{G} & \bt{A} & \bt{G} & \bt{C} & \bt{G} \\
+        \hline
+        \bt{A} & \onslide<2>{\color{red}}1 & 0 & 0 &
+          \onslide<3>{\color{gray}}1 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 \\
+        \bt{T} & 0 & \onslide<2>{\color{red}}2 & 0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 \\
+        \bt{C} & 0 & 0 & 0 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}1 &
+          \onslide<3>{\color{gray}}0 \\
+        \bt{A} & \onslide<3>{\color{gray}}1 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}1 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 \\
+        \bt{G} & \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}1 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}2 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}1 \\
+        \bt{C} & \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}3 &
+          \onslide<3>{\color{gray}}0 \\
+        \bt{A} & \onslide<3>{\color{gray}}1 & \onslide<3>{\color{gray}}0 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}1 &
+          \onslide<3>{\color{gray}}0 & \onslide<3>{\color{gray}}0 & 0 \\
+      \end{tabular}
+    \end{center}
+    \caption{LCS dynamic programming.}
+  \end{table}
+
+  \only<2>{Reusing partial solutions.}
+  \only<3>{Reusing parts of the matrix.}
+
+  \vfill
+\end{fframe}
+
+\section{Results}
+\begin{frame}
+  \frametitle{Protein descriptions}
+
+  Input:
+
+  \bt{NM\_002001.2:n.[109G>T;139G>T;159del]}
+  \bigskip
+  \pause
+
+  Old:
+
+  \bt{NM\_002001.2:n.[109G>T;139G>T;159del]}
+
+  \bt{NM\_002001.2:p.?}
+  \bigskip
+  \pause
+
+  New:
+
+  \bt{NM\_002001.2:n.[109G>T;139G>T;159del]}
+
+  \bt{NM\_002001.2:p.[Ala4Ser;Ala14Ser;Asp21Metfs*4]}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Protein descriptions (2)}
+
+  Input:
+
+  \bt{NM\_002001.2:n.[159del;162\_163del]}
+  \bigskip
+  \pause
+
+  Old:
+
+  \bt{NM\_002001.2:n.[159del;162\_163del]}
+
+  \bt{NM\_002001.2:p.?}
+  \bigskip
+  \pause
+
+  New:
+
+  \bt{NM\_002001.2:n.[159del;162\_163del]}
+
+  \bt{NM\_002001.2:p.Asp21\_Val22delinsSer}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Combining variants}
+
+  Input ($110$ and $111$ have the same nucleotide):
+
+  \bt{NM\_002001.2:n.[109del;111del]}
+  \bigskip
+  \pause
+
+  Old:
+
+  \bt{NM\_002001.2:n.[109del;111del]}
+
+  \bt{NM\_002001.2:p.?}
+  \bigskip
+  \pause
+
+  New:
+
+  \bt{NM\_002001.2:n.109\_110del}
+
+  \bt{NM\_002001.2:p.Ala4Hisfs*27}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Splitting variants}
+
+  Input:
+
+  \bt{NM\_002001.2:c.40\_50delinsTCCTTACTGTG}
+  \bigskip
+  \pause
+
+  Old:
+
+  \bt{NM\_002001.2:n.139\_149delinsTCCTTACTGTG}
+
+  \bt{NM\_002001.2:p.Ala14\_Phe17delinsSerLeuLeuCys}
+  \bigskip
+  \pause
+
+  New:
+
+  \bt{NM\_002001.2:n.[139G>T;149T>G]}
+
+  \bt{NM\_002001.2:p.[Ala14Ser;Phe17Cys]}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Comparing reference sequences}
+
+  DMD Dp71ab vs. DMD Dp71b:
+  \bigskip
+
+  Input:
+
+  \bt{NM\_004018.2} and \bt{NM\_004016.2}
+  \bigskip
+  \pause
+
+  Output:
+
+  \bt{1097\_1098insTCCCGTTACTCTGATCAACTTCTGGCCAGT\ldots}
+  \bigskip
+
+  Interpretation:
+
+  This is an exon not present in Dp71ab.
+\end{frame}
+
+\begin{frame}
+  \frametitle{Old vs. new transcripts}
+
+  DMD Dp71ab old vs. new:
+  \bigskip
+
+  Input: \bt{NM\_004018.2} and \bt{NM\_004018.1}
+  \bigskip
+
+  Output: \bt{[3308A>G;4288A>G]}
+  \bigskip
+  \bigskip
+  \bigskip
+  \pause
+
+  FCER1A old vs. new:
+  \bigskip
+
+  Input: \bt{NM\_002001.1} and \bt{NM\_002001.2}
+  \bigskip
+
+  Output: \bt{1\_7del}
+\end{frame}
+
+
+\begin{frame}
+  \frametitle{Old vs. new transcripts (2)}
+
+  FCER2 old vs. new:
+  \bigskip
+
+  Input:
+
+  \bt{NM\_002002.1} and \bt{NM\_002002.4}
+  \bigskip
+  \pause
+
+  Output:
+
+  \bt{[720C>T;903A>G;930T>C;1019C>A; \\
+    1401\_1402insACACCCCAACAGCACCCTCTCCAGATGAGAGT\ldots; \\
+    1478del;1529\_1530insTCCCACATTTGTCCCCTTCTTGGA\ldots]}
+  \smallskip
+  \pause
+
+  vice versa:
+
+  \bt{[720T>C;903G>A;930C>T;1019A>C;1402\_1464del; \\
+    1540dup;1592\_1620del]}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Limitations}
+
+  mtDNA reference vs. isolate K422 mitochondrion
+  \medskip
+
+  Input: \bt{NC\_012920.1} and \bt{JX266268.1}
+  \medskip
+  \pause
+
+  Output:
+  \bt{
+    [73A>G;194C>T;249del;263A>G;310delinsCTC;489T>C; \\
+    750A>G;1438A>G;1715C>T;2231\_2232dup;2706A>G; \\
+    3107del;3552T>A;4715A>G;4769A>G;6026G>A;7028C>T; \\
+    7196C>A;7999T>C;8508A>G;8584G>A;8701A>G;8860A>G; \\
+    9540T>C;9545A>G;10398\_10400delinsGCT;10873T>C; \\
+    11719G>A;11914G>A;11969G>A;12672A>G;12705C>T; \\
+    13263A>G;14318T>C;14766C>T;14783T>C;15043G>A; \\
+    15204T>C;15301G>A;15326A>G;15487A>T;15968T>C; \\
+    16129G>A;16223C>T;16298T>C;16327C>T;16519T>C]
+  }
+  \medskip
+  \pause
+
+  Runtime: $\pm20$ minutes, Memory: $4$G.
+\end{frame}
+
+\section{Optimisation}
+\begin{frame}
+  \frametitle{Accuracy vs. speed}
+
+  \begin{tabular}{l@{\ \ $\Rightarrow$\ \ }l}
+    \bt{AGAGGACG} & \bt{AG AG GA CG} \\
+    \bt{GAGGACA}  & \bt{GA AG GG GA AC CA}
+  \end{tabular}
+  \pause
+
+  \begin{table}
+    \begin{center}
+      \begin{tabular}{l|llll}
+           & \bt{A} & \bt{A} & \bt{G} & \bt{C} \\
+           & \bt{G} & \bt{G} & \bt{A} & \bt{G} \\
+        \hline
+        \bt{GA} & 0 & 0 & 1 & 0 \\
+        \bt{AG} & 1 & \onslide<3>{\color{red}}1 & 0 & 0 \\
+        \bt{GG} & 0 & 0 & 0 & 0 \\
+        \bt{GA} & 0 & 0 & \onslide<3>{\color{red}}2 & 0 \\
+        \bt{AC} & 0 & 0 & 0 & 0 \\
+        \bt{CA} & 0 & 0 & 0 & 0 \\
+      \end{tabular}
+    \end{center}
+    \caption{Rough method to find large strings.}
+  \end{table}
+
+  \onslide<3>{We make a ``knight move''.}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Accuracy vs. speed(2)}
+
+  \begin{minipage}[t]{0.45\textwidth}
+    \begin{table}[]
+      \begin{center}
+        \begin{tabular}{l|llll}
+             & \bt{A} & \bt{A} & \bt{G} & \bt{C} \\
+             & \bt{G} & \bt{G} & \bt{A} & \bt{G} \\
+          \hline
+          \bt{GA} & 0 & 0 & 1 & 0 \\
+          \bt{AG} & 1 & 1 & 0 & 0 \\
+          \bt{GG} & 0 & 0 & 0 & 0 \\
+          \bt{GA} & 0 & 0 & 2 & 0 \\
+          \bt{AC} & 0 & 0 & 0 & 0 \\
+          \bt{CA} & 0 & 0 & 0 & 0 \\
+        \end{tabular}
+      \end{center}
+      \caption{``Zoom out'' $k = 2$.}
+    \end{table}
+  \end{minipage}
+  \hfill
+  \begin{minipage}[t]{0.45\textwidth}
+    \begin{table}[]
+      \begin{center}
+        \begin{tabular}{l|ll}
+              & \bt{A} & \bt{G} \\
+              & \bt{G} & \bt{G} \\
+              & \bt{A} & \bt{A} \\
+          \hline
+          \bt{GAG} & 0 & 0 \\
+          \bt{AGG} & 0 & 0 \\
+          \bt{GGA} & 0 & 1 \\
+          \bt{GAC} & 0 & 0 \\
+          \bt{ACA} & 0 & 0 \\
+        \end{tabular}
+      \end{center}
+      \caption{``Zoom out'' $k = 3$.}
+    \end{table}
+  \end{minipage}
+  \pause
+
+  We find all common sub strings larger than $k$.
+  \pause
+
+  The length of these strings are at least $\ell k$ and at most
+   $\ell k + (k - 1)$ long.
+\end{frame}
+
+\section{Conclusions}
+\begin{frame}
+  \frametitle{We are getting there}
+
+  Extracting descriptions is feasible.
+  \pause
+  \begin{itemize}
+    \item Guarantees the same description for the same variant, no matter how
+      it is described by the user.
+    \item Usable for comparing reference sequences.
+    \pause
+    \begin{itemize}
+      \item Real lift over.
+    \end{itemize}
+  \end{itemize}
+  \bigskip
+  \pause
+
+  Extracting descriptions is practical.
+  \begin{itemize}
+    \item By ``zooming out'', we can meet the memory requirements.
+    \begin{itemize}
+      \item $4$G to less than a megabyte.
+    \end{itemize}
+    \item By ``zooming out'', we can meet the processing requirements.
+    \begin{itemize}
+      \item mtDNA test: $20$ minutes to under one second.
+    \end{itemize}
+  \end{itemize}
+\end{frame}
+
+\section{Questions?}
+\lastpagetemplate
+\begin{frame}
+  \begin{center}
+    Acknowledgements:
+    \bigskip
+    \bigskip
+
+    Martijn Vermaat
+
+    Ivo Fokkema
+
+    Peter Taschner
+
+    Johan den Dunnen
+
+  \end{center}
+\end{frame}
+
+\end{document}
diff --git a/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ul_logo.eps b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ul_logo.eps
new file mode 120000
index 00000000..d49aa2da
--- /dev/null
+++ b/doc/Presentation_26-09-12_Lab-J_Descriprion_extract/ul_logo.eps
@@ -0,0 +1 @@
+/local/projects/presentation/trunk/ul_logo.eps
\ No newline at end of file
-- 
GitLab