Commit 72937681 authored by Laros's avatar Laros
Browse files

Updated data sharing lecture.

parent f9eb7abb
......@@ -28,6 +28,13 @@
Society for Clinical Genetic Laboratory Diagnostics (VKGL).
\bigskip
Sharing data:
\begin{itemize}
\item First within The Netherlands.
\item Final goal is to share with the world.
\end{itemize}
\bigskip
Two types of variants:
\begin{itemize}
\item Bulk data (frequencies, VCF files).
......@@ -35,7 +42,7 @@
\end{itemize}
\end{pframe}
\section{Bulk data}
%\section{Bulk data}
\subsection{Requirements}
\begin{pframe}
We aim for a solution that has the following properties:
......@@ -57,6 +64,7 @@
Timestamps are essential for \emph{reproducibility}.
\end{pframe}
\subsection{Callable regions}
\begin{pframe}
\begin{figure}[]
\vspace{-0.5cm}
......@@ -110,44 +118,28 @@
\end{minipage}
\end{pframe}
\subsection{Options}
\begin{pframe}
Choices for the database layout:
\begin{itemize}
\item \emph{Sample} oriented.
\item \emph{Variant} oriented.
\end{itemize}
\bigskip
Choices for the infrastructure:
\begin{itemize}
\item Centralised.
\item Decentralised.
\end{itemize}
\end{pframe}
\subsection{Technical issues}
\begin{pframe}
\begin{figure}[]
{\Large\texttt{
\begin{tabular}{llll}
\#CHROM & POS & REF & ALT\\
\onslide<1->{1 & 884551 & GAGA\color{red}AAGA & GAGA}\\
\onslide<2->{1 & 884552 & AGA\color{red}AAGA & AGA}\\
\onslide<3->{1 & 884553 & GA\color{red}AAGA & GA}\\
\onslide<4->{1 & 884554 & A\color{red}AAGA & A}
\end{tabular}
}}
\caption{Deletion of \bt{AAGA}.}
\end{figure}
\end{pframe}
\section{Sample oriented server}
%\subsection{Options}
%\begin{pframe}
% Choices for the database layout:
% \begin{itemize}
% \item \emph{Sample} oriented.
% \item \emph{Variant} oriented.
% \end{itemize}
% \bigskip
%
% Choices for the infrastructure:
% \begin{itemize}
% \item Centralised.
% \item Decentralised.
% \end{itemize}
%\end{pframe}
%\section{Sample oriented server}
\subsection{Database layout}
\begin{pframe}
\begin{minipage}[t]{0.47\textwidth}\begin{figure}[]
\begin{center}
%\includegraphics[width=\textwidth]{sample_db}
\includegraphics[width=\textwidth]{sample_db}
\end{center}
\caption{Basic structure.}
\end{figure}
......@@ -160,7 +152,7 @@
\end{itemize}
\bigskip
Similar layout for ``coverage'' data.
Similar layout for genotype quality data.
\end{minipage}
\end{pframe}
......@@ -199,22 +191,12 @@
\end{itemize}
\end{pframe}
\subsection{Additional notes}
\begin{pframe}
Database content:
\begin{itemize}
\item Duplicate detection.
\item Removal of samples.
\item Quality control at submission time.
\end{itemize}
\end{pframe}
\section{Variant oriented server}
%\section{Variant oriented server}
\subsection{Database layout}
\begin{pframe}
\begin{minipage}[t]{0.47\textwidth}\begin{figure}[]
\begin{center}
%\includegraphics[width=\textwidth]{variant_db}
\includegraphics[width=\textwidth]{variant_db}
\end{center}
\caption{Basic structure.}
\end{figure}
......@@ -227,7 +209,7 @@
\end{itemize}
\bigskip
Similar layout for ``coverage'' data.
Similar layout for genotype quality data.
\end{minipage}
\end{pframe}
......@@ -283,7 +265,48 @@
\end{itemize}
\end{pframe}
\section{Pooling}
\subsection{Additional notes}
\begin{pframe}
Database content:
\begin{itemize}
\item Duplicate detection.
\item Removal of samples.
\item Quality control at submission time.
\end{itemize}
\end{pframe}
\subsection{Technical issues}
\begin{pframe}
Very important to ``speak the same language''.
\bigskip
Make sure we use common names:
\begin{itemize}
\item Reference sequence (hg19, hg38).
\item Labels.
\begin{itemize}
\item Gene panels.
\item Disease/syndrome names.
\end{itemize}
\end{itemize}
\end{pframe}
\begin{pframe}
\begin{figure}[]
{\Large\texttt{
\begin{tabular}{llll}
\#CHROM & POS & REF & ALT\\
\onslide<1->{1 & 884551 & GAGA\color{red}AAGA & GAGA}\\
\onslide<2->{1 & 884552 & AGA\color{red}AAGA & AGA}\\
\onslide<3->{1 & 884553 & GA\color{red}AAGA & GA}\\
\onslide<4->{1 & 884554 & A\color{red}AAGA & A}
\end{tabular}
}}
\caption{Deletion of \bt{AAGA}.}
\end{figure}
\end{pframe}
%\section{Pooling}
\subsection{Concept}
\begin{pframe}
Instead of submitting variants or samples, only \emph{aggregated data} is
......@@ -309,7 +332,7 @@
\end{itemize}
\end{pframe}
\section{Decentralised servers}
%\section{Decentralised servers}
\subsection{Concept}
\begin{pframe}
Each institute gets their own server, a centralised \emph{query interface}
......@@ -329,32 +352,54 @@
\end{itemize}
\end{pframe}
\section{Conclusions}
\subsection{Summary}
%\section{Conclusions}
%\subsection{Summary}
%\begin{pframe}
% \begin{table}[]
% \begin{center}
% \begin{tabular}{lccc}
% type & requirements & development & maintenance\\
% \hline
% sample & yes & no & low\\
% sample + pooling & yes & yes & high\\
% variant & no & yes & low\\
% distributed & yes & yes & high\\
% \end{tabular}
% \end{center}
% \caption{Solutions and consequences.}
% \end{table}
%
% Decentralised solutions will require a substantial amount of additional
% effort:
% \begin{itemize}
% \item Development.
% \item Maintenance.
% \end{itemize}
%\end{pframe}
\section{Interpreted data}
\subsection{HGVS nomenclature}
\begin{pframe}
\begin{table}[]
\begin{center}
\begin{tabular}{lccc}
type & requirements & development & maintenance\\
\hline
sample & yes & no & low\\
sample + pooling & yes & yes & high\\
variant & no & yes & low\\
distributed & yes & yes & high\\
\end{tabular}
\end{center}
\caption{Solutions and consequences.}
\end{table}
Recommendations for the description of sequence variants.
\bigskip
Decentralised solutions will require a substantial amount of additional
effort:
\textit{HGVS-nomenclature is used to report and exchange information
regarding variants found in DNA, RNA and protein sequences and serves as an
international standard in DNA diagnostics. HGVS-nomenclature is authorised by
the Human Genome Variation Society (HGVS), the Human Variome Project (HVP)
and the HUman Genome Organization (HUGO).}
\bigskip
The famous \bf{g.} and \bf{c.} notation:
\begin{itemize}
\item Development.
\item Maintenance.
\item {\Large\texttt{NC\_000011.9:g.111959695G>T}}.
\item {\Large\texttt{NM\_003002.3:c.274G>T}}.
\end{itemize}
\vfill
\permfoot{\url{http://varnomen.hgvs.org/}}
\end{pframe}
\section{Interpreted data}
\begin{pframe}
\begin{figure}[]
\begin{center}
......@@ -417,11 +462,51 @@
\end{figure}
\end{pframe}
% Issues with sharing interpretations:
% - ...
\subsection{Mutalyzer}
\begin{pframe}
Mutalyzer: a curational tool for Locus Specific Mutation Databases (LSDBs).
\bigskip
Variant nomenclature checker applying Human Genome Variation Society (HGVS)
guidelines.
\begin{itemize}
\item Is the syntax of the variant description valid?
\item Does the reference sequence exist?
\item Is the variant possible on this reference sequence?
\item Is this variant description the recommended one?
\end{itemize}
\bigskip
Basic effect prediction.
\begin{itemize}
\item Is the description of the transcript product as expected?
\item Is the predicted protein as expected?
\end{itemize}
\vfill
\permfoot{\url{https://mutalyzer.nl}}
\end{pframe}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[width=\textwidth]{mutalyzer_disambiguation}
\end{center}
\caption{}
\label{}
\end{figure}
\end{pframe}
\begin{pframe}
\begin{figure}[]
\begin{center}
\includegraphics[width=\textwidth]{mutalyzer_disambiguation_result}
\end{center}
\caption{}
\label{}
\end{figure}
\end{pframe}
% Nomenclature issues:
% -
% Make the acknowledgements slide.
\makeAcknowledgementsSlide{
......
../../submodules/presentation-pics/pics/mutalyzer_disambiguation.xcf
\ No newline at end of file
../../submodules/presentation-pics/pics/mutalyzer_disambiguation_result.xcf
\ No newline at end of file
../../submodules/vkgl-datashare/presentations/2016-09-12/sample_db.dia
\ No newline at end of file
../../submodules/vkgl-datashare/presentations/2016-09-12/variant_db.dia
\ No newline at end of file
Subproject commit 522ecb2dcf3be1dcba9a86be0dbb1e83e26826a9
Subproject commit d2389a47ec4397a925918434f7e1466b4f91cf4e
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment