variant_calling.tex 20.9 KB
Newer Older
Laros's avatar
Laros committed
1
2
\documentclass[slidestop]{beamer}

Laros's avatar
Laros committed
3
\author{Jeroen F.J. Laros}
Laros's avatar
Laros committed
4
\title{Variant Calling}
Laros's avatar
Laros committed
5
\providecommand{\mySubTitle}{}
Laros's avatar
Laros committed
6
\providecommand{\myConference}{Hogeschool Leiden}
Laros's avatar
Laros committed
7
8
\providecommand{\myDate}{02-11-2016}
\providecommand{\myGroup}{}
Laros's avatar
Laros committed
9
10
\providecommand{\myDepartment}{Department of Human Genetics}
\providecommand{\myCenter}{Center for Human and Clinical Genetics}
Laros's avatar
Laros committed
11
\providecommand{\myConference}{Hogeschool Leiden}
Laros's avatar
Laros committed
12
13
14
15
16
17
18
19

\usetheme{lumc}

\begin{document}

% This disables the \pause command, handy in the editing phase.
%\renewcommand{\pause}{}

Laros's avatar
Laros committed
20
21
% Make the title slide.
\makeTitleSlide{\includegraphics[width=3.5cm, trim=2 2 2 2, clip]{k_align}}
Laros's avatar
Laros committed
22
23
24

% First page of the presentation.
\section{Introduction}
Laros's avatar
Laros committed
25
\makeTableOfContents
Laros's avatar
Laros committed
26

Laros's avatar
Laros committed
27
28
29
\subsection{Illumina platforms}
\begin{pframe}
  \begin{minipage}[t]{0.47\textwidth}
Laros's avatar
Laros committed
30
    \begin{figure}
Laros's avatar
Laros committed
31
      \includegraphics[width=\textwidth, trim=0 40 0 0, clip]{hiseq_2000}
Laros's avatar
Laros committed
32
      \caption{HiSeq 2500.}
Laros's avatar
Laros committed
33
34
35
    \end{figure}
  \end{minipage}
  \hfill
Laros's avatar
Laros committed
36
  \begin{minipage}[t]{0.47\textwidth}
Laros's avatar
Laros committed
37
38
    Characteristics:
    \begin{itemize}
Laros's avatar
Laros committed
39
      \item High throughput ($3$~genomes).
Laros's avatar
Laros committed
40
41
      \item Paired end.
      \item High accuracy.
Laros's avatar
Laros committed
42
      \item Read length $2 \times 125$bp.
Laros's avatar
Laros committed
43
      \item Relatively long run time ($6$~days).
Laros's avatar
Laros committed
44
45
46
      \item Relatively expensive.
    \end{itemize}
  \end{minipage}
Laros's avatar
Laros committed
47
\end{pframe}
Laros's avatar
Laros committed
48

Laros's avatar
Laros committed
49
50
\begin{pframe}
  \begin{minipage}[t]{0.47\textwidth}
Laros's avatar
Laros committed
51
    \begin{figure}
Laros's avatar
Laros committed
52
53
54
      \includegraphics[width=\textwidth]{miseq}
      \vspace{-0.5cm}
      \caption{MiSeq.}
Laros's avatar
Laros committed
55
56
57
    \end{figure}
  \end{minipage}
  \hfill
Laros's avatar
Laros committed
58
  \begin{minipage}[t]{0.47\textwidth}
Laros's avatar
Laros committed
59
60
    Characteristics:
    \begin{itemize}
Laros's avatar
Laros committed
61
62
      \item Moderate throughput ($3$~exomes).
      \item Paired end.
Laros's avatar
Laros committed
63
      \item High accuracy.
Laros's avatar
Laros committed
64
65
66
      \item Read length $2 \times 300$bp.
      \item Relatively short run time ($3$~days).
      \item Relatively expensive.
Laros's avatar
Laros committed
67
68
    \end{itemize}
  \end{minipage}
Laros's avatar
Laros committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
\end{pframe}

\subsection{Next generation sequencing data}
\begin{pframe}
  \begin{lstlisting}[language=none, caption={A FastQ file.}]
    @SGGPP:4:101
    TTCGGGGGCTGGCAAATCCACTTCCGTGACACGCTACCATTCGCTGGTG
    +
    -'+4589,53330-0&07+03:54/2362-+.488587>@/25440++0
    @SGGPP:4:102
    CGGTAAACCACCCTGCTGACGGAACCCTAATGCGCCTGAAAGACAGCGT
    +
    34/--0'+.000(.55:;:99(0(+2(22(0316;185;;0;:<<>=AA
    @SGGPP:4:106
    TCGTTAACGACTTTGTTCGCCACCGCAACCGCCTGTTTCGGGTCACAGG
    +
    09875;5?<;?@A4?B:BBB<AA>CCC>C>BB0.->=0488+3444:@5
    @SGGPP:4:112
    TTGATGAATATATTATTTCAGGGAATAATTATGACACCTTTAGAACGCA
    +
    70<<@::5:<;==7;>>/79<:.:494.8(,,8:753/5@5??C>B???
  \end{lstlisting}
\end{pframe}
Laros's avatar
Laros committed
92

Laros's avatar
Laros committed
93
94
\subsection{Data analysis}
\begin{pframe}
Laros's avatar
Laros committed
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
  Resequencing pipelines can roughly be divided in five steps.
  \pause
  \begin{enumerate}
    \item Pre-alignment.
    \begin{itemize}
      \item Quality control.
      \item Data cleaning.
    \end{itemize}
    \pause
    \item Alignment.
    \begin{itemize}
      \item Post-alignment quality control.
    \end{itemize}
    \pause
    \item Variant calling.
    \pause
Laros's avatar
Laros committed
111
    \item Annotation and Filtering.
Laros's avatar
Laros committed
112
113
114
115
    \begin{itemize}
      \item Post-variant calling quality control.
    \end{itemize}
    \pause
Laros's avatar
Laros committed
116
    \item Effect prediction.
Laros's avatar
Laros committed
117
  \end{enumerate}
Laros's avatar
Laros committed
118
\end{pframe}
Laros's avatar
Laros committed
119

Laros's avatar
Laros committed
120
121
\subsection{Alignment and variant calling}
\begin{pframe}
Laros's avatar
Laros committed
122
123
124
125
126
127
  Alignment needs to be fault-tolerant.
  \bigskip
  \pause

  Not all aligners can deal with indels.
  \begin{itemize}
Laros's avatar
Laros committed
128
    \item Older aligners (\emph{Bowtie}), only consider substitutions.
Laros's avatar
Laros committed
129
  \end{itemize}
Laros's avatar
Laros committed
130
  \medskip
Laros's avatar
Laros committed
131

Laros's avatar
Laros committed
132
  Some aligners can work with large deletions.
Laros's avatar
Laros committed
133
134
135
  \begin{itemize}
    \item Spliced RNA.
    \begin{itemize}
Laros's avatar
Laros committed
136
137
      \item \emph{GMAP} / \emph{GSNAP}.
      \item \emph{Tophat}.
Laros's avatar
Laros committed
138
    \end{itemize}
Laros's avatar
Laros committed
139
    \item \emph{BWA-MEM}.
Laros's avatar
Laros committed
140
141
  \end{itemize}

Laros's avatar
Laros committed
142
143
144
145
146
147
  \vfill
  \permfoot{\url{http://bowtie-bio.sourceforge.net/index.shtml}}

  \permfoot{\url{http://research-pub.gene.com/gmap/}}

  \permfoot{\url{http://tophat.cbcb.umd.edu/}}
Laros's avatar
Laros committed
148

Laros's avatar
Laros committed
149
150
  \permfoot{\url{http://bio-bwa.sourceforge.net/}}
\end{pframe}
Laros's avatar
Laros committed
151

Laros's avatar
Laros committed
152
153
154
\section{Variant Calling}
\subsection{Principle of variant calling}
\begin{pframe}
Laros's avatar
Laros committed
155
156
157
158
159
160
  \begin{figure}[]
    \begin{center}
      \includegraphics[width=0.9\textwidth]{varcall}
    \end{center}
    \caption{Result of an alignment.}
  \end{figure}
Laros's avatar
Laros committed
161
\end{pframe}
Laros's avatar
Laros committed
162

Laros's avatar
Laros committed
163
\begin{pframe}
Laros's avatar
Laros committed
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
  In principle, we call a variant when we are confident we have seen one.
  \bigskip
  \pause

  But when are we confident?
  \begin{itemize}
    \item More than $x$ times?
    \item In more than $y$ percent of the reads covering the variant?
  \end{itemize}
  \bigskip
  \pause

  Variant callers can use:
  \begin{itemize}
    \item Fixed settings.
    \item Statistical models.
  \end{itemize}
Laros's avatar
Laros committed
181
\end{pframe}
Laros's avatar
Laros committed
182

Laros's avatar
Laros committed
183
184
\subsection{Some considerations}
\begin{pframe}
Laros's avatar
Laros committed
185
186
187
188
189
190
191
192
193
194
  Things a variant caller might take into account:
  \begin{itemize}
    \item Strand balance.
    \item Base quality.
    \item Mapping quality.
    \begin{itemize}
      \item Distribution within the reads.
    \end{itemize}
    \item Ploidity of the organism in question.
  \end{itemize}
Laros's avatar
Laros committed
195
  \medskip
Laros's avatar
Laros committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  \pause

  Complicating factors:
  \begin{itemize}
    \item Pooled samples.
    \pause
    \item RNA.
    \begin{itemize}
      \item Allele specific expression.
      \item RNA editing.
    \end{itemize}
    \pause
    \item Strand specific sampleprep.
  \end{itemize}
Laros's avatar
Laros committed
210
\end{pframe}
Laros's avatar
Laros committed
211

Laros's avatar
Laros committed
212
213
\subsection{Choice of variant caller}
\begin{pframe}
Laros's avatar
Laros committed
214
215
216
217
218
219
220
221
  Rules of thumb:
  \begin{itemize}
    \item Well known organism and experiment: Statistical model.
    \item Use a simpler variant caller otherwise.
  \end{itemize}
  \bigskip
  \pause

Laros's avatar
Laros committed
222
  Popular variant callers:
Laros's avatar
Laros committed
223
  \begin{itemize}
Laros's avatar
Laros committed
224
225
226
    \item \emph{Samtools}.
    \item \emph{GATK} (no longer free).
    \item \emph{VarScan}.
Laros's avatar
Laros committed
227
228
  \end{itemize}

Laros's avatar
Laros committed
229
230
  \vfill
  \permfoot{\url{http://samtools.sourceforge.net/}}
Laros's avatar
Laros committed
231

Laros's avatar
Laros committed
232
233
234
235
  \permfoot{\url{https://www.broadinstitute.org/gatk/}}

  \permfoot{\url{http://varscan.sourceforge.net/}}
\end{pframe}
Laros's avatar
Laros committed
236
237

\section{Small indel detection}
Laros's avatar
Laros committed
238
239
240
\subsection{Indels}
\begin{pframe}
  Choose an aligner that allows for indels.
Laros's avatar
Laros committed
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  \bigskip

  Deletions are easier:
  \begin{itemize}
    \item $100\%$ of the flanking regions are covered.
    \item In principle, a deletion of arbitrary length can be detected.
  \end{itemize}
  \bigskip

  For insertions:
  \begin{itemize}
    \item The read length is a limiting factor.
    \item If the insertion approaches the read length, the flanks can not be
      properly aligned.
    \pause
    \item Workaround:
    \begin{itemize}
      \item Local assembly in combination with anchoring.
    \end{itemize}
  \end{itemize}
Laros's avatar
Laros committed
261
\end{pframe}
Laros's avatar
Laros committed
262

Laros's avatar
Laros committed
263
264
\subsection{False positive substitutions.}
\begin{pframe}
Laros's avatar
Laros committed
265
266
267
268
269
270
271
272
273
274
  Can occur because of misalignment near (large) deletions or insertions.
  \bigskip

  The aligner prefers to introduce substitutions over deletions or insertions.
  \bigskip
  \pause

  What is often seen:
  \begin{itemize}
    \item A number of reads that indicate an indel.
Laros's avatar
Laros committed
275
    \item Some reads that do not span the indel properly indicate substitutions.
Laros's avatar
Laros committed
276
277
278
279
280
  \end{itemize}
  \pause

  No way around this, except for correction afterwards.
  \begin{itemize}
Laros's avatar
Laros committed
281
    \item Realignment.
Laros's avatar
Laros committed
282
283
    \item BAQ ``realignment''.
  \end{itemize}
Laros's avatar
Laros committed
284
\end{pframe}
Laros's avatar
Laros committed
285

Laros's avatar
Laros committed
286
\begin{pframe}
Laros's avatar
Laros committed
287
288
289
  \vspace{-0.5cm}
  \begin{figure}[]
    \begin{center}
Laros's avatar
Laros committed
290
      \includegraphics[trim=12cm 9cm 9cm 4cm, clip, height=0.7\textheight]
Laros's avatar
Laros committed
291
292
        {27bpDel}
    \end{center}
Laros's avatar
Laros committed
293
    \caption{False positive substitutions.}
Laros's avatar
Laros committed
294
  \end{figure}
Laros's avatar
Laros committed
295
\end{pframe}
Laros's avatar
Laros committed
296

Laros's avatar
Laros committed
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
\begin{pframe}
  \begin{figure}[]
    \begin{center}
      \fbox{
        \setlength{\unitlength}{0.8pt}
        \begin{picture}(300, 60)(0, 0)
          \put(0, 10){\line(1, 0){300}}  % Genomic sequence.
          \put(0, 14){{\scriptsize reference}}

          \put(80, 20){\line(1, 0){60}}  % Read with a deletion.
          \put(160, 20){\line(1, 0){60}}
          \put(80, 24){{\scriptsize read1}}

          \put(148, 27.5){xx}
          \put(160, 30){\line(1, 0){110}}
          \put(250, 34){{\scriptsize read2}}
        \end{picture}
      }

      \onslide<2->{
        \bigskip
        $\Downarrow$

        \bigskip
        \fbox{
          \setlength{\unitlength}{0.8pt}
          \begin{picture}(300, 60)(0, 0)
            \put(0, 10){\line(1, 0){300}}  % Genomic sequence.
            \put(0, 14){{\scriptsize reference}}

            \put(80, 20){\line(1, 0){60}}  % Read with a deletion.
            \put(160, 20){\line(1, 0){60}}
            \put(80, 24){{\scriptsize read1}}

            \put(130, 30){\line(1, 0){10}}
            \put(160, 30){\line(1, 0){110}}
            \put(250, 34){{\scriptsize read2}}
          \end{picture}
        }
      }
    \end{center}
    \caption{Realignment.}
  \end{figure}
\end{pframe}
Laros's avatar
Laros committed
341
342

\section{Tools}
Laros's avatar
Laros committed
343
344
\subsection{Variant caller input}
\begin{pframe}
Laros's avatar
Laros committed
345
346
347
348
349
350
351
352
  Output of aligner:
  \begin{itemize}
    \item SAM Sequence Alignment/Map.
    \item BAM Binary Alignment/Map (compressed SAM).
  \end{itemize}
  \bigskip
  \pause

Laros's avatar
Laros committed
353
  Almost all modern variant callers work (indirectly) with BAM format:
Laros's avatar
Laros committed
354
355
  \begin{itemize}
    \item Samtools.
Laros's avatar
Laros committed
356
    \item GATK.
Laros's avatar
Laros committed
357
358
359
360
361
    \item VarScan.
  \end{itemize}
  \bigskip

  All of these variant callers produce (indirectly) VCF as output.
Laros's avatar
Laros committed
362
\end{pframe}
Laros's avatar
Laros committed
363

Laros's avatar
Laros committed
364
365
\subsection{Pileup}
\begin{pframe}
Laros's avatar
Laros committed
366
367
368
369
370
371
372
373
374
  Variant calling is done on a \emph{pileup} file.
  \bigskip
  \pause

  Create a pileup file from a \emph{sorted} BAM file.
  \bigskip

  \begin{lstlisting}[language=none, caption=Make a pileup file.]
    samtools view -bt <reference> -o out.bam -i in.sam
Laros's avatar
Laros committed
375
376
    samtools sort out.bam out.sort
    samtools mpileup -uf <reference> out.sort.bam > \
Laros's avatar
Laros committed
377
378
379
      out.pileup
  \end{lstlisting}

Laros's avatar
Laros committed
380
  Some variant callers work on the BAM file directly.
Laros's avatar
Laros committed
381
\end{pframe}
Laros's avatar
Laros committed
382

Laros's avatar
Laros committed
383
384
\subsection{Converting a SAM file}
\begin{pframe}
Laros's avatar
Laros committed
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  Use the following command to make a BAM file out of a SAM file.
  \bigskip

  \begin{lstlisting}[language=none, caption=SAM to BAM.]
    samtools view -bt <reference> -o out.bam -i in.sam
  \end{lstlisting}

  \begin{table}[]
    \begin{center}
      \begin{tabular}{l|l}
        Parameter  & Explanation.\\
        \hline
        \bt{-b}    & Output in BAM format.\\
        \bt{-t}    & Path to the reference index.
      \end{tabular}
    \end{center}
    \caption{\bt{samtools view} options}
  \end{table}
Laros's avatar
Laros committed
403
\end{pframe}
Laros's avatar
Laros committed
404

Laros's avatar
Laros committed
405
406
\subsection{Indexing the reference sequence}
\begin{pframe}
Laros's avatar
Laros committed
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
  This needs to be done only once.
  \bigskip

  \begin{lstlisting}[language=none, caption=Index the reference sequence.]
    samtools faidx <reference>
  \end{lstlisting}
  \bigskip
  \pause

  Tab-delimited file:
  \begin{itemize}
    \item Reference name (chromosome).
    \item Length of the reference.
    \item Defines the order of the reference sequences in sorting.
  \end{itemize}
Laros's avatar
Laros committed
422
\end{pframe}
Laros's avatar
Laros committed
423

Laros's avatar
Laros committed
424
425
\subsection{Sorting a BAM file}
\begin{pframe}
Laros's avatar
Laros committed
426
427
428
429
430
431
432
433
434
435
436
437
  Sort alignments by leftmost coordinates.
  \bigskip

  \begin{lstlisting}[language=none, caption=Sort a BAM file.]
    samtools sort out.bam out.sorted
  \end{lstlisting}

  Finding something in a sorted list can be done very efficiently.
  \bigskip
  \pause

  After these steps, the alignment output is ready for the pileup step.
Laros's avatar
Laros committed
438
\end{pframe}
Laros's avatar
Laros committed
439

Laros's avatar
Laros committed
440
441
\subsection{Creating a pileup file}
\begin{pframe}
Laros's avatar
Laros committed
442
443
444
445
  Use the following command to make a pileup file.
  \bigskip

  \begin{lstlisting}[language=none, caption=Make a pileup file.]
Laros's avatar
Laros committed
446
    samtools mpileup -uf <reference> out.sort.bam > \
Laros's avatar
Laros committed
447
448
449
450
451
452
453
454
455
456
457
458
459
460
      out.pileup
  \end{lstlisting}

  \begin{table}[]
    \begin{center}
      \begin{tabular}{l|l}
        Parameter  & Explanation.\\
        \hline
        \bt{-u} & Calculate genotype likelihoods.\\
        \bt{-f} & Path to the reference sequence.
      \end{tabular}
    \end{center}
    \caption{\bt{samtools mpileup} options.}
  \end{table}
Laros's avatar
Laros committed
461
\end{pframe}
Laros's avatar
Laros committed
462

Laros's avatar
Laros committed
463
464
\subsection{Pileup output}
\begin{pframe}
Laros's avatar
Laros committed
465
466
467
468
469
470
471
472
473
474
  \begin{minipage}[t]{0.45\textwidth}
    \begin{figure}[]
      \begin{center}
        \includegraphics[height=0.8\textheight]{k_align}
      \end{center}
      \caption{Pileup visualised.}
    \end{figure}
  \end{minipage}
  \hfill
  \begin{minipage}[t]{0.53\textwidth}
Laros's avatar
Laros committed
475
    \begin{lstlisting}[language=none, caption=mpileup file.]
Laros's avatar
Laros committed
476
477
478
479
480
481
482
483
484
485
      chr10 65490 a 3 .., EJI
      chr10 65491 a 3 .., DJH
      chr10 65492 t 3 C.c @JF
      chr10 65493 g 3 .., /JE
    \end{lstlisting}
    \pause

    Symbols:
    \begin{itemize}
      \item `\bt{.}' and `\bt{,}' for reference calls.
Laros's avatar
Laros committed
486
      \item Last column contains quality scores.
Laros's avatar
Laros committed
487
488
    \end{itemize}
  \end{minipage}
Laros's avatar
Laros committed
489
\end{pframe}
Laros's avatar
Laros committed
490

Laros's avatar
Laros committed
491
492
\subsection{Bcftools}
\begin{pframe}
Laros's avatar
Laros committed
493
494
495
  Create a VCF file with Bcftools (part of Samtools).
  \bigskip

Laros's avatar
Laros committed
496
  \begin{lstlisting}[language=none, caption=Call substitutions and indels with Samtools.]
Laros's avatar
Laros committed
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
    bcftools view -bvcg in.mpileup > out.bcf
    bcftools view out.bcf > out.vcf
  \end{lstlisting}

  \begin{table}[]
    \begin{center}
      \begin{tabular}{l|l}
        Parameter  & Explanation.\\
        \hline
        \bt{-b} & Output in the BCF format.\\
        \bt{-v} & Output variant sites only.\\
        \bt{-c} & Call variants using Bayesian inference.\\
        \bt{-g} & Call per-sample genotypes.\\
      \end{tabular}
    \end{center}
    \caption{bcftools view options}
  \end{table}
Laros's avatar
Laros committed
514
\end{pframe}
Laros's avatar
Laros committed
515

Laros's avatar
Laros committed
516
517
\subsection{Bcftools filtering}
\begin{pframe}
Laros's avatar
Laros committed
518
519
520
  Filter the results.
  \bigskip

Laros's avatar
Laros committed
521
  \begin{lstlisting}[language=none, caption=Call substitutions and indels with Samtools.]
Laros's avatar
Laros committed
522
523
524
525
526
527
528
529
530
531
532
533
534
535
    vcfutils.pl varFilter -d 8 -D 50 out.vcf > flt.vcf
  \end{lstlisting}

  \begin{table}[]
    \begin{center}
      \begin{tabular}{l|l}
        Parameter  & Explanation.\\
        \hline
        \bt{-d} & Minimum depth.\\
        \bt{-D} & Maximum depth.\\
      \end{tabular}
    \end{center}
    \caption{\bt{vcfutils.pl varFilter} options.}
  \end{table}
Laros's avatar
Laros committed
536
\end{pframe}
Laros's avatar
Laros committed
537

Laros's avatar
Laros committed
538
539
540
\subsection{VarScan}
\begin{pframe}
  \begin{lstlisting}[language=none, caption=Calling substitutions.]
Laros's avatar
Laros committed
541
542
543
544
    java -jar VarScan.jar mpileup2snp in.mpileup \
      --output-vcf > out.vcf
  \end{lstlisting}

Laros's avatar
Laros committed
545
  \begin{lstlisting}[language=none, caption=Calling indels.]
Laros's avatar
Laros committed
546
547
548
    java -jar VarScan.jar mpileup2indel in.mpileup \
      --output-vcf > out.vcf
  \end{lstlisting}
Laros's avatar
Laros committed
549
\end{pframe}
Laros's avatar
Laros committed
550
551

\section{The VCF format}
Laros's avatar
Laros committed
552
553
\subsection{Required fields in the VCF format}
\begin{pframe}
Laros's avatar
Laros committed
554
555
  \begin{table}[]
    \begin{center}
Laros's avatar
Laros committed
556
557
558
559
560
561
562
563
564
565
566
567
568
      \begin{tabular}{l@{\ \ --\ \ }p{7cm}}
        \onslide<2->{%
          CHROM  & Name of the chromosome.\\
          POS    & Position on the chromosome.\\
        }%
        \onslide<3->{%
          ID     & List of unique identifiers.\\
          REF    & Reference base(s).\\
          ALT    & List of alternate non-reference alleles.\\
          QUAL   & Phred-scaled quality score for the assertion made in ALT.\\
          FILTER & PASS if this position has passed all filters.\\
          INFO   & Additional information.
        }%
Laros's avatar
Laros committed
569
570
571
572
      \end{tabular}
    \end{center}
    \caption{Required fields.}
  \end{table}
Laros's avatar
Laros committed
573
\end{pframe}
Laros's avatar
Laros committed
574

Laros's avatar
Laros committed
575
576
\subsection{Optional fields in the VCF format}
\begin{pframe}
Laros's avatar
Laros committed
577
578
579
580
581
  \begin{table}[]
    \begin{center}
      \begin{tabular}{l|p{7cm}}
        Field  & Explanation.\\
        \hline
Laros's avatar
Laros committed
582
        DP & Number of reads covering or bridging POS.\\
Laros's avatar
Laros committed
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
        AF & Allele frequency.\\
      \end{tabular}
    \end{center}
    \caption{Reserved fields.}
  \end{table}
  \pause

  \begin{table}[]
    \begin{center}
      \begin{tabular}{l|p{7cm}}
        Field  & Explanation.\\
        \hline
        INDEL  & Indicating the variant is an INDEL.\\
        DP4    & Number of 1) forward ref alleles; 2) reverse ref; 3) forward
          non-ref; 4) reverse non-ref alleles.\\
      \end{tabular}
    \end{center}
    \caption{Non-standard but important fields.}
  \end{table}
Laros's avatar
Laros committed
602
\end{pframe}
Laros's avatar
Laros committed
603

Laros's avatar
Laros committed
604
605
606
607
\section*{Pause}
\subsection{}
\begin{pframe}
\end{pframe}
Laros's avatar
Laros committed
608

Laros's avatar
Laros committed
609
610
611
\section{Variant filtering}
\subsection{Filtering on coverage}
\begin{pframe}
Laros's avatar
Laros committed
612
613
  We can set some thresholds:
  \begin{itemize}
Laros's avatar
Laros committed
614
615
    \item Genotype quality.
    \item Minimum coverage.
Laros's avatar
Laros committed
616
617
618
619
  \end{itemize}
  \bigskip
  \pause

Laros's avatar
Laros committed
620
621
622
623
  If we do not (or can not) use a copy number variation caller:
  \begin{itemize}
    \item Maximum coverage.
  \end{itemize}
Laros's avatar
Laros committed
624
625
  \bigskip

Laros's avatar
Laros committed
626
  An accepted way to calculate the maximum:
Laros's avatar
Laros committed
627
  \begin{itemize}
Laros's avatar
Laros committed
628
    \item Calculate the mean coverage over the targetd regions.
Laros's avatar
Laros committed
629
630
    \item Multiply this number with a reasonable factor e.g., $2.5$.
  \end{itemize}
Laros's avatar
Laros committed
631
\end{pframe}
Laros's avatar
Laros committed
632
633

\section{Annotation}
Laros's avatar
Laros committed
634
635
636
\subsection{What is already known about a variant}
\begin{pframe}
  A selection of VEP annotation:
Laros's avatar
Laros committed
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
  \begin{itemize}
    \item Is the variant known?
    \item Does it hit a gene?
    \pause
    \begin{itemize}
      \item Is it in an intron?
      \begin{itemize}
        \item Does it hit a splice site?
      \end{itemize}
      \pause
      \item Is it in the coding region?
      \begin{itemize}
        \item Is there a gain/loss of a stop codon?
        \item Does the variant result in a frameshift?
        \item \ldots
      \end{itemize}
      \pause
      \item Is it in the 5'/3' UTR of a gene?
      \item \ldots
    \end{itemize}
    \pause
    \item Is it in a regulatory region?
    \item \ldots
  \end{itemize}

Laros's avatar
Laros committed
662
663
664
  \vfill
  \permfoot{\url{http://www.ensembl.org/Tools/VEP}}
\end{pframe}
Laros's avatar
Laros committed
665

Laros's avatar
Laros committed
666
667
\subsection{Filtering on annotation.}
\begin{pframe}
Laros's avatar
Laros committed
668
669
670
671
672
673
674
  Conservation score.
  \begin{itemize}
    \item This particular sequence is the same in multiple species.
  \end{itemize}
  \bigskip
  \pause

Laros's avatar
Laros committed
675
  \emph{dbSNP} frequency.
Laros's avatar
Laros committed
676
677
678
679
680
681
682
683
684
685
  \begin{itemize}
    \item This may be a frequent and therefore uninteresting variant.
  \end{itemize}
  \bigskip
  \pause

  But beware, these databases are neither complete nor error-free.
  \begin{itemize}
    \item dbSNP now also contains variants that have a functional effect.
  \end{itemize}
Laros's avatar
Laros committed
686
687
688
689
  \vfill
  
  \permfoot{\url{https://www.ncbi.nlm.nih.gov/projects/SNP/}}
\end{pframe}
Laros's avatar
Laros committed
690
691

\section{Effect prediction}
Laros's avatar
Laros committed
692
693
694
\subsection{Effect prediction tools}
\begin{pframe}
  We want to know the effect on proteins.
Laros's avatar
Laros committed
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
  \bigskip
  \pause

  VEP:
  \begin{itemize}
    \item Pre-calculated effects for single variants.
    \item Pre-calculation of combinations is not feasible.
  \end{itemize}
  \bigskip
  \pause

  Mutalyzer:
  \begin{itemize}
    \item Effect prediction using simulation.
    \item Any combination of variants can be analysed.
  \end{itemize}
Laros's avatar
Laros committed
711
  \vfill
Laros's avatar
Laros committed
712

Laros's avatar
Laros committed
713
714
  \permfoot{\url{https://mutalyzer.nl}}
\end{pframe}
Laros's avatar
Laros committed
715

Laros's avatar
Laros committed
716
717
718
\subsection{Phasing}
\begin{pframe}
  Proof that variants are on the same allele.
Laros's avatar
Laros committed
719
720
721
722
723
  \bigskip
  \pause

  The effect of two variants separately can be different than that of the
  combination.
Laros's avatar
Laros committed
724
\end{pframe}
Laros's avatar
Laros committed
725

Laros's avatar
Laros committed
726
727
\subsection{Unphased variants}
\begin{pframe}
Laros's avatar
Laros committed
728
729
730
731
732
733
734
735
736
  \bt{NM\_003002.2(SDHD\_v001):c.[272del;301\_302del]}

  \begin{figure}
    \includegraphics[width=\textwidth]{fs1}
    \caption{Predicted frameshift.}
  \end{figure}
  \vfill

  \bt{NM\_003002.2(SDHD\_v001):c.272del}
Laros's avatar
Laros committed
737
\end{pframe}
Laros's avatar
Laros committed
738

Laros's avatar
Laros committed
739
740
\subsection{Unphased variants}
\begin{pframe}
Laros's avatar
Laros committed
741
742
743
744
745
746
747
748
749
  \bt{NM\_003002.2(SDHD\_v001):c.[272del;301\_302del]}

  \begin{figure}
    \includegraphics[width=\textwidth]{fs2}
    \caption{Predicted frameshift.}
  \end{figure}
  \vfill

  \bt{NM\_003002.2(SDHD\_v001):c.301\_302del}
Laros's avatar
Laros committed
750
\end{pframe}
Laros's avatar
Laros committed
751

Laros's avatar
Laros committed
752
753
\subsection{Phased variants}
\begin{pframe}
Laros's avatar
Laros committed
754
755
756
757
758
759
760
761
762
  \bt{NM\_003002.2(SDHD\_v001):c.[272del;301\_302del]}

  \begin{figure}
    \includegraphics[width=\textwidth]{fsc}
    \caption{Predicted indel.}
  \end{figure}
  \vfill

  \bt{NM\_003002.2(SDHD\_v001):c.[272del;301\_302del]}
Laros's avatar
Laros committed
763
\end{pframe}
Laros's avatar
Laros committed
764

Laros's avatar
Laros committed
765
766
\subsection{Phasing}
\begin{pframe}
Laros's avatar
Laros committed
767
768
769
770
771
772
773
774
775
  \begin{figure}
    \fbox{
      \setlength{\unitlength}{1pt}
      \input{phasing}
    }
    \caption{Read backed phasing.}
  \end{figure}

  Direct inference of phased variants.
Laros's avatar
Laros committed
776
\end{pframe}
Laros's avatar
Laros committed
777
778

\section{Variant databases}
Laros's avatar
Laros committed
779
780
\subsection{Share your results}
\begin{pframe}
Laros's avatar
Laros committed
781
782
783
  There are a number of ways of storing and sharing your variants:
  \begin{itemize}
    \item dbSNP.
Laros's avatar
Laros committed
784
    \item \emph{LOVD}.
Laros's avatar
Laros committed
785
786
787
788
789
790
791
792
793
  \end{itemize}
  \bigskip
  \pause

  Sharing is important:
  \begin{itemize}
    \item If no-one shared, you can not filter.
    \item Can lead to co-authorship when other people use your data.
  \end{itemize}
Laros's avatar
Laros committed
794
  \vfill
Laros's avatar
Laros committed
795

Laros's avatar
Laros committed
796
797
  \permfoot{\url{http://www.lovd.nl/}}
\end{pframe}
Laros's avatar
Laros committed
798

Laros's avatar
Laros committed
799
800
\subsection{LSDBs}
\begin{pframe}
Laros's avatar
Laros committed
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
  Lots of valuable data in \emph{locus specific databases}.
  \bigskip
  \pause

  These databases:
  \begin{itemize}
    \item Gene oriented.
    \item Contain patient information.
    \item Heavily curated (high quality).
  \end{itemize}
  \bigskip
  \pause

  But:
  \begin{itemize}
    \item Usually no genomic coordinates.
    \begin{itemize}
      \item Experts use one gene as reference, not an entire genome.
    \end{itemize}
  \end{itemize}
Laros's avatar
Laros committed
821
\end{pframe}
Laros's avatar
Laros committed
822

Laros's avatar
Laros committed
823
\begin{pframe}
Laros's avatar
Laros committed
824
825
  \begin{figure}[]
    \begin{center}
Laros's avatar
Laros committed
826
      \includegraphics[height=0.7\textheight]{lovd_welcome}
Laros's avatar
Laros committed
827
828
829
    \end{center}
    \caption{LOVD welcome screen.}
  \end{figure}
Laros's avatar
Laros committed
830
\end{pframe}
Laros's avatar
Laros committed
831

Laros's avatar
Laros committed
832
833
\subsection{Variants in LOVD}
\begin{pframe}
Laros's avatar
Laros committed
834
835
  \begin{figure}[]
    \begin{center}
Laros's avatar
Laros committed
836
      \includegraphics[height=0.7\textheight]{lovd_variants}
Laros's avatar
Laros committed
837
838
839
    \end{center}
    \caption{Selection of variants.}
  \end{figure}
Laros's avatar
Laros committed
840
\end{pframe}
Laros's avatar
Laros committed
841

Laros's avatar
Laros committed
842
843
844
845
846
\section{Conclusions}
\subsection{Things to remember}
\begin{pframe}
  Choose the right aligner.
  \bigskip
Laros's avatar
Laros committed
847
848
849
850
851
852
853
854
855
856
857

  Some variants are impossible to find:
  \begin{itemize}
    \item Unsequenceable DNA (depends on the platform).
    \item Unmappable reads (depends on read length).
  \end{itemize}
  \bigskip
  \pause

  False positives:
  \begin{itemize}
Laros's avatar
Laros committed
858
    \item Substitutions near indels.
Laros's avatar
Laros committed
859
860
861
862
    \item Copy number variation.
  \end{itemize}
  \bigskip

Laros's avatar
Laros committed
863
864
  In doubt, look at the alignment.
\end{pframe}
Laros's avatar
Laros committed
865

Laros's avatar
Laros committed
866
867
868
869
870
% Make the acknowledgements slide.
\makeAcknowledgementsSlide{
  \begin{tabular}{l}
    Michiel van Galen\\
    Martijn Vermaat\\
Laros's avatar
Laros committed
871
    Johan den Dunnen
Laros's avatar
Laros committed
872
873
  \end{tabular}
}
Laros's avatar
Laros committed
874
875

\end{document}