VariantAnnotator.scala 8.68 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
package nl.lumc.sasc.biopet.extensions.gatk
16
17

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.VcfUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.config.Configurable
22
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
24

Peter van 't Hof's avatar
Peter van 't Hof committed
25
class VariantAnnotator(val parent: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
26
  def analysis_type = "VariantAnnotator"
Peter van 't Hof's avatar
Peter van 't Hof committed
27
28
29
30
31
32
33
34
35
  scatterClass = classOf[LocusScatterFunction]
  setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }

  /** Input VCF file */
  @Input(fullName = "variant", shortName = "V", doc = "Input VCF file", required = true, exclusiveOf = "", validation = "")
  var variant: File = _

  /** SnpEff file from which to get annotations */
  @Input(fullName = "snpEffFile", shortName = "snpEffFile", doc = "SnpEff file from which to get annotations", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
36
  var snpEffFile: Option[File] = config("snpEffFile")
Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
39

  /** dbSNP file */
  @Input(fullName = "dbsnp", shortName = "D", doc = "dbSNP file", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  var dbsnp: Option[File] = dbsnpVcfFile
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
43

  /** Comparison VCF file */
  @Input(fullName = "comp", shortName = "comp", doc = "Comparison VCF file", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
44
  var comp: List[File] = config("comp", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47

  /** External resource VCF file */
  @Input(fullName = "resource", shortName = "resource", doc = "External resource VCF file", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  var resource: List[File] = config("resource")
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
51
52
53
54
55
56

  /** File to which variants should be written */
  @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "")
  @Gather(classOf[CatVariantsGatherer])
  var out: File = _

  /** One or more specific annotations to apply to variant calls */
  @Argument(fullName = "annotation", shortName = "A", doc = "One or more specific annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
57
  var annotation: List[String] = config("annotation", default = Nil, freeVar = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
60

  /** One or more specific annotations to exclude */
  @Argument(fullName = "excludeAnnotation", shortName = "XA", doc = "One or more specific annotations to exclude", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  var excludeAnnotation: List[String] = config("excludeAnnotation", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
64

  /** One or more classes/groups of annotations to apply to variant calls */
  @Argument(fullName = "group", shortName = "G", doc = "One or more classes/groups of annotations to apply to variant calls", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  var group: List[String] = config("group", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
68

  /** One or more specific expressions to apply to variant calls */
  @Argument(fullName = "expression", shortName = "E", doc = "One or more specific expressions to apply to variant calls", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  var expression: List[String] = config("expression", default = Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
72

  /** Check for allele concordances when using an external resource VCF file */
  @Argument(fullName = "resourceAlleleConcordance", shortName = "rac", doc = "Check for allele concordances when using an external resource VCF file", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
73
  var resourceAlleleConcordance: Boolean = config("resourceAlleleConcordance", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
76

  /** Use all possible annotations (not for the faint of heart) */
  @Argument(fullName = "useAllAnnotations", shortName = "all", doc = "Use all possible annotations (not for the faint of heart)", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var useAllAnnotations: Boolean = config("useAllAnnotations", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
80

  /** Add dbSNP ID even if one is already present */
  @Argument(fullName = "alwaysAppendDbsnpId", shortName = "alwaysAppendDbsnpId", doc = "Add dbSNP ID even if one is already present", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
81
  var alwaysAppendDbsnpId: Boolean = config("alwaysAppendDbsnpId", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
84

  /** GQ threshold for annotating MV ratio */
  @Argument(fullName = "MendelViolationGenotypeQualityThreshold", shortName = "mvq", doc = "GQ threshold for annotating MV ratio", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
85
  var MendelViolationGenotypeQualityThreshold: Option[Double] = config("MendelViolationGenotypeQualityThreshold")
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
89
90
91
92

  /** Format string for MendelViolationGenotypeQualityThreshold */
  @Argument(fullName = "MendelViolationGenotypeQualityThresholdFormat", shortName = "", doc = "Format string for MendelViolationGenotypeQualityThreshold", required = false, exclusiveOf = "", validation = "")
  var MendelViolationGenotypeQualityThresholdFormat: String = "%s"

  /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
  @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
96

  /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
  @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
100

  /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
  @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
Peter van 't Hof's avatar
Peter van 't Hof committed
101
  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
102

103
104
105
106
  @Output
  @Gather(enabled = false)
  private var outputIndex: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
  override def beforeGraph() {
    super.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    if (variant != null)
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
112
113
114
      deps :+= VcfUtils.getVcfIndexFile(variant)
    snpEffFile.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
    dbsnp.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
    deps ++= comp.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig))
    deps ++= resource.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig))
Peter van 't Hof's avatar
Peter van 't Hof committed
115
    if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out))
116
      outputIndex = VcfUtils.getVcfIndexFile(out)
117
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
118

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  override def cmdLine: String = super.cmdLine +
    required(TaggedFile.formatCommandLineParameter("-V", variant), variant) +
    optional(TaggedFile.formatCommandLineParameter("-snpEffFile", snpEffFile.getOrElse(new File("."))), snpEffFile) +
    optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(new File("."))), dbsnp) +
    repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter) +
    repeat("-resource", resource, formatPrefix = TaggedFile.formatCommandLineParameter) +
    optional("-o", out) +
    repeat("-A", annotation) +
    repeat("-XA", excludeAnnotation) +
    repeat("-G", group) +
    repeat("-E", expression) +
    conditional(resourceAlleleConcordance, "-rac") +
    conditional(useAllAnnotations, "-all") +
    conditional(alwaysAppendDbsnpId, "-alwaysAppendDbsnpId") +
    optional("-mvq", MendelViolationGenotypeQualityThreshold, format = MendelViolationGenotypeQualityThresholdFormat) +
    conditional(filter_reads_with_N_cigar, "-filterRNC") +
    conditional(filter_mismatching_base_and_quals, "-filterMBQ") +
    conditional(filter_bases_not_stored, "-filterNoBases")
Peter van 't Hof's avatar
Peter van 't Hof committed
137
}