PindelCaller.scala 9.58 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.extensions.pindel

import java.io.File

import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version }
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
23
import org.broadinstitute.gatk.utils.commandline._
24
25
26
27
28
29
30

/**
 * Extension for pindel
 *
 * Based on version 0.2.5b8
 */

Wai Yi Leung's avatar
Wai Yi Leung committed
31
class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
32
  executable = config("exe", default = "pindel")
33

Wai Yi Leung's avatar
Wai Yi Leung committed
34
  override def defaultCoreMemory = 4.0
35
36
  override def defaultThreads = 4

37
  def versionRegex = """Pindel version:? (.*)""".r
38
  override def versionExitcode = List(1)
39
  def versionCommand = executable
40
41
42
43

  /**
   * Required parameters
   */
44
  @Input
45
46
47
48
49
  var reference: File = referenceFasta

  @Input(doc = "Input specification for Pindel to use")
  var input: File = _

50
  @Argument(doc = "The pindel configuration file", required = false)
51
  var pindelFile: Option[File] = None
52

53
  @Argument(doc = "Configuration file with: bam-location/insert size/name", required = false)
54
  var configFile: Option[File] = None
55
56

  @Argument(doc = "Work directory")
57
  var outputPrefix: File = _
58

59
  @Output(doc = "Output file of pindel, pointing to the DEL file")
60
  var outputFile: File = _
61

62
  @Output(doc = "", required = false)
63
  var outputINV: File = _
64
  @Output(doc = "", required = false)
65
  var outputTD: File = _
66
  @Output(doc = "", required = false)
67
  var outputLI: File = _
68
  @Output(doc = "", required = false)
69
  var outputBP: File = _
70
  @Output(doc = "", required = false)
71
  var outputSI: File = _
72
  @Output(doc = "", required = false)
73
  var outputRP: File = _
74
  @Output(doc = "", required = false)
75
76
  var outputCloseEndMapped: File = _

77
  var RP: Option[Int] = config("RP")
78
  var minDistanceToTheEnd: Option[Int] = config("min_distance_to_the_end")
79
  // var threads
80
81
82
  var maxRangeIndex: Option[Int] = config("max_range_index")
  var windowSize: Option[Int] = config("window_size")
  var sequencingErrorRate: Option[Float] = config("sequencing_error_rate")
83
84
  var sensitivity: Option[Float] = config("sensitivity")

85
  var maximumAllowedMismatchRate: Option[Float] = config("maximum_allowed_mismatch_rate")
86
87
  var nm: Option[Int] = config("nm")

88
89
90
91
92
93
94
  var reportInversions: Boolean = config("report_inversions", default = false)
  var reportDuplications: Boolean = config("report_duplications", default = false)
  var reportLongInsertions: Boolean = config("report_long_insertions", default = false)
  var reportBreakpoints: Boolean = config("report_breakpoints", default = false)
  var reportCloseMappedReads: Boolean = config("report_close_mapped_reads", default = false)
  var reportOnlyCloseMappedReads: Boolean = config("report_only_close_mapped_reads", default = false)
  var reportInterchromosomalEvents: Boolean = config("report_interchromosomal_events", default = false)
95

96
97
  var IndelCorrection: Boolean = config("IndelCorrection", default = false)
  var NormalSamples: Boolean = config("NormalSamples", default = false)
98
99
100
101
102

  var breakdancer: Option[File] = config("breakdancer")
  var include: Option[File] = config("include")
  var exclude: Option[File] = config("exclude")

103
104
105
106
107
108
109
110
  var additionalMismatch: Option[Int] = config("additional_mismatch")
  var minPerfectMatchAroundBP: Option[Int] = config("min_perfect_match_around_BP")
  var minInversionSize: Option[Int] = config("min_inversion_size")
  var minNumMatchedBases: Option[Int] = config("min_num_matched_bases")
  var balanceCutoff: Option[Int] = config("balance_cutoff")
  var anchorQuality: Option[Int] = config("anchor_quality")
  var minimumSupportForEvent: Option[Int] = config("minimum_support_for_event")
  var inputSVCallsForAssembly: Option[File] = config("input_SV_Calls_for_assembly")
111

112
  var genotyping: Boolean = config("genotyping", default = false)
113
114
  var outputOfBreakdancerEvents: Option[File] = config("output_of_breakdancer_events")
  var nameOfLogfile: Option[File] = config("name_of_logfile")
115

116
117
  var ploidy: Option[File] = config("ploidy")
  var detectDD: Boolean = config("detect_DD", default = false)
118
119
120
121
122
123
124
125

  var MAX_DD_BREAKPOINT_DISTANCE: Option[Int] = config("MAX_DD_BREAKPOINT_DISTANCE")
  var MAX_DISTANCE_CLUSTER_READS: Option[Int] = config("MAX_DISTANCE_CLUSTER_READS")
  var MIN_DD_CLUSTER_SIZE: Option[Int] = config("MIN_DD_CLUSTER_SIZE")
  var MIN_DD_BREAKPOINT_SUPPORT: Option[Int] = config("MIN_DD_BREAKPOINT_SUPPORT")
  var MIN_DD_MAP_DISTANCE: Option[Int] = config("MIN_DD_MAP_DISTANCE")
  var DD_REPORT_DUPLICATION_READS: Option[Int] = config("DD_REPORT_DUPLICATION_READS")

126
  override def beforeGraph: Unit = {
127
128
    if (reference == null) reference = referenceFasta()

129
130
    // we should check whether the `pindel-config-file` is set or the `config-file` for the bam-list
    // at least one of them should be set.
131
    (pindelFile, configFile) match {
132
133
134
      case (None, None)       => Logging.addError("No pindel config is given")
      case (Some(a), Some(b)) => Logging.addError(s"Please specify either a pindel config or bam-config. Not both for Pindel: $a or $b")
      case (Some(a), None) => {
135
136
        Logging.logger.info(s"Using '${a}' as pindel config for Pindel")
        input = a.getAbsoluteFile
137
138
      }
      case (None, Some(b)) => {
139
140
        Logging.logger.info(s"Using '${b}' as bam config for Pindel")
        input = b.getAbsoluteFile
141
142
      }
    }
143

144
145
146
147
148
149
150
    /** setting the output files for the many outputfiles pindel has */

    outputINV = new File(outputPrefix + File.separator, "sample_INV")
    outputTD = new File(outputPrefix + File.separator, "sample_TD")
    if (reportLongInsertions) {
      outputLI = new File(outputPrefix + File.separator, "sample_LI")
    }
151
    if (reportBreakpoints) {
152
153
154
155
156
157
158
159
160
      outputBP = new File(outputPrefix + File.separator, "sample_BP")
    }
    outputSI = new File(outputPrefix + File.separator, "sample_SI")

    outputRP = new File(outputPrefix + File.separator, "sample_RP")
    if (reportCloseMappedReads) {
      outputCloseEndMapped = new File(outputPrefix + File.separator, "sample_CloseEndMapped")
    }

Wai Yi Leung's avatar
Wai Yi Leung committed
161
162
    // set the output file, the DELetion call is always made
    // TODO: add more outputs for the LI, SI, INV etc...
163
    outputFile = new File(outputPrefix + File.separator, "sample_D")
164
165
166
167
  }

  def cmdLine = required(executable) +
    required("--fasta ", reference) +
168
169
170
    optional("--pindel-config-file", pindelFile) +
    optional("--config-file", configFile) +
    required("--output-prefix ", new File(outputPrefix + File.separator, "sample")) +
171
    optional("--RP", RP) +
172
    optional("--min_distance_to_the_end", minDistanceToTheEnd) +
173
    optional("--number_of_threads", threads) +
174
175
176
    optional("--max_range_index", maxRangeIndex) +
    optional("--windows_size", windowSize) +
    optional("--sequencing_error_rate", sequencingErrorRate) +
177
    optional("--sensitivity", sensitivity) +
178
    optional("--maximum_allowed_mismatch_rate", maximumAllowedMismatchRate) +
179
    optional("--NM", nm) +
180
181
182
183
184
185
186
    conditional(reportInversions, "--report_inversions") +
    conditional(reportDuplications, "--report_duplications") +
    conditional(reportLongInsertions, "--report_long_insertions") +
    conditional(reportBreakpoints, "--report_breakpoints") +
    conditional(reportCloseMappedReads, "--report_close_mapped_reads") +
    conditional(reportOnlyCloseMappedReads, "--report_only_close_mapped_reads") +
    conditional(reportInterchromosomalEvents, "--report_interchromosomal_events") +
187
188
189
190
191
    conditional(IndelCorrection, "--IndelCorrection") +
    conditional(NormalSamples, "--NormalSamples") +
    optional("--breakdancer", breakdancer) +
    optional("--include", include) +
    optional("--exclude", exclude) +
192
193
194
195
196
197
198
199
    optional("--additional_mismatch", additionalMismatch) +
    optional("--min_perfect_match_around_BP", minPerfectMatchAroundBP) +
    optional("--min_inversion_size", minInversionSize) +
    optional("--min_num_matched_bases", minNumMatchedBases) +
    optional("--balance_cutoff", balanceCutoff) +
    optional("--anchor_quality", anchorQuality) +
    optional("--minimum_support_for_event", minimumSupportForEvent) +
    optional("--input_SV_Calls_for_assembly", inputSVCallsForAssembly) +
200
    conditional(genotyping, "-g") +
201
202
203
204
    optional("--output_of_breakdancer_events", outputOfBreakdancerEvents) +
    optional("--name_of_logfile", nameOfLogfile) +
    optional("--Ploidy", ploidy) +
    conditional(detectDD, "detect_DD") +
205
206
207
208
209
    optional("--MAX_DD_BREAKPOINT_DISTANCE", MAX_DD_BREAKPOINT_DISTANCE) +
    optional("--MAX_DISTANCE_CLUSTER_READS", MAX_DISTANCE_CLUSTER_READS) +
    optional("--MIN_DD_CLUSTER_SIZE", MIN_DD_CLUSTER_SIZE) +
    optional("--MIN_DD_BREAKPOINT_SUPPORT", MIN_DD_BREAKPOINT_SUPPORT) +
    optional("--MIN_DD_MAP_DISTANCE", MIN_DD_MAP_DISTANCE) +
Wai Yi Leung's avatar
Wai Yi Leung committed
210
    optional("--DD_REPORT_DUPLICATION_READS", DD_REPORT_DUPLICATION_READS)
211
212
}

Wai Yi Leung's avatar
Wai Yi Leung committed
213
214
215
object PindelCaller {
  def apply(root: Configurable, configFile: File, outputDir: File): PindelCaller = {
    val caller = new PindelCaller(root)
216
217
    caller.configFile = Some(configFile)
    caller.outputPrefix = outputDir
218
    caller.beforeGraph
219
220
221
    caller
  }
}