BreakdancerConfig.scala 3.19 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
package nl.lumc.sasc.biopet.extensions.breakdancer
17
18
19
20
21

import java.io.File

import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
22
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

class BreakdancerConfig(val root: Configurable) extends BiopetCommandLineFunction {
  executable = config("exe", default = "bam2cfg.pl", freeVar = false)

  @Input(doc = "Bam File")
  var input: File = _

  @Output(doc = "Output File")
  var output: File = _

  var min_mq: Option[Int] = config("min_mq", default = 20) // minimum of MQ to consider for taking read into histogram
  var use_mq: Boolean = config("use_mq", default = false)
  var min_insertsize: Option[Int] = config("min_insertsize", default = 450)
  var solid_data: Boolean = config("solid", default = false)
  var sd_cutoff: Option[Int] = config("sd_cutoff", default = 4) // Cutoff in unit of standard deviation [4]

  // we set this to a higher number to avoid biases in small numbers in sorted bams
  var min_observations: Option[Int] = config("min_observations", default = 10000) //  Number of observation required to estimate mean and s.d. insert size [10_000]
  var coefvar_cutoff: Option[Int] = config("coef_cutoff", default = 1) // Cutoff on coefficients of variation [1]
  var histogram_bins: Option[Int] = config("histogram_bins", default = 50) // Number of bins in the histogram [50]

  def cmdLine = required(executable) +
    optional("-q", min_mq) +
    conditional(use_mq, "-m") +
    optional("-s", min_insertsize) +
    conditional(solid_data, "-s") +
    optional("-c", sd_cutoff) +
    optional("-n", min_observations) +
    optional("-v", coefvar_cutoff) +
    optional("-b", histogram_bins) +
    required(input) + " 1> " + required(output)
}

object BreakdancerConfig {
  def apply(root: Configurable, input: File, output: File): BreakdancerConfig = {
    val bdconf = new BreakdancerConfig(root)
    bdconf.input = input
    bdconf.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
61
    bdconf
62
63
64
65
66
  }

  def apply(root: Configurable, input: File, outputDir: String): BreakdancerConfig = {
    val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/"
    val outputFile = new File(dir + swapExtension(input.getName))
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    apply(root, input, outputFile)
68
69
70
  }

  def apply(root: Configurable, input: File): BreakdancerConfig = {
Peter van 't Hof's avatar
Peter van 't Hof committed
71
    apply(root, input, new File(swapExtension(input.getAbsolutePath)))
72
73
74
75
  }

  private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".breakdancer.cfg"
}