VcfStats.scala 2.81 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
package nl.lumc.sasc.biopet.extensions.tools
16

Peter van 't Hof's avatar
Peter van 't Hof committed
17
18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
20
21
import nl.lumc.sasc.biopet.core.summary.{Summarizable, SummaryQScript}
import nl.lumc.sasc.biopet.core.{Reference, ToolCommandFunction}
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats
22
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.{ConfigUtils, tryToParseNumber}
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
25
26
27
28
29
30
31
32

import scala.io.Source

/**
 * This tool will generate statistics from a vcf file
 *
 * Created by pjvan_thof on 1/10/15.
 */
33
class VcfStats(val root: Configurable) extends ToolCommandFunction with Summarizable with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  def toolObject = VcfStats
35

36
37
  mainFunction = false

38
39
40
41
42
43
44
  @Input(doc = "Input fastq", shortName = "I", required = true)
  var input: File = _

  @Input
  protected var index: File = null

  @Output
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  protected var statsFile: File = null
46
47
48
49
50
51
52
53
54
55
56

  override def defaultCoreMemory = 3.0
  override def defaultThreads = 3

  protected var outputDir: File = _

  var infoTags: List[String] = Nil
  var genotypeTags: List[String] = Nil
  var allInfoTags = false
  var allGenotypeTags = false
  var reference: File = _
57
  var intervals: Option[File] = None
58
59
60
61
62
63
64
65
66

  override def beforeGraph(): Unit = {
    reference = referenceFasta()
    index = new File(input.getAbsolutePath + ".tbi")
  }

  /** Set output dir and a output file */
  def setOutputDir(dir: File): Unit = {
    outputDir = dir
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    statsFile = new File(dir, "stats.json")
68
69
70
71
    jobOutputFile = new File(dir, ".vcfstats.out")
  }

  /** Creates command to execute extension */
72
  override def cmdLine = super.cmdLine +
73
74
75
76
77
78
    required("-I", input) +
    required("-o", outputDir) +
    repeat("--infoTag", infoTags) +
    repeat("--genotypeTag", genotypeTags) +
    conditional(allInfoTags, "--allInfoTags") +
    conditional(allGenotypeTags, "--allGenotypeTags") +
79
80
    required("-R", reference) +
    optional("--intervals", intervals)
81
82

  /** Returns general stats to the summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
83
  def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(statsFile)
84
85
86

  /** return only general files to summary */
  def summaryFiles: Map[String, File] = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
87
    "stats" -> statsFile
88
89
  )
}