VcfStats.scala 2.82 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
package nl.lumc.sasc.biopet.extensions.tools
16

Peter van 't Hof's avatar
Peter van 't Hof committed
17 18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19 20
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats
22
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
23 24
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, tryToParseNumber }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
25 26 27 28 29 30 31 32

import scala.io.Source

/**
 * This tool will generate statistics from a vcf file
 *
 * Created by pjvan_thof on 1/10/15.
 */
33
class VcfStats(val root: Configurable) extends ToolCommandFunction with Summarizable with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  def toolObject = VcfStats
35

36 37
  mainFunction = false

38 39 40 41 42 43 44
  @Input(doc = "Input fastq", shortName = "I", required = true)
  var input: File = _

  @Input
  protected var index: File = null

  @Output
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  protected var statsFile: File = null
46 47 48 49 50 51 52 53 54 55 56

  override def defaultCoreMemory = 3.0
  override def defaultThreads = 3

  protected var outputDir: File = _

  var infoTags: List[String] = Nil
  var genotypeTags: List[String] = Nil
  var allInfoTags = false
  var allGenotypeTags = false
  var reference: File = _
57
  var intervals: Option[File] = None
58 59 60 61 62 63 64 65 66

  override def beforeGraph(): Unit = {
    reference = referenceFasta()
    index = new File(input.getAbsolutePath + ".tbi")
  }

  /** Set output dir and a output file */
  def setOutputDir(dir: File): Unit = {
    outputDir = dir
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    statsFile = new File(dir, "stats.json")
68 69 70 71
    jobOutputFile = new File(dir, ".vcfstats.out")
  }

  /** Creates command to execute extension */
72
  override def cmdLine = super.cmdLine +
73 74 75 76 77 78
    required("-I", input) +
    required("-o", outputDir) +
    repeat("--infoTag", infoTags) +
    repeat("--genotypeTag", genotypeTags) +
    conditional(allInfoTags, "--allInfoTags") +
    conditional(allGenotypeTags, "--allGenotypeTags") +
79 80
    required("-R", reference) +
    optional("--intervals", intervals)
81 82

  /** Returns general stats to the summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
83
  def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(statsFile)
84 85 86

  /** return only general files to summary */
  def summaryFiles: Map[String, File] = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
87
    "stats" -> statsFile
88 89
  )
}