BiopetQScript.scala 5.46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
package nl.lumc.sasc.biopet.core

import java.io.File
19

20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.config.Configurable
22
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.Logging
24
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.function.QFunction
26
import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
28
29
import org.broadinstitute.gatk.utils.commandline.Argument

Peter van 't Hof's avatar
Peter van 't Hof committed
30
/** Base for biopet pipeline */
31
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
32

33
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
34
  val configfiles: List[File] = Nil
bow's avatar
bow committed
35

36
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'path:path:key=value'", fullName = "config_value", shortName = "cv", required = false)
37
38
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** Output directory of pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
43
  }
bow's avatar
bow committed
44

45
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
46
  var disableScatter: Boolean = false
47

bow's avatar
bow committed
48
49
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  type InputFile = BiopetQScript.InputFile
51
52
53

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
54
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  var qSettings: QSettings
bow's avatar
bow committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
59
60
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
61
  def init()
bow's avatar
bow committed
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
  /** Pipeline itself */
64
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
65

66
67
68
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
69
  /** Script from queue itself, final to force some checks for each pipeline and write report */
70
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
73
74
    init()
    biopetScript()
75

76
    if (disableScatter) for (function <- functions) function match {
77
78
79
      case f: ScatterGatherableFunction => f.scatterCount = 1
      case _                            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
82
83
84
85
86

    this match {
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
        logger.info("Write report is skipped because sample flag is used")
      case _ => reportClass.foreach(add(_))
    }

87
    for (function <- functions) function match {
88
      case f: BiopetCommandLineFunction =>
89
90
        f.preProcessExecutable()
        f.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
91
        f.internalBeforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
92
        f.commandLine
Peter van 't Hof's avatar
Peter van 't Hof committed
93
      case _ =>
94
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
95

Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
      globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName))
Peter van 't Hof's avatar
Peter van 't Hof committed
98
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, outputdir can not be created")
99

Peter van 't Hof's avatar
Peter van 't Hof committed
100
    inputFiles.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
Peter van 't Hof's avatar
Peter van 't Hof committed
102
      else if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
103
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
104

Peter van 't Hof's avatar
Peter van 't Hof committed
105
    functions.filter(_.jobOutputFile == null).foreach(f => {
Peter van 't Hof's avatar
Peter van 't Hof committed
106
      try {
Peter van 't Hof's avatar
Peter van 't Hof committed
107
        f.jobOutputFile = new File(f.firstOutput.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + configName + ".out")
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
110
      } catch {
        case e: NullPointerException => logger.warn(s"Can't generate a jobOutputFile for $f")
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
111
    })
112

113
114
    if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json"))

Peter van 't Hof's avatar
Peter van 't Hof committed
115
    Logging.checkErrors()
116
  }
bow's avatar
bow committed
117

Peter van 't Hof's avatar
Peter van 't Hof committed
118
119
120
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

121
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
122
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
123

124
  /** Function to set isIntermediate and add in 1 line */
bow's avatar
bow committed
125
  def add(function: QFunction, isIntermediate: Boolean = false) {
126
127
128
    function.isIntermediate = isIntermediate
    add(function)
  }
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

  def add(subPipeline: QScript): Unit = {
    subPipeline.qSettings = this.qSettings
    subPipeline match {
      case that: SummaryQScript =>
        that.init()
        that.biopetScript()
        that.addSummaryJobs()
        this match {
          case s: SummaryQScript => s.addSummaryQScript(that)
          case _ =>
        }
      case that:BiopetQScript =>
        that.init()
        that.biopetScript()
      case _ => subPipeline.script
    }
    addAll(subPipeline.functions)
  }
148
}
Peter van 't Hof's avatar
Peter van 't Hof committed
149
150

object BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
151
  case class InputFile(file: File, md5: Option[String] = None)
152
}