BiopetQScript.scala 6.29 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
17
package nl.lumc.sasc.biopet.core

import java.io.File
18

19
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
23
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
/** Base for biopet pipeline */
28
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
29

30
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
31
  val configfiles: List[File] = Nil
bow's avatar
bow committed
32

Sander Bollen's avatar
Sander Bollen committed
33
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'namespace:namespace:key=value'", fullName = "config_value", shortName = "cv", required = false)
34
35
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
36
  /** Output directory of pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
40
  }
41
42
  require(outputDir.canRead, s"No premision to read outputdir: $outputDir")
  require(outputDir.canWrite, s"No premision to read outputdir: $outputDir")
bow's avatar
bow committed
43

44
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
45
  var disableScatter: Boolean = false
46

bow's avatar
bow committed
47
48
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  type InputFile = BiopetQScript.InputFile
50
51
52

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var qSettings: QSettings
bow's avatar
bow committed
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
58
59
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
60
  def init()
bow's avatar
bow committed
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
  /** Pipeline itself */
63
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
64

65
66
67
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Script from queue itself, final to force some checks for each pipeline and write report */
69
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
72
73
74

    BiopetQScript.checkOutputDir(outputDir)

75
76
    init()
    biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
77
    logger.info("Biopet script done")
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
81
82
83
84
    if (disableScatter) {
      logger.info("Disable scatters")
      for (function <- functions) function match {
        case f: ScatterGatherableFunction => f.scatterCount = 1
        case _                            =>
      }
85
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
89
90
91
92

    this match {
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
        logger.info("Write report is skipped because sample flag is used")
      case _ => reportClass.foreach(add(_))
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
93
    logger.info("Running pre commands")
94
    for (function <- functions) function match {
95
      case f: BiopetCommandLineFunction =>
96
97
        f.preProcessExecutable()
        f.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
98
        f.internalBeforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
99
        f.commandLine
Peter van 't Hof's avatar
Peter van 't Hof committed
100
      case _ =>
101
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
102

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
      globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName))
Sander Bollen's avatar
Sander Bollen committed
105
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created")
106

Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
    logger.info("Checking input files")
    inputFiles.par.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
109
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
110
      if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
Sander Bollen's avatar
Sander Bollen committed
111
      if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
112
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
113

Peter van 't Hof's avatar
Peter van 't Hof committed
114
    functions.filter(_.jobOutputFile == null).foreach(f => {
Peter van 't Hof's avatar
Peter van 't Hof committed
115
      try {
116
        f.jobOutputFile = new File(f.firstOutput.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + f.getClass.getSimpleName + ".out")
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
      } catch {
        case e: NullPointerException => logger.warn(s"Can't generate a jobOutputFile for $f")
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    })
121

122
123
    if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json"))

Peter van 't Hof's avatar
Peter van 't Hof committed
124
    Logging.checkErrors()
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    logger.info("Script complete without errors")
126
  }
bow's avatar
bow committed
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

131
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
132
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
133

134
  /** Function to set isIntermediate and add in 1 line */
bow's avatar
bow committed
135
  def add(function: QFunction, isIntermediate: Boolean = false) {
136
137
138
    function.isIntermediate = isIntermediate
    add(function)
  }
139
140
141
142
143
144
145
146
147

  def add(subPipeline: QScript): Unit = {
    subPipeline.qSettings = this.qSettings
    subPipeline match {
      case that: SummaryQScript =>
        that.init()
        that.biopetScript()
        this match {
          case s: SummaryQScript => s.addSummaryQScript(that)
Peter van 't Hof's avatar
Peter van 't Hof committed
148
          case _                 =>
149
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
150
      case that: BiopetQScript =>
151
152
153
154
155
156
        that.init()
        that.biopetScript()
      case _ => subPipeline.script
    }
    addAll(subPipeline.functions)
  }
157
}
Peter van 't Hof's avatar
Peter van 't Hof committed
158
159

object BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
160
  case class InputFile(file: File, md5: Option[String] = None)
161
162
163
164
165
166
167
168
169

  def checkOutputDir(outputDir: File): Unit = {
    // Sanity checks
    require(outputDir.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
    require(outputDir.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
    outputDir.mkdir()
    require(outputDir.canRead, s"No premision to read outputdir: $outputDir")
    require(outputDir.canWrite, s"No premision to write outputdir: $outputDir")
  }
170
}