BiopetQScript.scala 7.55 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16 17
package nl.lumc.sasc.biopet.core

import java.io.File
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, WriteSummary }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
/** Base for biopet pipeline */
28
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
29

30
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
31
  val configfiles: List[File] = Nil
bow's avatar
bow committed
32

Sander Bollen's avatar
Sander Bollen committed
33
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'namespace:namespace:key=value'", fullName = "config_value", shortName = "cv", required = false)
34 35
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
36
  /** Output directory of pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
38 39
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
40
  }
bow's avatar
bow committed
41

42
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
43
  var disableScatter: Boolean = false
44

bow's avatar
bow committed
45 46
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  type InputFile = BiopetQScript.InputFile
48 49 50

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  var qSettings: QSettings
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54 55 56 57
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
58
  def init()
bow's avatar
bow committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Pipeline itself */
61
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
62

63 64 65
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

66 67
  val skipWriteDependencies: Boolean = config("skip_write_dependencies", default = false)

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Script from queue itself, final to force some checks for each pipeline and write report */
69
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
70 71
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
72 73 74

    BiopetQScript.checkOutputDir(outputDir)

75 76
    init()
    biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
77
    logger.info("Biopet script done")
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79 80 81 82 83 84
    if (disableScatter) {
      logger.info("Disable scatters")
      for (function <- functions) function match {
        case f: ScatterGatherableFunction => f.scatterCount = 1
        case _                            =>
      }
85
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
86 87 88 89

    this match {
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
        logger.info("Write report is skipped because sample flag is used")
90 91 92
      case _ => reportClass.foreach { report =>
        add(report)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
93 94
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
95
    logger.info("Running pre commands")
96
    for (function <- functions) function match {
97
      case f: BiopetCommandLineFunction =>
98 99
        f.preProcessExecutable()
        f.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
100
        f.internalBeforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
101
        f.commandLine
Peter van 't Hof's avatar
Peter van 't Hof committed
102 103
      case f: WriteSummary => f.init()
      case _               =>
104
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
105

Peter van 't Hof's avatar
Peter van 't Hof committed
106
    val logDir = new File(outputDir, ".log" + File.separator + qSettings.runName.toLowerCase)
Peter van 't Hof's avatar
Peter van 't Hof committed
107

Peter van 't Hof's avatar
Peter van 't Hof committed
108
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
Peter van 't Hof's avatar
Peter van 't Hof committed
109
      globalConfig.writeReport(new File(logDir, "config"))
Sander Bollen's avatar
Sander Bollen committed
110
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created")
111

Peter van 't Hof's avatar
Peter van 't Hof committed
112 113
    logger.info("Checking input files")
    inputFiles.par.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
114
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
115
      if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
Sander Bollen's avatar
Sander Bollen committed
116
      if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
117
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
118

Peter van 't Hof's avatar
Peter van 't Hof committed
119
    functions.filter(_.jobOutputFile == null).foreach(f => {
Peter van 't Hof's avatar
Peter van 't Hof committed
120 121 122 123
      val className = if (f.getClass.isAnonymousClass) f.getClass.getSuperclass.getSimpleName else f.getClass.getSimpleName
      BiopetQScript.safeOutputs(f) match {
        case Some(o) => f.jobOutputFile = new File(o.head.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + className + ".out")
        case _ => f.jobOutputFile = new File("./stdout") // Line is here for test backup
Peter van 't Hof's avatar
Peter van 't Hof committed
124
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    })
126

Peter van 't Hof's avatar
Peter van 't Hof committed
127 128
    if (!skipWriteDependencies) WriteDependencies.writeDependencies(
      functions,
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      new File(logDir, "graph"))
130

Peter van 't Hof's avatar
Peter van 't Hof committed
131
    Logging.checkErrors()
Peter van 't Hof's avatar
Peter van 't Hof committed
132
    logger.info("Script complete without errors")
133
  }
bow's avatar
bow committed
134

Peter van 't Hof's avatar
Peter van 't Hof committed
135 136 137
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

138
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
139
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
140

141
  /** Function to set isIntermediate and add in 1 line */
bow's avatar
bow committed
142
  def add(function: QFunction, isIntermediate: Boolean = false) {
143 144 145
    function.isIntermediate = isIntermediate
    add(function)
  }
146 147 148 149 150 151 152 153 154

  def add(subPipeline: QScript): Unit = {
    subPipeline.qSettings = this.qSettings
    subPipeline match {
      case that: SummaryQScript =>
        that.init()
        that.biopetScript()
        this match {
          case s: SummaryQScript => s.addSummaryQScript(that)
Peter van 't Hof's avatar
Peter van 't Hof committed
155
          case _                 =>
156
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
157
      case that: BiopetQScript =>
158 159
        that.init()
        that.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
160
      case _ => subPipeline.script()
161 162 163
    }
    addAll(subPipeline.functions)
  }
164
}
Peter van 't Hof's avatar
Peter van 't Hof committed
165 166

object BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
  case class InputFile(file: File, md5: Option[String] = None)
168 169 170

  def checkOutputDir(outputDir: File): Unit = {
    // Sanity checks
Peter van 't Hof's avatar
Peter van 't Hof committed
171 172
    require(outputDir.getAbsoluteFile.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
    require(outputDir.getAbsoluteFile.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
173
    outputDir.mkdir()
Peter van 't Hof's avatar
Peter van 't Hof committed
174 175
    require(outputDir.getAbsoluteFile.canRead, s"No premision to read outputdir: $outputDir")
    require(outputDir.getAbsoluteFile.canWrite, s"No premision to write outputdir: $outputDir")
176
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
177

Peter van 't Hof's avatar
Peter van 't Hof committed
178
  def safeInputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
179
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
180
      Some(function.inputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
182
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
183 184 185
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
186
  def safeOutputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      Some(function.outputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
191 192 193
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
194
  def safeDoneFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
195
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
196
      Some(function.doneOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
198
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
199 200 201
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
202
  def safeFailFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
203
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
204
      Some(function.failOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
205
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
207 208 209
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
210
  def safeIsDone(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
211
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
212
      Some(function.isDone)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
214
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
215 216 217
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
218
  def safeIsFail(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
219
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
220
      Some(function.isFail)
Peter van 't Hof's avatar
Peter van 't Hof committed
221
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
222
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
223 224 225
    }
  }

226
}