BiopetQScript.scala 8.34 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16 17
package nl.lumc.sasc.biopet.core

import java.io.File
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, WriteSummary }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
24
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
26

27 28
import scala.collection.mutable.ListBuffer

Peter van 't Hof's avatar
Peter van 't Hof committed
29
/** Base for biopet pipeline */
30
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
31

32
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
33
  val configfiles: List[File] = Nil
34

35
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'namespace:namespace:key=value'", fullName = "config_value", shortName = "cv", required = false)
36 37
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** Output directory of pipeline */
39
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
42
  }
43

44
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
45
  var disableScatter: Boolean = false
46

47 48
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  type InputFile = BiopetQScript.InputFile
50 51 52

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var qSettings: QSettings
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56 57 58 59
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
60
  def init()
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
  /** Pipeline itself */
63
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
64

65 66 67
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

68 69
  val skipWriteDependencies: Boolean = config("skip_write_dependencies", default = false)

70 71
  val writeHtmlReport: Boolean = config("write_html_report", default = true)

Peter van 't Hof's avatar
Peter van 't Hof committed
72
  /** Script from queue itself, final to force some checks for each pipeline and write report */
73
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
74 75
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
76 77 78

    BiopetQScript.checkOutputDir(outputDir)

79 80
    init()
    biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
81
    logger.info("Biopet script done")
82

Peter van 't Hof's avatar
Peter van 't Hof committed
83 84 85 86 87 88
    if (disableScatter) {
      logger.info("Disable scatters")
      for (function <- functions) function match {
        case f: ScatterGatherableFunction => f.scatterCount = 1
        case _                            =>
      }
89
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
    logger.info("Running pre commands")
akaljuvee's avatar
akaljuvee committed
92
    var count = 0
93
    val totalCount = functions.size
akaljuvee's avatar
akaljuvee committed
94 95 96 97 98 99 100 101 102 103 104
    for (function <- functions) {
      function match {
        case f: BiopetCommandLineFunction =>
          f.preProcessExecutable()
          f.beforeGraph()
          f.internalBeforeGraph()
          f.commandLine
        case f: WriteSummary => f.init()
        case _               =>
      }
      count += 1
105
      if (count % 500 == 0) logger.info(s"Preprocessing done for $count jobs out of $totalCount total")
106
    }
107
    logger.info(s"Preprocessing done for $totalCount functions")
Peter van 't Hof's avatar
Peter van 't Hof committed
108

Peter van 't Hof's avatar
Peter van 't Hof committed
109
    val logDir = new File(outputDir, ".log" + File.separator + qSettings.runName.toLowerCase)
Peter van 't Hof's avatar
Peter van 't Hof committed
110

Peter van 't Hof's avatar
Peter van 't Hof committed
111
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
Peter van 't Hof's avatar
Peter van 't Hof committed
112
      globalConfig.writeReport(new File(logDir, "config"))
113
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writable, output directory cannot be created")
114

Peter van 't Hof's avatar
Peter van 't Hof committed
115 116
    logger.info("Checking input files")
    inputFiles.par.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
117
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
118
      if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
Sander Bollen's avatar
Sander Bollen committed
119
      if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
120
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
121

Peter van 't Hof's avatar
Peter van 't Hof committed
122
    logger.info("Set stdout file when not set")
Peter van 't Hof's avatar
Peter van 't Hof committed
123
    functions.filter(_.jobOutputFile == null).foreach(f => {
124 125 126
      val className = if (f.getClass.isAnonymousClass) f.getClass.getSuperclass.getSimpleName else f.getClass.getSimpleName
      BiopetQScript.safeOutputs(f) match {
        case Some(o) => f.jobOutputFile = new File(o.head.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + className + ".out")
127
        case _       => f.jobOutputFile = new File("./stdout") // Line is here for test backup
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
129
    })
130

131 132 133 134 135 136 137 138
    if (writeHtmlReport) {
      logger.info("Adding report")
      this match {
        case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
          logger.info("Write report is skipped because sample flag is used")
        case _ => reportClass.foreach { report =>
          for (f <- functions) f match {
            case w: WriteSummary => report.deps :+= w.jobOutputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
139
            case _               =>
140 141 142
          }
          report.jobOutputFile = new File(report.outputDir, ".report.out")
          add(report)
Peter van 't Hof's avatar
Peter van 't Hof committed
143 144 145 146
        }
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
147 148
    if (!skipWriteDependencies) WriteDependencies.writeDependencies(
      functions,
Peter van 't Hof's avatar
Peter van 't Hof committed
149
      new File(logDir, "graph"))
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    else logger.debug("Write dependencies is skipped")
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
    Logging.checkErrors()
Peter van 't Hof's avatar
Peter van 't Hof committed
153
    logger.info("Script complete without errors")
154
  }
155

Peter van 't Hof's avatar
Peter van 't Hof committed
156 157 158
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

159
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
160
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
161

162
  /** Function to set isIntermediate and add in 1 line */
163
  def add(function: QFunction, isIntermediate: Boolean = false) {
164 165 166
    function.isIntermediate = isIntermediate
    add(function)
  }
167 168 169 170 171 172 173 174 175

  def add(subPipeline: QScript): Unit = {
    subPipeline.qSettings = this.qSettings
    subPipeline match {
      case that: SummaryQScript =>
        that.init()
        that.biopetScript()
        this match {
          case s: SummaryQScript => s.addSummaryQScript(that)
Peter van 't Hof's avatar
Peter van 't Hof committed
176
          case _                 =>
177
        }
178
      case that: BiopetQScript =>
179 180
        that.init()
        that.biopetScript()
181
      case _ => subPipeline.script()
182 183 184
    }
    addAll(subPipeline.functions)
  }
185
}
Peter van 't Hof's avatar
Peter van 't Hof committed
186 187

object BiopetQScript {
188
  case class InputFile(file: File, md5: Option[String] = None)
189 190 191

  def checkOutputDir(outputDir: File): Unit = {
    // Sanity checks
Peter van 't Hof's avatar
Peter van 't Hof committed
192 193
    require(outputDir.getAbsoluteFile.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
    require(outputDir.getAbsoluteFile.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
194
    outputDir.mkdir()
Peter van 't Hof's avatar
Peter van 't Hof committed
195 196
    require(outputDir.getAbsoluteFile.canRead, s"No premision to read outputdir: $outputDir")
    require(outputDir.getAbsoluteFile.canWrite, s"No premision to write outputdir: $outputDir")
197
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
198

199
  def safeInputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
200
    try {
201
      Some(function.inputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
202
    } catch {
203
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
204 205 206
    }
  }

207
  def safeOutputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    try {
209
      Some(function.outputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
    } catch {
211
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
212 213 214
    }
  }

215
  def safeDoneFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    try {
217
      Some(function.doneOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
218
    } catch {
219
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
220 221 222
    }
  }

223
  def safeFailFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
224
    try {
225
      Some(function.failOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
226
    } catch {
227
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
228 229 230
    }
  }

231
  def safeIsDone(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
232
    try {
233
      Some(function.isDone)
Peter van 't Hof's avatar
Peter van 't Hof committed
234
    } catch {
235
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
236 237 238
    }
  }

239
  def safeIsFail(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
240
    try {
241
      Some(function.isFail)
Peter van 't Hof's avatar
Peter van 't Hof committed
242
    } catch {
243
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
244 245 246
    }
  }

247
}