BiopetQScript.scala 7.96 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16 17
package nl.lumc.sasc.biopet.core

import java.io.File
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, WriteSummary }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
26

27 28
import scala.collection.mutable.ListBuffer

Peter van 't Hof's avatar
Peter van 't Hof committed
29
/** Base for biopet pipeline */
30
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
31

32
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
33
  val configfiles: List[File] = Nil
bow's avatar
bow committed
34

Sander Bollen's avatar
Sander Bollen committed
35
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'namespace:namespace:key=value'", fullName = "config_value", shortName = "cv", required = false)
36 37
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** Output directory of pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
42
  }
bow's avatar
bow committed
43

44
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
45
  var disableScatter: Boolean = false
46

bow's avatar
bow committed
47 48
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  type InputFile = BiopetQScript.InputFile
50 51 52

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var qSettings: QSettings
bow's avatar
bow committed
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56 57 58 59
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
60
  def init()
bow's avatar
bow committed
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
  /** Pipeline itself */
63
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
64

65 66 67
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

68 69
  val skipWriteDependencies: Boolean = config("skip_write_dependencies", default = false)

Peter van 't Hof's avatar
Peter van 't Hof committed
70
  /** Script from queue itself, final to force some checks for each pipeline and write report */
71
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
74 75 76

    BiopetQScript.checkOutputDir(outputDir)

77 78
    init()
    biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
79
    logger.info("Biopet script done")
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81 82 83 84 85 86
    if (disableScatter) {
      logger.info("Disable scatters")
      for (function <- functions) function match {
        case f: ScatterGatherableFunction => f.scatterCount = 1
        case _                            =>
      }
87
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
88 89 90 91

    this match {
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
        logger.info("Write report is skipped because sample flag is used")
92
      case _ => reportClass.foreach { report =>
Peter van 't Hof's avatar
Peter van 't Hof committed
93
        add(report)
94
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
95 96
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
97
    logger.info("Running pre commands")
akaljuvee's avatar
akaljuvee committed
98
    var count = 0
99
    val totalCount = functions.size
akaljuvee's avatar
akaljuvee committed
100 101 102 103 104 105 106 107 108 109 110
    for (function <- functions) {
      function match {
        case f: BiopetCommandLineFunction =>
          f.preProcessExecutable()
          f.beforeGraph()
          f.internalBeforeGraph()
          f.commandLine
        case f: WriteSummary => f.init()
        case _               =>
      }
      count += 1
111
      if (count % 500 == 0) logger.info(s"Preprocessing done for $count jobs out of $totalCount total")
112
    }
113
    logger.info(s"Preprocessing done for $totalCount functions")
Peter van 't Hof's avatar
Peter van 't Hof committed
114

Peter van 't Hof's avatar
Peter van 't Hof committed
115
    val logDir = new File(outputDir, ".log" + File.separator + qSettings.runName.toLowerCase)
Peter van 't Hof's avatar
Peter van 't Hof committed
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
Peter van 't Hof's avatar
Peter van 't Hof committed
118
      globalConfig.writeReport(new File(logDir, "config"))
119
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writable, output directory cannot be created")
120

Peter van 't Hof's avatar
Peter van 't Hof committed
121 122
    logger.info("Checking input files")
    inputFiles.par.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
123
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
124
      if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
Sander Bollen's avatar
Sander Bollen committed
125
      if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
126
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
    logger.info("Set stdout file when not set")
Peter van 't Hof's avatar
Peter van 't Hof committed
129
    functions.filter(_.jobOutputFile == null).foreach(f => {
Peter van 't Hof's avatar
Peter van 't Hof committed
130 131 132
      val className = if (f.getClass.isAnonymousClass) f.getClass.getSuperclass.getSimpleName else f.getClass.getSimpleName
      BiopetQScript.safeOutputs(f) match {
        case Some(o) => f.jobOutputFile = new File(o.head.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + className + ".out")
133
        case _       => f.jobOutputFile = new File("./stdout") // Line is here for test backup
Peter van 't Hof's avatar
Peter van 't Hof committed
134
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
135
    })
136

Peter van 't Hof's avatar
Peter van 't Hof committed
137 138
    if (!skipWriteDependencies) WriteDependencies.writeDependencies(
      functions,
Peter van 't Hof's avatar
Peter van 't Hof committed
139
      new File(logDir, "graph"))
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    else logger.debug("Write dependencies is skipped")
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    Logging.checkErrors()
Peter van 't Hof's avatar
Peter van 't Hof committed
143
    logger.info("Script complete without errors")
144
  }
bow's avatar
bow committed
145

Peter van 't Hof's avatar
Peter van 't Hof committed
146 147 148
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

149
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
150
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
151

152
  /** Function to set isIntermediate and add in 1 line */
bow's avatar
bow committed
153
  def add(function: QFunction, isIntermediate: Boolean = false) {
154 155 156
    function.isIntermediate = isIntermediate
    add(function)
  }
157 158 159 160 161 162 163 164 165

  def add(subPipeline: QScript): Unit = {
    subPipeline.qSettings = this.qSettings
    subPipeline match {
      case that: SummaryQScript =>
        that.init()
        that.biopetScript()
        this match {
          case s: SummaryQScript => s.addSummaryQScript(that)
Peter van 't Hof's avatar
Peter van 't Hof committed
166
          case _                 =>
167
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      case that: BiopetQScript =>
169 170
        that.init()
        that.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
171
      case _ => subPipeline.script()
172 173 174
    }
    addAll(subPipeline.functions)
  }
175
}
Peter van 't Hof's avatar
Peter van 't Hof committed
176 177

object BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
178
  case class InputFile(file: File, md5: Option[String] = None)
179 180 181

  def checkOutputDir(outputDir: File): Unit = {
    // Sanity checks
Peter van 't Hof's avatar
Peter van 't Hof committed
182 183
    require(outputDir.getAbsoluteFile.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
    require(outputDir.getAbsoluteFile.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
184
    outputDir.mkdir()
Peter van 't Hof's avatar
Peter van 't Hof committed
185 186
    require(outputDir.getAbsoluteFile.canRead, s"No premision to read outputdir: $outputDir")
    require(outputDir.getAbsoluteFile.canWrite, s"No premision to write outputdir: $outputDir")
187
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
188

Peter van 't Hof's avatar
Peter van 't Hof committed
189
  def safeInputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      Some(function.inputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
194 195 196
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
197
  def safeOutputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
198
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
199
      Some(function.outputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
200
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
202 203 204
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
205
  def safeDoneFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      Some(function.doneOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
209
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
210 211 212
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
213
  def safeFailFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
214
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      Some(function.failOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
217
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
218 219 220
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
221
  def safeIsDone(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
222
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
223
      Some(function.isDone)
Peter van 't Hof's avatar
Peter van 't Hof committed
224
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
225
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
226 227 228
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
229
  def safeIsFail(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
230
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
231
      Some(function.isFail)
Peter van 't Hof's avatar
Peter van 't Hof committed
232
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
233
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
234 235 236
    }
  }

237
}