BiopetQScript.scala 6.34 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16 17
package nl.lumc.sasc.biopet.core

import java.io.File
18

19
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
23
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
/** Base for biopet pipeline */
28
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
29

30
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
31
  val configfiles: List[File] = Nil
bow's avatar
bow committed
32

Sander Bollen's avatar
Sander Bollen committed
33
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'namespace:namespace:key=value'", fullName = "config_value", shortName = "cv", required = false)
34 35
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
36
  /** Output directory of pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
38 39
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
40
  }
bow's avatar
bow committed
41

42
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
43
  var disableScatter: Boolean = false
44

bow's avatar
bow committed
45 46
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  type InputFile = BiopetQScript.InputFile
48 49 50

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  var qSettings: QSettings
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54 55 56 57
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
58
  def init()
bow's avatar
bow committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Pipeline itself */
61
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
62

63 64 65
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
66
  /** Script from queue itself, final to force some checks for each pipeline and write report */
67
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
68 69
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
70 71 72

    BiopetQScript.checkOutputDir(outputDir)

73 74
    init()
    biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
75
    logger.info("Biopet script done")
76

Peter van 't Hof's avatar
Peter van 't Hof committed
77 78 79 80 81 82
    if (disableScatter) {
      logger.info("Disable scatters")
      for (function <- functions) function match {
        case f: ScatterGatherableFunction => f.scatterCount = 1
        case _                            =>
      }
83
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
84 85 86 87

    this match {
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
        logger.info("Write report is skipped because sample flag is used")
88 89 90 91
      case _ => reportClass.foreach { report =>
        report.mainFunction = true
        add(report)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
92 93
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
94
    logger.info("Running pre commands")
95
    for (function <- functions) function match {
96
      case f: BiopetCommandLineFunction =>
97 98
        f.preProcessExecutable()
        f.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
99
        f.internalBeforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
100
        f.commandLine
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      case _ =>
102
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104 105
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
      globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName))
Sander Bollen's avatar
Sander Bollen committed
106
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created")
107

Peter van 't Hof's avatar
Peter van 't Hof committed
108 109
    logger.info("Checking input files")
    inputFiles.par.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
110
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
111
      if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
Sander Bollen's avatar
Sander Bollen committed
112
      if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
113
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
114

Peter van 't Hof's avatar
Peter van 't Hof committed
115
    functions.filter(_.jobOutputFile == null).foreach(f => {
Peter van 't Hof's avatar
Peter van 't Hof committed
116
      try {
Peter van 't Hof's avatar
Peter van 't Hof committed
117 118
        val className = if (f.getClass.isAnonymousClass) f.getClass.getSuperclass.getSimpleName else f.getClass.getSimpleName
        f.jobOutputFile = new File(f.firstOutput.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + className + ".out")
Peter van 't Hof's avatar
Peter van 't Hof committed
119 120 121
      } catch {
        case e: NullPointerException => logger.warn(s"Can't generate a jobOutputFile for $f")
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    })
123

Peter van 't Hof's avatar
Peter van 't Hof committed
124
    WriteDependencies.writeDependencies(functions, new File(outputDir, ".log"), qSettings.runName)
125

Peter van 't Hof's avatar
Peter van 't Hof committed
126
    Logging.checkErrors()
Peter van 't Hof's avatar
Peter van 't Hof committed
127
    logger.info("Script complete without errors")
128
  }
bow's avatar
bow committed
129

Peter van 't Hof's avatar
Peter van 't Hof committed
130 131 132
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

133
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
134
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
135

136
  /** Function to set isIntermediate and add in 1 line */
bow's avatar
bow committed
137
  def add(function: QFunction, isIntermediate: Boolean = false) {
138 139 140
    function.isIntermediate = isIntermediate
    add(function)
  }
141 142 143 144 145 146 147 148 149

  def add(subPipeline: QScript): Unit = {
    subPipeline.qSettings = this.qSettings
    subPipeline match {
      case that: SummaryQScript =>
        that.init()
        that.biopetScript()
        this match {
          case s: SummaryQScript => s.addSummaryQScript(that)
Peter van 't Hof's avatar
Peter van 't Hof committed
150
          case _                 =>
151
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
152
      case that: BiopetQScript =>
153 154 155 156 157 158
        that.init()
        that.biopetScript()
      case _ => subPipeline.script
    }
    addAll(subPipeline.functions)
  }
159
}
Peter van 't Hof's avatar
Peter van 't Hof committed
160 161

object BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
162
  case class InputFile(file: File, md5: Option[String] = None)
163 164 165

  def checkOutputDir(outputDir: File): Unit = {
    // Sanity checks
Peter van 't Hof's avatar
Peter van 't Hof committed
166 167
    require(outputDir.getAbsoluteFile.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
    require(outputDir.getAbsoluteFile.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
168
    outputDir.mkdir()
Peter van 't Hof's avatar
Peter van 't Hof committed
169 170
    require(outputDir.getAbsoluteFile.canRead, s"No premision to read outputdir: $outputDir")
    require(outputDir.getAbsoluteFile.canWrite, s"No premision to write outputdir: $outputDir")
171
  }
172
}