BiopetQScript.scala 7.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
17
package nl.lumc.sasc.biopet.core

import java.io.File
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, WriteSummary }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
/** Base for biopet pipeline */
28
trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
29

30
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
31
  val configfiles: List[File] = Nil
bow's avatar
bow committed
32

Sander Bollen's avatar
Sander Bollen committed
33
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'namespace:namespace:key=value'", fullName = "config_value", shortName = "cv", required = false)
34
35
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
36
  /** Output directory of pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
40
  }
bow's avatar
bow committed
41

42
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
43
  var disableScatter: Boolean = false
44

bow's avatar
bow committed
45
46
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  type InputFile = BiopetQScript.InputFile
48
49
50

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  var qSettings: QSettings
bow's avatar
bow committed
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
56
57
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
58
  def init()
bow's avatar
bow committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
  /** Pipeline itself */
61
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
62

63
64
65
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

66
67
  val skipWriteDependencies: Boolean = config("skip_write_dependencies", default = false)

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Script from queue itself, final to force some checks for each pipeline and write report */
69
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
72
73
74

    BiopetQScript.checkOutputDir(outputDir)

75
76
    init()
    biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
77
    logger.info("Biopet script done")
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
81
82
83
84
    if (disableScatter) {
      logger.info("Disable scatters")
      for (function <- functions) function match {
        case f: ScatterGatherableFunction => f.scatterCount = 1
        case _                            =>
      }
85
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
89

    this match {
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
        logger.info("Write report is skipped because sample flag is used")
90
91
92
      case _ => reportClass.foreach { report =>
        add(report)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
95
    logger.info("Running pre commands")
akaljuvee's avatar
akaljuvee committed
96
97
98
99
100
101
102
103
104
105
106
107
108
    var count = 0
    for (function <- functions) {
      function match {
        case f: BiopetCommandLineFunction =>
          f.preProcessExecutable()
          f.beforeGraph()
          f.internalBeforeGraph()
          f.commandLine
        case f: WriteSummary => f.init()
        case _               =>
      }
      count += 1
      if (count % 500 == 0) logger.info(s"Preprocessing done for ${count} jobs")
109
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
110

Peter van 't Hof's avatar
Peter van 't Hof committed
111
    val logDir = new File(outputDir, ".log" + File.separator + qSettings.runName.toLowerCase)
Peter van 't Hof's avatar
Peter van 't Hof committed
112

Peter van 't Hof's avatar
Peter van 't Hof committed
113
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
Peter van 't Hof's avatar
Peter van 't Hof committed
114
      globalConfig.writeReport(new File(logDir, "config"))
Sander Bollen's avatar
Sander Bollen committed
115
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created")
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
    logger.info("Checking input files")
    inputFiles.par.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
119
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
120
      if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
Sander Bollen's avatar
Sander Bollen committed
121
      if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
122
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
123

Peter van 't Hof's avatar
Peter van 't Hof committed
124
    functions.filter(_.jobOutputFile == null).foreach(f => {
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
127
      val className = if (f.getClass.isAnonymousClass) f.getClass.getSuperclass.getSimpleName else f.getClass.getSimpleName
      BiopetQScript.safeOutputs(f) match {
        case Some(o) => f.jobOutputFile = new File(o.head.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + className + ".out")
128
        case _       => f.jobOutputFile = new File("./stdout") // Line is here for test backup
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
130
    })
131

Peter van 't Hof's avatar
Peter van 't Hof committed
132
133
    if (!skipWriteDependencies) WriteDependencies.writeDependencies(
      functions,
Peter van 't Hof's avatar
Peter van 't Hof committed
134
      new File(logDir, "graph"))
135

Peter van 't Hof's avatar
Peter van 't Hof committed
136
    Logging.checkErrors()
Peter van 't Hof's avatar
Peter van 't Hof committed
137
    logger.info("Script complete without errors")
138
  }
bow's avatar
bow committed
139

Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

143
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
144
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
145

146
  /** Function to set isIntermediate and add in 1 line */
bow's avatar
bow committed
147
  def add(function: QFunction, isIntermediate: Boolean = false) {
148
149
150
    function.isIntermediate = isIntermediate
    add(function)
  }
151
152
153
154
155
156
157
158
159

  def add(subPipeline: QScript): Unit = {
    subPipeline.qSettings = this.qSettings
    subPipeline match {
      case that: SummaryQScript =>
        that.init()
        that.biopetScript()
        this match {
          case s: SummaryQScript => s.addSummaryQScript(that)
Peter van 't Hof's avatar
Peter van 't Hof committed
160
          case _                 =>
161
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
162
      case that: BiopetQScript =>
163
164
        that.init()
        that.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      case _ => subPipeline.script()
166
167
168
    }
    addAll(subPipeline.functions)
  }
169
}
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171

object BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
172
  case class InputFile(file: File, md5: Option[String] = None)
173
174
175

  def checkOutputDir(outputDir: File): Unit = {
    // Sanity checks
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
    require(outputDir.getAbsoluteFile.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
    require(outputDir.getAbsoluteFile.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
178
    outputDir.mkdir()
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
    require(outputDir.getAbsoluteFile.canRead, s"No premision to read outputdir: $outputDir")
    require(outputDir.getAbsoluteFile.canWrite, s"No premision to write outputdir: $outputDir")
181
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
182

Peter van 't Hof's avatar
Peter van 't Hof committed
183
  def safeInputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
184
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
185
      Some(function.inputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
186
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
191
  def safeOutputs(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      Some(function.outputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
194
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
195
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
199
  def safeDoneFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
200
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      Some(function.doneOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
202
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
203
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
206
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
207
  def safeFailFiles(function: QFunction): Option[Seq[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
209
      Some(function.failOutputs)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
211
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
214
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
215
  def safeIsDone(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
217
      Some(function.isDone)
Peter van 't Hof's avatar
Peter van 't Hof committed
218
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
219
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
220
221
222
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
223
  def safeIsFail(function: QFunction): Option[Boolean] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
224
    try {
Peter van 't Hof's avatar
Peter van 't Hof committed
225
      Some(function.isFail)
Peter van 't Hof's avatar
Peter van 't Hof committed
226
    } catch {
Peter van 't Hof's avatar
Peter van 't Hof committed
227
      case e: NullPointerException => None
Peter van 't Hof's avatar
Peter van 't Hof committed
228
229
230
    }
  }

231
}