BiopetQScript.scala 4.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
package nl.lumc.sasc.biopet.core

import java.io.File
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
import org.broadinstitute.gatk.queue.QSettings
import org.broadinstitute.gatk.queue.function.QFunction
25
import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
27
28
import org.broadinstitute.gatk.utils.commandline.Argument

Peter van 't Hof's avatar
Peter van 't Hof committed
29
/** Base for biopet pipeline */
30
trait BiopetQScript extends Configurable with GatkLogging {
31

32
  @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
33
  val configfiles: List[File] = Nil
bow's avatar
bow committed
34

35
  @Argument(doc = "Config values, value should be formatted like 'key=value' or 'path:path:key=value'", fullName = "config_value", shortName = "cv", required = false)
36
37
  val configValues: List[String] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** Output directory of pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var outputDir: File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
    if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
    else new File(".")
42
  }
bow's avatar
bow committed
43

44
  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
45
  var disableScatter: Boolean = false
46

bow's avatar
bow committed
47
48
  var outputFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  type InputFile = BiopetQScript.InputFile
50
51
52

  var inputFiles: List[InputFile] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var qSettings: QSettings
bow's avatar
bow committed
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
58
59
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  var functions: Seq[QFunction]

  /** Init for pipeline */
60
  def init()
bow's avatar
bow committed
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
  /** Pipeline itself */
63
  def biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
64

65
66
67
  /** Returns the extension to make the report */
  def reportClass: Option[ReportBuilderExtension] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Script from queue itself, final to force some checks for each pipeline and write report */
69
  final def script() {
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
    outputDir = config("output_dir")
    outputDir = outputDir.getAbsoluteFile
72
73
    init()
    biopetScript()
74

75
    if (disableScatter) for (function <- functions) function match {
76
77
78
      case f: ScatterGatherableFunction => f.scatterCount = 1
      case _                            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
81
82
83
84
85

    this match {
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
        logger.info("Write report is skipped because sample flag is used")
      case _ => reportClass.foreach(add(_))
    }

86
    for (function <- functions) function match {
87
      case f: BiopetCommandLineFunction =>
88
89
        f.preProcessExecutable()
        f.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
90
        f.internalBeforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
91
        f.commandLine
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      case _ =>
93
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
94

Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
    if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite))
      globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName))
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, outputdir can not be created")
98

Peter van 't Hof's avatar
Peter van 't Hof committed
99
    inputFiles.foreach { i =>
Peter van 't Hof's avatar
Peter van 't Hof committed
100
      if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      else if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
102
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
    functions.filter(_.jobOutputFile == null).foreach(f => {
Peter van 't Hof's avatar
Peter van 't Hof committed
105
      try {
Peter van 't Hof's avatar
Peter van 't Hof committed
106
        f.jobOutputFile = new File(f.firstOutput.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + configName + ".out")
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
109
      } catch {
        case e: NullPointerException => logger.warn(s"Can't generate a jobOutputFile for $f")
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
110
    })
111

112
113
    if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json"))

Peter van 't Hof's avatar
Peter van 't Hof committed
114
    Logging.checkErrors()
115
  }
bow's avatar
bow committed
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
  def add(functions: QFunction*)

120
  /** Get implemented from org.broadinstitute.gatk.queue.QScript */
Peter van 't Hof's avatar
Peter van 't Hof committed
121
  def addAll(functions: scala.Traversable[org.broadinstitute.gatk.queue.function.QFunction])
122

123
  /** Function to set isIntermediate and add in 1 line */
bow's avatar
bow committed
124
  def add(function: QFunction, isIntermediate: Boolean = false) {
125
126
127
    function.isIntermediate = isIntermediate
    add(function)
  }
128
}
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130

object BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
131
  case class InputFile(file: File, md5: Option[String] = None)
132
}