SummaryQScript.scala 8.09 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.core.summary

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.sql.Date
19

20
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.extensions.{ CheckChecksum, Md5sum }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
23
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.LastCommitHash
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
import scala.collection.mutable
27
28
import scala.concurrent.Await
import scala.concurrent.duration.Duration
Peter van 't Hof's avatar
WIP    
Peter van 't Hof committed
29
import scala.concurrent.ExecutionContext.Implicits.global
30
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
31

32
/**
33
34
 * This trait is used for qscript / pipelines that will produce a summary
 *
35
36
 * Created by pjvan_thof on 2/14/15.
 */
37
trait SummaryQScript extends BiopetQScript { qscript: QScript =>
38
39

  /** Key is sample/library, None is sample or library is not applicable */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  private[summary] var summarizables: Map[(String, Option[String], Option[String]), List[Summarizable]] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42

  /** Qscripts summaries that need to be merge into this summary */
43
44
  private[summary] var summaryQScripts: List[SummaryQScript] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** Name of the pipeline in the summary */
Sander Bollen's avatar
Sander Bollen committed
46
  var summaryName = configNamespace
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Must return a map with used settings for this pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
  def summarySettings: Map[String, Any]

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** File to put in the summary for thie pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
  def summaryFiles: Map[String, File]

54
  def summaryDbFile: File = root match {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
    case s: SummaryQScript => new File(s.outputDir, s"${s.summaryName}.summary.db")
    case _                 => throw new IllegalStateException("Root should be a SummaryQScript")
57
58
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
63
  /**
   * Add a module to summary for this pipeline
   *
   * @param summarizable summarizable to add to summary for this pipeline
   * @param name Name of module
64
65
   * @param sampleId Id of sample
   * @param libraryId Id of libary
Peter van 't Hof's avatar
Peter van 't Hof committed
66
   * @param forceSingle If true it replaces summarizable instead of adding to it
Peter van 't Hof's avatar
Peter van 't Hof committed
67
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
  def addSummarizable(summarizable: Summarizable,
                      name: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
72
73
74
75
76
77
78
79
                      sampleId: Option[String] = None,
                      libraryId: Option[String] = None,
                      forceSingle: Boolean = false): Unit = {
    val (sId, lId) = this match {
      case tag: SampleLibraryTag => (tag.sampleId, tag.libId)
      case _                     => (sampleId, libraryId)
    }
    if (lId.isDefined) require(sId.isDefined) // Library always require a sample
    if (forceSingle) summarizables = summarizables.filterNot(_._1 == (name, sId, lId))
    summarizables += (name, sId, lId) -> (summarizable :: summarizables.getOrElse((name, sId, lId), Nil))
80
81
  }

82
  /** Add an other qscript to merge in output summary */
83
84
85
86
  def addSummaryQScript(summaryQScript: SummaryQScript): Unit = {
    summaryQScripts :+= summaryQScript
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
  private var addedJobs = false

89
  final lazy val summaryRunId: Int = {
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    if (runIdFile.exists() && summaryDbFile.exists()) {
91
92
93
94
      val reader = Source.fromFile(runIdFile)
      val id = reader.getLines().next().toInt
      reader.close()
      id
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    } else createRun()
96
97
98
99
  }

  private def runIdFile = root match {
    case s: SummaryQScript => new File(s.outputDir, s".log/summary.runid")
Peter van 't Hof's avatar
Peter van 't Hof committed
100
    case _                 => throw new IllegalStateException("Root should be a SummaryQscript")
101
102
103
104
  }

  private def createRun(): Int = {
    val db = SummaryDb.openSqliteSummary(summaryDbFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
105
    val dir = root match {
106
107
      case q: BiopetQScript => q.outputDir
      case _                => throw new IllegalStateException("Root should be a BiopetQscript")
Peter van 't Hof's avatar
Peter van 't Hof committed
108
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
    val name = root match {
      case q: SummaryQScript => q.summaryName
Peter van 't Hof's avatar
Peter van 't Hof committed
111
      case _                 => throw new IllegalStateException("Root should be a SummaryQScript")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
    }
    val id = Await.result(db.createRun(name, dir.getAbsolutePath, nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
114
      LastCommitHash, new Date(System.currentTimeMillis())), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
    runIdFile.getParentFile.mkdir()
116
117
118
119
120
121
    val writer = new PrintWriter(runIdFile)
    writer.println(id)
    writer.close()
    id
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
122
  /** Add jobs to qscript to execute summary, also add checksum jobs */
123
  def addSummaryJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
124
    if (addedJobs) throw new IllegalStateException("Summary jobs for this QScript are already executed")
125
126
    val writeSummary = new WriteSummary(this)

Peter van 't Hof's avatar
Peter van 't Hof committed
127
    def addChecksum(file: File): Unit = {
128
129
130
      if (writeSummary.md5sum) {
        if (!SummaryQScript.md5sumCache.contains(file)) {
          val md5sum = new Md5sum(this) {
Sander Bollen's avatar
Sander Bollen committed
131
            override def configNamespace = "md5sum"
132
133
134
135
136

            override def cmdLine: String = super.cmdLine + " || " +
              required("echo") + required("error_on_capture  " + input.toString) + " > " + required(output)
          }
          md5sum.input = file
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
139
140
141
142
143
          md5sum.output = if (file.getAbsolutePath.startsWith(outputDir.getAbsolutePath))
            new File(file.getParentFile, file.getName + ".md5")
          else {
            // Need to not write a md5 file outside the outputDir
            new File(outputDir, ".md5" + file.getAbsolutePath + ".md5")
          }
          md5sum.jobOutputFile = new File(md5sum.output.getParentFile, s".${file.getName}.md5.md5sum.out")
144
145
146
147
148

          writeSummary.deps :+= md5sum.output
          SummaryQScript.md5sumCache += file -> md5sum.output
          add(md5sum)
        } else writeSummary.deps :+= SummaryQScript.md5sumCache(file)
149
150
151
152
      }
      //TODO: add more checksums types
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
    for ((_, summarizableList) <- summarizables; summarizable <- summarizableList) {
      summarizable match {
155
156
        case f: BiopetCommandLineFunction => f.beforeGraph()
        case _                            =>
Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
160
    //Automatic checksums
161
    for ((_, summarizableList) <- summarizables; summarizable <- summarizableList; (_, file) <- summarizable.summaryFiles) {
Peter van 't Hof's avatar
Peter van 't Hof committed
162
      addChecksum(file)
163
164
165
166
167
      summarizable match {
        case f: BiopetJavaCommandLineFunction => if (f.jarFile != null) addChecksum(f.jarFile)
        case _                                =>
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
168

Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
    qscript match {
      case q: MultiSampleQScript =>
        // Global level
        for ((key, file) <- qscript.summaryFiles) addChecksum(file)

        for ((sampleName, sample) <- q.samples) {
          // Sample level
          for ((key, file) <- sample.summaryFiles) addChecksum(file)
          for ((libName, lib) <- sample.libraries) {
            // Library level
            for ((key, file) <- lib.summaryFiles) addChecksum(file)
          }
        }
      case q => for ((key, file) <- q.summaryFiles) addChecksum(file)
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
187
188
189
    for (inputFile <- inputFiles) {
      inputFile.md5 match {
        case Some(checksum) => {
          val checkMd5 = new CheckChecksum
          checkMd5.inputFile = inputFile.file
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
          if (!SummaryQScript.md5sumCache.contains(inputFile.file))
            addChecksum(inputFile.file)
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
          checkMd5.checksumFile = SummaryQScript.md5sumCache(inputFile.file)
          checkMd5.checksum = checksum
Peter van 't Hof's avatar
Peter van 't Hof committed
194
          checkMd5.jobOutputFile = new File(checkMd5.checksumFile.getParentFile, checkMd5.checksumFile.getName + ".check.out")
Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
197
198
199
200
          add(checkMd5)
        }
        case _ =>
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
    for ((_, file) <- this.summaryFiles)
      addChecksum(file)

204
    this match {
205
      case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
206
207
208
        logger.info("Write summary is skipped because sample flag is used")
      case _ => add(writeSummary)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210

    addedJobs = true
211
212
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214

object SummaryQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216

  /** Cache to have no duplicate jobs */
217
  protected[summary] val md5sumCache: mutable.Map[File, File] = mutable.Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
218
}