WriteSummary.scala 7.72 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.core.summary

17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.LastCommitHash
23
24
25
import nl.lumc.sasc.biopet.utils.summary.SummaryDb
import org.broadinstitute.gatk.queue.function.{InProcessFunction, QFunction}
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28

import scala.collection.mutable
import scala.io.Source
29
import slick.driver.H2Driver.api._
30
31

/**
32
33
 * This will collect and write the summary
 *
34
35
 * Created by pjvan_thof on 2/14/15.
 */
36
class WriteSummary(val parent: SummaryQScript) extends InProcessFunction with Configurable {
37
38
  this.analysisName = getClass.getSimpleName

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** To access qscript for this summary */
40
  val qscript = parent
41
42
43
44
45

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
46
  var out: File = qscript.summaryFile
47
48
49
50
51

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56
    init()
    super.freezeFieldValues()
  }

  def init(): Unit = {
57
58
59
60
61
62
    if (qscript == root) {
      qscript match {
        case s: MultiSampleQScript => s.initSummaryDb
        case _ => qscript.summaryRunId
      }
    } // This initialize the database
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
    for (q <- qscript.summaryQScripts)
      deps :+= q.summaryFile
Peter van 't Hof's avatar
Peter van 't Hof committed
65
66
67
68
69
70
    for ((_, l) <- qscript.summarizables; s <- l) {
      deps :::= s.summaryDeps
      s match {
        case f: QFunction if qscript.functions.contains(f) => try {
          deps :+= f.firstOutput
        } catch {
Peter van 't Hof's avatar
typo    
Peter van 't Hof committed
71
          case e: NullPointerException => logger.debug("Queue values are not initialized")
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
        }
        case _ =>
74
      }
75
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
76

77
    jobOutputFile = new File(qscript.summaryDbFile.getParentFile, "." + qscript.summaryDbFile.getName.stripSuffix(".db") + ".out")
78
79
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
80
  /** Function to create summary */
81
  def run(): Unit = {
82
83
    val summaryDb = SummaryDb.openSqliteSummary(qscript.summaryDbFile)

84
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
85
      summarizable.addToQscriptSummary(qscript, name)
86
87
    }

88
89
90
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
91
      val executables: Map[String, Any] = {
92
93

        def fetchVersion(f: QFunction): Option[(String, Any)] = {
94
          f match {
95
            case f: BiopetJavaCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
96
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
97
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
98
                "java_version" -> f.getJavaVersion,
99
                "jar_path" -> f.jarFile))
100
            case f: BiopetCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
101
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
102
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
103
                "path" -> f.executable))
104
            case f: Configurable with Version =>
Sander Bollen's avatar
Sander Bollen committed
105
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None)))
106
            case _ => None
107
          }
108
        }
109

110
        (
Peter van 't Hof's avatar
Peter van 't Hof committed
111
          qscript.functions.flatMap(fetchVersion) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
114
115
116
          qscript.functions
          .flatMap {
            case f: BiopetCommandLineFunction => f.pipesJobs
            case _                            => Nil
          }.flatMap(fetchVersion(_))
117
        ).toMap
118
119
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
120
      val map = Map(qscript.summaryName -> Map(
121
122
        "settings" -> settings,
        "files" -> Map("pipeline" -> files),
Peter van 't Hof's avatar
Peter van 't Hof committed
123
        "executables" -> executables.toMap)
124
      )
125
126
127

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
132
        case q: MultiSampleQScript =>
          ConfigUtils.mergeMaps(
            Map("samples" -> q.samples.map {
              case (sampleName, sample) =>
                sampleName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
                  qscript.summaryName -> Map(
                    "settings" -> sample.summarySettings,
                    "tags" -> sample.sampleTags),
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
                  "libraries" -> sample.libraries.map {
                    case (libName, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
138
                      libName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
141
                        qscript.summaryName -> Map(
                          "settings" -> lib.summarySettings,
                          "tags" -> lib.libTags)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
145
                      )
                  }
                )
            }), map)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
        case _ => map
147
148
149
150
      }
    }

    val jobsMap = (for (
151
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
152
153
      summarizable <- summarizables
    ) yield {
154
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
155

156
157
158
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
164
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
165
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
166
        "pipeline_name" -> qscript.summaryName,
167
168
169
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
170
      ))
171

172
173
174
175
//    val writer = new PrintWriter(out)
//    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
//    writer.close()
    summaryDb.close()
176
  }
177

178
179
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
180
181
182
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
183
184
185
186
187
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
188
  /** Convert summarizable to a summary map */
189
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    val stats = summarizable.summaryStats
191
192
    val files = parseFiles(summarizable.summaryFiles)

193
    Map("stats" -> Map(name -> stats)) ++
194
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
195
196
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
197
  /** Parse files map to summary map */
198
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
199
200
201
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
202
  /** parse single file summary map */
203
204
205
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
207
    map.toMap
208
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
209
}
210

Peter van 't Hof's avatar
Peter van 't Hof committed
211
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
212
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
213
  def parseChecksum(checksumFile: File): String = {
214
215
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
216
}