WriteSummary.scala 7.43 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.core.summary

bow's avatar
bow committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.LastCommitHash
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
27
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

import scala.collection.mutable
import scala.io.Source
28
29

/**
30
31
 * This will collect and write the summary
 *
32
33
 * Created by pjvan_thof on 2/14/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
34
class WriteSummary(val parent: Configurable) extends InProcessFunction with Configurable {
35
36
  this.analysisName = getClass.getSimpleName

Peter van 't Hof's avatar
Peter van 't Hof committed
37
  require(parent.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")
38

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** To access qscript for this summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  val qscript = parent.asInstanceOf[SummaryQScript]
41
42
43
44
45

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
46
  var out: File = qscript.summaryFile
47
48
49
50
51

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56
57
58
    init()
    super.freezeFieldValues()
  }

  def init(): Unit = {
    for (q <- qscript.summaryQScripts)
      deps :+= q.summaryFile
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
63
64
    for ((_, l) <- qscript.summarizables; s <- l) {
      deps :::= s.summaryDeps
      s match {
        case f: QFunction if qscript.functions.contains(f) => try {
          deps :+= f.firstOutput
        } catch {
Peter van 't Hof's avatar
typo    
Peter van 't Hof committed
65
          case e: NullPointerException => logger.debug("Queue values are not initialized")
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
        }
        case _ =>
68
      }
69
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))
72
73
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
74
  /** Function to create summary */
75
  def run(): Unit = {
76
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
77
      summarizable.addToQscriptSummary(qscript, name)
78
79
    }

80
81
82
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
83
      val executables: Map[String, Any] = {
84
85

        def fetchVersion(f: QFunction): Option[(String, Any)] = {
86
          f match {
87
            case f: BiopetJavaCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
88
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
89
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
90
                "java_version" -> f.getJavaVersion,
91
                "jar_path" -> f.jarFile))
92
            case f: BiopetCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
93
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
94
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
95
                "path" -> f.executable))
96
            case f: Configurable with Version =>
Sander Bollen's avatar
Sander Bollen committed
97
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None)))
98
            case _ => None
99
          }
100
        }
101

102
        (
Peter van 't Hof's avatar
Peter van 't Hof committed
103
          qscript.functions.flatMap(fetchVersion) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
          qscript.functions
          .flatMap {
            case f: BiopetCommandLineFunction => f.pipesJobs
            case _                            => Nil
          }.flatMap(fetchVersion(_))
109
        ).toMap
110
111
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
112
      val map = Map(qscript.summaryName -> Map(
113
114
        "settings" -> settings,
        "files" -> Map("pipeline" -> files),
Peter van 't Hof's avatar
Peter van 't Hof committed
115
        "executables" -> executables.toMap)
116
      )
117
118
119

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
123
124
        case q: MultiSampleQScript =>
          ConfigUtils.mergeMaps(
            Map("samples" -> q.samples.map {
              case (sampleName, sample) =>
                sampleName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
127
                  qscript.summaryName -> Map(
                    "settings" -> sample.summarySettings,
                    "tags" -> sample.sampleTags),
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
                  "libraries" -> sample.libraries.map {
                    case (libName, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
130
                      libName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
131
132
133
                        qscript.summaryName -> Map(
                          "settings" -> lib.summarySettings,
                          "tags" -> lib.libTags)
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
136
137
                      )
                  }
                )
            }), map)
Peter van 't Hof's avatar
Peter van 't Hof committed
138
        case _ => map
139
140
141
142
      }
    }

    val jobsMap = (for (
143
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
144
145
      summarizable <- summarizables
    ) yield {
146
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
147

148
149
150
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
155
156
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
157
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
158
        "pipeline_name" -> qscript.summaryName,
159
160
161
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
162
      ))
163

164
    val writer = new PrintWriter(out)
165
    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
166
167
    writer.close()
  }
168

169
170
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
171
172
173
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
174
175
176
177
178
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
179
  /** Convert summarizable to a summary map */
180
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    val stats = summarizable.summaryStats
182
183
    val files = parseFiles(summarizable.summaryFiles)

184
    Map("stats" -> Map(name -> stats)) ++
185
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
186
187
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
188
  /** Parse files map to summary map */
189
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
190
191
192
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
193
  /** parse single file summary map */
194
195
196
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
197
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
198
    map.toMap
199
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
200
}
201

Peter van 't Hof's avatar
Peter van 't Hof committed
202
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
203
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
204
  def parseChecksum(checksumFile: File): String = {
205
206
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
207
}