WriteSummary.scala 7.18 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.core.summary

bow's avatar
bow committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.LastCommitHash
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
27
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

import scala.collection.mutable
import scala.io.Source
28
29

/**
30
31
 * This will collect and write the summary
 *
32
33
34
35
36
37
38
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** To access qscript for this summary */
40
  val qscript = root.asInstanceOf[SummaryQScript]
41
42
43
44
45

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
46
  var out: File = qscript.summaryFile
47
48
49
50
51

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
52
53
    for (q <- qscript.summaryQScripts) deps :+= q.summaryFile
    for ((_, l) <- qscript.summarizables; s <- l) s match {
54
55
56
      case f: QFunction => deps :+= f.firstOutput
      case _            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
59

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))

60
61
62
    super.freezeFieldValues()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
63
  /** Function to create summary */
64
  def run(): Unit = {
65
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
66
      summarizable.addToQscriptSummary(qscript, name)
67
68
    }

69
70
71
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
72
      val executables: Map[String, Any] = {
73
74

        def fetchVersion(f: QFunction): Option[(String, Any)] = {
75
          f match {
76
            case f: BiopetJavaCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
77
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
78
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
79
                "java_version" -> f.getJavaVersion,
80
                "jar_path" -> f.jarFile))
81
            case f: BiopetCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
82
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
83
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
84
                "path" -> f.executable))
85
            case f: Configurable with Version =>
Sander Bollen's avatar
Sander Bollen committed
86
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None)))
87
            case _ => None
88
          }
89
        }
90

91
        (
Peter van 't Hof's avatar
Peter van 't Hof committed
92
          qscript.functions.flatMap(fetchVersion) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
95
96
97
          qscript.functions
          .flatMap {
            case f: BiopetCommandLineFunction => f.pipesJobs
            case _                            => Nil
          }.flatMap(fetchVersion(_))
98
        ).toMap
99
100
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
101
      val map = Map(qscript.summaryName -> Map(
102
103
        "settings" -> settings,
        "files" -> Map("pipeline" -> files),
Peter van 't Hof's avatar
Peter van 't Hof committed
104
        "executables" -> executables.toMap)
105
      )
106
107
108

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
111
112
113
        case q: MultiSampleQScript =>
          ConfigUtils.mergeMaps(
            Map("samples" -> q.samples.map {
              case (sampleName, sample) =>
                sampleName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
116
                  qscript.summaryName -> Map(
                    "settings" -> sample.summarySettings,
                    "tags" -> sample.sampleTags),
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
                  "libraries" -> sample.libraries.map {
                    case (libName, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
119
                      libName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
                        qscript.summaryName -> Map(
                          "settings" -> lib.summarySettings,
                          "tags" -> lib.libTags)
Peter van 't Hof's avatar
Peter van 't Hof committed
123
124
125
126
                      )
                  }
                )
            }), map)
Peter van 't Hof's avatar
Peter van 't Hof committed
127
        case _ => map
128
129
130
131
      }
    }

    val jobsMap = (for (
132
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
133
134
      summarizable <- summarizables
    ) yield {
135
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
136

137
138
139
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
140

Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
143
144
145
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
146
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
147
        "pipeline_name" -> qscript.summaryName,
148
149
150
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
151
      ))
152

153
    val writer = new PrintWriter(out)
154
    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
155
156
    writer.close()
  }
157

158
159
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
160
161
162
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
163
164
165
166
167
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
168
  /** Convert summarizable to a summary map */
169
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
170
    val stats = summarizable.summaryStats
171
172
    val files = parseFiles(summarizable.summaryFiles)

173
    Map("stats" -> Map(name -> stats)) ++
174
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
175
176
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
177
  /** Parse files map to summary map */
178
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
179
180
181
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
182
  /** parse single file summary map */
183
184
185
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
186
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
187
    map.toMap
188
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
189
}
190

Peter van 't Hof's avatar
Peter van 't Hof committed
191
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
192
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
193
  def parseChecksum(checksumFile: File): String = {
194
195
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
196
}