WriteSummary.scala 7.21 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.core.summary

bow's avatar
bow committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core._
22
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.LastCommitHash
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
27
28
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

import scala.collection.mutable
import scala.io.Source
29
30

/**
31
32
 * This will collect and write the summary
 *
33
34
35
36
37
38
39
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  /** To access qscript for this summary */
41
  val qscript = root.asInstanceOf[SummaryQScript]
42
43
44
45
46

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
47
  var out: File = qscript.summaryFile
48
49
50
51
52

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
53
54
    for (q <- qscript.summaryQScripts) deps :+= q.summaryFile
    for ((_, l) <- qscript.summarizables; s <- l) s match {
55
56
57
      case f: QFunction => deps :+= f.firstOutput
      case _            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
60

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))

61
62
63
    super.freezeFieldValues()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  /** Function to create summary */
65
  def run(): Unit = {
66
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
67
      summarizable.addToQscriptSummary(qscript, name)
68
69
    }

70
71
72
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
73
      val executables: Map[String, Any] = {
74
75

        def fetchVersion(f: QFunction): Option[(String, Any)] = {
76
          f match {
77
            case f: BiopetJavaCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
78
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
79
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
80
                "java_version" -> f.getJavaVersion,
81
                "jar_path" -> f.jarFile))
82
            case f: BiopetCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
83
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
84
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
85
                "path" -> f.executable))
86
            case f: Configurable with Version =>
Sander Bollen's avatar
Sander Bollen committed
87
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None)))
88
            case _ => None
89
          }
90
        }
91

92
        (
Peter van 't Hof's avatar
Peter van 't Hof committed
93
          qscript.functions.flatMap(fetchVersion) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
96
97
98
          qscript.functions
          .flatMap {
            case f: BiopetCommandLineFunction => f.pipesJobs
            case _                            => Nil
          }.flatMap(fetchVersion(_))
99
        ).toMap
100
101
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
102
      val map = Map(qscript.summaryName -> Map(
103
104
        "settings" -> settings,
        "files" -> Map("pipeline" -> files),
Peter van 't Hof's avatar
Peter van 't Hof committed
105
        "executables" -> executables.toMap)
106
      )
107
108
109

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
112
113
114
        case q: MultiSampleQScript =>
          ConfigUtils.mergeMaps(
            Map("samples" -> q.samples.map {
              case (sampleName, sample) =>
                sampleName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
115
116
117
                  qscript.summaryName -> Map(
                    "settings" -> sample.summarySettings,
                    "tags" -> sample.sampleTags),
Peter van 't Hof's avatar
Peter van 't Hof committed
118
119
                  "libraries" -> sample.libraries.map {
                    case (libName, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
120
                      libName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
                        qscript.summaryName -> Map(
                          "settings" -> lib.summarySettings,
                          "tags" -> lib.libTags)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
126
127
                      )
                  }
                )
            }), map)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
        case _ => map
129
130
131
132
      }
    }

    val jobsMap = (for (
133
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
134
135
      summarizable <- summarizables
    ) yield {
136
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
137

138
139
140
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
145
146
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
147
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
148
        "pipeline_name" -> qscript.summaryName,
149
150
151
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
152
      ))
153

154
    val writer = new PrintWriter(out)
155
    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
156
157
    writer.close()
  }
158

159
160
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
161
162
163
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
164
165
166
167
168
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
169
  /** Convert summarizable to a summary map */
170
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
    val stats = summarizable.summaryStats
172
173
    val files = parseFiles(summarizable.summaryFiles)

174
    Map("stats" -> Map(name -> stats)) ++
175
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
176
177
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
178
  /** Parse files map to summary map */
179
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
180
181
182
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
183
  /** parse single file summary map */
184
185
186
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
187
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
188
    map.toMap
189
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
190
}
191

Peter van 't Hof's avatar
Peter van 't Hof committed
192
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
193
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
194
  def parseChecksum(checksumFile: File): String = {
195
196
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
197
}