WriteSummary.scala 7.06 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.core.summary

bow's avatar
bow committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core._
22
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.LastCommitHash
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
27
28
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

import scala.collection.mutable
import scala.io.Source
29
30

/**
31
32
 * This will collect and write the summary
 *
33
34
35
36
37
38
39
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  /** To access qscript for this summary */
41
  val qscript = root.asInstanceOf[SummaryQScript]
42
43
44
45
46

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
47
  var out: File = qscript.summaryFile
48
49
50
51
52

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
53
54
    for (q <- qscript.summaryQScripts) deps :+= q.summaryFile
    for ((_, l) <- qscript.summarizables; s <- l) s match {
55
56
57
      case f: QFunction => deps :+= f.firstOutput
      case _            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
60

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))

61
62
63
    super.freezeFieldValues()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  /** Function to create summary */
65
  def run(): Unit = {
66
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
67
      summarizable.addToQscriptSummary(qscript, name)
68
69
    }

70
71
72
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
73
      val executables: Map[String, Any] = {
74
75

        def fetchVersion(f: QFunction): Option[(String, Any)] = {
76
          f match {
77
            case f: BiopetJavaCommandLineFunction with Version =>
78
              Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None),
79
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
80
                "java_version" -> f.getJavaVersion,
81
                "jar_path" -> f.jarFile))
82
            case f: BiopetCommandLineFunction with Version =>
83
              Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None),
84
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
85
                "path" -> f.executable))
86
            case f: Configurable with Version =>
87
88
              Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None)))
            case _ => None
89
          }
90
        }
91

92
        (
Peter van 't Hof's avatar
Peter van 't Hof committed
93
          qscript.functions.flatMap(fetchVersion) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
96
97
98
          qscript.functions
          .flatMap {
            case f: BiopetCommandLineFunction => f.pipesJobs
            case _                            => Nil
          }.flatMap(fetchVersion(_))
99
        ).toMap
100
101
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
102
      val map = Map(qscript.summaryName -> Map(
103
104
        "settings" -> settings,
        "files" -> Map("pipeline" -> files),
Peter van 't Hof's avatar
Peter van 't Hof committed
105
        "executables" -> executables.toMap)
106
      )
107
108
109

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
112
113
114
115
116
117
        case q: MultiSampleQScript =>
          ConfigUtils.mergeMaps(
            Map("samples" -> q.samples.map {
              case (sampleName, sample) =>
                sampleName -> Map(
                  qscript.summaryName -> Map("settings" -> sample.summarySettings),
                  "libraries" -> sample.libraries.map {
                    case (libName, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
118
                      libName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
119
120
121
122
123
                        qscript.summaryName -> Map("settings" -> lib.summarySettings)
                      )
                  }
                )
            }), map)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
        case _ => map
125
126
127
128
      }
    }

    val jobsMap = (for (
129
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
130
131
      summarizable <- summarizables
    ) yield {
132
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
133

134
135
136
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
137

Peter van 't Hof's avatar
Peter van 't Hof committed
138
139
140
141
142
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
143
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
144
        "pipeline_name" -> qscript.summaryName,
145
146
147
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
148
      ))
149

150
    val writer = new PrintWriter(out)
151
    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
152
153
    writer.close()
  }
154

155
156
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
157
158
159
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
160
161
162
163
164
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
165
  /** Convert summarizable to a summary map */
166
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
    val stats = summarizable.summaryStats
168
169
    val files = parseFiles(summarizable.summaryFiles)

170
    Map("stats" -> Map(name -> stats)) ++
171
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
172
173
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
174
  /** Parse files map to summary map */
175
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
176
177
178
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
179
  /** parse single file summary map */
180
181
182
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
183
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
184
    map.toMap
185
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
186
}
187

Peter van 't Hof's avatar
Peter van 't Hof committed
188
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
189
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
190
  def parseChecksum(checksumFile: File): String = {
191
192
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
193
}