WriteSummary.scala 7.22 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.core.summary

bow's avatar
bow committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.LastCommitHash
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
27
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

import scala.collection.mutable
import scala.io.Source
28
29

/**
30
31
 * This will collect and write the summary
 *
32
33
34
35
36
37
38
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** To access qscript for this summary */
40
  val qscript = root.asInstanceOf[SummaryQScript]
41
42
43
44
45

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
46
  var out: File = qscript.summaryFile
47
48
49
50
51

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56
57
58
    init()
    super.freezeFieldValues()
  }

  def init(): Unit = {
    for (q <- qscript.summaryQScripts)
      deps :+= q.summaryFile
59
    for ((_, l) <- qscript.summarizables; s <- l) s match {
60
61
62
      case f: QFunction => deps :+= f.firstOutput
      case _            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
63
64

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))
65
66
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
67
  /** Function to create summary */
68
  def run(): Unit = {
69
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
70
      summarizable.addToQscriptSummary(qscript, name)
71
72
    }

73
74
75
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
76
      val executables: Map[String, Any] = {
77
78

        def fetchVersion(f: QFunction): Option[(String, Any)] = {
79
          f match {
80
            case f: BiopetJavaCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
81
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
82
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
83
                "java_version" -> f.getJavaVersion,
84
                "jar_path" -> f.jarFile))
85
            case f: BiopetCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
86
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
87
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
88
                "path" -> f.executable))
89
            case f: Configurable with Version =>
Sander Bollen's avatar
Sander Bollen committed
90
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None)))
91
            case _ => None
92
          }
93
        }
94

95
        (
Peter van 't Hof's avatar
Peter van 't Hof committed
96
          qscript.functions.flatMap(fetchVersion) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
99
100
101
          qscript.functions
          .flatMap {
            case f: BiopetCommandLineFunction => f.pipesJobs
            case _                            => Nil
          }.flatMap(fetchVersion(_))
102
        ).toMap
103
104
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
105
      val map = Map(qscript.summaryName -> Map(
106
107
        "settings" -> settings,
        "files" -> Map("pipeline" -> files),
Peter van 't Hof's avatar
Peter van 't Hof committed
108
        "executables" -> executables.toMap)
109
      )
110
111
112

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
115
116
117
        case q: MultiSampleQScript =>
          ConfigUtils.mergeMaps(
            Map("samples" -> q.samples.map {
              case (sampleName, sample) =>
                sampleName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
118
119
120
                  qscript.summaryName -> Map(
                    "settings" -> sample.summarySettings,
                    "tags" -> sample.sampleTags),
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
                  "libraries" -> sample.libraries.map {
                    case (libName, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
123
                      libName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
126
                        qscript.summaryName -> Map(
                          "settings" -> lib.summarySettings,
                          "tags" -> lib.libTags)
Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
129
130
                      )
                  }
                )
            }), map)
Peter van 't Hof's avatar
Peter van 't Hof committed
131
        case _ => map
132
133
134
135
      }
    }

    val jobsMap = (for (
136
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
137
138
      summarizable <- summarizables
    ) yield {
139
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
140

141
142
143
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
144

Peter van 't Hof's avatar
Peter van 't Hof committed
145
146
147
148
149
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
150
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
151
        "pipeline_name" -> qscript.summaryName,
152
153
154
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      ))
156

157
    val writer = new PrintWriter(out)
158
    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
159
160
    writer.close()
  }
161

162
163
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
164
165
166
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
167
168
169
170
171
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
172
  /** Convert summarizable to a summary map */
173
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
174
    val stats = summarizable.summaryStats
175
176
    val files = parseFiles(summarizable.summaryFiles)

177
    Map("stats" -> Map(name -> stats)) ++
178
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
179
180
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
181
  /** Parse files map to summary map */
182
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
183
184
185
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
186
  /** parse single file summary map */
187
188
189
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
191
    map.toMap
192
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
193
}
194

Peter van 't Hof's avatar
Peter van 't Hof committed
195
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
196
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
197
  def parseChecksum(checksumFile: File): String = {
198
199
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
200
}