WriteSummary.scala 7.36 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.core.summary

bow's avatar
bow committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20
import nl.lumc.sasc.biopet.core._
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.LastCommitHash
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
27
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

import scala.collection.mutable
import scala.io.Source
28
29

/**
30
31
 * This will collect and write the summary
 *
32
33
34
35
36
37
38
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  /** To access qscript for this summary */
40
  val qscript = root.asInstanceOf[SummaryQScript]
41
42
43
44
45

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
46
  var out: File = qscript.summaryFile
47
48
49
50
51

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56
57
58
    init()
    super.freezeFieldValues()
  }

  def init(): Unit = {
    for (q <- qscript.summaryQScripts)
      deps :+= q.summaryFile
59
    for ((_, l) <- qscript.summarizables; s <- l) s match {
60
      case f: QFunction if qscript.functions.contains(f) => try {
61
        deps :+= f.firstOutput
62
      } catch {
63
64
        case e: NullPointerException => logger.warn("Queue values are not init")
      }
65
      case _ =>
66
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))
69
70
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
71
  /** Function to create summary */
72
  def run(): Unit = {
73
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
74
      summarizable.addToQscriptSummary(qscript, name)
75
76
    }

77
78
79
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
80
      val executables: Map[String, Any] = {
81
82

        def fetchVersion(f: QFunction): Option[(String, Any)] = {
83
          f match {
84
            case f: BiopetJavaCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
85
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
86
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
87
                "java_version" -> f.getJavaVersion,
88
                "jar_path" -> f.jarFile))
89
            case f: BiopetCommandLineFunction with Version =>
Sander Bollen's avatar
Sander Bollen committed
90
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None),
91
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
92
                "path" -> f.executable))
93
            case f: Configurable with Version =>
Sander Bollen's avatar
Sander Bollen committed
94
              Some(f.configNamespace -> Map("version" -> f.getVersion.getOrElse(None)))
95
            case _ => None
96
          }
97
        }
98

99
        (
Peter van 't Hof's avatar
Peter van 't Hof committed
100
          qscript.functions.flatMap(fetchVersion) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
103
104
105
          qscript.functions
          .flatMap {
            case f: BiopetCommandLineFunction => f.pipesJobs
            case _                            => Nil
          }.flatMap(fetchVersion(_))
106
        ).toMap
107
108
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
109
      val map = Map(qscript.summaryName -> Map(
110
111
        "settings" -> settings,
        "files" -> Map("pipeline" -> files),
Peter van 't Hof's avatar
Peter van 't Hof committed
112
        "executables" -> executables.toMap)
113
      )
114
115
116

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
120
121
        case q: MultiSampleQScript =>
          ConfigUtils.mergeMaps(
            Map("samples" -> q.samples.map {
              case (sampleName, sample) =>
                sampleName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
122
123
124
                  qscript.summaryName -> Map(
                    "settings" -> sample.summarySettings,
                    "tags" -> sample.sampleTags),
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
                  "libraries" -> sample.libraries.map {
                    case (libName, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
127
                      libName -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
                        qscript.summaryName -> Map(
                          "settings" -> lib.summarySettings,
                          "tags" -> lib.libTags)
Peter van 't Hof's avatar
Peter van 't Hof committed
131
132
133
134
                      )
                  }
                )
            }), map)
Peter van 't Hof's avatar
Peter van 't Hof committed
135
        case _ => map
136
137
138
139
      }
    }

    val jobsMap = (for (
140
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
141
142
      summarizable <- summarizables
    ) yield {
143
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
144

145
146
147
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
148

Peter van 't Hof's avatar
Peter van 't Hof committed
149
150
151
152
153
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
154
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
155
        "pipeline_name" -> qscript.summaryName,
156
157
158
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
159
      ))
160

161
    val writer = new PrintWriter(out)
162
    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
163
164
    writer.close()
  }
165

166
167
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
168
169
170
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
171
172
173
174
175
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
176
  /** Convert summarizable to a summary map */
177
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
178
    val stats = summarizable.summaryStats
179
180
    val files = parseFiles(summarizable.summaryFiles)

181
    Map("stats" -> Map(name -> stats)) ++
182
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
183
184
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
185
  /** Parse files map to summary map */
186
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
187
188
189
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
190
  /** parse single file summary map */
191
192
193
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
194
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
195
    map.toMap
196
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
197
}
198

Peter van 't Hof's avatar
Peter van 't Hof committed
199
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
200
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
201
  def parseChecksum(checksumFile: File): String = {
202
203
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
204
}