WriteSummary.scala 6.54 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.core.summary

bow's avatar
bow committed
18
import java.io.{ File, PrintWriter }
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, BiopetJavaCommandLineFunction, SampleLibraryTag }
22
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
27
28
import nl.lumc.sasc.biopet.{ LastCommitHash, Version }
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }

import scala.collection.mutable
import scala.io.Source
29
30

/**
31
32
 * This will collect and write the summary
 *
33
34
35
36
37
38
39
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  /** To access qscript for this summary */
41
  val qscript = root.asInstanceOf[SummaryQScript]
42
43
44
45
46

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
47
  var out: File = qscript.summaryFile
48
49
50
51
52

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
53
54
    for (q <- qscript.summaryQScripts) deps :+= q.summaryFile
    for ((_, l) <- qscript.summarizables; s <- l) s match {
55
56
57
      case f: QFunction => deps :+= f.firstOutput
      case _            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
60

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))

61
62
63
    super.freezeFieldValues()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  /** Function to create summary */
65
  def run(): Unit = {
66
    for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
67
      summarizable.addToQscriptSummary(qscript, name)
68
69
    }

70
71
72
    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
73
      val executables: Map[String, Any] = {
74
        (for (f <- qscript.functions if f.isInstanceOf[BiopetCommandLineFunction]) yield {
75
          f match {
76
            case f: BiopetJavaCommandLineFunction with Version =>
77
              f.configName -> Map("version" -> f.getVersion.getOrElse(None),
78
                "java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
79
80
                "java_version" -> f.getJavaVersion,
                "jar_path" -> f.jarFile)
81
            case f: BiopetCommandLineFunction with Version =>
Peter van 't Hof's avatar
Peter van 't Hof committed
82
              f.configName -> Map("version" -> f.getVersion.getOrElse(None),
83
                "md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
84
                "path" -> f.executable)
85
86
            case f: Configurable with Version =>
              f.configName -> Map("version" -> f.getVersion.getOrElse(None))
87
88
89
90
            case _ => throw new IllegalStateException("This should not be possible")
          }

        }).toMap
91
92
93
94
95
96
97
98
99
100
101
102
103
      }

      val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++
        (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map("pipeline" -> files))) ++
        (if (executables.isEmpty) Map[String, Any]() else Map("executables" -> executables.toMap))))

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
        case _                     => map
      }
    }

    val jobsMap = (for (
104
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
105
106
      summarizable <- summarizables
    ) yield {
107
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
108

109
110
111
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
112

Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
115
116
117
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
      Map("meta" -> Map(
        "last_commit_hash" -> LastCommitHash,
118
        "pipeline_version" -> nl.lumc.sasc.biopet.Version,
Peter van 't Hof's avatar
Peter van 't Hof committed
119
        "pipeline_name" -> qscript.summaryName,
120
121
122
        "output_dir" -> qscript.outputDir,
        "run_name" -> config("run_name", default = qSettings.runName).asString,
        "summary_creation" -> System.currentTimeMillis()
Peter van 't Hof's avatar
Peter van 't Hof committed
123
      ))
124

125
    val writer = new PrintWriter(out)
126
    writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
127
128
    writer.close()
  }
129

130
131
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
132
133
134
      case Some(s) => Map("samples" -> Map(s -> (libraryId match {
        case Some(l) => Map("libraries" -> Map(l -> map))
        case _       => map
135
136
137
138
139
      })))
      case _ => map
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
140
  /** Convert summarizable to a summary map */
141
  def parseSummarizable(summarizable: Summarizable, name: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
142
    val stats = summarizable.summaryStats
143
144
    val files = parseFiles(summarizable.summaryFiles)

145
    Map("stats" -> Map(name -> stats)) ++
146
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
147
148
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
149
  /** Parse files map to summary map */
150
  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
151
152
153
    for ((key, file) <- files) yield key -> parseFile(file)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
154
  /** parse single file summary map */
155
156
157
  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
158
    if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file))
159
    map.toMap
160
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
}
object WriteSummary {
Peter van 't Hof's avatar
Peter van 't Hof committed
163
  /** Retrive checksum from file */
Peter van 't Hof's avatar
Peter van 't Hof committed
164
  def parseChecksum(checksumFile: File): String = {
165
166
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
167
}