WriteSummary.scala 2.97 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.core.summary

3
import java.io.{ FileInputStream, PrintWriter, File }
4
import java.security.MessageDigest
5
6

import nl.lumc.sasc.biopet.core.config.Configurable
7
import nl.lumc.sasc.biopet.utils.ConfigUtils
8
9
10
import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction }
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }

11
12
13
import scala.collection.mutable
import scala.io.Source

14
15
16
17
18
19
20
21
/**
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

22
  val qscript = root.asInstanceOf[SummaryQScript]
23
24
25
26
27

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
28
  var out: File = qscript.summaryFile
29
30
31
32
33

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
34
35
    for (q <- qscript.summaryQScripts) deps :+= q.summaryFile
    for ((_, l) <- qscript.summarizables; s <- l) s match {
36
37
38
      case f: QFunction => deps :+= f.firstOutput
      case _            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
41

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))

42
43
44
45
    super.freezeFieldValues()
  }

  def run(): Unit = {
46
    val map = (for (
47
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
48
49
      summarizable <- summarizables
    ) yield {
50
      val map = Map(qscript.summaryName ->Map(name -> parseSummarizable(summarizable)))
51
52
53
54

      (sampleId match {
        case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match {
          case Some(libraryId) => Map("libraries" -> Map(libraryId -> map))
55
          case _               => map
56
57
        })))
        case _ => map
58
59
      }, (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(Map[String, Any]())((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
60

61
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
62
63
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
    }).foldRight(map)((a, b) => ConfigUtils.mergeMaps(a, b))
64

65
    val writer = new PrintWriter(out)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
    writer.println(ConfigUtils.mapToJson(combinedMap).spaces4)
67
68
    writer.close()
  }
69
70
71
72
73
74
75
76
77

  def parseSummarizable(summarizable: Summarizable): Map[String, Map[String, Any]] = {
    Map("data" -> summarizable.summaryData, "files" -> parseFiles(summarizable.summaryFiles))
  }

  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
    for ((key, file) <- files) yield {
      val map: mutable.Map[String, Any] = mutable.Map()
      map += "path" -> file.getAbsolutePath
Peter van 't Hof's avatar
Peter van 't Hof committed
78
      if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file))
79
80
81
82
      key -> map.toMap
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
83
  def parseChecksum(checksumFile: File): String = {
84
85
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
86
}