WriteSummary.scala 4.27 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.core.summary

3
import java.io.{ FileInputStream, PrintWriter, File }
4
import java.security.MessageDigest
5

6
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunctionTrait, SampleLibraryTag }
7
import nl.lumc.sasc.biopet.core.config.Configurable
8
import nl.lumc.sasc.biopet.utils.ConfigUtils
9
10
11
import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction }
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }

12
13
14
import scala.collection.mutable
import scala.io.Source

15
16
17
18
19
20
21
22
/**
 * Created by pjvan_thof on 2/14/15.
 */
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
  this.analysisName = getClass.getSimpleName

  require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")

23
  val qscript = root.asInstanceOf[SummaryQScript]
24
25
26
27
28

  @Input(doc = "deps", required = false)
  var deps: List[File] = Nil

  @Output(doc = "Summary output", required = true)
29
  var out: File = qscript.summaryFile
30
31
32
33
34

  var md5sum: Boolean = config("summary_md5", default = true)
  //TODO: add more checksums types

  override def freezeFieldValues(): Unit = {
35
36
    for (q <- qscript.summaryQScripts) deps :+= q.summaryFile
    for ((_, l) <- qscript.summarizables; s <- l) s match {
37
38
39
      case f: QFunction => deps :+= f.firstOutput
      case _            =>
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42

    jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))

43
44
45
46
    super.freezeFieldValues()
  }

  def run(): Unit = {
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

    val pipelineMap = {
      val files = parseFiles(qscript.summaryFiles)
      val settings = qscript.summarySettings
      val executables = {
        for ((name, (file, version)) <- qscript.executables) yield {
          name -> Map("version" -> version, "md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(file.getCanonicalPath, "N/A"))
        }
      }

      val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++
        (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map("pipeline" -> files))) ++
        (if (executables.isEmpty) Map[String, Any]() else Map("executables" -> executables.toMap))))

      qscript match {
        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
        case _                     => map
      }
    }

    val jobsMap = (for (
68
      ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
69
70
      summarizable <- summarizables
    ) yield {
71
      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
72

73
74
75
      (prefixSampleLibrary(map, sampleId, libraryId),
        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
76

77
    val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
78
      ConfigUtils.fileToConfigMap(qscript.summaryFile)
79
    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b))
80

81
    val writer = new PrintWriter(out)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
    writer.println(ConfigUtils.mapToJson(combinedMap).spaces4)
83
84
    writer.close()
  }
85

86
87
88
89
90
91
92
93
94
95
96
97
  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
    sampleId match {
      case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match {
        case Some(libraryId) => Map("libraries" -> Map(libraryId -> map))
        case _               => map
      })))
      case _ => map
    }
  }

  def parseSummarizable(summarizable: Summarizable, name: String) = {
    val data = summarizable.summaryStats
98
99
    val files = parseFiles(summarizable.summaryFiles)

100
101
    (if (data.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> data))) ++
      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
102
103
104
  }

  def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
105
106
107
108
109
110
111
112
    for ((key, file) <- files) yield key -> parseFile(file)
  }

  def parseFile(file: File): Map[String, Any] = {
    val map: mutable.Map[String, Any] = mutable.Map()
    map += "path" -> file.getAbsolutePath
    if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file))
    map.toMap
113
114
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
115
  def parseChecksum(checksumFile: File): String = {
116
117
    Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0)
  }
118
}