Commit d1016462 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Remove version from summary and rename data to stats

parent 73435d56
......@@ -207,6 +207,6 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
object BiopetCommandLineFunctionTrait {
import scala.collection.mutable.Map
private val versionCache: Map[String, String] = Map()
private val executableMd5Cache: Map[String, String] = Map()
private[core] val executableMd5Cache: Map[String, String] = Map()
private val executableCache: Map[String, String] = Map()
}
......@@ -11,7 +11,7 @@ trait Summarizable extends Configurable {
def summaryFiles: Map[String, File]
def summaryData: Map[String, Any]
def summaryStats: Map[String, Any]
/**
* This function is used to merge
......@@ -20,5 +20,7 @@ trait Summarizable extends Configurable {
* @param key
* @return
*/
def resolveSummaryConflict(v1: Any, v2: Any, key: String) = v1
def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
throw new IllegalStateException("Merge can not have same key by default")
}
}
......@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.core.summary
import java.io.{ FileInputStream, PrintWriter, File }
import java.security.MessageDigest
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunctionTrait, SampleLibraryTag }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction }
......@@ -43,45 +44,72 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
}
def run(): Unit = {
val map = (for (
val pipelineMap = {
val files = parseFiles(qscript.summaryFiles)
val settings = qscript.summarySettings
val executables = {
for ((name, (file, version)) <- qscript.executables) yield {
name -> Map("version" -> version, "md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(file.getCanonicalPath, "N/A"))
}
}
val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++
(if (files.isEmpty) Map[String, Any]() else Map("files" -> Map("pipeline" -> files))) ++
(if (executables.isEmpty) Map[String, Any]() else Map("executables" -> executables.toMap))))
qscript match {
case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
case _ => map
}
}
val jobsMap = (for (
((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
summarizable <- summarizables
) yield {
val map = Map(qscript.summaryName -> Map(name -> parseSummarizable(summarizable)))
val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
(sampleId match {
case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match {
case Some(libraryId) => Map("libraries" -> Map(libraryId -> map))
case _ => map
})))
case _ => map
}, (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
}).foldRight(Map[String, Any]())((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
(prefixSampleLibrary(map, sampleId, libraryId),
(v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
}).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
ConfigUtils.fileToConfigMap(qscript.summaryFile)
}).foldRight(map)((a, b) => ConfigUtils.mergeMaps(a, b))
}).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b))
val writer = new PrintWriter(out)
writer.println(ConfigUtils.mapToJson(combinedMap).spaces4)
writer.close()
}
def parseSummarizable(summarizable: Summarizable) = {
val data = summarizable.summaryData
def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
sampleId match {
case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match {
case Some(libraryId) => Map("libraries" -> Map(libraryId -> map))
case _ => map
})))
case _ => map
}
}
def parseSummarizable(summarizable: Summarizable, name: String) = {
val data = summarizable.summaryStats
val files = parseFiles(summarizable.summaryFiles)
(if (data.isEmpty) Map[String, Any]() else Map("data" -> data)) ++
(if (files.isEmpty) Map[String, Any]() else Map("files" -> files))
(if (data.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> data))) ++
(if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
}
def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
for ((key, file) <- files) yield {
val map: mutable.Map[String, Any] = mutable.Map()
map += "path" -> file.getAbsolutePath
if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file))
key -> map.toMap
}
for ((key, file) <- files) yield key -> parseFile(file)
}
def parseFile(file: File): Map[String, Any] = {
val map: mutable.Map[String, Any] = mutable.Map()
map += "path" -> file.getAbsolutePath
if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file))
map.toMap
}
def parseChecksum(checksumFile: File): String = {
......
......@@ -47,7 +47,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString
var opt_discard: Boolean = config("discard", default = false)
var opt_minimum_length: Option[Int] = config("minimum_length", 1)
var opt_minimum_length: Int = config("minimum_length", 1)
var opt_maximum_length: Option[Int] = config("maximum_length")
def cmdLine = required(executable) +
......@@ -63,7 +63,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
required("--output", fastq_output) +
" > " + required(stats_output)
def summaryData: Map[String, Any] = {
def summaryStats: Map[String, Any] = {
val trimR = """.*Trimmed reads: *(\d*) .*""".r
val tooShortR = """.*Too short reads: *(\d*) .*""".r
val tooLongR = """.*Too long reads: *(\d*) .*""".r
......@@ -82,8 +82,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
}
}
Map("version" -> getVersion,
"num_reads_affected" -> stats("trimmed"),
Map("num_reads_affected" -> stats("trimmed"),
"num_reads_discarded_too_short" -> stats("tooshort"),
"num_reads_discarded_too_long" -> stats("toolong"),
"adapters" -> adapter_stats.toMap
......
......@@ -45,7 +45,7 @@ class Seqstat(val root: Configurable) extends BiopetCommandLineFunction with Sum
def cmdLine = required(executable) + required(input) + " > " + required(output)
def summaryData: Map[String, Any] = {
def summaryStats: Map[String, Any] = {
val map = ConfigUtils.fileToConfigMap(output)
ConfigUtils.any2map(map.getOrElse("stats", Map()))
......
......@@ -81,7 +81,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ
" > " + required(output_stats)
}
def summaryData: Map[String, Any] = {
def summaryStats: Map[String, Any] = {
val pairKept = """FastQ paired records kept: (\d*) \((\d*) pairs\)""".r
val singleKept = """FastQ single records kept: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r
val pairDiscarded = """FastQ paired records discarded: (\d*) \((\d*) pairs\)""".r
......@@ -105,7 +105,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ
}
}
stats.toMap ++ Map("version" -> getVersion)
stats.toMap
}
override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
......
......@@ -63,7 +63,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
def summaryFiles: Map[String, File] = Map()
def summaryData: Map[String, Any] = {
def summaryStats: Map[String, Any] = {
val (header, content) = Picard.getMetrics(output)
(for (category <- 0 until content.size) yield {
......
......@@ -73,7 +73,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram)
def summaryData: Map[String, Any] = {
def summaryStats: Map[String, Any] = {
val (header, content) = Picard.getMetrics(output)
(for (i <- 0 to header.size if i < content.head.size)
yield (header(i).toLowerCase -> content.head(i))).toMap
......
......@@ -16,6 +16,7 @@
package nl.lumc.sasc.biopet.extensions.seqtk
import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.config.Configurable
......@@ -23,7 +24,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable
* Wrapper for the seqtk seq subcommand.
* Written based on seqtk version 1.0-r63-dirty.
*/
class SeqtkSeq(val root: Configurable) extends Seqtk {
class SeqtkSeq(val root: Configurable) extends Seqtk with Summarizable {
/** input file */
@Input(doc = "Input file (FASTQ or FASTA)")
......@@ -81,6 +82,10 @@ class SeqtkSeq(val root: Configurable) extends Seqtk {
/** shift quality by '(-Q) - 33' */
var V: Boolean = config("V", default = false)
def summaryStats: Map[String, Any] = Map()
def summaryFiles: Map[String, File] = Map()
def cmdLine = {
required(executable) +
" seq " +
......
......@@ -44,7 +44,7 @@ class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFuncti
def summaryFiles: Map[String, File] = Map()
def summaryData: Map[String, Any] = {
def summaryStats: Map[String, Any] = {
ConfigUtils.fileToConfigMap(summaryFile)
}
}
......
......@@ -64,7 +64,7 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi
def summaryFiles: Map[String, File] = Map()
def summaryData: Map[String, Any] = {
def summaryStats: Map[String, Any] = {
val regex = new Regex("""Filtered (\d*) reads from first read file.
|Filtered (\d*) reads from second read file.
|Synced read files contain (\d*) reads.""".stripMargin,
......@@ -82,8 +82,7 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi
}
} else (0, 0, 0)
Map("version" -> BiopetExecutable.getVersion,
"num_reads_discarded_R1" -> countFilteredR1,
Map("num_reads_discarded_R1" -> countFilteredR1,
"num_reads_discarded_R2" -> countFilteredR2,
"num_reads_kept" -> countRLeft
)
......
......@@ -18,6 +18,6 @@ class CollectAlignmentSummaryMetricsTest extends TestNGSuite with Matchers {
val job = new CollectAlignmentSummaryMetrics(null)
job.output = file
job.summaryData
job.summaryStats
}
}
......@@ -18,6 +18,6 @@ class CollectInsertSizeMetricsTest extends TestNGSuite with Matchers {
val job = new CollectInsertSizeMetrics(null)
job.output = file
job.summaryData
job.summaryStats
}
}
......@@ -159,7 +159,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
outputFiles ++ Map("fastq_file" -> this.fastqfile)
}
def summaryData: Map[String, Any] = Map("version" -> getVersion)
def summaryStats: Map[String, Any] = Map()
}
object Fastqc {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment