diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index 944dee43fa5fae667cad5e08a05ae4ce9d5f8fbe..0dcf13eeab40c0f502a181b267da9dea9d3f3bb9 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -207,6 +207,6 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab object BiopetCommandLineFunctionTrait { import scala.collection.mutable.Map private val versionCache: Map[String, String] = Map() - private val executableMd5Cache: Map[String, String] = Map() + private[core] val executableMd5Cache: Map[String, String] = Map() private val executableCache: Map[String, String] = Map() } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala index d71eaa1608feffffea1fe82537af02bda4e2257e..a1901ed793912f9e88056007479f9295145dae8f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala @@ -11,7 +11,7 @@ trait Summarizable extends Configurable { def summaryFiles: Map[String, File] - def summaryData: Map[String, Any] + def summaryStats: Map[String, Any] /** * This function is used to merge @@ -20,5 +20,7 @@ trait Summarizable extends Configurable { * @param key * @return */ - def resolveSummaryConflict(v1: Any, v2: Any, key: String) = v1 + def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { + throw new IllegalStateException("Merge can not have same key by default") + } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 207acd79a3cfb67691d8342b3f8ae942df7f331c..fac1055e3b03699898e5ef4b36b1011ca3530133 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.core.summary import java.io.{ FileInputStream, PrintWriter, File } import java.security.MessageDigest +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunctionTrait, SampleLibraryTag } import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction } @@ -43,45 +44,72 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config } def run(): Unit = { - val map = (for ( + + val pipelineMap = { + val files = parseFiles(qscript.summaryFiles) + val settings = qscript.summarySettings + val executables = { + for ((name, (file, version)) <- qscript.executables) yield { + name -> Map("version" -> version, "md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(file.getCanonicalPath, "N/A")) + } + } + + val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++ + (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map("pipeline" -> files))) ++ + (if (executables.isEmpty) Map[String, Any]() else Map("executables" -> executables.toMap)))) + + qscript match { + case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId) + case _ => map + } + } + + val jobsMap = (for ( ((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables ) yield { - val map = Map(qscript.summaryName -> Map(name -> parseSummarizable(summarizable))) + val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name)) - (sampleId match { - case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match { - case Some(libraryId) => Map("libraries" -> Map(libraryId -> map)) - case _ => map - }))) - case _ => map - }, (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key)) - }).foldRight(Map[String, Any]())((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2)) + (prefixSampleLibrary(map, sampleId, libraryId), + (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key)) + }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2)) val combinedMap = (for (qscript <- qscript.summaryQScripts) yield { ConfigUtils.fileToConfigMap(qscript.summaryFile) - }).foldRight(map)((a, b) => ConfigUtils.mergeMaps(a, b)) + }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) val writer = new PrintWriter(out) writer.println(ConfigUtils.mapToJson(combinedMap).spaces4) writer.close() } - def parseSummarizable(summarizable: Summarizable) = { - val data = summarizable.summaryData + def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = { + sampleId match { + case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match { + case Some(libraryId) => Map("libraries" -> Map(libraryId -> map)) + case _ => map + }))) + case _ => map + } + } + + def parseSummarizable(summarizable: Summarizable, name: String) = { + val data = summarizable.summaryStats val files = parseFiles(summarizable.summaryFiles) - (if (data.isEmpty) Map[String, Any]() else Map("data" -> data)) ++ - (if (files.isEmpty) Map[String, Any]() else Map("files" -> files)) + (if (data.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> data))) ++ + (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files))) } def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = { - for ((key, file) <- files) yield { - val map: mutable.Map[String, Any] = mutable.Map() - map += "path" -> file.getAbsolutePath - if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file)) - key -> map.toMap - } + for ((key, file) <- files) yield key -> parseFile(file) + } + + def parseFile(file: File): Map[String, Any] = { + val map: mutable.Map[String, Any] = mutable.Map() + map += "path" -> file.getAbsolutePath + if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file)) + map.toMap } def parseChecksum(checksumFile: File): String = { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala index ccf56efc8035eabba1c49dd9e2745de26f7efe30..fc90139f8db4dba65da7e7c90df8988f68bb87e7 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala @@ -47,7 +47,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString var opt_discard: Boolean = config("discard", default = false) - var opt_minimum_length: Option[Int] = config("minimum_length", 1) + var opt_minimum_length: Int = config("minimum_length", 1) var opt_maximum_length: Option[Int] = config("maximum_length") def cmdLine = required(executable) + @@ -63,7 +63,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su required("--output", fastq_output) + " > " + required(stats_output) - def summaryData: Map[String, Any] = { + def summaryStats: Map[String, Any] = { val trimR = """.*Trimmed reads: *(\d*) .*""".r val tooShortR = """.*Too short reads: *(\d*) .*""".r val tooLongR = """.*Too long reads: *(\d*) .*""".r @@ -82,8 +82,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su } } - Map("version" -> getVersion, - "num_reads_affected" -> stats("trimmed"), + Map("num_reads_affected" -> stats("trimmed"), "num_reads_discarded_too_short" -> stats("tooshort"), "num_reads_discarded_too_long" -> stats("toolong"), "adapters" -> adapter_stats.toMap diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala index 556f8639f27b2d5d57c018ed9e31fb0195892a56..bb014d224c7a6c1ca9aeb7508538e288e88117cc 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala @@ -45,7 +45,7 @@ class Seqstat(val root: Configurable) extends BiopetCommandLineFunction with Sum def cmdLine = required(executable) + required(input) + " > " + required(output) - def summaryData: Map[String, Any] = { + def summaryStats: Map[String, Any] = { val map = ConfigUtils.fileToConfigMap(output) ConfigUtils.any2map(map.getOrElse("stats", Map())) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala index 8a546534d305e8a8a36a69b6c94660d63abb9533..fae8c5802c3979af507421df955a91400d21923d 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala @@ -81,7 +81,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ " > " + required(output_stats) } - def summaryData: Map[String, Any] = { + def summaryStats: Map[String, Any] = { val pairKept = """FastQ paired records kept: (\d*) \((\d*) pairs\)""".r val singleKept = """FastQ single records kept: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r val pairDiscarded = """FastQ paired records discarded: (\d*) \((\d*) pairs\)""".r @@ -105,7 +105,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ } } - stats.toMap ++ Map("version" -> getVersion) + stats.toMap } override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala index f691145215e6f6ccd54f4e72001165228672f6b9..53b109b8c4fc549924b8f494993f661ec09598ad 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala @@ -63,7 +63,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with def summaryFiles: Map[String, File] = Map() - def summaryData: Map[String, Any] = { + def summaryStats: Map[String, Any] = { val (header, content) = Picard.getMetrics(output) (for (category <- 0 until content.size) yield { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala index 9585491f971454ea2e0f4bb03b0c67b50aea8579..d5565316293e7af0460362064e39e713361a6b7c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala @@ -73,7 +73,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram) - def summaryData: Map[String, Any] = { + def summaryStats: Map[String, Any] = { val (header, content) = Picard.getMetrics(output) (for (i <- 0 to header.size if i < content.head.size) yield (header(i).toLowerCase -> content.head(i))).toMap diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala index 9838040cc74a5da4cff3073ac0b2123b6de9e954..a3dc12182a8a236e3543f327adbd5f51c843b5d9 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala @@ -16,6 +16,7 @@ package nl.lumc.sasc.biopet.extensions.seqtk import java.io.File +import nl.lumc.sasc.biopet.core.summary.Summarizable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import nl.lumc.sasc.biopet.core.config.Configurable @@ -23,7 +24,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable * Wrapper for the seqtk seq subcommand. * Written based on seqtk version 1.0-r63-dirty. */ -class SeqtkSeq(val root: Configurable) extends Seqtk { +class SeqtkSeq(val root: Configurable) extends Seqtk with Summarizable { /** input file */ @Input(doc = "Input file (FASTQ or FASTA)") @@ -81,6 +82,10 @@ class SeqtkSeq(val root: Configurable) extends Seqtk { /** shift quality by '(-Q) - 33' */ var V: Boolean = config("V", default = false) + def summaryStats: Map[String, Any] = Map() + + def summaryFiles: Map[String, File] = Map() + def cmdLine = { required(executable) + " seq " + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala index e866195ae4b9b132387809ed35ba2d78ac9098b1..48e1c36e1805dfa11b630618e002a7a1ba3f23e4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala @@ -44,7 +44,7 @@ class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFuncti def summaryFiles: Map[String, File] = Map() - def summaryData: Map[String, Any] = { + def summaryStats: Map[String, Any] = { ConfigUtils.fileToConfigMap(summaryFile) } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala index 32c0c5b1eb25ea43bcbfb211f3ea89bfabe60c0e..2b44e6300e844f9c0827901c66ff3cba464dcb4c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala @@ -64,7 +64,7 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi def summaryFiles: Map[String, File] = Map() - def summaryData: Map[String, Any] = { + def summaryStats: Map[String, Any] = { val regex = new Regex("""Filtered (\d*) reads from first read file. |Filtered (\d*) reads from second read file. |Synced read files contain (\d*) reads.""".stripMargin, @@ -82,8 +82,7 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi } } else (0, 0, 0) - Map("version" -> BiopetExecutable.getVersion, - "num_reads_discarded_R1" -> countFilteredR1, + Map("num_reads_discarded_R1" -> countFilteredR1, "num_reads_discarded_R2" -> countFilteredR2, "num_reads_kept" -> countRLeft ) diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala index 51139812f6169689a080aaf913c16b6ee2ae00fa..f46a9b91f6d82d90444700453ab9fdf1a32b723e 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala @@ -18,6 +18,6 @@ class CollectAlignmentSummaryMetricsTest extends TestNGSuite with Matchers { val job = new CollectAlignmentSummaryMetrics(null) job.output = file - job.summaryData + job.summaryStats } } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala index fc0c563e82a48aa3918ee5ad0913dd9a62e8d966..ffb5d528c1bf42044a5f61d282f6fe42aab11f1a 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala @@ -18,6 +18,6 @@ class CollectInsertSizeMetricsTest extends TestNGSuite with Matchers { val job = new CollectInsertSizeMetrics(null) job.output = file - job.summaryData + job.summaryStats } } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index cd9dd4ae019f2f61c88f7d95d9ab988d499efc8a..8946db96cfea9ff9637f5a88afaa9e733043ebb9 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -159,7 +159,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r outputFiles ++ Map("fastq_file" -> this.fastqfile) } - def summaryData: Map[String, Any] = Map("version" -> getVersion) + def summaryStats: Map[String, Any] = Map() } object Fastqc {