diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 137d06ce02465a33fc29d37d21b61c701ca9dbd2..506e42337f5c02052937319d91a6c98a614773d8 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -62,10 +62,21 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit def biopetScript() { add(SamtoolsFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".flagstat"))) - add(BiopetFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".biopetflagstat"))) + + val biopetFlagstat = BiopetFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".biopetflagstat"), + swapExt(outputDir, inputBam, ".bam", ".biopetflagstat.json")) + add(biopetFlagstat) + addSummarizable(biopetFlagstat, "biopet_flagstat") + add(CollectGcBiasMetrics(this, inputBam, outputDir)) - add(CollectInsertSizeMetrics(this, inputBam, outputDir)) - add(CollectAlignmentSummaryMetrics(this, inputBam, outputDir)) + + val collectInsertSizeMetrics = CollectInsertSizeMetrics(this, inputBam, outputDir) + add(collectInsertSizeMetrics) + addSummarizable(collectInsertSizeMetrics, "insert_size_metrics") + + val collectAlignmentSummaryMetrics = CollectAlignmentSummaryMetrics(this, inputBam, outputDir) + add(collectAlignmentSummaryMetrics) + addSummarizable(collectAlignmentSummaryMetrics, "alignment_metrics") val baitIntervalFile = if (baitBedFile != null) new File(outputDir, baitBedFile.getName.stripSuffix(".bed") + ".interval") else null if (baitIntervalFile != null) @@ -81,12 +92,14 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit val strictOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam") add(BedtoolsIntersect(this, inputBam, bedFile, strictOutputBam, minOverlap = config("strictintersectoverlap", default = 1.0)), true) add(SamtoolsFlagstat(this, strictOutputBam, swapExt(targetDir, strictOutputBam, ".bam", ".flagstat"))) - add(BiopetFlagstat(this, strictOutputBam, swapExt(targetDir, strictOutputBam, ".bam", ".biopetflagstat"))) + add(BiopetFlagstat(this, strictOutputBam, swapExt(targetDir, strictOutputBam, ".bam", ".biopetflagstat"), + swapExt(targetDir, strictOutputBam, ".bam", ".biopetflagstat.json"))) val looseOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam") add(BedtoolsIntersect(this, inputBam, bedFile, looseOutputBam, minOverlap = config("looseintersectoverlap", default = 0.01)), true) add(SamtoolsFlagstat(this, looseOutputBam, swapExt(targetDir, looseOutputBam, ".bam", ".biopet"))) - add(BiopetFlagstat(this, looseOutputBam, swapExt(targetDir, looseOutputBam, ".bam", ".biopetflagstat"))) + add(BiopetFlagstat(this, looseOutputBam, swapExt(targetDir, looseOutputBam, ".bam", ".biopetflagstat"), + swapExt(targetDir, looseOutputBam, ".bam", ".biopetflagstat.json"))) val coverageFile = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage") add(BedtoolsCoverage(this, inputBam, bedFile, coverageFile, true), true) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala index 6606d9e41acc4d5c2b785f905d844c2f91676b72..8fad2052937c8924bbe28a0751e76fb32e06b923 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala @@ -17,9 +17,10 @@ package nl.lumc.sasc.biopet.extensions.picard import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.core.summary.Summarizable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard { +class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with Summarizable { javaMainClass = "picard.analysis.CollectAlignmentSummaryMetrics" @Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) @@ -59,6 +60,18 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard { optional("ASSUME_SORTED=", assumeSorted, spaceSeparated = false) + optional("STOP_AFTER=", stopAfter, spaceSeparated = false) + repeat("ADAPTER_SEQUENCE=", adapterSequence, spaceSeparated = false) + + def summaryFiles: Map[String, File] = Map() + + def summaryData: Map[String, Any] = { + val (header, content) = Picard.getMetrics(output) + + (for (category <- 0 to content.size) yield { + content(category)(0) -> (for (i <- 1 to header.size) yield { + header(i).toLowerCase -> content(category)(i) + }).toMap + }).toMap + } } object CollectAlignmentSummaryMetrics { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala index 9b1777677a9567d8e88d21f44ec7f913e45341b5..f83298822055aace7936b5f3b7802ba9ed12add5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala @@ -17,11 +17,12 @@ package nl.lumc.sasc.biopet.extensions.picard import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.core.summary.Summarizable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import scala.collection.immutable.Nil -class CollectInsertSizeMetrics(val root: Configurable) extends Picard { +class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summarizable { javaMainClass = "picard.analysis.CollectInsertSizeMetrics" @Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) @@ -31,7 +32,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard { var output: File = _ @Output(doc = "Output histogram", required = true) - var outputHistogram: File = _ + def outputHistogram: File = new File(output + ".pdf") @Argument(doc = "Reference file", required = false) var reference: File = config("reference") @@ -55,7 +56,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard { var histogramWidth: Option[Int] = config("histogramWidth") override def beforeGraph { - if (outputHistogram == null) outputHistogram = new File(output + ".pdf") + //if (outputHistogram == null) outputHistogram = new File(output + ".pdf") //require(reference.exists) } @@ -69,6 +70,14 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard { optional("STOP_AFTER=", stopAfter, spaceSeparated = false) + optional("HISTOGRAM_WIDTH=", histogramWidth, spaceSeparated = false) + conditional(assumeSorted, "ASSUME_SORTED=TRUE") + + def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram) + + def summaryData: Map[String, Any] = { + val (header, content) = Picard.getMetrics(output) + (for (i <- 0 to header.size) + yield (header(i).toLowerCase -> content.head(i))).toMap + } } object CollectInsertSizeMetrics { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala index e417a85fd65c0d096961a910d20a8c890de28e91..12d9f05f6998fcac6462c3b65a2695a8471a73eb 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala @@ -15,9 +15,13 @@ */ package nl.lumc.sasc.biopet.extensions.picard +import java.io.File + import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import org.broadinstitute.gatk.utils.commandline.{ Argument } +import scala.io.Source + abstract class Picard extends BiopetJavaCommandLineFunction { override def subPath = "picard" :: super.subPath @@ -42,6 +46,7 @@ abstract class Picard extends BiopetJavaCommandLineFunction { @Argument(doc = "CREATE_MD5_FILE", required = false) var createMd5: Boolean = config("createmd5", default = false) + //FIXME: picard version // override def versionCommand = executable + " " + javaOpts + " " + javaExecutable + " -h" // override val versionRegex = """Version: (.*)""".r // override val versionExitcode = List(0, 1) @@ -59,3 +64,17 @@ abstract class Picard extends BiopetJavaCommandLineFunction { conditional(createIndex, "CREATE_INDEX=TRUE") + conditional(createMd5, "CREATE_MD5_FILE=TRUE") } + +object Picard { + def getMetrics(file: File) = { + val lines = Source.fromFile(file).getLines().toArray + + val start = lines.indexWhere(_.startsWith("## METRICS CLASS")) + 1 + val end = lines.indexOf("", start) + + val header = lines(start).split("\t") + val content = (for (i <- (start + 1) until end) yield lines(i).split("\t")).toList + + (header, content) + } +} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala index e99398ff43e563146229869fea220926a1c30eca..e866195ae4b9b132387809ed35ba2d78ac9098b1 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala @@ -16,15 +16,16 @@ package nl.lumc.sasc.biopet.tools import htsjdk.samtools.{ SAMRecord, SamReaderFactory } -import java.io.File +import java.io.{ PrintWriter, File } import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.ConfigUtils import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.JavaConversions._ -import scala.collection.mutable.Map -class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFunction { +class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFunction with Summarizable { javaMainClass = getClass.getName @Input(doc = "Input bam", shortName = "input", required = true) @@ -33,29 +34,44 @@ class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFuncti @Output(doc = "Output flagstat", shortName = "output", required = true) var output: File = _ + @Output(doc = "summary output file", shortName = "output", required = false) + var summaryFile: File = _ + override val defaultVmem = "8G" memoryLimit = Option(4.0) - override def commandLine = super.commandLine + required("-I", input) + " > " + required(output) + override def commandLine = super.commandLine + required("-I", input) + required("-s", summaryFile) + " > " + required(output) + + def summaryFiles: Map[String, File] = Map() + + def summaryData: Map[String, Any] = { + ConfigUtils.fileToConfigMap(summaryFile) + } } object BiopetFlagstat extends ToolCommand { - def apply(root: Configurable, input: File, output: File): BiopetFlagstat = { + import scala.collection.mutable.Map + + def apply(root: Configurable, input: File, output: File, summaryFile: File): BiopetFlagstat = { val flagstat = new BiopetFlagstat(root) flagstat.input = input flagstat.output = output + flagstat.summaryFile = summaryFile return flagstat } - case class Args(inputFile: File = null, region: Option[String] = None) extends AbstractArgs + case class Args(inputFile: File = null, summaryFile: Option[File] = None, region: Option[String] = None) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) => c.copy(inputFile = x) - } text ("out is a required file property") + } text ("input bam file") + opt[File]('s', "summaryFile") valueName ("<file>") action { (x, c) => + c.copy(summaryFile = Some(x)) + } text ("summary output file") opt[String]('r', "region") valueName ("<chr:start-stop>") action { (x, c) => c.copy(region = Some(x)) - } text ("out is a required file property") + } } /** @@ -127,6 +143,14 @@ object BiopetFlagstat extends ToolCommand { flagstatCollector.loadRecord(record) } + commandArgs.summaryFile.foreach { + case file => { + val writer = new PrintWriter(file) + writer.println(flagstatCollector.summary) + writer.close() + } + } + println(flagstatCollector.report) } @@ -207,6 +231,14 @@ object BiopetFlagstat extends ToolCommand { return buffer.toString } + def summary: String = { + val map = (for (t <- 0 until names.size) yield { + names(t) -> totalCounts(t) + }).toMap + + return ConfigUtils.mapToJson(map).spaces4 + } + def crossReport(fraction: Boolean = false): String = { val buffer = new StringBuilder