Commit 856f286b authored by Peter van 't Hof's avatar Peter van 't Hof

Added stats from bam metrics to summary

parent 553a7538
......@@ -51,7 +51,7 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
def summaryFiles = Map("input_bam" -> inputBam)
/** return settings */
def summarySettings = Map()
def summarySettings = Map("ampliconBedFile" -> ampliconBedFile, "roiBedFiles" -> roiBedFiles)
/** executed before script */
def init() {
......@@ -69,13 +69,17 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
multiMetrics.input = inputBam
multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
add(multiMetrics)
addSummarizable(multiMetrics, "multi_metrics")
add(CollectGcBiasMetrics(this, inputBam, outputDir))
val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
add(gcBiasMetrics)
addSummarizable(gcBiasMetrics, "gc_bias")
val wgsMetrics = new CollectWgsMetrics(this)
wgsMetrics.input = inputBam
wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
add(wgsMetrics)
addSummarizable(wgsMetrics, "wgs")
if (rnaMetrics) {
val rnaMetrics = new CollectRnaSeqMetrics(this)
......@@ -83,6 +87,7 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
add(rnaMetrics)
addSummarizable(rnaMetrics, "rna")
}
case class Intervals(bed: File, intervals: File)
......@@ -107,10 +112,12 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
val chsMetrics = CalculateHsMetrics(this, inputBam,
List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
add(chsMetrics)
addSummarizable(chsMetrics, "hs_metrics")
val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
add(pcrMetrics)
addSummarizable(chsMetrics, "targeted_pcr_metrics")
Intervals(ampliconBedFile, ampIntervals)
}
......@@ -118,8 +125,8 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
// Create stats and coverage plot for each bed/interval file
for (intervals <- roiIntervals ++ ampIntervals) {
//TODO: Add target jobs to summary
val targetDir = new File(outputDir, intervals.bed.getName.stripSuffix(".bed"))
val targetName = intervals.bed.getName.stripSuffix(".bed")
val targetDir = new File(outputDir, targetName)
val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam"),
......@@ -127,7 +134,9 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
biStrict.isIntermediate = true
add(biStrict)
add(SamtoolsFlagstat(this, biStrict.output, targetDir))
add(BiopetFlagstat(this, biStrict.output, targetDir))
val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
add(biopetFlagstatStrict)
addSummarizable(biopetFlagstatStrict, targetName + "_biopet_flagstat_strict")
val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam"),
......@@ -135,13 +144,17 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
biLoose.isIntermediate = true
add(biLoose)
add(SamtoolsFlagstat(this, biLoose.output, targetDir))
add(BiopetFlagstat(this, biLoose.output, targetDir))
val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
add(biopetFlagstatLoose)
addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
val coverageFile = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
//FIXME:should use piping
add(BedtoolsCoverage(this, inputBam, intervals.bed, coverageFile, depth = true), true)
add(CoverageStats(this, coverageFile, targetDir))
val covStats = CoverageStats(this, coverageFile, targetDir)
add(covStats)
addSummarizable(covStats, "cov_stats")
}
addSummaryJobs
......
......@@ -17,10 +17,11 @@ package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
/** Extension for picard CalculateHsMetrics */
class CalculateHsMetrics(val root: Configurable) extends Picard {
class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizable {
javaMainClass = new picard.analysis.directed.CalculateHsMetrics().getClass.getName
@Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true)
......@@ -57,6 +58,12 @@ class CalculateHsMetrics(val root: Configurable) extends Picard {
repeat("TARGET_INTERVALS=", targetIntervals, spaceSeparated = false) +
optional("PER_TARGET_COVERAGE=", perTargetCoverage, spaceSeparated = false) +
optional("BAIT_SET_NAME=", baitSetName, spaceSeparated = false)
/** Returns files for summary */
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
}
object CalculateHsMetrics {
......
......@@ -66,19 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output) match {
case None => Map()
case Some((header, content)) =>
(for (category <- 0 until content.size) yield {
content(category)(0).toString -> (
for (
i <- 1 until header.size if i < content(category).size
) yield {
header(i).toLowerCase -> content(category)(i)
}).toMap
}
).toMap
}
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
}
object CollectAlignmentSummaryMetrics {
......
......@@ -17,10 +17,11 @@ package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
/** Extension for picard CollectGcBiasMetrics */
class CollectGcBiasMetrics(val root: Configurable) extends Picard {
class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summarizable {
javaMainClass = new picard.analysis.CollectGcBiasMetrics().getClass.getName
@Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true)
......@@ -65,6 +66,12 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard {
optional("MINIMUM_GENOME_FRACTION=", minGenomeFraction, spaceSeparated = false) +
conditional(assumeSorted, "ASSUME_SORTED=TRUE") +
conditional(isBisulfiteSequinced.getOrElse(false), "IS_BISULFITE_SEQUENCED=TRUE")
/** Returns files for summary */
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
}
object CollectGcBiasMetrics {
......
......@@ -75,13 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
/** Returns files for summary */
def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram)
def summaryStats: Map[String, Any] = Picard.getMetrics(output) match {
case None => Map()
case Some((header, content)) =>
(for (i <- 0 to header.size if i < content.head.size)
yield header(i).toLowerCase -> content.head(i)).toMap
}
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
}
object CollectInsertSizeMetrics {
......@@ -90,6 +84,6 @@ object CollectInsertSizeMetrics {
val collectInsertSizeMetrics = new CollectInsertSizeMetrics(root)
collectInsertSizeMetrics.input = input
collectInsertSizeMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".insertsizemetrics")
return collectInsertSizeMetrics
collectInsertSizeMetrics
}
}
\ No newline at end of file
......@@ -4,12 +4,13 @@ import java.io.File
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import org.broadinstitute.gatk.utils.commandline.{ Output, Argument, Input }
/**
* Created by pjvan_thof on 4/16/15.
*/
class CollectMultipleMetrics(val root: Configurable) extends Picard {
class CollectMultipleMetrics(val root: Configurable) extends Picard with Summarizable {
import CollectMultipleMetrics._
javaMainClass = new picard.analysis.CollectMultipleMetrics().getClass.getName
......@@ -64,6 +65,38 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard {
conditional(assumeSorted, "ASSUME_SORTED=true") +
optional("STOP_AFTER=", stopAfter, spaceSeparated = false) +
repeat("PROGRAM=", program, spaceSeparated = false)
override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
program.foreach(p => {
val stats: Map[String, Any] = p match {
case _ if p == Programs.CollectAlignmentSummaryMetrics.toString =>
Picard.getMetrics(new File(outputName + ".alignment_summary_metrics")).getOrElse(Map())
case _ if p == Programs.CollectInsertSizeMetrics.toString =>
Map()
Picard.getMetrics(new File(outputName + ".insert_size_metrics")).getOrElse(Map())
case _ if p == Programs.QualityScoreDistribution.toString =>
Map()
Picard.getMetrics(new File(outputName + ".quality_distribution_metrics")).getOrElse(Map())
case _ if p == Programs.MeanQualityByCycle.toString =>
Map()
Picard.getMetrics(new File(outputName + ".quality_by_cycle_metrics")).getOrElse(Map())
case _ if p == Programs.CollectBaseDistributionByCycle.toString =>
Map()
Picard.getMetrics(new File(outputName + ".base_distribution_by_cycle_metrics")).getOrElse(Map())
case _ => Map()
}
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats
}
qscript.addSummarizable(sum, p)
})
}
def summaryFiles = Map()
def summaryStats = Map()
}
object CollectMultipleMetrics {
......
......@@ -84,12 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
"output_chart" -> chartOutput
).collect { case (key, Some(value)) => key -> value }
def summaryStats: Map[String, Any] = Picard.getMetrics(output) match {
case None => Map()
case Some((header, content)) =>
(for (i <- 0 to header.size if i < content.head.size)
yield header(i).toLowerCase -> content.head(i)).toMap
}
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated = false) +
......
......@@ -3,12 +3,13 @@ package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input }
/**
* Created by pjvan_thof on 4/16/15.
*/
class CollectTargetedPcrMetrics(val root: Configurable) extends Picard {
class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summarizable {
javaMainClass = new picard.analysis.directed.CollectTargetedPcrMetrics().getClass.getName
......@@ -41,6 +42,12 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard {
repeat("TARGET_INTERVALS=", targetIntervals, spaceSeparated = false) +
optional("PER_TARGET_COVERAGE=", perTargetCoverage, spaceSeparated = false) +
optional("CUSTOM_AMPLICON_SET_NAME=", customAmpliconSetName, spaceSeparated = false)
/** Returns files for summary */
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
}
object CollectTargetedPcrMetrics {
......
......@@ -3,12 +3,13 @@ package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input }
/**
* Created by pjvan_thof on 4/16/15.
*/
class CollectWgsMetrics(val root: Configurable) extends Picard {
class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable {
javaMainClass = new picard.analysis.CollectWgsMetrics().getClass.getName
......@@ -45,4 +46,10 @@ class CollectWgsMetrics(val root: Configurable) extends Picard {
optional("COVERAGE_CAP=", covCap, spaceSeparated = false) +
optional("STOP_AFTER=", stopAfter, spaceSeparated = false) +
conditional(includeBqHistogram, "INCLUDE_BQ_HISTOGRAM=true")
/** Returns files for summary */
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
}
......@@ -100,19 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable {
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(outputMetrics) match {
case None => Map()
case Some((header, content)) =>
(for (category <- 0 until content.size) yield {
content(category)(0).toString -> (
for (
i <- 1 until header.size if i < content(category).size
) yield {
header(i).toLowerCase -> content(category)(i)
}).toMap
}
).toMap
}
def summaryStats: Map[String, Any] = Picard.getMetrics(outputMetrics).getOrElse(Map())
}
object MarkDuplicates {
/** Returns default MarkDuplicates */
......
......@@ -81,20 +81,18 @@ object Picard {
* @param file input metrics file
* @return (header, content)
*/
def getMetrics(file: File): Option[(Array[String], List[Array[Any]])] =
if (file.exists) {
def getMetrics(file: File, tag: String = "METRICS CLASS"): Option[Map[String, Any]] =
if (!file.exists) None
else {
val lines = Source.fromFile(file).getLines().toArray
val start = lines.indexWhere(_.startsWith("## METRICS CLASS")) + 1
val start = lines.indexWhere(_.startsWith("## " + tag)) + 1
val end = lines.indexOf("", start)
val header = lines(start).split("\t")
val content = (for (i <- (start + 1) until end) yield lines(i).split("\t"))
.map(row => row.map(col => tryToParseNumber(col, true).getOrElse(col)))
Option((header, content.toList))
} else {
None
Some(Map("header" -> header, "content" -> content))
}
}
\ No newline at end of file
......@@ -16,11 +16,12 @@
package nl.lumc.sasc.biopet.scripts
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class CoverageStats(val root: Configurable) extends PythonCommandLineFunction {
class CoverageStats(val root: Configurable) extends PythonCommandLineFunction with Summarizable {
setPythonScript("bedtools_cov_stats.py")
@Input(doc = "Input file")
......@@ -36,6 +37,10 @@ class CoverageStats(val root: Configurable) extends PythonCommandLineFunction {
def cmdLine = getPythonCommand +
required(input) + required("--plot", plot) + " > " + required(output)
def summaryFiles: Map[String, File] = Map("output" -> output, "plot" -> plot)
def summaryStats: Map[String, Any] = Map()
}
object CoverageStats {
......@@ -44,6 +49,6 @@ object CoverageStats {
coverageStats.input = input
coverageStats.output = new File(outputDir, input.getName + ".stats")
coverageStats.plot = new File(outputDir, input.getName + ".stats.png")
return coverageStats
coverageStats
}
}
......@@ -109,7 +109,6 @@ class VcfStats(val root: Configurable) extends BiopetJavaCommandLineFunction wit
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment