Commit 057f8873 authored by Sander Bollen's avatar Sander Bollen

Merge branch 'fix-BIOPET-374' into 'develop'

Fix biopet 374



See merge request !476
parents ebebec1a b71f8de4
......@@ -83,10 +83,10 @@
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample}/Libraries/${libId}/index.html">${libId}</a></td> #end
#{
val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List("bammetrics", "stats")
val total = summary.getValue((prefixPath ::: List("biopet_flagstat", "All")):_*).getOrElse(0L).asInstanceOf[Long]
val mapped = summary.getValue((prefixPath ::: List("biopet_flagstat", "Mapped")):_*).getOrElse(0L).asInstanceOf[Long]
val duplicates = summary.getValue((prefixPath ::: List("biopet_flagstat", "Duplicates")):_*).getOrElse(0L).asInstanceOf[Long]
val secondary = summary.getValue((prefixPath ::: List("biopet_flagstat", "NotPrimaryAlignment")):_*).getOrElse(0L).asInstanceOf[Long]
val total = summary.getValue((prefixPath ::: List("bamstats", "flagstats", "All")):_*).getOrElse(0L).asInstanceOf[Long]
val mapped = summary.getValue((prefixPath ::: List("bamstats", "flagstats", "Mapped")):_*).getOrElse(0L).asInstanceOf[Long]
val duplicates = summary.getValue((prefixPath ::: List("bamstats", "flagstats", "Duplicates")):_*).getOrElse(0L).asInstanceOf[Long]
val secondary = summary.getValue((prefixPath ::: List("bamstats", "flagstats", "NotPrimaryAlignment")):_*).getOrElse(0L).asInstanceOf[Long]
}#
<td>${total}</td>
<td>${mapped}</td>
......
......@@ -12,9 +12,9 @@
#for (field <- fields)
<tr><th>${field}</th><td>
#if (libId.isDefined)
${summary.getLibraryValue(sampleId.get, libId.get, metricsTag, "stats", "biopet_flagstat", field)}
${summary.getLibraryValue(sampleId.get, libId.get, metricsTag, "stats", "bamstats", "flagstats", field)}
#else
${summary.getSampleValue(sampleId.get, metricsTag, "stats", "biopet_flagstat", field)}
${summary.getSampleValue(sampleId.get, metricsTag, "stats", "bamstats", "flagstats", field)}
#end
</td></tr>
#end
......
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
#import(org.apache.commons.io.FileUtils)
<%@ var summary: Summary %>
<%@ var sampleId: Option[String] = None %>
<%@ var libId: Option[String] = None %>
<%@ var rootPath: String %>
<%@ var metricsTag: String = "bammetrics" %>
<%@ var sampleLevel: Boolean = false %>
<%@ var outputDir: File %>
<%@ var fields: List[String] = List("min", "max", "mean", "median", "modal")%>
<%@ var showPlot: Boolean = false %>
<%@ var showTable: Boolean = true %>
<%@ var showIntro: Boolean = true%>
#{
val samples = sampleId match {
case Some(sample) => {
List(sample.toString)
}
case _ => summary.samples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
<div class="col-md-1"></div>
<div class="col-md-6">
<p>
#if (sampleId.isDefined && libId.isDefined)
This plot shows the clipping distribution for library <b>${libId}</b> of sample <b>${sampleId}</b>.
#elseif(sampleId.isDefined)
This plot shows the clipping distribution for the libraries of sample <b>${sampleId}</b>.
#else
This plot shows the clipping distribution for each of the <b>${samples.size}</b> samples.
#end
</p>
</div>
</div>
#end
#if (showPlot)
#{ BammetricsReport.clippingPlot(outputDir, "clipping", summary, !sampleLevel, sampleId = sampleId, libId = libId) }#
<div class="panel-body">
<img src="clipping.png" class="img-responsive" />
</div>
<div class="panel-footer">
#if (showTable)
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#clippingTable">
<i class="glyphicon glyphicon-eye-close"></i> Hide table</button>
#else
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#clippingTable">
<i class="glyphicon glyphicon-eye-open"></i> Show table</button>
#end
<a href="clipping.tsv"><button type="button" class="btn btn-info"><i class="glyphicon glyphicon-cloud-download"></i>TSV file</button></a>
</div>
#end
<div class="panel-body collapse #if (showTable)in#end" id="clippingTable">
<!-- Table -->
<table class="table">
<thead><tr>
<th data-sorted="true" data-sorted-direction="ascending">Sample</th>
#if (!sampleLevel) <th>Library</th> #end
#for (field <- fields)
<th>${field.replaceAll("_", " ")}</th>
#end
</tr></thead>
<tbody>
#for (sample <- samples.toList.sorted)
#{
val libs = (libId, sampleLevel) match {
case (_, true) => List("")
case (Some(libId), _) => List(libId.toString)
case _ => summary.libraries(sample).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample}/index.html">${sample}</a></td>
#for (libId <- libs)
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample}/Libraries/${libId}/index.html">${libId}</a></td> #end
#{
val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List("bammetrics", "stats")
val fieldValues = for (field <- fields) yield {
summary.getValue((prefixPath ::: List("bamstats", "clipping", "general", field)):_*).getOrElse("N/A")
}
}#
#for (value <- fieldValues)
<td>${value}</td>
#end
</tr>
#end
#end
</tbody>
</table>
</div>
......@@ -51,12 +51,12 @@
<div class="panel-footer">
#if (showTable)
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#insertsizeTable">
<i class="glyphicon glyphicon-eye-close"></i> Hide table</button>
<i class="glyphicon glyphicon-eye-close"></i>Hide table</button>
#else
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#insertsizeTable">
<i class="glyphicon glyphicon-eye-open"></i> Show table</button>
<i class="glyphicon glyphicon-eye-open"></i>Show table</button>
#end
<button type="button" class="btn btn-info"><i class="glyphicon glyphicon-cloud-download"> <a href="insertsize.tsv">tsv file</a></i></button>
<a href="insertsize.tsv"><button type="button" class="btn btn-info"><i class="glyphicon glyphicon-cloud-download"></i>TSV file</button></a>
</div>
#end
......
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
#import(org.apache.commons.io.FileUtils)
<%@ var summary: Summary %>
<%@ var sampleId: Option[String] = None %>
<%@ var libId: Option[String] = None %>
<%@ var rootPath: String %>
<%@ var metricsTag: String = "bammetrics" %>
<%@ var sampleLevel: Boolean = false %>
<%@ var outputDir: File %>
<%@ var fields: List[String] = List("min", "max", "mean", "median", "modal")%>
<%@ var showPlot: Boolean = false %>
<%@ var showTable: Boolean = true %>
<%@ var showIntro: Boolean = true%>
#{
val samples = sampleId match {
case Some(sample) => {
List(sample.toString)
}
case _ => summary.samples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
<div class="col-md-1"></div>
<div class="col-md-6">
<p>
#if (sampleId.isDefined && libId.isDefined)
This plot shows the mapping quality distribution for all libraries combined in sample <b>${sampleId}</b>.
#elseif(sampleId.isDefined)
This plot shows the mapping quality distribution for the libraries of sample <b>${sampleId}</b>.
#else
This plot shows the mapping quality distribution for each of the <b>${samples.size}</b> samples.
#end
</p>
</div>
</div>
#end
#if (showPlot)
#{ BammetricsReport.mappingQualityPlot(outputDir, "mapping_quality", summary, !sampleLevel, sampleId = sampleId, libId = libId) }#
<div class="panel-body">
<img src="mapping_quality.png" class="img-responsive" />
</div>
<div class="panel-footer">
#if (showTable)
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#mapping_qualityTable">
<i class="glyphicon glyphicon-eye-close"></i> Hide table</button>
#else
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#mapping_qualityTable">
<i class="glyphicon glyphicon-eye-open"></i> Show table</button>
#end
<a href="mapping_quality.tsv"><button type="button" class="btn btn-info"><i class="glyphicon glyphicon-cloud-download"></i> TSV file</button></a>
</div>
#end
<div class="panel-body collapse #if (showTable)in#end" id="mapping_qualityTable">
<!-- Table -->
<table class="table">
<thead><tr>
<th data-sorted="true" data-sorted-direction="ascending">Sample</th>
#if (!sampleLevel) <th>Library</th> #end
#for (field <- fields)
<th>${field.replaceAll("_", " ")}</th>
#end
</tr></thead>
<tbody>
#for (sample <- samples.toList.sorted)
#{
val libs = (libId, sampleLevel) match {
case (_, true) => List("")
case (Some(libId), _) => List(libId.toString)
case _ => summary.libraries(sample).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample}/index.html">${sample}</a></td>
#for (libId <- libs)
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample}/Libraries/${libId}/index.html">${libId}</a></td> #end
#{
val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List("bammetrics", "stats")
val fieldValues = for (field <- fields) yield {
summary.getValue((prefixPath ::: List("bamstats", "mapping_quality", "general", field)):_*).getOrElse("N/A")
}
}#
#for (value <- fieldValues)
<td>${value}</td>
#end
</tr>
#end
#end
</tbody>
</table>
</div>
......@@ -22,7 +22,7 @@ import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, Sa
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort }
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import nl.lumc.sasc.biopet.extensions.tools.{ BamStats, BiopetFlagstat }
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
......@@ -82,9 +82,11 @@ class BamMetrics(val root: Configurable) extends QScript
def biopetScript() {
add(SamtoolsFlagstat(this, inputBam, outputDir))
val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
add(biopetFlagstat)
addSummarizable(biopetFlagstat, "biopet_flagstat")
val bamStats = new BamStats(this)
bamStats.bamFile = inputBam
bamStats.outputDir = new File(outputDir, "bamstats")
add(bamStats)
addSummarizable(bamStats, "bamstats")
val multiMetrics = new CollectMultipleMetrics(this)
multiMetrics.input = inputBam
......
......@@ -18,7 +18,7 @@ import java.io.{ File, FileOutputStream }
import com.google.common.io.Files
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import nl.lumc.sasc.biopet.extensions.tools.BamStats
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.apache.commons.io.FileUtils
......@@ -77,7 +77,7 @@ class BamMetricsTest extends TestNGSuite with Matchers {
bammetrics.functions.count(_.isInstanceOf[CollectMultipleMetrics]) shouldBe 1
bammetrics.functions.count(_.isInstanceOf[CalculateHsMetrics]) shouldBe (if (amplicon) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[CollectTargetedPcrMetrics]) shouldBe (if (amplicon) 1 else 0)
bammetrics.functions.count(_.isInstanceOf[BiopetFlagstat]) shouldBe 1
bammetrics.functions.count(_.isInstanceOf[BamStats]) shouldBe 1
}
// remove temporary run directory all tests in the class have been run
......
......@@ -249,6 +249,7 @@ object BiopetCommandLineFunction extends Logging {
try {
val buffer = new StringBuffer()
val tempFile = File.createTempFile("which.", ".sh")
tempFile.deleteOnExit()
val writer = new PrintWriter(tempFile)
pre_commands.foreach(cmd => writer.println(cmd + " > /dev/null 2> /dev/null"))
writer.println(s"which $executable")
......
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
/**
* Created by pjvanthof on 18/11/2016.
*/
class BamStats(val root: Configurable) extends ToolCommandFunction with Reference with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.bamstats.BamStats
@Input(required = true)
var reference: File = _
@Input(required = true)
var bamFile: File = _
var outputDir: File = _
var binSize: Option[Int] = config("bin_size")
var threadBinSize: Option[Int] = config("thread_bin_size")
override def defaultThreads = 3
override def defaultCoreMemory = 5.0
override def dictRequired = true
def getOutputFile(name: String, contig: Option[String] = None): File = {
contig match {
case Some(contig) => new File(outputDir, "contigs" + File.separator + contig + File.separator + name)
case _ => new File(outputDir, name)
}
}
def bamstatsSummary: File = new File(outputDir, "bamstats.summary.json")
def flagstatSummaryFile(contig: Option[String] = None): File = getOutputFile("flagstats.summary.json", contig)
def mappingQualityFile(contig: Option[String] = None): File = getOutputFile("mapping_quality.tsv", contig)
def clipingFile(contig: Option[String] = None): File = getOutputFile("clipping.tsv", contig)
override def beforeGraph() {
super.beforeGraph()
jobOutputFile = new File(outputDir, ".bamstats.out")
if (reference == null) reference = referenceFasta()
}
/** Creates command to execute extension */
override def cmdLine = super.cmdLine +
required("-b", bamFile) +
required("-o", outputDir) +
required("-R", reference) +
optional("--binSize", binSize) +
optional("--threadBinSize", threadBinSize)
def summaryFiles: Map[String, File] = Map()
def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(bamstatsSummary)
}
......@@ -14,19 +14,19 @@
*/
package nl.lumc.sasc.biopet.tools.bamstats
import java.io.File
import java.util.concurrent.TimeoutException
import java.io.{ File, PrintWriter }
import htsjdk.samtools.reference.FastaSequenceFile
import htsjdk.samtools.{ SAMSequenceDictionary, SamReaderFactory }
import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck
import nl.lumc.sasc.biopet.utils.{ FastaUtils, ToolCommand }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, FastaUtils, ToolCommand }
import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList }
import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.concurrent.{ Await, Future }
import scala.io.Source
import scala.language.postfixOps
/**
......@@ -104,6 +104,42 @@ object BamStats extends ToolCommand {
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.toList)
stats.writeStatsToFiles(outputDir)
val clippingHistogram = tsvToMap(new File(outputDir, "clipping.tsv"))
val mappingQualityHistogram = tsvToMap(new File(outputDir, "mapping_quality.tsv"))
val summary = Map(
"flagstats" -> ConfigUtils.fileToConfigMap(new File(outputDir, "flagstats.summary.json")),
"flagstats_per_contig" -> referenceDict.getSequences.map {
c =>
c.getSequenceName -> ConfigUtils.fileToConfigMap(
new File(outputDir, "contigs" + File.separator + c.getSequenceName + File.separator + "flagstats.summary.json"))
}.toMap,
"mapping_quality" -> Map("general" -> aggregateStats(mappingQualityHistogram), "histogram" -> mappingQualityHistogram),
"clipping" -> Map("general" -> aggregateStats(clippingHistogram), "histogram" -> clippingHistogram)
)
val summaryWriter = new PrintWriter(new File(outputDir, "bamstats.summary.json"))
summaryWriter.println(ConfigUtils.mapToJson(summary).spaces2)
summaryWriter.close()
}
def aggregateStats(table: Map[String, Array[Long]]): Map[String, Any] = {
val values = table("value")
val counts = table("count")
require(values.size == counts.size)
if (values.nonEmpty) {
val modal = values(counts.indexOf(counts.max))
val totalCounts = counts.sum
val mean: Double = values.zip(counts).map(x => x._1 * x._2).sum.toDouble / totalCounts
val median: Long = values(values.zip(counts).zipWithIndex.sortBy(_._1._1).foldLeft((0L, 0)) {
case (a, b) =>
val total = a._1 + b._1._2
if (total >= totalCounts / 2) (total, a._2)
else (total, b._2)
}._2)
Map("min" -> values.min, "max" -> values.max, "median" -> median, "mean" -> mean, "modal" -> modal)
} else Map()
}
/**
......@@ -166,7 +202,7 @@ object BamStats extends ToolCommand {
if (!samRecord.getReadUnmappedFlag) { // Mapped read
totalStats.mappingQualityHistogram.add(samRecord.getMappingQuality)
}
if (samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag)
if (samRecord.getReadPairedFlag && samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag)
totalStats.insertSizeHistogram.add(samRecord.getInferredInsertSize.abs)
val leftClipping = samRecord.getAlignmentStart - samRecord.getUnclippedStart
......@@ -209,4 +245,21 @@ object BamStats extends ToolCommand {
samReader.close()
stats
}
def tsvToMap(tsvFile: File): Map[String, Array[Long]] = {
val reader = Source.fromFile(tsvFile)
val it = reader.getLines()
val header = it.next().split("\t")
val arrays = header.zipWithIndex.map(x => x._2 -> (x._1 -> ArrayBuffer[Long]()))
for (line <- it) {
val values = line.split("\t")
require(values.size == header.size, s"Line does not have the number of field as header: $line")
for (array <- arrays) {
array._2._2.append(values(array._1).toLong)
}
}
reader.close()
arrays.map(x => x._2._1 -> x._2._2.toArray).toMap
}
}
......@@ -31,7 +31,7 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(),
_3_ClippingHistogram: Histogram[Int] = new Histogram[Int]()) {
flagstat.loadDefaultFunctions()
flagstat.loadQualityFunctions(1, 0)
flagstat.loadQualityFunctions()
flagstat.loadOrientationFunctions
/** This will add an other [[Stats]] inside `this` */
......
......@@ -211,6 +211,7 @@ object ConfigUtils extends Logging {
case Some(x) => anyToJson(x)
case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2)))
case l: List[_] => Json.array(l.map(anyToJson): _*)
case l: Array[_] => Json.array(l.map(anyToJson): _*)
case b: Boolean => Json.jBool(b)
case n: Int => Json.jNumberOrString(n)
case n: Double => Json.jNumberOrString(n)
......
......@@ -49,3 +49,24 @@ class LinePlot(val root: Configurable) extends Rscript {
title.map(Seq("--title", _)).getOrElse(Seq()) ++
(if (removeZero) Seq("--removeZero", "true") else Seq())
}
object LinePlot {
def apply(inputTsv: File,
outputFile: File,
root: Configurable = null,
xlabel: Option[String] = None,
ylabel: Option[String] = None,
width: Int = 1200,
removeZero: Boolean = false,
title: Option[String] = None): LinePlot = {
val plot = new LinePlot(root)
plot.input = inputTsv
plot.output = outputFile
plot.xlabel = xlabel
plot.ylabel = ylabel
plot.width = Some(width)
plot.removeZero = removeZero
plot.title = title
plot
}
}
\ No newline at end of file
......@@ -78,6 +78,11 @@ class Summary(file: File) {
}
}
/** Get value on nested path with prefix depending is sampleId and/or libId is None or not */
def getValueAsArray(sampleId: Option[String], libId: Option[String], path: String*): Option[Array[Any]] = {
this.getValue(sampleId, libId, path: _*).map(ConfigUtils.any2list(_).toArray)
}
/**
* Get values for all libraries on a given path
* @param path path to of value
......
......@@ -79,8 +79,12 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder {
additionalSections ++
List("Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp",
Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)
), "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp",
Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)
), "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp",
Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)
)) ++
(if (pairedFound) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
(if (pairedFound && insertsizeExecuted) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)))
else Nil) ++
(if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment