Commit 4e8c4e4f authored by Peter van 't Hof's avatar Peter van 't Hof

Merge remote-tracking branch 'remotes/origin/develop' into epic-summary_reformat

parents ffaea590 2299e7a9
......@@ -22,6 +22,10 @@ node('local') {
junit '*/target/surefire-reports/*.xml'
}
stage('Check git on changes') {
sh 'if [ $(git diff | wc -l) -eq 0 ]; then true; else echo "[ERROR] Git is not clean anymore after build"; git diff; echo "[ERROR] This might be caused by reformated code, if so run maven locally"; false; fi'
}
stage('Check Documentation') {
sh 'mkdocs build --clean --strict'
}
......
......@@ -5,7 +5,7 @@
Biopet (Bio Pipeline Execution Toolkit) is the main pipeline development framework of the LUMC Sequencing Analysis Support Core team. It contains our main pipelines and some of the command line tools we develop in-house. It is meant to be used in the main [SHARK](https://humgenprojects.lumc.nl/trac/shark) computing cluster. While usage outside of SHARK is technically possible, some adjustments may need to be made in order to do so.
Full documantation is here: [Biopet documantation](http://biopet-docs.readthedocs.io/en/latest/)
Full documentation is here: [Biopet documentation](http://biopet-docs.readthedocs.io/en/latest/)
## Quick Start
......@@ -60,7 +60,7 @@ Biopet is based on the Queue framework developed by the Broad Institute as part
We welcome any kind of contribution, be it merge requests on the code base, documentation updates, or any kinds of other fixes! The main language we use is Scala, though the repository also contains a small bit of Python and R. Our main code repository is located at [https://github.com/biopet/biopet](https://github.com/biopet/biopet/issues), along with our issue tracker.
For more information please go to our [Developer documantation](http://biopet-docs.readthedocs.io/en/develop/developer/getting-started/)
For more information please go to our [Developer documentation](http://biopet-docs.readthedocs.io/en/develop/developer/getting-started/)
## About
......
......@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.utils.summary.db.Schema.{ Library, Sample }
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
import nl.lumc.sasc.biopet.utils.{ IoUtils, Logging, ToolCommand }
import org.broadinstitute.gatk.utils.commandline.Input
import org.fusesource.scalate.{ TemplateEngine, TemplateSource }
import org.fusesource.scalate.TemplateEngine
import scala.collection.mutable
import scala.concurrent.{ Await, Future }
......@@ -263,9 +263,7 @@ object ReportBuilder {
/** Single template render engine, this will have a cache for all compile templates */
protected val engine = new TemplateEngine()
/** Cache of temp file for templates from the classpath / jar */
private[report] var templateCache: Map[String, File] = Map()
engine.allowReload = false
/** This will give the total number of pages including all nested pages */
def countPages(page: ReportPage): Int = {
......@@ -281,15 +279,6 @@ object ReportBuilder {
def renderTemplate(location: String, args: Map[String, Any] = Map()): String = {
Logging.logger.info("Rendering: " + location)
val templateFile: File = templateCache.get(location) match {
case Some(template) => template
case _ =>
val tempFile = File.createTempFile("ssp-template", new File(location).getName)
tempFile.deleteOnExit()
IoUtils.copyStreamToFile(getClass.getResourceAsStream(location), tempFile)
templateCache += location -> tempFile
tempFile
}
engine.layout(TemplateSource.fromFile(templateFile), args)
engine.layout(location, args)
}
}
\ No newline at end of file
......@@ -96,11 +96,8 @@ class ReportBuilderTest extends TestNGSuite with Matchers {
@Test
def testRenderTemplate: Unit = {
ReportBuilder.templateCache = Map()
ReportBuilder.templateCache shouldBe empty
ReportBuilder.renderTemplate("/template.ssp", Map("arg" -> "test")) shouldBe "test"
ReportBuilder.templateCache.size shouldBe 1
ReportBuilder.renderTemplate("/template.ssp", Map("arg" -> "bla")) shouldBe "bla"
ReportBuilder.templateCache.size shouldBe 1
}
}
......@@ -39,7 +39,8 @@ object BamStats extends ToolCommand {
bamFile: File = null,
referenceFasta: Option[File] = None,
binSize: Int = 10000,
threadBinSize: Int = 10000000) extends AbstractArgs
threadBinSize: Int = 10000000,
tsvOutputs: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('R', "reference") valueName "<file>" action { (x, c) =>
......@@ -57,6 +58,9 @@ object BamStats extends ToolCommand {
opt[Int]("threadBinSize") valueName "<int>" action { (x, c) =>
c.copy(threadBinSize = x)
} text "Size of region per thread"
opt[Unit]("tsvOutputs") action { (x, c) =>
c.copy(tsvOutputs = true)
} text "Also output tsv files, default there is only a json"
}
/** This is the main entry to [[BamStats]], this will do the argument parsing. */
......@@ -68,7 +72,7 @@ object BamStats extends ToolCommand {
val sequenceDict = validateReferenceInBam(cmdArgs.bamFile, cmdArgs.referenceFasta)
init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize)
init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize, cmdArgs.tsvOutputs)
logger.info("Done")
}
......@@ -96,13 +100,26 @@ object BamStats extends ToolCommand {
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = {
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int, tsvOutput: Boolean): Unit = {
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
contig.chr -> processContig(contig, bamFile, binSize, threadBinSize, outputDir)
}.toList
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.map(_._2))
if (tsvOutput) {
stats.flagstat.writeAsTsv(new File(outputDir, "flagstats.tsv"))
stats.insertSizeHistogram.writeFilesAndPlot(outputDir, "insertsize", "Insertsize", "Reads", "Insertsize distribution")
stats.mappingQualityHistogram.writeFilesAndPlot(outputDir, "mappingQuality", "Mapping Quality", "Reads", "Mapping Quality distribution")
stats.clippingHistogram.writeFilesAndPlot(outputDir, "clipping", "CLipped bases", "Reads", "Clipping distribution")
stats.leftClippingHistogram.writeFilesAndPlot(outputDir, "left_clipping", "CLipped bases", "Reads", "Left Clipping distribution")
stats.rightClippingHistogram.writeFilesAndPlot(outputDir, "right_clipping", "CLipped bases", "Reads", "Right Clipping distribution")
stats._3_ClippingHistogram.writeFilesAndPlot(outputDir, "3prime_clipping", "CLipped bases", "Reads", "3 Prime Clipping distribution")
stats._5_ClippingHistogram.writeFilesAndPlot(outputDir, "5prime_clipping", "CLipped bases", "Reads", "5 Prime Clipping distribution")
}
val statsWriter = new PrintWriter(new File(outputDir, "bamstats.json"))
val totalStats = stats.toSummaryMap
val statsMap = Map(
......
......@@ -14,8 +14,10 @@
*/
package nl.lumc.sasc.biopet.tools.bamstats
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.sortAnyAny
import java.io.{ File, IOException, PrintWriter }
import nl.lumc.sasc.biopet.utils.rscript.LinePlot
import nl.lumc.sasc.biopet.utils.{ Logging, sortAnyAny }
import scala.collection.mutable
......@@ -43,7 +45,7 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
}
/** Write histogram to a tsv/count file */
def writeToTsv(file: File): Unit = {
def writeHistogramToTsv(file: File): Unit = {
val writer = new PrintWriter(file)
writer.println("value\tcount")
counts.keys.toList.sorted.foreach(x => writer.println(s"$x\t${counts(x)}"))
......@@ -82,4 +84,28 @@ class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric
} else Map()
}
/** Write histogram to a tsv/count file */
def writeAggregateToTsv(file: File): Unit = {
val writer = new PrintWriter(file)
aggregateStats.foreach(x => writer.println(x._1 + "\t" + x._2))
writer.close()
}
def writeFilesAndPlot(outputDir: File, prefix: String, xlabel: String, ylabel: String, title: String): Unit = {
writeHistogramToTsv(new File(outputDir, prefix + ".histogram.tsv"))
writeAggregateToTsv(new File(outputDir, prefix + ".stats.tsv"))
val plot = new LinePlot(null)
plot.input = new File(outputDir, prefix + ".histogram.tsv")
plot.output = new File(outputDir, prefix + ".histogram.png")
plot.xlabel = Some(xlabel)
plot.ylabel = Some(ylabel)
plot.title = Some(title)
try {
plot.runLocal()
} catch {
// If plotting fails the tools should not fail, this depens on R to be installed
case e: IOException => Logging.logger.warn(s"Error found while plotting ${plot.output}: ${e.getMessage}")
}
}
}
......@@ -50,13 +50,13 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(),
def writeStatsToFiles(outputDir: File): Unit = {
this.flagstat.writeReportToFile(new File(outputDir, "flagstats"))
this.flagstat.writeSummaryTofile(new File(outputDir, "flagstats.summary.json"))
this.mappingQualityHistogram.writeToTsv(new File(outputDir, "mapping_quality.tsv"))
this.insertSizeHistogram.writeToTsv(new File(outputDir, "insert_size.tsv"))
this.clippingHistogram.writeToTsv(new File(outputDir, "clipping.tsv"))
this.leftClippingHistogram.writeToTsv(new File(outputDir, "left_clipping.tsv"))
this.rightClippingHistogram.writeToTsv(new File(outputDir, "right_clipping.tsv"))
this._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv"))
this._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv"))
this.mappingQualityHistogram.writeHistogramToTsv(new File(outputDir, "mapping_quality.tsv"))
this.insertSizeHistogram.writeHistogramToTsv(new File(outputDir, "insert_size.tsv"))
this.clippingHistogram.writeHistogramToTsv(new File(outputDir, "clipping.tsv"))
this.leftClippingHistogram.writeHistogramToTsv(new File(outputDir, "left_clipping.tsv"))
this.rightClippingHistogram.writeHistogramToTsv(new File(outputDir, "right_clipping.tsv"))
this._5_ClippingHistogram.writeHistogramToTsv(new File(outputDir, "5_prime_clipping.tsv"))
this._3_ClippingHistogram.writeHistogramToTsv(new File(outputDir, "3_prime_clipping.tsv"))
}
def toSummaryMap = {
......
......@@ -32,6 +32,12 @@ class FlagstatCollector {
protected[FlagstatCollector] var totalCounts: Array[Long] = Array()
protected[FlagstatCollector] var crossCounts = Array.ofDim[Long](1, 1)
def writeAsTsv(file: File): Unit = {
val writer = new PrintWriter(file)
names.foreach(x => writer.println(x._2 + "\t" + totalCounts(x._1)))
writer.close()
}
def loadDefaultFunctions() {
addFunction("All", record => true)
addFunction("Mapped", record => !record.getReadUnmappedFlag)
......
......@@ -74,7 +74,7 @@ object VcfStats extends ToolCommand {
opt[File]('o', "outputDir") required () unbounded () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(outputDir = x)
} validate {
x => if (x == null) failure("Output directory required") else success
x => if (x == null) failure("Valid output directory required") else if (x.exists) success else failure(s"Output directory does not exist: $x")
} text "Path to directory for output (required)"
opt[File]('i', "intervals") unbounded () valueName "<file>" action { (x, c) =>
c.copy(intervals = Some(x))
......
......@@ -24,6 +24,7 @@ import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import nl.lumc.sasc.biopet.utils.sortAnyAny
import org.apache.commons.io.FileUtils
import scala.collection.mutable
......@@ -162,6 +163,16 @@ class VcfStatsTest extends TestNGSuite with Matchers {
valueFromTsv(i, "Sample_ID_3", "bam") should be(empty)
}
@Test
def testNoExistOutputDir: Unit = {
val tmp = Files.createTempDirectory("vcfStats")
FileUtils.deleteDirectory(new File(tmp.toAbsolutePath.toString))
val vcf = resourcePath("/chrQ.vcf.gz")
val ref = resourcePath("/fake_chrQ.fa")
an[IllegalArgumentException] should be thrownBy main(Array("-I", vcf, "-R", ref, "-o", tmp.toAbsolutePath.toString))
}
@Test
def testMain() = {
val tmp = Files.createTempDirectory("vcfStats")
......
......@@ -34,6 +34,43 @@ class BamStatsTest extends TestNGSuite with Matchers {
new File(outputDir, "bamstats.json") should exist
new File(outputDir, "bamstats.summary.json") should exist
new File(outputDir, "flagstats.tsv") shouldNot exist
new File(outputDir, "insertsize.stats.tsv") shouldNot exist
new File(outputDir, "insertsize.histogram.tsv") shouldNot exist
new File(outputDir, "mappingQuality.stats.tsv") shouldNot exist
new File(outputDir, "mappingQuality.histogram.tsv") shouldNot exist
new File(outputDir, "clipping.stats.tsv") shouldNot exist
new File(outputDir, "clipping.histogram.tsv") shouldNot exist
new File(outputDir, "flagstats") shouldNot exist
new File(outputDir, "flagstats.summary.json") shouldNot exist
new File(outputDir, "mapping_quality.tsv") shouldNot exist
new File(outputDir, "insert_size.tsv") shouldNot exist
new File(outputDir, "clipping.tsv") shouldNot exist
new File(outputDir, "left_clipping.tsv") shouldNot exist
new File(outputDir, "right_clipping.tsv") shouldNot exist
new File(outputDir, "5_prime_clipping.tsv") shouldNot exist
new File(outputDir, "3_prime_clipping.tsv") shouldNot exist
}
@Test
def testTsvOutputs: Unit = {
val outputDir = Files.createTempDir()
outputDir.deleteOnExit()
BamStats.main(Array("-b", BamStatsTest.pairedBam01.getAbsolutePath, "-o", outputDir.getAbsolutePath, "--tsvOutputs"))
new File(outputDir, "bamstats.json") should exist
new File(outputDir, "bamstats.summary.json") should exist
new File(outputDir, "flagstats.tsv") should exist
new File(outputDir, "insertsize.stats.tsv") should exist
new File(outputDir, "insertsize.histogram.tsv") should exist
new File(outputDir, "mappingQuality.stats.tsv") should exist
new File(outputDir, "mappingQuality.histogram.tsv") should exist
new File(outputDir, "clipping.stats.tsv") should exist
new File(outputDir, "clipping.histogram.tsv") should exist
new File(outputDir, "flagstats") shouldNot exist
new File(outputDir, "flagstats.summary.json") shouldNot exist
new File(outputDir, "mapping_quality.tsv") shouldNot exist
......
......@@ -86,7 +86,7 @@ class CountsTest extends TestNGSuite with Matchers {
val tsvFile = File.createTempFile("counts.", ".tsv")
tsvFile.deleteOnExit()
c1.writeToTsv(tsvFile)
c1.writeHistogramToTsv(tsvFile)
val reader = Source.fromFile(tsvFile)
reader.getLines().toList shouldBe List("value\tcount", "1\t1", "2\t2", "3\t3")
......
......@@ -64,7 +64,7 @@ class HistogramTest extends TestNGSuite with Matchers {
val tsvFile = File.createTempFile("counts.", ".tsv")
tsvFile.deleteOnExit()
c1.writeToTsv(tsvFile)
c1.writeHistogramToTsv(tsvFile)
val reader = Source.fromFile(tsvFile)
reader.getLines().toList shouldBe List("value\tcount", "1\t1", "2\t2", "3\t3")
......
......@@ -103,8 +103,10 @@ At this moment the following variant callers can be used
* <a href="https://samtools.github.io/bcftools/bcftools.html">bcftools</a>
* <a href="https://samtools.github.io/bcftools/bcftools.html">bcftools_singlesample</a>
* <a href="https://github.com/ekg/freebayes">freebayes</a>
* <a href="http://varscan.sourceforge.net/">varscan</a>
* [raw](../tools/MpileupToVcf)
## Config options
To view all possible config options please navigate to our Gitlab wiki page
......
......@@ -64,13 +64,6 @@ class XhmmMethod(val parent: Configurable) extends CnvMethod with Reference {
val firstMatrix = new XhmmMatrix(this)
firstMatrix.inputMatrix = merged.output
firstMatrix.outputMatrix = swapExt(xhmmDir, merged.output, ".depths.data", ".filtered_centered.data")
firstMatrix.minTargetSize = 10
firstMatrix.maxTargetSize = 10000
firstMatrix.minMeanTargetRD = 10
firstMatrix.maxMeanTargetRD = 500
firstMatrix.minMeanSampleRD = 25
firstMatrix.maxMeanSampleRD = 200
firstMatrix.maxSdSampleRD = 150
firstMatrix.outputExcludedSamples = Some(swapExt(xhmmDir, merged.output, ".depths.data", ".filtered.samples.txt"))
firstMatrix.outputExcludedTargets = Some(swapExt(xhmmDir, merged.output, ".depths.data", ".filtered.targets.txt"))
add(firstMatrix)
......
......@@ -128,6 +128,22 @@
</executions>
<!-- ... (see other usage or goals for details) ... -->
</plugin>
<plugin>
<groupId>org.scalatra.scalate</groupId>
<artifactId>maven-scalate-plugin_2.10</artifactId>
<version>1.7.0</version>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>precompile</goal>
</goals>
<configuration>
<contextClass>org.fusesource.scalate.DefaultRenderContext</contextClass>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
......@@ -281,4 +297,12 @@
</plugin>
</plugins>
</reporting>
<dependencies>
<dependency>
<groupId>org.scalatra.scalate</groupId>
<artifactId>scalate-core_2.10</artifactId>
<version>1.7.0</version>
</dependency>
</dependencies>
</project>
......@@ -69,23 +69,28 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri
// merge VCF by sample
for ((sample, bamFile) <- inputBams) {
var sampleVCFS: List[Option[File]] = List.empty
callers.foreach { caller =>
sampleVCFS ::= caller.outputVCF(sample)
if (callers.size > 1) {
var sampleVCFS: List[Option[File]] = List.empty
callers.foreach { caller =>
sampleVCFS ::= caller.outputVCF(sample)
}
val mergeSVcalls = new Pysvtools(this)
mergeSVcalls.input = sampleVCFS.flatten
mergeSVcalls.output = new File(outputDir, sample + ".merged.vcf")
add(mergeSVcalls)
outputMergedVCFbySample += (sample -> mergeSVcalls.output)
} else {
outputMergedVCFbySample += (sample -> callers.head.outputVCF(sample).get)
}
val mergeSVcalls = new Pysvtools(this)
mergeSVcalls.input = sampleVCFS.flatten
mergeSVcalls.output = new File(outputDir, sample + ".merged.vcf")
add(mergeSVcalls)
outputMergedVCFbySample += (sample -> mergeSVcalls.output)
}
// merge all files from all samples in project
val mergeSVcallsProject = new Pysvtools(this)
mergeSVcallsProject.input = outputMergedVCFbySample.values.toList
mergeSVcallsProject.output = outputMergedVCF
add(mergeSVcallsProject)
if (inputBams.size > 1) {
// merge all files from all samples in project
val mergeSVcallsProject = new Pysvtools(this)
mergeSVcallsProject.input = outputMergedVCFbySample.values.toList
mergeSVcallsProject.output = outputMergedVCF
add(mergeSVcallsProject)
}
// merging the VCF calls by project
// basicly this will do all samples from this pipeline run
// group by "tags"
......@@ -101,7 +106,7 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri
def summarySettings = Map("sv_callers" -> configCallers.toList)
/** Files for the summary */
def summaryFiles: Map[String, File] = Map("final_mergedvcf" -> outputMergedVCF)
def summaryFiles: Map[String, File] = Map("final_mergedvcf" -> (if (inputBams.size > 1) outputMergedVCF else outputMergedVCFbySample.values.head))
}
object ShivaSvCalling extends PipelineCommand
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment