Commit ae8146ba authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into feature-qiime

parents 62eb15af 6714a0ab
#!/bin/bash
DIR=`readlink -f \`dirname $0\``
cp -r $DIR/../*/*/src/* $DIR/src
......@@ -2,45 +2,42 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>BiopetRoot</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.6.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>BiopetAggregate</artifactId>
<packaging>pom</packaging>
<dependencies>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetProtectedPackage</artifactId>
<version>0.6.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>18.0</version>
</dependency>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.6.0-SNAPSHOT</version>
<relativePath>../public</relativePath>
</parent>
</dependencies>
<modules>
<module>../public/biopet-core</module>
<module>../public/biopet-public-package</module>
<module>../public/bammetrics</module>
<module>../public/flexiprep</module>
<module>../public/gentrap</module>
<module>../public/mapping</module>
<module>../public/sage</module>
<module>../public/kopisu</module>
<module>../public/gears</module>
<module>../public/bam2wig</module>
<module>../public/carp</module>
<module>../public/toucan</module>
<module>../public/shiva</module>
<module>../public/basty</module>
<module>../public/biopet-utils</module>
<module>../public/biopet-tools</module>
<module>../public/biopet-tools-extensions</module>
<module>../public/biopet-extensions</module>
<module>../public/biopet-tools-package</module>
<module>../protected/biopet-gatk-extensions</module>
<module>../protected/biopet-gatk-pipelines</module>
<module>../protected/biopet-protected-package</module>
</modules>
</project>
\ No newline at end of file
#!/bin/bash
DIR=`readlink -f \`dirname $0\``
rm -r $DIR/src/main $DIR/src/test
......@@ -8,10 +8,11 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Output
import org.broadinstitute.gatk.utils.commandline.{ Gather, Output }
class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral {
@Gather(enabled = false)
@Output(required = false)
protected var vcfIndex: File = _
......
......@@ -8,9 +8,11 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Output
import org.broadinstitute.gatk.utils.commandline.{ Gather, Output }
class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral {
@Gather(enabled = false)
@Output
protected var bamIndex: File = _
......
......@@ -21,7 +21,7 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait {
def this() = this(null)
/** Make variantcalling submodule, this with the gatk modes in there */
override def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
override def makeVariantcalling(multisample: Boolean = false) = {
if (multisample) new ShivaVariantcalling(qscript) {
override def namePrefix = "multisample"
override def configName = "shivavariantcalling"
......@@ -56,36 +56,40 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait {
("use_indel_realigner" -> useIndelRealigner) +
("use_base_recalibration" -> useBaseRecalibration)
/** This will adds preprocess steps, gatk indel realignment and base recalibration is included here */
override def preProcess(input: File): Option[File] = {
if (!useIndelRealigner && !useBaseRecalibration) None
else {
val indelRealignFile = useIndelRealigner match {
case true => addIndelRealign(input, libDir, useBaseRecalibration || libraries.size > 1)
case false => input
}
useBaseRecalibration match {
case true => Some(addBaseRecalibrator(indelRealignFile, libDir, libraries.size > 1))
case false => Some(indelRealignFile)
}
override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
else bamFile
override def addJobs(): Unit = {
super.addJobs()
if (useIndelRealigner && useBaseRecalibration) {
val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
addBaseRecalibrator(file, libDir, libraries.size > 1)
} else if (useIndelRealigner) {
addIndelRealign(bamFile.get, libDir, libraries.size > 1)
} else if (useBaseRecalibration) {
addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
}
}
}
override def keepMergedFiles: Boolean = config("keep_merged_files", default = false)
override def summarySettings = super.summarySettings + ("use_indel_realigner" -> useIndelRealigner)
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
/** This methods will add double preprocess steps, with GATK indel realignment */
override protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
if (input.size <= 1) super.addDoublePreProcess(input)
else super.addDoublePreProcess(input, isIntermediate = useIndelRealigner).collect {
case file =>
useIndelRealigner match {
case true => addIndelRealign(file, sampleDir, isIntermediate = false)
case false => file
}
override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
} else bamFile
override def addJobs(): Unit = {
super.addJobs()
if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
addIndelRealign(bamFile.get, sampleDir, false)
}
}
}
......
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
#import(org.apache.commons.io.FileUtils)
<%@ var summary: Summary %>
<%@ var sampleId: Option[String] = None %>
<%@ var libId: Option[String] = None %>
<%@ var rootPath: String %>
<%@ var metricsTag: String = "bammetrics" %>
<%@ var sampleLevel: Boolean = false %>
<%@ var outputDir: File %>
<%@ var fields: List[String] = List("PF_ALIGNED_BASES", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "MEDIAN_5PRIME_TO_3PRIME_BIAS")%>
<%@ var showPlot: Boolean = false %>
<%@ var showTable: Boolean = true %>
<%@ var showIntro: Boolean = true%>
#{
val samples = sampleId match {
case Some(sample) => {
List(sample.toString)
}
case _ => summary.samples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
<div class="col-md-1"></div>
<div class="col-md-6">
<p>
This Show the relative coverage for all transcripts. De data here is generated by picard CollectRnaMetrics
</p>
</div>
</div>
#end
#if (showPlot)
#{ BammetricsReport.rnaHistogramPlot(outputDir, "rna", summary, !sampleLevel, sampleId = sampleId, libId = libId) }#
<div class="panel-body">
<img src="rna.png" class="img-responsive" />
</div>
<div class="panel-footer">
#if (showTable)
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#rnaTable">Hide table</button>
#else
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#rnaTable">Show table</button>
#end
<i class="glyphicon glyphicon-file"></i> <a href="rna.tsv">tsv file</a>
</div>
#end
<div class="panel-body collapse #if (showTable)in#end" id="rnaTable">
<!-- Table -->
<table class="table sortable-theme-bootstrap" data-sortable>
<thead><tr>
<th data-sorted="true" data-sorted-direction="ascending">Sample</th>
#if (!sampleLevel) <th>Library</th> #end
#for (field <- fields)
<th>${field.replaceAll("_", " ")}</th>
#end
</tr></thead>
<tbody>
#for (sample <- samples.toList.sorted)
#{
val libs = (libId, sampleLevel) match {
case (_, true) => List("")
case (Some(libId), _) => List(libId.toString)
case _ => summary.libraries(sample).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample}/index.html">${sample}</a></td>
#for (libId <- libs)
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample}/Libraries/${libId}/index.html">${libId}</a></td> #end
#{
val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List("bammetrics", "stats")
val fieldValues = for (field <- fields) yield {
summary.getValue((prefixPath ::: List("rna", "metrics", field.toUpperCase)):_*).getOrElse(prefixPath ::: metricsTag :: Nil)
}
}#
#for (value <- fieldValues)
<td>${value}</td>
#end
</tr>
#end
#end
</tbody>
</table>
</div>
......@@ -39,7 +39,6 @@ class BamMetrics(val root: Configurable) extends QScript
var inputBam: File = _
/** Settings for CollectRnaSeqMetrics */
var rnaMetricsSettings: Map[String, String] = Map()
var transcriptRefFlatFile: Option[File] = config("transcript_refflat")
/** return location of summary file */
......@@ -77,7 +76,7 @@ class BamMetrics(val root: Configurable) extends QScript
/** Script to add jobs */
def biopetScript() {
add(SamtoolsFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".flagstat")))
add(SamtoolsFlagstat(this, inputBam, outputDir))
val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
add(biopetFlagstat)
......@@ -107,8 +106,6 @@ class BamMetrics(val root: Configurable) extends QScript
rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
rnaMetrics.refFlat = transcriptRefFlatFile.get
rnaMetrics.ribosomalIntervals = rnaMetricsSettings.get("ribosomal_intervals").collect { case n => new File(n) }
rnaMetrics.strandSpecificity = rnaMetricsSettings.get("strand_specificity")
add(rnaMetrics)
addSummarizable(rnaMetrics, "rna")
}
......
......@@ -57,9 +57,13 @@ object BammetricsReport extends ReportBuilder {
sampleId: Option[String],
libId: Option[String],
metricsTag: String = "bammetrics") = {
val wgsExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "wgs").isDefined
val rnaExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "rna").isDefined
val targets = (
summary.getValue(sampleId, libId, "bammetrics", "settings", "amplicon_name"),
summary.getValue(sampleId, libId, "bammetrics", "settings", "roi_name")
summary.getValue(sampleId, libId, metricsTag, "settings", "amplicon_name"),
summary.getValue(sampleId, libId, metricsTag, "settings", "roi_name")
) match {
case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
case (_, Some(roi: List[_])) => roi.map(_.toString)
......@@ -74,9 +78,13 @@ object BammetricsReport extends ReportBuilder {
Map())),
List(
"Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
"Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true)),
"Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", Map("showPlot" -> true))
),
"Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Map("showPlot" -> true)))
else Nil) ++
(if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
Map("showPlot" -> true)))
else Nil),
Map("metricsTag" -> metricsTag)
)
}
......@@ -321,4 +329,94 @@ object BammetricsReport extends ReportBuilder {
plot.title = Some("Whole genome coverage")
plot.runLocal()
}
/**
* Generate a line plot for rna coverage
* @param outputDir OutputDir for the tsv and png file
* @param prefix Prefix of the tsv and png file
* @param summary Summary class
* @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
* @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
*/
def rnaHistogramPlot(outputDir: File,
prefix: String,
summary: Summary,
libraryLevel: Boolean = false,
sampleId: Option[String] = None,
libId: Option[String] = None): Unit = {
val tsvFile = new File(outputDir, prefix + ".tsv")
val pngFile = new File(outputDir, prefix + ".png")
val tsvWriter = new PrintWriter(tsvFile)
if (libraryLevel) {
tsvWriter.println((for (
sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
) yield s"$sample-$lib")
.mkString("library\t", "\t", ""))
} else {
sampleId match {
case Some(sample) => tsvWriter.println("\t" + sample)
case _ => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", ""))
}
}
var map: Map[Int, Map[String, Double]] = Map()
def fill(sample: String, lib: Option[String]): Unit = {
val insertSize = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "normalized_position"),
summary, Some(sample), lib).value.getOrElse(List())
val counts = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage"),
summary, Some(sample), lib).value.getOrElse(List())
(insertSize, counts) match {
case (l: List[_], l2: List[_]) =>
l.zip(l2).foreach(i => {
val insertSize = i._1.toString.toInt
val count = i._2.toString.toDouble
val old = map.getOrElse(insertSize, Map())
if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count))
else map += insertSize -> (old + (sample -> count))
})
case _ => throw new IllegalStateException("Must be a list")
}
}
if (libraryLevel) {
for (
sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
) fill(sample, Some(lib))
} else if (sampleId.isDefined) fill(sampleId.get, None)
else summary.samples.foreach(fill(_, None))
for ((insertSize, counts) <- map) {
tsvWriter.print(insertSize)
if (libraryLevel) {
for (
sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample;
lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib
) {
tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0"))
}
} else {
for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) {
tsvWriter.print("\t" + counts.getOrElse(sample, "0"))
}
}
tsvWriter.println()
}
tsvWriter.close()
val plot = new LinePlot(null)
plot.input = tsvFile
plot.output = pngFile
plot.xlabel = Some("Reletive position")
plot.ylabel = Some("Coverage")
plot.width = Some(1200)
plot.removeZero = true
plot.title = Some("Rna coverage")
plot.runLocal()
}
}
......@@ -17,10 +17,10 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.Argument
/** This trait creates a structured way of use multisample pipelines */
trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
......@@ -31,7 +31,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
require(globalConfig.map.contains("samples"), "No Samples found in config")
/** Sample class with basic functions build in */
abstract class AbstractSample(val sampleId: String) extends Summarizable {
abstract class AbstractSample(val sampleId: String) extends Summarizable { sample =>
/** Overrules config of qscript with default sample */
val config = new ConfigFunctions(defaultSample = sampleId)
......@@ -39,7 +39,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
def summarySettings: Map[String, Any] = Map()
/** Library class with basic functions build in */
abstract class AbstractLibrary(val libId: String) extends Summarizable {
abstract class AbstractLibrary(val libId: String) extends Summarizable { lib =>
/** Overrules config of qscript with default sample and default library */
val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libId)
......@@ -64,11 +64,22 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
}
/** Creates a library file with given suffix */
def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix)
def createFile(suffix: String): File = new File(libDir, s"$sampleId-$libId.$suffix")
/** Returns library directory */
def libDir = new File(sampleDir, "lib_" + libId)
lazy val libTags: Map[String, Any] =
config("tags", default = Map(), freeVar = false, submodule = libId, path = List("samples", sampleId, "libraries"))
def sampleId = sample.sampleId
lazy val libGroups: List[String] = libTags.get("groups") match {
case Some(g: List[_]) => g.map(_.toString)
case Some(g: String) => List(g)
case _ => Nil
}
/** Function that add library jobs */
protected def addJobs()
}
......@@ -79,6 +90,49 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
/** Stores all libraries */
val libraries: Map[String, Library] = libIds.map(id => id -> makeLibrary(id)).toMap
lazy val sampleTags: Map[String, Any] =
config("tags", default = Map(), freeVar = false, submodule = sampleId, path = List("samples"))
lazy val gender = {
val g: Option[String] = sampleTags.get("gender").map(_.toString)
g.map(_.toLowerCase) match {
case Some("male") => Gender.Male
case Some("female") => Gender.Female
case Some(s) =>
logger.warn(s"Could not convert '$g' to a gender")
Gender.Unknown
case _ => Gender.Unknown
}
}
lazy val father = {
val g: Option[String] = sampleTags.get("father").map(_.toString)
g.foreach { father =>
if (sampleId != father) Logging.addError(s"Father for $sampleId can not be itself")
if (samples.contains(father)) if (samples(father).gender == Gender.Male)
Logging.addError(s"Father of $sampleId is not a female")
else logger.warn(s"For sample '$sampleId' is father '$father' not found in config")
}
g
}
lazy val mother = {
val g: Option[String] = sampleTags.get("mother").map(_.toString)
g.foreach { mother =>
if (sampleId != mother) Logging.addError(s"mother for $sampleId can not be itself")
if (samples.contains(mother)) if (samples(mother).gender == Gender.Female)
Logging.addError(s"Mother of $sampleId is not a female")
else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config")
}
g
}
lazy val sampleGroups: List[String] = sampleTags.get("groups") match {
case Some(g: List[_]) => g.map(_.toString)
case Some(g: String) => List(g)
case _ => Nil
}
/**
* Factory method for Library class
* @param id SampleId
......@@ -117,7 +171,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
}
/** Creates a sample file with given suffix */
def createFile(suffix: String) = new File(sampleDir, sampleId + suffix)
def createFile(suffix: String) = new File(sampleDir, s"$sampleId.$suffix")
/** Returns sample directory */
def sampleDir = new File(outputDir, "samples" + File.separator + sampleId)
......@@ -180,3 +234,10 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
sample ::: lib ::: super.configFullPath
}
}
object MultiSampleQScript {