diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 1109f30da9fcb53cd622935b8451d6ab551d0205..4cbcfdb6881834575150db1befcc67731ebe5df8 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -119,5 +119,5 @@ trait BiopetQScript extends Configurable with GatkLogging { } object BiopetQScript { - protected case class InputFile(file: File, md5: Option[String] = None) + case class InputFile(file: File, md5: Option[String] = None) } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala index a2e43d404de302bdb21dbcf9dffeee96c0857de6..e377e8f1e128451d1fcf24a5dfc90e6d244df454 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala @@ -16,11 +16,13 @@ package nl.lumc.sasc.biopet.core.report import java.io._ + import nl.lumc.sasc.biopet.core.ToolCommandFunction import nl.lumc.sasc.biopet.utils.summary.Summary -import nl.lumc.sasc.biopet.utils.{ ToolCommand, Logging, IoUtils } +import nl.lumc.sasc.biopet.utils.{ IoUtils, Logging, ToolCommand } import org.broadinstitute.gatk.utils.commandline.Input import org.fusesource.scalate.{ TemplateEngine, TemplateSource } + import scala.collection.mutable /** @@ -95,6 +97,21 @@ trait ReportBuilder extends ToolCommand { private var _libId: Option[String] = None protected def libId = _libId + case class ExtFile(resourcePath: String, targetPath: String) + + def extFiles = List( + "css/bootstrap_dashboard.css", + "css/bootstrap.min.css", + "css/bootstrap-theme.min.css", + "css/sortable-theme-bootstrap.css", + "js/jquery.min.js", + "js/sortable.min.js", + "js/bootstrap.min.js", + "fonts/glyphicons-halflings-regular.woff", + "fonts/glyphicons-halflings-regular.ttf", + "fonts/glyphicons-halflings-regular.woff2" + ).map(x => ExtFile("/nl/lumc/sasc/biopet/core/report/ext/" + x, x)) + /** Main function to for building the report */ def main(args: Array[String]): Unit = { logger.info("Start") @@ -123,22 +140,9 @@ trait ReportBuilder extends ToolCommand { // Static files that will be copied to the output folder, then file is added to [resourceDir] it's need to be added here also val extOutputDir: File = new File(cmdArgs.outputDir, "ext") - val resourceDir: String = "/nl/lumc/sasc/biopet/core/report/ext/" - val extFiles = List( - "css/bootstrap_dashboard.css", - "css/bootstrap.min.css", - "css/bootstrap-theme.min.css", - "css/sortable-theme-bootstrap.css", - "js/jquery.min.js", - "js/sortable.min.js", - "js/bootstrap.min.js", - "fonts/glyphicons-halflings-regular.woff", - "fonts/glyphicons-halflings-regular.ttf", - "fonts/glyphicons-halflings-regular.woff2" - ) for (resource <- extFiles.par) { - IoUtils.copyStreamToFile(getClass.getResourceAsStream(resourceDir + resource), new File(extOutputDir, resource), createDirs = true) + IoUtils.copyStreamToFile(getClass.getResourceAsStream(resource.resourcePath), new File(extOutputDir, resource.targetPath), createDirs = true) } logger.info("Parsing summary") diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala index 5b448c458aa98079a099e910abd7998bd8bfd597..86df5c4151d114fcb2b654cbf7b0c28bb78b9bcf 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala @@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions.kraken import java.io.File -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -39,11 +39,6 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers var db: File = config("db") - var inputFastQ: Boolean = true - var compression: Boolean = false - var compressionGzip: Boolean = false - var compressionBzip: Boolean = false - var quick: Boolean = false var minHits: Option[Int] = config("min_hits") @@ -51,11 +46,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers var paired: Boolean = config("paired", default = false) executable = config("exe", default = "kraken") - def versionRegex = """Kraken version ([\d\w\-\.]+)\n.*""".r + + def versionRegex = """^Kraken version ([\d\w\-\.]+)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " --version" override def defaultCoreMemory = 8.0 + override def defaultThreads = 4 /** Sets readgroup when not set yet */ @@ -66,16 +65,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers /** Returns command to execute */ def cmdLine = required(executable) + - "--db" + required(db) + + required("--db", db) + optional("--threads", nCoresRequest) + - conditional(inputFastQ, "--fastq-input") + - conditional(!inputFastQ, "--fasta-input") + conditional(quick, "--quick") + optional("--min_hits", minHits) + optional("--unclassified-out ", unclassified_out.get) + optional("--classified-out ", classified_out.get) + - "--output" + required(output) + + required("--output", output) + conditional(preLoad, "--preload") + conditional(paired, "--paired") + + conditional(paired, "--check-names") + repeat(input) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala index e63beea4198f4f73c367756dda932db51d6584c2..0919728a08ccbd466f7dc1ae1d3f385e49294162 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala @@ -44,9 +44,9 @@ class KrakenReport(val root: Configurable) extends BiopetCommandLineFunction wit var output: File = _ def cmdLine: String = { - val cmd: String = "--db " + required(db) + + val cmd: String = required(executable) + "--db " + required(db) + conditional(show_zeros, "--show-zeros") + - input.getAbsolutePath + ">" + output.getAbsolutePath + required(input.getAbsolutePath) + " > " + required(output.getAbsolutePath) cmd } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala index d862076f0b8861e032832584a07d77df7c2508ca..b12ff9584da1090aa90970dbbd137c0bb6b792e9 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala @@ -15,16 +15,16 @@ */ package nl.lumc.sasc.biopet.extensions.sambamba -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } /** General Sambamba extension */ abstract class Sambamba extends BiopetCommandLineFunction with Version { - override def defaultCoreMemory = 2.0 + override def defaultCoreMemory = 4.0 override def defaultThreads = 2 override def subPath = "sambamba" :: super.subPath - executable = config("exe", default = "sambamba", freeVar = false) + executable = config("exe", default = "sambamba", submodule = "sambamba", freeVar = false) def versionCommand = executable def versionRegex = """sambamba v(.*)""".r override def versionExitcode = List(0, 1) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala index 4a012d22950898d662c6285ddf47fbec06e6bce1..ca2470c6094192afef8da6de7b76b6ca1809c1e3 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala @@ -43,6 +43,6 @@ class SambambaView(val root: Configurable) extends Sambamba { optional("--format", format.get) + optional("--regions", regions) + optional("--compression-level", compression_level) + - required("--output" + output) + + required("--output-filename", output) + required(input) } diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala index b4109a45e70b0dd111463cddb3df7f2ebf74d14f..946922ec7075938e124a659ba1f5a3acf40a5dd1 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala @@ -27,7 +27,8 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig, nl.lumc.sasc.biopet.pipelines.carp.Carp, nl.lumc.sasc.biopet.pipelines.toucan.Toucan, - nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling + nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling, + nl.lumc.sasc.biopet.pipelines.gears.Gears ) def pipelines: List[MainCommand] = List( @@ -59,5 +60,6 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.tools.SeqStat, nl.lumc.sasc.biopet.tools.VepNormalizer, nl.lumc.sasc.biopet.tools.AnnotateVcfWithBed, - nl.lumc.sasc.biopet.tools.VcfWithVcf) + nl.lumc.sasc.biopet.tools.VcfWithVcf, + nl.lumc.sasc.biopet.tools.KrakenReportToJson) } diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala new file mode 100644 index 0000000000000000000000000000000000000000..fbea0965ca7fa3c75d630fb3bfe9a0272584ba27 --- /dev/null +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala @@ -0,0 +1,55 @@ +package nl.lumc.sasc.biopet.extensions.tools + +/** + * Created by waiyileung on 05-10-15. + */ +import java.io.File + +import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } + +/** + * KrakenReportToJson function class for usage in Biopet pipelines + * + * @param root Configuration object for the pipeline + */ +class KrakenReportToJson(val root: Configurable) extends ToolCommandFunction with Summarizable { + def toolObject = nl.lumc.sasc.biopet.tools.KrakenReportToJson + + @Input(doc = "Input Kraken Full report", shortName = "inputReport", required = true) + var inputReport: File = _ + + @Argument(required = false) + var skipNames: Boolean = false + + @Output(doc = "Output JSON", shortName = "output", required = true) + var output: File = _ + + override def defaultCoreMemory = 2.0 + + override def cmdLine = + super.cmdLine + + required("-i", inputReport) + + required("-o", output) + + conditional(skipNames, "--skipnames") + + def summaryStats: Map[String, Any] = { + ConfigUtils.fileToConfigMap(output) + } + + def summaryFiles: Map[String, File] = Map() + +} + +object KrakenReportToJson { + def apply(root: Configurable, input: File, output: File): KrakenReportToJson = { + val report = new KrakenReportToJson(root) + report.inputReport = input + report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json") + report + } +} + diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala new file mode 100644 index 0000000000000000000000000000000000000000..ee59a22aaab5a1b21ad8565a74f41d432cde096b --- /dev/null +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala @@ -0,0 +1,186 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.tools + +/** + * Created by wyleung on 25-9-15. + */ + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.utils.ConfigUtils._ +import nl.lumc.sasc.biopet.utils.ToolCommand + +import scala.collection.mutable +import scala.collection.mutable.ListBuffer +import scala.io.Source + +object KrakenReportToJson extends ToolCommand { + + case class KrakenHit(taxonomyID: Long, + taxonomyName: String, + cladeCount: Long, + cladeSize: Long, // size of parent - including itself + taxonRank: String, + cladeLevel: Int, + parentTaxonomyID: Long, + children: ListBuffer[KrakenHit]) { + def toJSON(withChildren: Boolean = false): Map[String, Any] = { + val childJSON = if (withChildren) children.toList.map(entry => entry.toJSON(withChildren)) else List() + Map( + "name" -> taxonomyName, + "taxid" -> taxonomyID, + "taxonrank" -> taxonRank, + "cladelevel" -> cladeLevel, + "count" -> cladeCount, + "size" -> cladeSize, + "children" -> childJSON + ) + } + + } + + var cladeIDs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill(32)(0) + val spacePattern = "^( +)".r + private var lines: Map[Long, KrakenHit] = Map.empty + + case class Args(krakenreport: File = null, outputJson: Option[File] = None, skipNames: Boolean = false) extends AbstractArgs + + class OptParser extends AbstractOptParser { + + head( + s""" + |$commandName - Convert Kraken-report (full) output to JSON + """.stripMargin) + + opt[File]('i', "krakenreport") required () unbounded () valueName "<krakenreport>" action { (x, c) => + c.copy(krakenreport = x) + } validate { + x => if (x.exists) success else failure("Krakenreport not found") + } text "Kraken report to generate stats from" + + opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) => + c.copy(outputJson = Some(x)) + } text "File to write output to, if not supplied output go to stdout" + + opt[Boolean]('n', "skipnames") unbounded () valueName "<skipnames>" action { (x, c) => + c.copy(skipNames = x) + } text "Don't report the scientific name of the taxon." + + } + + /** + * Parses the command line argument + * + * @param args Array of arguments + * @return + */ + def parseArgs(args: Array[String]): Args = new OptParser() + .parse(args, Args()) + .getOrElse(sys.exit(1)) + + /** + * Takes a line from the kraken report, converts into Map with taxonID and + * information on this hit as `KrakenHit`. `KrakenHit` is used later on for + * building the tree + * + * @param krakenRawHit Line from the KrakenReport output + * @param skipNames Specify to skip names in the report output to reduce size of JSON + * @return + */ + def parseLine(krakenRawHit: String, skipNames: Boolean): Map[Long, KrakenHit] = { + val values: Array[String] = krakenRawHit.stripLineEnd.split("\t") + + assert(values.length == 6) + + val scientificName: String = values(5) + val cladeLevel = spacePattern.findFirstIn(scientificName).getOrElse("").length / 2 + + if (cladeIDs.length <= cladeLevel + 1) { + cladeIDs ++= mutable.ArrayBuffer.fill(10)(0L) + } + + cladeIDs(cladeLevel + 1) = values(4).toLong + Map( + values(4).toLong -> new KrakenHit( + taxonomyID = values(4).toLong, + taxonomyName = if (skipNames) "" else scientificName.trim, + cladeCount = values(2).toLong, + cladeSize = values(1).toLong, + taxonRank = values(3), + cladeLevel = cladeLevel, + parentTaxonomyID = cladeIDs(cladeLevel), + children = ListBuffer() + )) + } + + /** + * Read the `KrakenReport` output and transform into `Map` by TaxonID and `KrakenHit` + * A JSON-string output is given. + * + * @param reportRaw The `KrakenReport` output + * @param skipNames Specify to skip names in the report output to reduce size of JSON + * @return + */ + def reportToJson(reportRaw: File, skipNames: Boolean): String = { + val reader = Source.fromFile(reportRaw) + + /* + * http://ccb.jhu.edu/software/kraken/MANUAL.html + * The header layout is: + * 1. Percentage of reads covered by the clade rooted at this taxon + * 2. Number of reads covered by the clade rooted at this taxon + * 3. Number of reads assigned directly to this taxon + * 4. A rank code, indicating (U)nclassified, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. All other ranks are simply '-'. + * 5. NCBI taxonomy ID + * 6. indented scientific name + * */ + + lines = reader.getLines() + .map(line => parseLine(line, skipNames)) + .filter(p => (p.head._2.cladeSize > 0) || List(0L, 1L).contains(p.head._2.taxonomyID)) + .foldLeft(Map.empty[Long, KrakenHit])((a, b) => { + a + b.head + }) + + lines.keys.foreach(k => { + // append itself to the children attribute of the parent + if (lines(k).parentTaxonomyID > 0L) { + // avoid the root and unclassified appending to the unclassified node + lines(lines(k).parentTaxonomyID).children += lines(k) + } + }) + + val result = Map("unclassified" -> lines(0).toJSON(withChildren = false), + "classified" -> lines(1).toJSON(withChildren = true)) + mapToJson(result).spaces2 + + } + + def main(args: Array[String]): Unit = { + val commandArgs: Args = parseArgs(args) + + val jsonString: String = reportToJson(commandArgs.krakenreport, skipNames = commandArgs.skipNames) + commandArgs.outputJson match { + case Some(file) => + val writer = new PrintWriter(file) + writer.println(jsonString) + writer.close() + case _ => println(jsonString) + } + + } +} diff --git a/public/gears/pom.xml b/public/gears/pom.xml index 8d09f66d1528a295e18ef5467f2ebd3fa99d8657..cc5e8bbe4f8664bf225fff5bae0e11ffb38b68a1 100644 --- a/public/gears/pom.xml +++ b/public/gears/pom.xml @@ -23,11 +23,14 @@ <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> <version>0.5.0-SNAPSHOT</version> + <relativePath>../</relativePath> </parent> <modelVersion>4.0.0</modelVersion> <inceptionYear>2015</inceptionYear> <artifactId>Gears</artifactId> + <name>Gears</name> + <packaging>jar</packaging> <dependencies> <dependency> @@ -37,7 +40,7 @@ </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> - <artifactId>Mapping</artifactId> + <artifactId>BiopetToolsExtensions</artifactId> <version>${project.version}</version> </dependency> <dependency> diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp new file mode 100644 index 0000000000000000000000000000000000000000..20ca432859aeee8e0cd2482e190abe8b9586e8b6 --- /dev/null +++ b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp @@ -0,0 +1,38 @@ +#import(nl.lumc.sasc.biopet.utils.summary.Summary) +#import(nl.lumc.sasc.biopet.core.report.ReportPage) +<%@ var summary: Summary %> +<%@ var rootPath: String %> +<%@ var sampleId: Option[String] = None %> +<%@ var libId: Option[String] = None %> + +<table class="table"> +<tbody> + <tr><th>Pipeline</th><td>Gears</td></tr> + <tr><th>Version</th><td>${summary.getValue("meta", "pipeline_version")}</td></tr> + <tr><th>Last commit hash</th><td>${summary.getValue("meta", "last_commit_hash")}</td></tr> + <tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr> + #if(sampleId.isDefined) <tr><th>Sample</th><td>${sampleId}</td></tr> #end + #if(libId.isDefined) <tr><th>Library</th><td>${libId}</td></tr> #end +</tbody> +</table> +<br/> +<div class="row"> +<div class="col-md-1"></div> +<div class="col-md-6"> + <p> + In this web document you can find your <em>Gears</em> pipeline report. + Different categories of data can be found in the left-side menu. + Statistics per sample and library can be accessed through the top-level menu. + Some statistics for target regions can be found in the regions tab. + Futhermore, you can view all versions of software tools used by selecting <em>Versions</em> from the top menu. + </p> + + <p> + <small>Brought to you by <a href="https://sasc.lumc.nl" target="_blank"><abbr + title="Sequence Analysis Support Core">SASC</abbr></a> and <a + href="https://www.lumc.nl/org/klinische-genetica/" target="_blank"><abbr title="Clinical Genetics LUMC">KG</abbr></a>, + LUMC. + </small> + </p> +</div> +</div> \ No newline at end of file diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp new file mode 100644 index 0000000000000000000000000000000000000000..0ea325108f4f3ce6a8388537f7bd72e09f93a21f --- /dev/null +++ b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp @@ -0,0 +1,69 @@ +#import(nl.lumc.sasc.biopet.utils.summary.Summary) +#import(nl.lumc.sasc.biopet.utils.ConfigUtils) +#import(java.io.File) +<%@ var summary: Summary %> +<%@ var sampleId: Option[String] = None %> +<%@ var libId: Option[String] = None %> +<%@ var rootPath: String %> +<%@ var showPlot: Boolean = true %> +<%@ var showIntro: Boolean = true %> +#{ + val samples = sampleId match { + case Some(sample) => List(sample.toString) + case _ => summary.samples.toList + } + val librariesCount = summary.samples.foldLeft(0)(_ + summary.libraries(_).size) +}# + +#if (showIntro) + + <div class="row"> + <div class="col-md-1"></div> + <div class="col-md-10"> + Here we show a sunburst visualisation of the analysis of the metagenome in sample: ${sampleId} + </div> + <div class="col-md-1"></div> + </div> +#end + + + +#if (showPlot) +<div class="row"> + <div class="col-md-12"> + <h3 id='currentlevel'>Root</h3> + <div> + <span id="selection_name"></span> - + <span id="selection_size"></span> - + <span id="selection_value"></span> + </div> + + <form> + <label><input type="radio" name="mode" value="size"> Size</label> + <label><input type="radio" name="mode" value="count" checked> Count</label> + </form> + <div id="sequence"></div> + + <div id="datatable"></div> + <div id="svgholder"></div> + + </div> + + + <script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js" charset="utf-8"></script> + <script src="${rootPath}ext/js/gears.js"></script> + + <script type="application/ecmascript"> + + #{ + val rawreport = Map("kraken" -> summary.getValue(sampleId, libId, "gears", "stats", "krakenreport")) + val jsonReport = ConfigUtils.mapToJson(rawreport) + }# + + var krakenresult = JSON.parse('<%= unescape(jsonReport) %>'); + loadGears(krakenresult.kraken.classified); + </script> + + +</div> +#end diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js new file mode 100644 index 0000000000000000000000000000000000000000..91b3262201485efba08df5770df5d16e8cc626d9 --- /dev/null +++ b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js @@ -0,0 +1,341 @@ + +// Breadcrumb dimensions: width, height, spacing, width of tip/tail. +var b = { + w: 130, h: 20, s: 3, t: 10 +}; + +// Given a node in a partition layout, return an array of all of its ancestor +// nodes, highest first, but excluding the root. +function getAncestors(node) { + var path = []; + var current = node; + while (current.parent) { + path.unshift(current); + current = current.parent; + } + return path; +} + + +function initializeBreadcrumbTrail() { + // Add the svg area. + var trail = d3.select("#sequence").append("svg:svg") + .attr("width", width) + .attr("height", 50) + .attr("id", "trail"); + // Add the label at the end, for the percentage. + trail.append("svg:text") + .attr("id", "endlabel") + .style("fill", "#000"); +} + +// Generate a string that describes the points of a breadcrumb polygon. +function breadcrumbPoints(d, i) { + var points = []; + points.push("0,0"); + points.push(b.w + ",0"); + points.push(b.w + b.t + "," + (b.h / 2)); + points.push(b.w + "," + b.h); + points.push("0," + b.h); + if (i > 0) { // Leftmost breadcrumb; don't include 6th vertex. + points.push(b.t + "," + (b.h / 2)); + } + return points.join(" "); +} + +// Update the breadcrumb trail to show the current sequence and percentage. +function updateBreadcrumbs(nodeArray, percentageString) { + // Data join; key function combines name and depth (= position in sequence). + var g = d3.select("#trail") + .selectAll("g") + .data(nodeArray, function(d) { return d.name + d.depth; }); + + // Add breadcrumb and label for entering nodes. + var entering = g.enter().append("svg:g"); + + entering.append("svg:polygon") + .attr("points", breadcrumbPoints) + .style("fill", function(d) { return color((d.children ? d : d.parent).name); }); + + entering.append("svg:text") + .attr("x", (b.w + b.t) / 2) + .attr("y", b.h / 2) + .attr("dy", "0.35em") + .attr("text-anchor", "middle") + .text(function(d) { return d.name; }); + + // Set position for entering and updating nodes. + g.attr("transform", function(d, i) { + return "translate(" + i * (b.w + b.s) + ", 0)"; + }); + + // Remove exiting nodes. + g.exit().remove(); + + // Now move and update the percentage at the end. + d3.select("#trail").select("#endlabel") + .attr("x", (nodeArray.length + 0.5) * (b.w + b.s)) + .attr("y", b.h / 2) + .attr("dy", "0.35em") + .attr("text-anchor", "middle") + .text(percentageString); + + // Make the breadcrumb trail visible, if it's hidden. + d3.select("#trail") + .style("visibility", ""); + +} +// Fade all but the current sequence, and show it in the breadcrumb trail. +function mouseover(d) { + + var percentage = (100 * d.value / totalSize).toPrecision(3); + var percentageString = percentage + "%"; + if (percentage < 0.1) { + percentageString = "< 0.1%"; + } + + d3.select("#percentage") + .text(percentageString); + + d3.select("#explanation") + .style("visibility", ""); + + var sequenceArray = getAncestors(d); + updateBreadcrumbs(sequenceArray, percentageString); + + // Fade all the segments. + d3.selectAll("path") + .style("opacity", 0.3); + + // Then highlight only those that are an ancestor of the current segment. + svg.selectAll("path") + .filter(function(node) { + return (sequenceArray.indexOf(node) >= 0); + }) + .style("opacity", 1); +} + +// Restore everything to full opacity when moving off the visualization. +function mouseleave(d) { + + // Hide the breadcrumb trail + d3.select("#trail") + .style("visibility", "hidden"); + + // Deactivate all segments during transition. + d3.selectAll("path").on("mouseover", null); + + // Transition each segment to full opacity and then reactivate it. + d3.selectAll("path") + .transition() + .duration(500) + .style("opacity", 1) + .each("end", function() { + d3.select(this).on("mouseover", mouseover); + }); + + d3.select("#explanation") + .style("visibility", "hidden"); +} + +function mousein(d) { + d3.select("#selection_name").text(d.name); + d3.select("#selection_value").text(d.value); + d3.select("#selection_size").text(d.size); + + mouseover(d); +} + +function mouseout(d) { + d3.select("#selection_name").text(""); + d3.select("#selection_value").text(""); + d3.select("#selection_size").text(""); + + //mouseleave(d); +} + +function loadTable(d, target) { + var content = ""; + console.log(d); + d3.select(target).html(""); + + var table = d3.select("#datatable").append("table").attr("class", "table"); + var row; + row = table.append("tr"); + row.append("th").text("Name"); + row.append("th").text("Clade count"); + row.append("th").text("Hits on this leaf"); + + for( var i in d.children ) { + var child = d.children[ i ]; + + row = table.append("tr"); + + row.append("td").attr("class", "col-md-6").append("a").text( child.name ) + .data(child) + .on("click", outerclick); + row.append("td").attr("class", "col-md-3").text( child.size ); + row.append("td").attr("class", "col-md-3").text( child.count ); + + } + +} + + +function outerclick(d) { + var path = svg.datum(node).selectAll("path"); + + node = d; + path.transition() + .duration(1000) + .attrTween("d", arcTweenZoom(d)); + + d3.select("#currentlevel").text(d.name); + + loadTable(d, "#datatable"); +} + + + + + + + + + +// Total size of all segments; we set this later, after loading the data. +var totalSize = 0; + + + + +var width = 960, + height = 700, + radius = Math.min(width, height) / 2; + +var x = d3.scale.linear() + .range([0, 2 * Math.PI]); + +var y = d3.scale.sqrt() + .range([0, radius]); + +var color = d3.scale.category20c(); + +var svg = d3.select("#svgholder").append("svg") + .attr("width", width) + .attr("height", height) + .append("g") + .attr("id", "container") + .attr("transform", "translate(" + width / 2 + "," + (height / 2 + 10) + ")"); + +// Bounding circle underneath the sunburst, to make it easier to detect +// when the mouse leaves the parent g. +svg.append("svg:circle") + .attr("r", radius) + .style("opacity", 0); + + +var partition = d3.layout.partition() + .sort(null) + .value(function(d) { return 1; }); + +var arc = d3.svg.arc() + .startAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x))); }) + .endAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x + d.dx))); }) + .innerRadius(function(d) { return Math.max(0, y(d.y)); }) + .outerRadius(function(d) { return Math.max(0, y(d.y + d.dy)); }); + +// Keep track of the node that is currently being displayed as the root. +var node; + +function loadGears( root ) { + console.log(root); + node = root; + var path = svg.datum(root).selectAll("path") + .data(partition.nodes) + .enter().append("path") + .attr("d", arc) + .attr("fill-rule", "evenodd") + .style("fill", function(d) { return color((d.children ? d : d.parent).name); }) + .style("opacity", 1) + .on("click", click) + .on("mouseover", mousein) + .on("mouseleave", mouseout) + .each(stash); + + d3.selectAll("input").on("change", function change() { + var value = this.value === "count" + ? function() { return 1; } + : function(d) { return d.size; }; + + path + .data(partition.value(value).nodes) + .transition() + .duration(1000) + .attrTween("d", arcTweenData); + }); + + function click(d) { + node = d; + path.transition() + .duration(1000) + .attrTween("d", arcTweenZoom(d)); + + d3.select("#currentlevel").text(d.name); + + loadTable(d, "#datatable"); + + } + // d3.select("#container").on("mouseleave", mouseleave); + initializeBreadcrumbTrail(); + + // Get total size of the tree = value of root node from partition. + totalSize = path.node().__data__.value; +} +d3.select(self.frameElement).style("height", height + "px"); + + + + + + + +// Setup for switching data: stash the old values for transition. +function stash(d) { + d.x0 = d.x; + d.dx0 = d.dx; +} + +// When switching data: interpolate the arcs in data space. +function arcTweenData(a, i) { + var oi = d3.interpolate({x: a.x0, dx: a.dx0}, a); + function tween(t) { + var b = oi(t); + a.x0 = b.x; + a.dx0 = b.dx; + return arc(b); + } + if (i == 0) { + // If we are on the first arc, adjust the x domain to match the root node + // at the current zoom level. (We only need to do this once.) + var xd = d3.interpolate(x.domain(), [node.x, node.x + node.dx]); + return function(t) { + x.domain(xd(t)); + return tween(t); + }; + } else { + return tween; + } +} + +// When zooming: interpolate the scales. +function arcTweenZoom(d) { + var xd = d3.interpolate(x.domain(), [d.x, d.x + d.dx]), + yd = d3.interpolate(y.domain(), [d.y, 1]), + yr = d3.interpolate(y.range(), [d.y ? 20 : 0, radius]); + return function(d, i) { + return i + ? function(t) { return arc(d); } + : function(t) { x.domain(xd(t)); y.domain(yd(t)).range(yr(t)); return arc(d); }; + }; +} diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala index c630b0b3e2d13e4c31466da08d984e6269e18d65..780f983d593544ddd5d5ffc017a65195cb0691e5 100644 --- a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala @@ -15,309 +15,143 @@ */ package nl.lumc.sasc.biopet.pipelines.gears -import htsjdk.samtools.SamReaderFactory -import nl.lumc.sasc.biopet.core.{ PipelineCommand, MultiSampleQScript } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.Ln +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile +import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag } import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport } -import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, SamToFastq } -import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import nl.lumc.sasc.biopet.extensions.tools.FastqSync +import nl.lumc.sasc.biopet.extensions.picard.SamToFastq +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView +import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript -import scala.collection.JavaConversions._ - /** - * This is a trait for the Gears pipeline - * The ShivaTrait is used as template for this pipeline + * Created by wyleung */ -class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qscript => +class Gears(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag { def this() = this(null) - /** Executed before running the script */ - def init(): Unit = { - } - - /** Method to add jobs */ - def biopetScript(): Unit = { - addSamplesJobs() - addSummaryJobs() - } - - /** Multisample meta-genome comparison */ - def addMultiSampleJobs(): Unit = { - // generate report from multiple samples, this is: - // - the TSV - // - the Spearman correlation plot + table - } - - /** Location of summary file */ - def summaryFile = new File(outputDir, "gears.summary.json") - - /** Settings of pipeline for summary */ - def summarySettings = Map() - - /** Files for the summary */ - def summaryFiles = Map() - - /** Method to make a sample */ - def makeSample(id: String) = new Sample(id) - - /** Class that will generate jobs for a sample */ - class Sample(sampleId: String) extends AbstractSample(sampleId) { - /** Sample specific files to add to summary */ - def summaryFiles: Map[String, File] = { - preProcessBam match { - case Some(pb) => Map("bamFile" -> pb) - case _ => Map() - } - } ++ Map("alignment" -> alnFile) - - /** Sample specific stats to add to summary */ - def summaryStats: Map[String, Any] = Map() - - /** Method to make a library */ - def makeLibrary(id: String) = new Library(id) - - /** Class to generate jobs for a library */ - class Library(libId: String) extends AbstractLibrary(libId) { - /** Library specific files to add to the summary */ - def summaryFiles: Map[String, File] = { - (bamFile, preProcessBam) match { - case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb) - case (Some(b), _) => Map("bamFile" -> b) - case _ => Map() - } - } - - /** Alignment results of this library ~ can only be accessed after addJobs is run! */ - def alnFile: File = bamFile match { - case Some(b) => b - case _ => throw new IllegalStateException("The bamfile is not generated yet") - } + @Input(doc = "R1 reads in FastQ format", shortName = "R1", required = false) + var fastqR1: Option[File] = None - /** Library specific stats to add to summary */ - def summaryStats: Map[String, Any] = Map() + @Input(doc = "R2 reads in FastQ format", shortName = "R2", required = false) + var fastqR2: Option[File] = None - /** Method to execute library preprocess */ - def preProcess(input: File): Option[File] = None + @Input(doc = "All unmapped reads will be extracted from this bam for analysis", shortName = "bam", required = false) + var bamFile: Option[File] = None - /** Method to make the mapping submodule */ - def makeMapping = { - val mapping = new Mapping(qscript) - mapping.sampleId = Some(sampleId) - mapping.libId = Some(libId) - mapping.outputDir = libDir - mapping.outputName = sampleId + "-" + libId - (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile)) - } + @Argument(required = false) + var outputName: String = _ - /** - * Determine where where to start the pipeline in cases where both R1 (fastq) and BAM is specified - */ - lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) = - (config.contains("R1"), config.contains("bam")) match { - case (true, _) => makeMapping // Default starting from fastq files - case (false, true) => // Starting from bam file - config("bam_to_fastq", default = false).asBoolean match { - case true => makeMapping // bam file will be converted to fastq - case false => - val file = new File(libDir, sampleId + "-" + libId + ".final.bam") - (None, Some(file), preProcess(file)) - } - case _ => (None, None, None) - } - - /** This will add jobs for this library */ - def addJobs(): Unit = { - (config.contains("R1"), config.contains("bam")) match { - case (true, _) => mapping.foreach(mapping => { - mapping.input_R1 = config("R1") - }) - case (false, true) => config("bam_to_fastq", default = false).asBoolean match { - case true => - val samToFastq = SamToFastq(qscript, config("bam"), - new File(libDir, sampleId + "-" + libId + ".R1.fastq"), - new File(libDir, sampleId + "-" + libId + ".R2.fastq")) - samToFastq.isIntermediate = true - qscript.add(samToFastq) - mapping.foreach(mapping => { - mapping.input_R1 = samToFastq.fastqR1 - mapping.input_R2 = Some(samToFastq.fastqR2) - }) - case false => - val inputSam = SamReaderFactory.makeDefault.open(config("bam")) - val readGroups = inputSam.getFileHeader.getReadGroups - - val readGroupOke = readGroups.forall(readGroup => { - if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") - readGroup.getSample == sampleId && readGroup.getLibrary == libId - }) - inputSam.close() - - if (!readGroupOke) { - if (config("correct_readgroups", default = false).asBoolean) { - logger.info("Correcting readgroups, file:" + config("bam")) - val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get) - aorrg.RGID = sampleId + "-" + libId - aorrg.RGLB = libId - aorrg.RGSM = sampleId - aorrg.isIntermediate = true - qscript.add(aorrg) - } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + - "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") - } else { - val oldBamFile: File = config("bam") - val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(qscript, oldBamFile, bamFile.get) - bamLn.deps :+= baiLn.output - add(bamLn) - } - } - case _ => logger.warn("Sample: " + sampleId + " Library: " + libId + ", no reads found") - } - mapping.foreach(mapping => { - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) // Add functions of mapping to current function pool - addSummaryQScript(mapping) - }) - } + /** Executed before running the script */ + def init(): Unit = { + require(fastqR1.isDefined || bamFile.isDefined, "Please specify fastq-file(s) or bam file") + require(fastqR1.isDefined != bamFile.isDefined, "Provide either a bam file or la R1 file") + + if (outputName == null) { + if (fastqR1.isDefined) outputName = fastqR1.map(_.getName + .stripSuffix(".gz") + .stripSuffix(".fastq") + .stripSuffix(".fq")) + .getOrElse("noName") + else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName") } - /** This will add jobs for the double preprocessing */ - protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { - if (input == Nil) None - else if (input.tail == Nil) { - val bamFile = new File(sampleDir, input.head.getName) - val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(qscript, input.head, bamFile) - bamLn.deps :+= baiLn.output - add(bamLn) - Some(bamFile) - } else { - val md = new MarkDuplicates(qscript) - md.input = input - md.output = new File(sampleDir, sampleId + ".dedup.bam") - md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics") - md.isIntermediate = isIntermediate - md.removeDuplicates = true - add(md) - addSummarizable(md, "mark_duplicates") - Some(md.output) - } + if (fastqR1.isDefined) { + fastqR1.foreach(inputFiles :+= InputFile(_)) + fastqR2.foreach(inputFiles :+= InputFile(_)) + } else { + inputFiles :+= InputFile(bamFile.get) } + } - lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => { - (lib._2.bamFile, lib._2.preProcessBam) match { - case (_, Some(file)) => Some(file) - case (Some(file), _) => Some(file) - case _ => None - } - }).toList) - def alnFile: File = sampleBamLinkJob.output + override def reportClass = { + val gears = new GearsReport(this) + gears.outputDir = new File(outputDir, "report") + gears.summaryFile = summaryFile + sampleId.foreach(gears.args += "sampleId" -> _) + libId.foreach(gears.args += "libId" -> _) + Some(gears) + } - /** Job for combining all library BAMs */ - private def sampleBamLinkJob: Ln = - makeCombineJob(libraries.values.map(_.alnFile).toList, createFile(".bam")) + override def defaults = Map( + "samtofastq" -> Map( + "validationstringency" -> "LENIENT" + ) + ) + /** Method to add jobs */ + def biopetScript(): Unit = { + val fastqFiles: List[File] = bamFile.map { bamfile => - /** Ln or MergeSamFile job, depending on how many inputs are supplied */ - private def makeCombineJob(inFiles: List[File], outFile: File, - mergeSortOrder: String = "coordinate"): Ln = { - require(inFiles.nonEmpty, "At least one input files for combine job") - val input: File = { + val samtoolsViewSelectUnmapped = new SamtoolsView(this) + samtoolsViewSelectUnmapped.input = bamfile + samtoolsViewSelectUnmapped.b = true + samtoolsViewSelectUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam") + samtoolsViewSelectUnmapped.f = List("12") + samtoolsViewSelectUnmapped.isIntermediate = true + add(samtoolsViewSelectUnmapped) - if (inFiles.size == 1) inFiles.head - else { - val mergedBam = createFile(".merged.bam") - val mergejob = new MergeSamFiles(qscript) - mergejob.input = inFiles - mergejob.output = mergedBam - mergejob.sortOrder = mergeSortOrder - add(mergejob) - mergejob.output - } - } + // start bam to fastq (only on unaligned reads) also extract the matesam + val samToFastq = new SamToFastq(this) + samToFastq.input = samtoolsViewSelectUnmapped.output + samToFastq.fastqR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz") + samToFastq.fastqR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz") + samToFastq.fastqUnpaired = new File(outputDir, s"$outputName.unmapped.singleton.fq.gz") + samToFastq.isIntermediate = true + add(samToFastq) - val linkJob = new Ln(qscript) - linkJob.input = input - linkJob.output = outFile - linkJob + List(samToFastq.fastqR1, samToFastq.fastqR2) + }.getOrElse(List(fastqR1, fastqR2).flatten) - } + // start kraken + val krakenAnalysis = new Kraken(this) + krakenAnalysis.input = fastqFiles + krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw") - /** This will add sample jobs */ - def addJobs(): Unit = { - addPerLibJobs() - // merge or symlink per-library alignments - add(sampleBamLinkJob) + krakenAnalysis.paired = fastqFiles.length == 2 - if (preProcessBam.isDefined) { - val bamMetrics = new BamMetrics(qscript) - bamMetrics.sampleId = Some(sampleId) - bamMetrics.inputBam = preProcessBam.get - bamMetrics.outputDir = sampleDir - bamMetrics.init() - bamMetrics.biopetScript() - addAll(bamMetrics.functions) - addSummaryQScript(bamMetrics) - } + krakenAnalysis.classified_out = Some(new File(outputDir, s"$outputName.krkn.classified.fastq")) + krakenAnalysis.unclassified_out = Some(new File(outputDir, s"$outputName.krkn.unclassified.fastq")) + add(krakenAnalysis) - // sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam> - val samFilterUnmapped = new SambambaView(qscript) - samFilterUnmapped.input = alnFile - samFilterUnmapped.filter = Some("unmapped or mate_is_unmapped") - samFilterUnmapped.output = createFile(".unmapped.bam") - samFilterUnmapped.isIntermediate = true - qscript.add(samFilterUnmapped) + outputFiles += ("kraken_output_raw" -> krakenAnalysis.output) + outputFiles += ("kraken_classified_out" -> krakenAnalysis.classified_out.getOrElse("")) + outputFiles += ("kraken_unclassified_out" -> krakenAnalysis.unclassified_out.getOrElse("")) - // start bam to fastq (only on unaligned reads) also extract the matesam - val samToFastq = SamToFastq(qscript, alnFile, - createFile(".unmap.R1.fastq"), - createFile(".unmap.R2.fastq") - ) - samToFastq.isIntermediate = true - qscript.add(samToFastq) + // create kraken summary file + val krakenReport = new KrakenReport(this) + krakenReport.input = krakenAnalysis.output + krakenReport.show_zeros = true + krakenReport.output = new File(outputDir, s"$outputName.krkn.full") + add(krakenReport) - // sync the fastq records - val fastqSync = new FastqSync(qscript) - fastqSync.refFastq = samToFastq.fastqR1 - fastqSync.inputFastq1 = samToFastq.fastqR1 - fastqSync.inputFastq2 = samToFastq.fastqR2 - fastqSync.outputFastq1 = createFile(".unmapsynced.R1.fastq.gz") - fastqSync.outputFastq2 = createFile(".unmapsynced.R2.fastq.gz") - fastqSync.outputStats = createFile(".syncstats.json") - qscript.add(fastqSync) + outputFiles += ("kraken_report_input" -> krakenReport.input) + outputFiles += ("kraken_report_output" -> krakenReport.output) - // start kraken - val krakenAnalysis = new Kraken(qscript) - krakenAnalysis.input = List(fastqSync.outputFastq1, fastqSync.outputFastq2) - krakenAnalysis.output = createFile(".krkn.raw") - krakenAnalysis.paired = true - krakenAnalysis.classified_out = Option(createFile(".krkn.classified.fastq")) - krakenAnalysis.unclassified_out = Option(createFile(".krkn.unclassified.fastq")) - qscript.add(krakenAnalysis) + val krakenReportJSON = new KrakenReportToJson(this) + krakenReportJSON.inputReport = krakenReport.output + krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json") + krakenReportJSON.skipNames = config("skipNames", default = false) + add(krakenReportJSON) + addSummarizable(krakenReportJSON, "krakenreport") - // create kraken summary file + outputFiles += ("kraken_report_json_input" -> krakenReportJSON.inputReport) + outputFiles += ("kraken_report_json_output" -> krakenReportJSON.output) - val krakenReport = new KrakenReport(qscript) - krakenReport.input = krakenAnalysis.output - krakenReport.show_zeros = true - krakenReport.output = createFile(".krkn.full") - qscript.add(krakenReport) - } + addSummaryJobs() } + + /** Location of summary file */ + def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".gears.summary.json") + + /** Pipeline settings shown in the summary file */ + def summarySettings: Map[String, Any] = Map.empty + + /** Statistics shown in the summary file */ + def summaryFiles: Map[String, File] = Map.empty ++ + (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++ + (if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++ + outputFiles } /** This object give a default main method to the pipelines */ diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala new file mode 100644 index 0000000000000000000000000000000000000000..f8b367dbbeff55d18e77e939f8cc8f57ad841c2d --- /dev/null +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala @@ -0,0 +1,35 @@ +package nl.lumc.sasc.biopet.pipelines.gears + +import nl.lumc.sasc.biopet.core.report._ +import nl.lumc.sasc.biopet.utils.config.Configurable + +class GearsReport(val root: Configurable) extends ReportBuilderExtension { + val builder = GearsReport +} + +object GearsReport extends ReportBuilder { + + // TODO: Add dustbin analysis (aggregated) + // TODO: Add alignment stats per sample for the dustbin analysis + + override def extFiles = super.extFiles ++ List("js/gears.js") + .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) + + def indexPage = { + ReportPage( + List( + "Versions" -> ReportPage(List(), List(( + "Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp" + ))), Map()) + ), + List( + "Gears intro" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp"), + "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp") + ), + pageArgs + ) + } + + def reportName = "Gears :: Metagenomics Report" + +} diff --git a/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala b/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..842845bb5e88764376ab54dd2e41a83caa61f201 --- /dev/null +++ b/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala @@ -0,0 +1,137 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.gears + +import java.io.File + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport } +import nl.lumc.sasc.biopet.extensions.picard.SamToFastq +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView +import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Config +import org.apache.commons.io.FileUtils +import org.broadinstitute.gatk.queue.QSettings +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations._ + +/** + * Test class for [[Gears]] + * + * Created by wyleung on 10/22/15. + */ + +class GearsPipelineTest(val testset: String) extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): Gears = { + new Gears { + override def configName = "gears" + + override def globalConfig = new Config(map) + + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @DataProvider(name = "gearsOptions") + def gearsOptions = { + val startFromBam = Array(true, false) + val paired = Array(true, false) + val hasOutputNames = Array(true, false) + val hasFileExtensions = Array(true, false) + + for ( + fromBam <- startFromBam; + pair <- paired; + hasOutputName <- hasOutputNames; + hasFileExtension <- hasFileExtensions + ) yield Array(testset, fromBam, pair, hasOutputName, hasFileExtension) + } + + @Test(dataProvider = "gearsOptions") + def testGears(testset: String, fromBam: Boolean, paired: Boolean, + hasOutputName: Boolean, hasFileExtension: Boolean) = { + val map = ConfigUtils.mergeMaps(Map( + "output_dir" -> GearsTest.outputDir + ), Map(GearsTest.executables.toSeq: _*)) + + val gears: Gears = initPipeline(map) + + if (fromBam) { + gears.bamFile = if (hasFileExtension) Some(GearsTest.bam) else Some(GearsTest.bam_noext) + } else { + gears.fastqR1 = if (hasFileExtension) Some(GearsTest.r1) else Some(GearsTest.r1_noext) + gears.fastqR2 = if (paired) if (hasFileExtension) Some(GearsTest.r2) else Some(GearsTest.r2_noext) else None + } + if (hasOutputName) + gears.outputName = "test" + + gears.script() + + if (hasOutputName) { + gears.outputName shouldBe "test" + } else { + // in the following cases the filename should have been determined by the filename + if (hasFileExtension) { + gears.outputName shouldBe (if (fromBam) "bamfile" else "R1") + } else { + // no real use-case for this one, have this is for sanity check + gears.outputName shouldBe (if (fromBam) "bamfile" else "R1") + } + } + + // SamToFastq should have started if it was started from bam + gears.functions.count(_.isInstanceOf[SamtoolsView]) shouldBe (if (fromBam) 1 else 0) + gears.functions.count(_.isInstanceOf[SamToFastq]) shouldBe (if (fromBam) 1 else 0) + + gears.functions.count(_.isInstanceOf[Kraken]) shouldBe 1 + gears.functions.count(_.isInstanceOf[KrakenReport]) shouldBe 1 + gears.functions.count(_.isInstanceOf[KrakenReportToJson]) shouldBe 1 + } + + // remove temporary run directory all tests in the class have been run + @AfterClass def removeTempOutputDir() = { + FileUtils.deleteDirectory(GearsTest.outputDir) + } +} + +object GearsTest { + val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + + val r1 = new File(outputDir, "input" + File.separator + "R1.fq") + Files.touch(r1) + val r2 = new File(outputDir, "input" + File.separator + "R2.fq") + Files.touch(r2) + val bam = new File(outputDir, "input" + File.separator + "bamfile.bam") + Files.touch(bam) + + val r1_noext = new File(outputDir, "input" + File.separator + "R1") + Files.touch(r1_noext) + val r2_noext = new File(outputDir, "input" + File.separator + "R2") + Files.touch(r2_noext) + val bam_noext = new File(outputDir, "input" + File.separator + "bamfile") + Files.touch(bam_noext) + + val executables = Map( + "kraken" -> Map("exe" -> "test", "db" -> "test"), + "krakenreport" -> Map("exe" -> "test", "db" -> "test"), + "sambamba" -> Map("exe" -> "test"), + "md5sum" -> Map("exe" -> "test") + ) +} diff --git a/public/mapping/pom.xml b/public/mapping/pom.xml index b5b45bb49c185ff3953a6a1623d154d88d8e7bf6..676b86c3ba70fada179524e93454fde310bf25cd 100644 --- a/public/mapping/pom.xml +++ b/public/mapping/pom.xml @@ -43,6 +43,11 @@ <artifactId>Flexiprep</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Gears</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>BamMetrics</artifactId> diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 2ef18c8f67e1ea784f8aa6d42d5daa688a4675a6..c56a48b2d5628493765f79a45d7d85f1f2d6c1ca 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -19,17 +19,17 @@ import java.io.File import java.util.Date import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse } import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam } +import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat, _ } import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep +import nl.lumc.sasc.biopet.pipelines.gears.Gears import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition -import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter -import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript import scala.math._ @@ -258,6 +258,16 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S add(Ln(this, bamFile, finalBamFile)) outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile) + if (config("unmapped_to_gears", default = false).asBoolean) { + val gears = new Gears(this) + gears.bamFile = Some(finalBamFile) + gears.outputDir = new File(outputDir, "gears") + gears.init() + gears.biopetScript() + addAll(gears.functions) + addSummaryQScript(gears) + } + if (config("generate_wig", default = false).asBoolean) addAll(Bam2Wig(this, finalBamFile).functions) diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala index b2f1b7a846da3483eeed879e493e651f25a83759..6c8b2b50023f8d1dbfa66237b38de1783307f376 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala @@ -16,7 +16,7 @@ package nl.lumc.sasc.biopet.pipelines.mapping import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection, ReportPage, ReportBuilder } +import nl.lumc.sasc.biopet.core.report._ import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport @@ -33,6 +33,11 @@ object MappingReport extends ReportBuilder { /** Name of report */ val reportName = "Mapping Report" + override def extFiles = super.extFiles ++ List("js/gears.js") + .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) + + def krakenExecuted = summary.getValue(sampleId, libId, "gears", "stats", "krakenreport").isDefined + /** Root page for single BamMetrcis report */ def indexPage = { val skipFlexiprep = summary.getValue(sampleId, libId, "mapping", "settings", "skip_flexiprep").getOrElse(false) == true @@ -48,7 +53,11 @@ object MappingReport extends ReportBuilder { "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"))) ::: List("Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)) ), Map()) - ), List( + ) ::: + (if (krakenExecuted) List("Gears - Metagenomics" -> ReportPage(List(), List( + "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp" + )), Map())) + else Nil), List( "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/mappingFront.ssp") ) ::: bamMetricsPage.map(_.sections).getOrElse(Nil), Map() diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala index b7b271ca82d6d474ce333988f509188afb440fd4..9fa0eaa5f0256d623ad9615058710741cdc6327f 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala @@ -41,6 +41,9 @@ object ShivaReport extends MultisampleReportBuilder { case _ => false } + override def extFiles = super.extFiles ++ List("js/gears.js") + .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) + /** Root page for the shiva report */ def indexPage = { val regions = regionsPage @@ -144,10 +147,15 @@ object ShivaReport extends MultisampleReportBuilder { /** Library page */ def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { val flexiprepExecuted = summary.getLibraryValue(sampleId, libId, "flexiprep").isDefined + val krakenExecuted = summary.getValue(Some(sampleId), Some(libId), "gears", "stats", "krakenreport").isDefined + ReportPage( "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)) :: (if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil - ), "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp") :: + ) ::: (if (krakenExecuted) List("Gears - Metagenomics" -> ReportPage(List(), List( + "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp" + )), Map())) + else Nil), "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp") :: (if (flexiprepExecuted) List( "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")