Commit 7696695b authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into fix-shiva_fails

Conflicts:
	public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala
parents 62a99234 8c42e046
......@@ -119,5 +119,5 @@ trait BiopetQScript extends Configurable with GatkLogging {
}
object BiopetQScript {
protected case class InputFile(file: File, md5: Option[String] = None)
case class InputFile(file: File, md5: Option[String] = None)
}
......@@ -16,11 +16,13 @@
package nl.lumc.sasc.biopet.core.report
import java.io._
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.summary.Summary
import nl.lumc.sasc.biopet.utils.{ ToolCommand, Logging, IoUtils }
import nl.lumc.sasc.biopet.utils.{ IoUtils, Logging, ToolCommand }
import org.broadinstitute.gatk.utils.commandline.Input
import org.fusesource.scalate.{ TemplateEngine, TemplateSource }
import scala.collection.mutable
/**
......@@ -95,6 +97,21 @@ trait ReportBuilder extends ToolCommand {
private var _libId: Option[String] = None
protected def libId = _libId
case class ExtFile(resourcePath: String, targetPath: String)
def extFiles = List(
"css/bootstrap_dashboard.css",
"css/bootstrap.min.css",
"css/bootstrap-theme.min.css",
"css/sortable-theme-bootstrap.css",
"js/jquery.min.js",
"js/sortable.min.js",
"js/bootstrap.min.js",
"fonts/glyphicons-halflings-regular.woff",
"fonts/glyphicons-halflings-regular.ttf",
"fonts/glyphicons-halflings-regular.woff2"
).map(x => ExtFile("/nl/lumc/sasc/biopet/core/report/ext/" + x, x))
/** Main function to for building the report */
def main(args: Array[String]): Unit = {
logger.info("Start")
......@@ -123,22 +140,9 @@ trait ReportBuilder extends ToolCommand {
// Static files that will be copied to the output folder, then file is added to [resourceDir] it's need to be added here also
val extOutputDir: File = new File(cmdArgs.outputDir, "ext")
val resourceDir: String = "/nl/lumc/sasc/biopet/core/report/ext/"
val extFiles = List(
"css/bootstrap_dashboard.css",
"css/bootstrap.min.css",
"css/bootstrap-theme.min.css",
"css/sortable-theme-bootstrap.css",
"js/jquery.min.js",
"js/sortable.min.js",
"js/bootstrap.min.js",
"fonts/glyphicons-halflings-regular.woff",
"fonts/glyphicons-halflings-regular.ttf",
"fonts/glyphicons-halflings-regular.woff2"
)
for (resource <- extFiles.par) {
IoUtils.copyStreamToFile(getClass.getResourceAsStream(resourceDir + resource), new File(extOutputDir, resource), createDirs = true)
IoUtils.copyStreamToFile(getClass.getResourceAsStream(resource.resourcePath), new File(extOutputDir, resource.targetPath), createDirs = true)
}
logger.info("Parsing summary")
......
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions.kraken
import java.io.File
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -39,11 +39,6 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
var db: File = config("db")
var inputFastQ: Boolean = true
var compression: Boolean = false
var compressionGzip: Boolean = false
var compressionBzip: Boolean = false
var quick: Boolean = false
var minHits: Option[Int] = config("min_hits")
......@@ -51,11 +46,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
var paired: Boolean = config("paired", default = false)
executable = config("exe", default = "kraken")
def versionRegex = """Kraken version ([\d\w\-\.]+)\n.*""".r
def versionRegex = """^Kraken version ([\d\w\-\.]+)""".r
override def versionExitcode = List(0, 1)
def versionCommand = executable + " --version"
override def defaultCoreMemory = 8.0
override def defaultThreads = 4
/** Sets readgroup when not set yet */
......@@ -66,16 +65,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
/** Returns command to execute */
def cmdLine = required(executable) +
"--db" + required(db) +
required("--db", db) +
optional("--threads", nCoresRequest) +
conditional(inputFastQ, "--fastq-input") +
conditional(!inputFastQ, "--fasta-input") +
conditional(quick, "--quick") +
optional("--min_hits", minHits) +
optional("--unclassified-out ", unclassified_out.get) +
optional("--classified-out ", classified_out.get) +
"--output" + required(output) +
required("--output", output) +
conditional(preLoad, "--preload") +
conditional(paired, "--paired") +
conditional(paired, "--check-names") +
repeat(input)
}
......@@ -44,9 +44,9 @@ class KrakenReport(val root: Configurable) extends BiopetCommandLineFunction wit
var output: File = _
def cmdLine: String = {
val cmd: String = "--db " + required(db) +
val cmd: String = required(executable) + "--db " + required(db) +
conditional(show_zeros, "--show-zeros") +
input.getAbsolutePath + ">" + output.getAbsolutePath
required(input.getAbsolutePath) + " > " + required(output.getAbsolutePath)
cmd
}
}
......@@ -15,16 +15,16 @@
*/
package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
/** General Sambamba extension */
abstract class Sambamba extends BiopetCommandLineFunction with Version {
override def defaultCoreMemory = 2.0
override def defaultCoreMemory = 4.0
override def defaultThreads = 2
override def subPath = "sambamba" :: super.subPath
executable = config("exe", default = "sambamba", freeVar = false)
executable = config("exe", default = "sambamba", submodule = "sambamba", freeVar = false)
def versionCommand = executable
def versionRegex = """sambamba v(.*)""".r
override def versionExitcode = List(0, 1)
......
......@@ -43,6 +43,6 @@ class SambambaView(val root: Configurable) extends Sambamba {
optional("--format", format.get) +
optional("--regions", regions) +
optional("--compression-level", compression_level) +
required("--output" + output) +
required("--output-filename", output) +
required(input)
}
......@@ -27,7 +27,8 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig,
nl.lumc.sasc.biopet.pipelines.carp.Carp,
nl.lumc.sasc.biopet.pipelines.toucan.Toucan,
nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling
nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling,
nl.lumc.sasc.biopet.pipelines.gears.Gears
)
def pipelines: List[MainCommand] = List(
......@@ -59,5 +60,6 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.SeqStat,
nl.lumc.sasc.biopet.tools.VepNormalizer,
nl.lumc.sasc.biopet.tools.AnnotateVcfWithBed,
nl.lumc.sasc.biopet.tools.VcfWithVcf)
nl.lumc.sasc.biopet.tools.VcfWithVcf,
nl.lumc.sasc.biopet.tools.KrakenReportToJson)
}
package nl.lumc.sasc.biopet.extensions.tools
/**
* Created by waiyileung on 05-10-15.
*/
import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* KrakenReportToJson function class for usage in Biopet pipelines
*
* @param root Configuration object for the pipeline
*/
class KrakenReportToJson(val root: Configurable) extends ToolCommandFunction with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.KrakenReportToJson
@Input(doc = "Input Kraken Full report", shortName = "inputReport", required = true)
var inputReport: File = _
@Argument(required = false)
var skipNames: Boolean = false
@Output(doc = "Output JSON", shortName = "output", required = true)
var output: File = _
override def defaultCoreMemory = 2.0
override def cmdLine =
super.cmdLine +
required("-i", inputReport) +
required("-o", output) +
conditional(skipNames, "--skipnames")
def summaryStats: Map[String, Any] = {
ConfigUtils.fileToConfigMap(output)
}
def summaryFiles: Map[String, File] = Map()
}
object KrakenReportToJson {
def apply(root: Configurable, input: File, output: File): KrakenReportToJson = {
val report = new KrakenReportToJson(root)
report.inputReport = input
report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json")
report
}
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools
/**
* Created by wyleung on 25-9-15.
*/
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.ConfigUtils._
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.Source
object KrakenReportToJson extends ToolCommand {
case class KrakenHit(taxonomyID: Long,
taxonomyName: String,
cladeCount: Long,
cladeSize: Long, // size of parent - including itself
taxonRank: String,
cladeLevel: Int,
parentTaxonomyID: Long,
children: ListBuffer[KrakenHit]) {
def toJSON(withChildren: Boolean = false): Map[String, Any] = {
val childJSON = if (withChildren) children.toList.map(entry => entry.toJSON(withChildren)) else List()
Map(
"name" -> taxonomyName,
"taxid" -> taxonomyID,
"taxonrank" -> taxonRank,
"cladelevel" -> cladeLevel,
"count" -> cladeCount,
"size" -> cladeSize,
"children" -> childJSON
)
}
}
var cladeIDs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill(32)(0)
val spacePattern = "^( +)".r
private var lines: Map[Long, KrakenHit] = Map.empty
case class Args(krakenreport: File = null, outputJson: Option[File] = None, skipNames: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser {
head(
s"""
|$commandName - Convert Kraken-report (full) output to JSON
""".stripMargin)
opt[File]('i', "krakenreport") required () unbounded () valueName "<krakenreport>" action { (x, c) =>
c.copy(krakenreport = x)
} validate {
x => if (x.exists) success else failure("Krakenreport not found")
} text "Kraken report to generate stats from"
opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) =>
c.copy(outputJson = Some(x))
} text "File to write output to, if not supplied output go to stdout"
opt[Boolean]('n', "skipnames") unbounded () valueName "<skipnames>" action { (x, c) =>
c.copy(skipNames = x)
} text "Don't report the scientific name of the taxon."
}
/**
* Parses the command line argument
*
* @param args Array of arguments
* @return
*/
def parseArgs(args: Array[String]): Args = new OptParser()
.parse(args, Args())
.getOrElse(sys.exit(1))
/**
* Takes a line from the kraken report, converts into Map with taxonID and
* information on this hit as `KrakenHit`. `KrakenHit` is used later on for
* building the tree
*
* @param krakenRawHit Line from the KrakenReport output
* @param skipNames Specify to skip names in the report output to reduce size of JSON
* @return
*/
def parseLine(krakenRawHit: String, skipNames: Boolean): Map[Long, KrakenHit] = {
val values: Array[String] = krakenRawHit.stripLineEnd.split("\t")
assert(values.length == 6)
val scientificName: String = values(5)
val cladeLevel = spacePattern.findFirstIn(scientificName).getOrElse("").length / 2
if (cladeIDs.length <= cladeLevel + 1) {
cladeIDs ++= mutable.ArrayBuffer.fill(10)(0L)
}
cladeIDs(cladeLevel + 1) = values(4).toLong
Map(
values(4).toLong -> new KrakenHit(
taxonomyID = values(4).toLong,
taxonomyName = if (skipNames) "" else scientificName.trim,
cladeCount = values(2).toLong,
cladeSize = values(1).toLong,
taxonRank = values(3),
cladeLevel = cladeLevel,
parentTaxonomyID = cladeIDs(cladeLevel),
children = ListBuffer()
))
}
/**
* Read the `KrakenReport` output and transform into `Map` by TaxonID and `KrakenHit`
* A JSON-string output is given.
*
* @param reportRaw The `KrakenReport` output
* @param skipNames Specify to skip names in the report output to reduce size of JSON
* @return
*/
def reportToJson(reportRaw: File, skipNames: Boolean): String = {
val reader = Source.fromFile(reportRaw)
/*
* http://ccb.jhu.edu/software/kraken/MANUAL.html
* The header layout is:
* 1. Percentage of reads covered by the clade rooted at this taxon
* 2. Number of reads covered by the clade rooted at this taxon
* 3. Number of reads assigned directly to this taxon
* 4. A rank code, indicating (U)nclassified, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. All other ranks are simply '-'.
* 5. NCBI taxonomy ID
* 6. indented scientific name
* */
lines = reader.getLines()
.map(line => parseLine(line, skipNames))
.filter(p => (p.head._2.cladeSize > 0) || List(0L, 1L).contains(p.head._2.taxonomyID))
.foldLeft(Map.empty[Long, KrakenHit])((a, b) => {
a + b.head
})
lines.keys.foreach(k => {
// append itself to the children attribute of the parent
if (lines(k).parentTaxonomyID > 0L) {
// avoid the root and unclassified appending to the unclassified node
lines(lines(k).parentTaxonomyID).children += lines(k)
}
})
val result = Map("unclassified" -> lines(0).toJSON(withChildren = false),
"classified" -> lines(1).toJSON(withChildren = true))
mapToJson(result).spaces2
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
val jsonString: String = reportToJson(commandArgs.krakenreport, skipNames = commandArgs.skipNames)
commandArgs.outputJson match {
case Some(file) =>
val writer = new PrintWriter(file)
writer.println(jsonString)
writer.close()
case _ => println(jsonString)
}
}
}
......@@ -23,11 +23,14 @@
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.5.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<inceptionYear>2015</inceptionYear>
<artifactId>Gears</artifactId>
<name>Gears</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
......@@ -37,7 +40,7 @@
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Mapping</artifactId>
<artifactId>BiopetToolsExtensions</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
......
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
<%@ var summary: Summary %>
<%@ var rootPath: String %>
<%@ var sampleId: Option[String] = None %>
<%@ var libId: Option[String] = None %>
<table class="table">
<tbody>
<tr><th>Pipeline</th><td>Gears</td></tr>
<tr><th>Version</th><td>${summary.getValue("meta", "pipeline_version")}</td></tr>
<tr><th>Last commit hash</th><td>${summary.getValue("meta", "last_commit_hash")}</td></tr>
<tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr>
#if(sampleId.isDefined) <tr><th>Sample</th><td>${sampleId}</td></tr> #end
#if(libId.isDefined) <tr><th>Library</th><td>${libId}</td></tr> #end
</tbody>
</table>
<br/>
<div class="row">
<div class="col-md-1"></div>
<div class="col-md-6">
<p>
In this web document you can find your <em>Gears</em> pipeline report.
Different categories of data can be found in the left-side menu.
Statistics per sample and library can be accessed through the top-level menu.
Some statistics for target regions can be found in the regions tab.
Futhermore, you can view all versions of software tools used by selecting <em>Versions</em> from the top menu.
</p>
<p>
<small>Brought to you by <a href="https://sasc.lumc.nl" target="_blank"><abbr
title="Sequence Analysis Support Core">SASC</abbr></a> and <a
href="https://www.lumc.nl/org/klinische-genetica/" target="_blank"><abbr title="Clinical Genetics LUMC">KG</abbr></a>,
LUMC.
</small>
</p>
</div>
</div>
\ No newline at end of file
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.utils.ConfigUtils)
#import(java.io.File)
<%@ var summary: Summary %>
<%@ var sampleId: Option[String] = None %>
<%@ var libId: Option[String] = None %>
<%@ var rootPath: String %>
<%@ var showPlot: Boolean = true %>
<%@ var showIntro: Boolean = true %>
#{
val samples = sampleId match {
case Some(sample) => List(sample.toString)
case _ => summary.samples.toList
}
val librariesCount = summary.samples.foldLeft(0)(_ + summary.libraries(_).size)
}#
#if (showIntro)
<div class="row">
<div class="col-md-1"></div>
<div class="col-md-10">
Here we show a sunburst visualisation of the analysis of the metagenome in sample: ${sampleId}
</div>
<div class="col-md-1"></div>
</div>
#end
#if (showPlot)
<div class="row">
<div class="col-md-12">
<h3 id='currentlevel'>Root</h3>
<div>
<span id="selection_name"></span> -
<span id="selection_size"></span> -
<span id="selection_value"></span>
</div>
<form>
<label><input type="radio" name="mode" value="size"> Size</label>
<label><input type="radio" name="mode" value="count" checked> Count</label>
</form>
<div id="sequence"></div>
<div id="datatable"></div>
<div id="svgholder"></div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js" charset="utf-8"></script>
<script src="${rootPath}ext/js/gears.js"></script>
<script type="application/ecmascript">
#{
val rawreport = Map("kraken" -> summary.getValue(sampleId, libId, "gears", "stats", "krakenreport"))
val jsonReport = ConfigUtils.mapToJson(rawreport)
}#
var krakenresult = JSON.parse('<%= unescape(jsonReport) %>');
loadGears(krakenresult.kraken.classified);
</script>
</div>
#end
// Breadcrumb dimensions: width, height, spacing, width of tip/tail.
var b = {
w: 130, h: 20, s: 3, t: 10