Commit 7696695b authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into fix-shiva_fails

Conflicts:
	public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala
parents 62a99234 8c42e046
...@@ -119,5 +119,5 @@ trait BiopetQScript extends Configurable with GatkLogging { ...@@ -119,5 +119,5 @@ trait BiopetQScript extends Configurable with GatkLogging {
} }
object BiopetQScript { object BiopetQScript {
protected case class InputFile(file: File, md5: Option[String] = None) case class InputFile(file: File, md5: Option[String] = None)
} }
...@@ -16,11 +16,13 @@ ...@@ -16,11 +16,13 @@
package nl.lumc.sasc.biopet.core.report package nl.lumc.sasc.biopet.core.report
import java.io._ import java.io._
import nl.lumc.sasc.biopet.core.ToolCommandFunction import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.summary.Summary import nl.lumc.sasc.biopet.utils.summary.Summary
import nl.lumc.sasc.biopet.utils.{ ToolCommand, Logging, IoUtils } import nl.lumc.sasc.biopet.utils.{ IoUtils, Logging, ToolCommand }
import org.broadinstitute.gatk.utils.commandline.Input import org.broadinstitute.gatk.utils.commandline.Input
import org.fusesource.scalate.{ TemplateEngine, TemplateSource } import org.fusesource.scalate.{ TemplateEngine, TemplateSource }
import scala.collection.mutable import scala.collection.mutable
/** /**
...@@ -95,6 +97,21 @@ trait ReportBuilder extends ToolCommand { ...@@ -95,6 +97,21 @@ trait ReportBuilder extends ToolCommand {
private var _libId: Option[String] = None private var _libId: Option[String] = None
protected def libId = _libId protected def libId = _libId
case class ExtFile(resourcePath: String, targetPath: String)
def extFiles = List(
"css/bootstrap_dashboard.css",
"css/bootstrap.min.css",
"css/bootstrap-theme.min.css",
"css/sortable-theme-bootstrap.css",
"js/jquery.min.js",
"js/sortable.min.js",
"js/bootstrap.min.js",
"fonts/glyphicons-halflings-regular.woff",
"fonts/glyphicons-halflings-regular.ttf",
"fonts/glyphicons-halflings-regular.woff2"
).map(x => ExtFile("/nl/lumc/sasc/biopet/core/report/ext/" + x, x))
/** Main function to for building the report */ /** Main function to for building the report */
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
logger.info("Start") logger.info("Start")
...@@ -123,22 +140,9 @@ trait ReportBuilder extends ToolCommand { ...@@ -123,22 +140,9 @@ trait ReportBuilder extends ToolCommand {
// Static files that will be copied to the output folder, then file is added to [resourceDir] it's need to be added here also // Static files that will be copied to the output folder, then file is added to [resourceDir] it's need to be added here also
val extOutputDir: File = new File(cmdArgs.outputDir, "ext") val extOutputDir: File = new File(cmdArgs.outputDir, "ext")
val resourceDir: String = "/nl/lumc/sasc/biopet/core/report/ext/"
val extFiles = List(
"css/bootstrap_dashboard.css",
"css/bootstrap.min.css",
"css/bootstrap-theme.min.css",
"css/sortable-theme-bootstrap.css",
"js/jquery.min.js",
"js/sortable.min.js",
"js/bootstrap.min.js",
"fonts/glyphicons-halflings-regular.woff",
"fonts/glyphicons-halflings-regular.ttf",
"fonts/glyphicons-halflings-regular.woff2"
)
for (resource <- extFiles.par) { for (resource <- extFiles.par) {
IoUtils.copyStreamToFile(getClass.getResourceAsStream(resourceDir + resource), new File(extOutputDir, resource), createDirs = true) IoUtils.copyStreamToFile(getClass.getResourceAsStream(resource.resourcePath), new File(extOutputDir, resource.targetPath), createDirs = true)
} }
logger.info("Parsing summary") logger.info("Parsing summary")
......
...@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions.kraken ...@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions.kraken
import java.io.File import java.io.File
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
...@@ -39,11 +39,6 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers ...@@ -39,11 +39,6 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
var db: File = config("db") var db: File = config("db")
var inputFastQ: Boolean = true
var compression: Boolean = false
var compressionGzip: Boolean = false
var compressionBzip: Boolean = false
var quick: Boolean = false var quick: Boolean = false
var minHits: Option[Int] = config("min_hits") var minHits: Option[Int] = config("min_hits")
...@@ -51,11 +46,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers ...@@ -51,11 +46,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
var paired: Boolean = config("paired", default = false) var paired: Boolean = config("paired", default = false)
executable = config("exe", default = "kraken") executable = config("exe", default = "kraken")
def versionRegex = """Kraken version ([\d\w\-\.]+)\n.*""".r
def versionRegex = """^Kraken version ([\d\w\-\.]+)""".r
override def versionExitcode = List(0, 1) override def versionExitcode = List(0, 1)
def versionCommand = executable + " --version" def versionCommand = executable + " --version"
override def defaultCoreMemory = 8.0 override def defaultCoreMemory = 8.0
override def defaultThreads = 4 override def defaultThreads = 4
/** Sets readgroup when not set yet */ /** Sets readgroup when not set yet */
...@@ -66,16 +65,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers ...@@ -66,16 +65,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
/** Returns command to execute */ /** Returns command to execute */
def cmdLine = required(executable) + def cmdLine = required(executable) +
"--db" + required(db) + required("--db", db) +
optional("--threads", nCoresRequest) + optional("--threads", nCoresRequest) +
conditional(inputFastQ, "--fastq-input") +
conditional(!inputFastQ, "--fasta-input") +
conditional(quick, "--quick") + conditional(quick, "--quick") +
optional("--min_hits", minHits) + optional("--min_hits", minHits) +
optional("--unclassified-out ", unclassified_out.get) + optional("--unclassified-out ", unclassified_out.get) +
optional("--classified-out ", classified_out.get) + optional("--classified-out ", classified_out.get) +
"--output" + required(output) + required("--output", output) +
conditional(preLoad, "--preload") + conditional(preLoad, "--preload") +
conditional(paired, "--paired") + conditional(paired, "--paired") +
conditional(paired, "--check-names") +
repeat(input) repeat(input)
} }
...@@ -44,9 +44,9 @@ class KrakenReport(val root: Configurable) extends BiopetCommandLineFunction wit ...@@ -44,9 +44,9 @@ class KrakenReport(val root: Configurable) extends BiopetCommandLineFunction wit
var output: File = _ var output: File = _
def cmdLine: String = { def cmdLine: String = {
val cmd: String = "--db " + required(db) + val cmd: String = required(executable) + "--db " + required(db) +
conditional(show_zeros, "--show-zeros") + conditional(show_zeros, "--show-zeros") +
input.getAbsolutePath + ">" + output.getAbsolutePath required(input.getAbsolutePath) + " > " + required(output.getAbsolutePath)
cmd cmd
} }
} }
...@@ -15,16 +15,16 @@ ...@@ -15,16 +15,16 @@
*/ */
package nl.lumc.sasc.biopet.extensions.sambamba package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
/** General Sambamba extension */ /** General Sambamba extension */
abstract class Sambamba extends BiopetCommandLineFunction with Version { abstract class Sambamba extends BiopetCommandLineFunction with Version {
override def defaultCoreMemory = 2.0 override def defaultCoreMemory = 4.0
override def defaultThreads = 2 override def defaultThreads = 2
override def subPath = "sambamba" :: super.subPath override def subPath = "sambamba" :: super.subPath
executable = config("exe", default = "sambamba", freeVar = false) executable = config("exe", default = "sambamba", submodule = "sambamba", freeVar = false)
def versionCommand = executable def versionCommand = executable
def versionRegex = """sambamba v(.*)""".r def versionRegex = """sambamba v(.*)""".r
override def versionExitcode = List(0, 1) override def versionExitcode = List(0, 1)
......
...@@ -43,6 +43,6 @@ class SambambaView(val root: Configurable) extends Sambamba { ...@@ -43,6 +43,6 @@ class SambambaView(val root: Configurable) extends Sambamba {
optional("--format", format.get) + optional("--format", format.get) +
optional("--regions", regions) + optional("--regions", regions) +
optional("--compression-level", compression_level) + optional("--compression-level", compression_level) +
required("--output" + output) + required("--output-filename", output) +
required(input) required(input)
} }
...@@ -27,7 +27,8 @@ object BiopetExecutablePublic extends BiopetExecutable { ...@@ -27,7 +27,8 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig, nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig,
nl.lumc.sasc.biopet.pipelines.carp.Carp, nl.lumc.sasc.biopet.pipelines.carp.Carp,
nl.lumc.sasc.biopet.pipelines.toucan.Toucan, nl.lumc.sasc.biopet.pipelines.toucan.Toucan,
nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling,
nl.lumc.sasc.biopet.pipelines.gears.Gears
) )
def pipelines: List[MainCommand] = List( def pipelines: List[MainCommand] = List(
...@@ -59,5 +60,6 @@ object BiopetExecutablePublic extends BiopetExecutable { ...@@ -59,5 +60,6 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.SeqStat, nl.lumc.sasc.biopet.tools.SeqStat,
nl.lumc.sasc.biopet.tools.VepNormalizer, nl.lumc.sasc.biopet.tools.VepNormalizer,
nl.lumc.sasc.biopet.tools.AnnotateVcfWithBed, nl.lumc.sasc.biopet.tools.AnnotateVcfWithBed,
nl.lumc.sasc.biopet.tools.VcfWithVcf) nl.lumc.sasc.biopet.tools.VcfWithVcf,
nl.lumc.sasc.biopet.tools.KrakenReportToJson)
} }
package nl.lumc.sasc.biopet.extensions.tools
/**
* Created by waiyileung on 05-10-15.
*/
import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* KrakenReportToJson function class for usage in Biopet pipelines
*
* @param root Configuration object for the pipeline
*/
class KrakenReportToJson(val root: Configurable) extends ToolCommandFunction with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.KrakenReportToJson
@Input(doc = "Input Kraken Full report", shortName = "inputReport", required = true)
var inputReport: File = _
@Argument(required = false)
var skipNames: Boolean = false
@Output(doc = "Output JSON", shortName = "output", required = true)
var output: File = _
override def defaultCoreMemory = 2.0
override def cmdLine =
super.cmdLine +
required("-i", inputReport) +
required("-o", output) +
conditional(skipNames, "--skipnames")
def summaryStats: Map[String, Any] = {
ConfigUtils.fileToConfigMap(output)
}
def summaryFiles: Map[String, File] = Map()
}
object KrakenReportToJson {
def apply(root: Configurable, input: File, output: File): KrakenReportToJson = {
val report = new KrakenReportToJson(root)
report.inputReport = input
report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json")
report
}
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools
/**
* Created by wyleung on 25-9-15.
*/
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.ConfigUtils._
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.Source
object KrakenReportToJson extends ToolCommand {
case class KrakenHit(taxonomyID: Long,
taxonomyName: String,
cladeCount: Long,
cladeSize: Long, // size of parent - including itself
taxonRank: String,
cladeLevel: Int,
parentTaxonomyID: Long,
children: ListBuffer[KrakenHit]) {
def toJSON(withChildren: Boolean = false): Map[String, Any] = {
val childJSON = if (withChildren) children.toList.map(entry => entry.toJSON(withChildren)) else List()
Map(
"name" -> taxonomyName,
"taxid" -> taxonomyID,
"taxonrank" -> taxonRank,
"cladelevel" -> cladeLevel,
"count" -> cladeCount,
"size" -> cladeSize,
"children" -> childJSON
)
}
}
var cladeIDs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill(32)(0)
val spacePattern = "^( +)".r
private var lines: Map[Long, KrakenHit] = Map.empty
case class Args(krakenreport: File = null, outputJson: Option[File] = None, skipNames: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser {
head(
s"""
|$commandName - Convert Kraken-report (full) output to JSON
""".stripMargin)
opt[File]('i', "krakenreport") required () unbounded () valueName "<krakenreport>" action { (x, c) =>
c.copy(krakenreport = x)
} validate {
x => if (x.exists) success else failure("Krakenreport not found")
} text "Kraken report to generate stats from"
opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) =>
c.copy(outputJson = Some(x))
} text "File to write output to, if not supplied output go to stdout"
opt[Boolean]('n', "skipnames") unbounded () valueName "<skipnames>" action { (x, c) =>
c.copy(skipNames = x)
} text "Don't report the scientific name of the taxon."
}
/**
* Parses the command line argument
*
* @param args Array of arguments
* @return
*/
def parseArgs(args: Array[String]): Args = new OptParser()
.parse(args, Args())
.getOrElse(sys.exit(1))
/**
* Takes a line from the kraken report, converts into Map with taxonID and
* information on this hit as `KrakenHit`. `KrakenHit` is used later on for
* building the tree
*
* @param krakenRawHit Line from the KrakenReport output
* @param skipNames Specify to skip names in the report output to reduce size of JSON
* @return
*/
def parseLine(krakenRawHit: String, skipNames: Boolean): Map[Long, KrakenHit] = {
val values: Array[String] = krakenRawHit.stripLineEnd.split("\t")
assert(values.length == 6)
val scientificName: String = values(5)
val cladeLevel = spacePattern.findFirstIn(scientificName).getOrElse("").length / 2
if (cladeIDs.length <= cladeLevel + 1) {
cladeIDs ++= mutable.ArrayBuffer.fill(10)(0L)
}
cladeIDs(cladeLevel + 1) = values(4).toLong
Map(
values(4).toLong -> new KrakenHit(
taxonomyID = values(4).toLong,
taxonomyName = if (skipNames) "" else scientificName.trim,
cladeCount = values(2).toLong,
cladeSize = values(1).toLong,
taxonRank = values(3),
cladeLevel = cladeLevel,
parentTaxonomyID = cladeIDs(cladeLevel),
children = ListBuffer()
))
}
/**
* Read the `KrakenReport` output and transform into `Map` by TaxonID and `KrakenHit`
* A JSON-string output is given.
*
* @param reportRaw The `KrakenReport` output
* @param skipNames Specify to skip names in the report output to reduce size of JSON
* @return
*/
def reportToJson(reportRaw: File, skipNames: Boolean): String = {
val reader = Source.fromFile(reportRaw)
/*
* http://ccb.jhu.edu/software/kraken/MANUAL.html
* The header layout is:
* 1. Percentage of reads covered by the clade rooted at this taxon
* 2. Number of reads covered by the clade rooted at this taxon
* 3. Number of reads assigned directly to this taxon
* 4. A rank code, indicating (U)nclassified, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. All other ranks are simply '-'.
* 5. NCBI taxonomy ID
* 6. indented scientific name
* */
lines = reader.getLines()
.map(line => parseLine(line, skipNames))
.filter(p => (p.head._2.cladeSize > 0) || List(0L, 1L).contains(p.head._2.taxonomyID))
.foldLeft(Map.empty[Long, KrakenHit])((a, b) => {
a + b.head
})
lines.keys.foreach(k => {
// append itself to the children attribute of the parent
if (lines(k).parentTaxonomyID > 0L) {
// avoid the root and unclassified appending to the unclassified node
lines(lines(k).parentTaxonomyID).children += lines(k)
}
})
val result = Map("unclassified" -> lines(0).toJSON(withChildren = false),
"classified" -> lines(1).toJSON(withChildren = true))
mapToJson(result).spaces2
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
val jsonString: String = reportToJson(commandArgs.krakenreport, skipNames = commandArgs.skipNames)
commandArgs.outputJson match {
case Some(file) =>
val writer = new PrintWriter(file)
writer.println(jsonString)
writer.close()
case _ => println(jsonString)
}
}
}
...@@ -23,11 +23,14 @@ ...@@ -23,11 +23,14 @@
<artifactId>Biopet</artifactId> <artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId> <groupId>nl.lumc.sasc</groupId>
<version>0.5.0-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<inceptionYear>2015</inceptionYear> <inceptionYear>2015</inceptionYear>
<artifactId>Gears</artifactId> <artifactId>Gears</artifactId>
<name>Gears</name>
<packaging>jar</packaging>
<dependencies> <dependencies>
<dependency> <dependency>
...@@ -37,7 +40,7 @@ ...@@ -37,7 +40,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>nl.lumc.sasc</groupId> <groupId>nl.lumc.sasc</groupId>
<artifactId>Mapping</artifactId> <artifactId>BiopetToolsExtensions</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>
......
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
<%@ var summary: Summary %>
<%@ var rootPath: String %>
<%@ var sampleId: Option[String] = None %>
<%@ var libId: Option[String] = None %>
<table class="table">
<tbody>
<tr><th>Pipeline</th><td>Gears</td></tr>
<tr><th>Version</th><td>${summary.getValue("meta", "pipeline_version")}</td></tr>
<tr><th>Last commit hash</th><td>${summary.getValue("meta", "last_commit_hash")}</td></tr>
<tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr>
#if(sampleId.isDefined) <tr><th>Sample</th><td>${sampleId}</td></tr> #end
#if(libId.isDefined) <tr><th>Library</th><td>${libId}</td></tr> #end
</tbody>
</table>
<br/>
<div class="row">
<div class="col-md-1"></div>
<div class="col-md-6">
<p>