diff --git a/.gitignore b/.gitignore index 77146859b6e12b32e90cc6f6d388ebc878d548d6..4f40f8c4d9b2bf4eb9d898bbc0691a20c5663dd0 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ git.properties target/ public/target/ protected/target/ +site/ diff --git a/README.md b/README.md index 06afe292ff4b01756841094444d7e020ee394014..f8f05d71c3c1e30f6a3a901deb4134251f757bb4 100755 --- a/README.md +++ b/README.md @@ -64,8 +64,8 @@ We welcome any kind of contribution, be it merge requests on the code base, docu To develop Biopet, Java 7, Maven 3.2.2, and GATK Queue 3.4 is required. Please consult the Java homepage and Maven homepage for the respective installation instruction. After you have both Java and Maven installed, you would then need to install GATK Queue. However, as the GATK Queue package is not yet available as an artifact in Maven Central, you will need to download, compile, and install GATK Queue first. ~~~ -$ git clone https://github.com/broadgsa/gatk -$ cd gatk +$ git clone https://github.com/broadgsa/gatk-protected +$ cd gatk-protected $ git checkout 3.4 # the current release is based on GATK 3.4 $ mvn -U clean install ~~~ diff --git a/biopet-aggregate/pom.xml b/biopet-aggregate/pom.xml index 4b89651970b5b9c05bf599a6b14d44b161793663..04f8d08e04d528a2fa59a547dcd365a1fcd68461 100644 --- a/biopet-aggregate/pom.xml +++ b/biopet-aggregate/pom.xml @@ -5,7 +5,7 @@ <parent> <artifactId>BiopetRoot</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> @@ -33,7 +33,7 @@ <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>BiopetProtectedPackage</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </dependency> <dependency> <groupId>com.google.guava</groupId> diff --git a/docs/cluster/oge.md b/docs/cluster/oge.md index c5218ad0970c0371b5cee7120a54ee15bb17d391..82397b58cbdaaea218d3055a86d4832fa2467b45 100644 --- a/docs/cluster/oge.md +++ b/docs/cluster/oge.md @@ -1,9 +1,15 @@ # Introduction - +Within the LUMC we have a compute cluster which runs on the Sun Grid Engine (SGE). This cluster currently consists of around 600 +cores and several terabytes of memory. The Sun Grid Engine (SGE) enables the cluster to schedule all the jobs coming from +different users in a fair way. So Resources are shared equally between multiple users. # Sun Grid Engine +Oracle Grid Engine or Sun Grid Engine is a computer cluster software system also known as a batch-queing system. These + systems help the computer cluster users to distribute and fairly schedule the jobs to the different computers in the cluster. +# Open Grid Engine -# Open Grid Engine \ No newline at end of file +The Open Grid Engine (OGE) is based on the SunGridEngine but is completely open source. It does support commercially batch-queuing + systems. \ No newline at end of file diff --git a/docs/developer/code-style.md b/docs/developer/code-style.md new file mode 100644 index 0000000000000000000000000000000000000000..8e20b7747d886b98b3ada34d13e28d355298f2cd --- /dev/null +++ b/docs/developer/code-style.md @@ -0,0 +1,36 @@ +# Developer - Code style + +## General rules +- Variable names should always be in *camelCase* and do **not** start with a capital letter + +```scala +// correct: +val outputFromProgram: String = "foobar" + +// incorrect: +val OutputFromProgram: String = "foobar" +``` + +- Class names should always be in *CamelCase* and **always** start with a capital letter + +```scala +// correct: +class ExtractReads {} + +// incorrect: +class extractReads {} + +``` + +- Avoid using `null`, the Option `type` in Scala can be used instead + +```scala +// correct: +val inputFile: Option[File] = None + +// incorrect: +val inputFile: File = null + +``` +- If a method/value is designed to be overridden make it a `def` and override it with a `def`, we encourage you to not use `val` + diff --git a/docs/developer/example-pipeable.md b/docs/developer/example-pipeable.md new file mode 100644 index 0000000000000000000000000000000000000000..9604c8fb6f165fd2f5c027bf4aa794b9013493cb --- /dev/null +++ b/docs/developer/example-pipeable.md @@ -0,0 +1,29 @@ +# Pipeable commands + +## Introduction + +Since the release of Biopet v0.5.0 we support piping of programs/tools to decrease disk usage and run time. Here we make use of + [fifo piping](http://www.gnu.org/software/libc/manual/html_node/FIFO-Special-Files.html#FIFO-Special-Files), which enables a + developer to very easily implement piping for most pipeable tools. + +## Example + +``` scala + val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) :: + Some(gsnapCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten) + pipe.threadsCorrection = -1 + zcatR1._1.foreach(x => pipe.threadsCorrection -= 1) + zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1)) + add(pipe) + ar._2 +``` + +* In the above example we define the variable ***pipe***. This is the place to define which jobs should be piped together. In +this case + we perform a zcat on the input files. After that GSNAP alignment and Picard reordersam is performed. The final output of this + job will be a SAM file. All intermediate files will be removed as soon as the job finished completely without any error codes. +* With the second command pipe.threadsCorrection = -1 we make sure the total number of assigned cores is not too high. This +ensures that the job can still be scheduled to the compute cluster. +* So we hope you can appreciate in the above example that we decrease the total number of assigned cores with 2. This is done +by the command ***zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)*** + \ No newline at end of file diff --git a/docs/developer/example-pipeline.md b/docs/developer/example-pipeline.md new file mode 100644 index 0000000000000000000000000000000000000000..2cfbce145f2cd609e87dd10e11b4681783472ff5 --- /dev/null +++ b/docs/developer/example-pipeline.md @@ -0,0 +1,158 @@ +# Developer - Example pipeline + +This document/tutorial will show you how to add a new pipeline to biopet. The minimum requirement is having: + + - A clean biopet checkout from git + - Texteditor or IntelliJ IDEA + +### Adding pipeline folder + +Via commandline: + +``` +cd biopet/public/ +mkdir -p mypipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/mypipeline +``` + +### Adding maven project + +Adding a `pom.xml` to `biopet/public/mypipeline` folder. The example below is the minimum required POM definition + +```xml +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>Biopet</artifactId> + <groupId>nl.lumc.sasc</groupId> + <version>0.5.0-SNAPSHOT</version> + <relativePath>../</relativePath> + </parent> + <modelVersion>4.0.0</modelVersion> + + <inceptionYear>2015</inceptionYear> + <artifactId>MyPipeline</artifactId> + <name>MyPipeline</name> + <packaging>jar</packaging> + + <dependencies> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>BiopetCore</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>BiopetToolsExtensions</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.10</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> + </dependencies> + +</project> +``` + +### Initial pipeline code + +In `biopet/public/mypipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/mypipeline` create a file named `HelloPipeline.scala` with the following contents: + +```scala +package nl.lumc.sasc.biopet/pipelines.mypipeline + +import nl.lumc.sasc.biopet.core.PipelineCommand +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import org.broadinstitute.gatk.queue.QScript + +class HelloPipeline(val root: Configurable) extends QScript with SummaryQScript { + def this() = this(null) + + /** Only required when using [[SummaryQScript]] */ + def summaryFile = new File(outputDir, "hello.summary.json") + + /** Only required when using [[SummaryQScript]] */ + def summaryFiles: Map[String, File] = Map() + + /** Only required when using [[SummaryQScript]] */ + def summarySettings = Map() + + // This method can be used to initialize some classes where needed + def init(): Unit = { + } + + // This method is the actual pipeline + def biopetScript: Unit = { + // Executing a tool like FastQC, calling the extension in `nl.lumc.sasc.biopet.extensions.Fastqc` + + val fastqc = new Fastqc(this) + fastqc.fastqfile = config("fastqc_input") + fastqc.output = new File(outputDir, "fastqc.txt") + add(fastqc) + + } +} + +object HelloPipeline extends PipelineCommand + +``` + +Looking at the pipeline, you can see that it inherits from `QScript`. `QScript` is the fundamental class which gives access to the Queue scheduling system. In addition `SummaryQScript` (trait) will add another layer of functions which provides functions to handle and create summary files from pipeline output. +`class HelloPipeline(val root: Configurable`, our pipeline is called HelloPipeline and is taking a `root` with configuration options passed down to Biopet via a JSON specified on the commandline (--config). + +``` + def biopetScript: Unit = { + } +``` + +One can start adding pipeline components in `biopetScript`, this is the programmatically equivalent to the `main` method in most popular programming languages. For example, adding a QC tool to the pipeline like `FastQC`. Look at the example shown above. +Setting up the pipeline is done within the pipeline itself, fine-tuning is always possible by overriding in the following way: + +``` + val fastqc = new Fastqc(this) + fastqc.fastqfile = config("fastqc_input") + fastqc.output = new File(outputDir, "fastqc.txt") + + // change kmers settings to 9, wrap with `Some()` because `fastqc.kmers` is a `Option` value. + fastqc.kmers = Some(9) + + add(fastqc) + +``` + + + + +### Config setup + +For our new pipeline, one should setup the (default) config options. + +Since our pipeline is called `HelloPipeline`, the root of the configoptions will called `hellopipeline` (lowercaps). + +```json +{ + "output_dir": "/home/user/mypipelineoutpt", + "hellopipeline": { + + } +} + +``` + + +### Test pipeline + +### Summary output + +### Reporting output (optional) \ No newline at end of file diff --git a/docs/developer/example-reporting.md b/docs/developer/example-reporting.md new file mode 100644 index 0000000000000000000000000000000000000000..bb3277d3765a2f7208f698a5597fdcc3e5a0bd76 --- /dev/null +++ b/docs/developer/example-reporting.md @@ -0,0 +1,12 @@ +# Developer - Example pipeline report + + +### Concept + +### Requirements + +### Getting started - First page + +### How to generate report independent from pipeline + +### Branding etc. diff --git a/docs/developer/example-tool.md b/docs/developer/example-tool.md new file mode 100644 index 0000000000000000000000000000000000000000..b37062d926f87276a728fd843dc6a0e35af6c0af --- /dev/null +++ b/docs/developer/example-tool.md @@ -0,0 +1,213 @@ +# Developer - Example tool + +In this tutorial we explain how to create a tool within the biopet-framework. We provide convient helper methods which can be used in the tool. +We take a line counter as the use case. + +### Initial tool code +```scala +package nl.lumc.sasc.biopet.tools + +import java.io.{ PrintWriter, File } + +import nl.lumc.sasc.biopet.utils.ConfigUtils._ +import nl.lumc.sasc.biopet.utils.ToolCommand +import scala.collection.mutable + +import scala.io.Source + +/** + */ +object SimpleTool extends ToolCommand { + /* + * Main function executes the LineCounter.scala + */ + def main(args: Array[String]): Unit = { + println("This is the SimpleTool"); + } +} +``` + +This is the minimum setup for having a working tool. We will place some code for line counting in ``main``. Like in other +higher order programming languages like Java, C++ and .Net, one needs to specify an entry for the program to run. ``def main`` +is here the first entry point from the command line into your tool. + + +### Program arguments and environment variables + +A basic application/tool usually takes arguments to configure and set parameters to be used within the tool. +In biopet we facilitate an ``AbstractArgs`` case-class which stores the arguments read from command line. + + +```scala + case class Args(inputFile: File = Nil, outputFile: Option[File] = None) extends AbstractArgs +``` + +The arguments are stored in ``Args``, this is a `Case Class` which acts as a java `HashMap` storing the arguments in an +object-like fashion. + +Consuming and placing values in `Args` works as follows: + +```scala + class OptParser extends AbstractOptParser { + head( + s""" + |$commandName - Count lines in a textfile + """.stripMargin) + + opt[File]('i', "input") required () unbounded () valueName "<inputFile>" action { (x, c) => + c.copy(inputFile = x) + } validate { + x => if (x.exists) success else failure("Inputfile not found") + } text "Count lines from this files" + + opt[File]('o', "output") unbounded () valueName "<outputFile>" action { (x, c) => + c.copy(outputFile = Some(x)) + } text "File to write output to, if not supplied output go to stdout" + + } +``` + +One has to implement class `OptParser` in order to fill `Args`. In `OptParser` one defines the command line args and how it should be processed. + In our example, we just copy the values passed on the command line. Further reading: [scala scopt](https://github.com/scopt/scopt) + +Let's compile the code into 1 file and test with real functional code: + + +```scala + +package nl.lumc.sasc.biopet.tools + +import java.io.{ PrintWriter, File } + +import nl.lumc.sasc.biopet.utils.ConfigUtils._ +import nl.lumc.sasc.biopet.utils.ToolCommand +import scala.collection.mutable + +import scala.io.Source + +/** + */ +object SimpleTool extends ToolCommand { + case class Args(inputFile: File = Nil, outputFile: Option[File] = None) extends AbstractArgs + + class OptParser extends AbstractOptParser { + + head( + s""" + |$commandName - Count lines in a textfile + """.stripMargin) + + opt[File]('i', "input") required () unbounded () valueName "<inputFile>" action { (x, c) => + c.copy(inputFile = x) + } validate { + x => if (x.exists) success else failure("Inputfile not found") + } text "Count lines from this files" + + opt[File]('o', "output") unbounded () valueName "<outputFile>" action { (x, c) => + c.copy(outputFile = Some(x)) + } text "File to write output to, if not supplied output go to stdout" + + } + + def countToJSON(inputRaw: File): String = { + val reader = Source.fromFile(inputRaw) + val nLines = reader.getLines.size + + mapToJson(Map( + "lines" -> nLines, + "input" -> inputRaw + )).spaces2 + } + + /* + * Main function executes the LineCounter.scala + */ + def main(args: Array[String]): Unit = { + val commandArgs: Args = parseArgs(args) + + // use the arguments + val jsonString: String = countToJSON(commandArgs.input) + commandArgs.outputJson match { + case Some(file) => + val writer = new PrintWriter(file) + writer.println(jsonString) + writer.close() + case _ => println(jsonString) + } + } +} +``` + +### Adding tool-extension for usage in pipeline + +In order to use this tool within biopet, one should write an `extension` for the tool. (as we also do for normal executables like `bwa-mem`) + +The wrapper would look like this, basically exposing the same command line arguments to biopet in an OOP format. +Note: we also add some functionalities for getting summary data and passing on to biopet. + +The concept of having (extension)-wrappers is to create a black-box service model. One should only know how to interact with the tool without necessarily knowing the internals. + + +```scala +package nl.lumc.sasc.biopet.extensions.tools + +import java.io.File + +import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input } + +/** + * SimpleTool function class for usage in Biopet pipelines + * + * @param root Configuration object for the pipeline + */ +class SimpleTool(val root: Configurable) extends ToolCommandFunction with Summarizable { + def toolObject = nl.lumc.sasc.biopet.tools.SimpleTool + + @Input(doc = "Input file to count lines from", shortName = "input", required = true) + var input: File = _ + + @Output(doc = "Output JSON", shortName = "output", required = true) + var output: File = _ + + // setting the memory for this tool where it starts from. + override def defaultCoreMemory = 1.0 + + override def cmdLine = super.cmdLine + + required("-i", input) + + required("-o", output) + + def summaryStats: Map[String, Any] = { + ConfigUtils.fileToConfigMap(output) + } + + def summaryFiles: Map[String, File] = Map( + "simpletool" -> output + ) + +} + +object SimpleTool { + def apply(root: Configurable, input: File, output: File): SimpleTool = { + val report = new SimpleTool(root) + report.inputReport = input + report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".simpletool.json") + report + } + + def apply(root: Configurable, input: File, outDir: String): SimpleTool = { + val report = new SimpleTool(root) + report.inputReport = input + report.output = new File(outDir, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".simpletool.json") + report + } +} +``` + + +### Summary setup (for reporting results to JSON) + + diff --git a/docs/developer/getting-started.md b/docs/developer/getting-started.md new file mode 100644 index 0000000000000000000000000000000000000000..97f2bcfb8f2c32d96afba1fcb97c15cbf19e833f --- /dev/null +++ b/docs/developer/getting-started.md @@ -0,0 +1,193 @@ +# Developer - Getting started + +### Requirements +- Maven 3.3 +- Installed Gatk to maven local repository (see below) +- Installed Biopet to maven local repository (see below) +- Some knowledge of the programming language [Scala](http://www.scala-lang.org/) (The pipelines are scripted using Scala) +- We encourage users to use an IDE for scripting the pipeline. One that works pretty well for us is: [IntelliJ IDEA](https://www.jetbrains.com/idea/) + +To start the development of a biopet pipeline you should have the following tools installed: + +* Gatk +* Biopet + +Make sure both tools are installed in your local maven repository. To do this one should use the commands below. + +```bash +# Replace 'mvn' with the location of you maven executable or put it in your PATH with the export command. +git clone https://github.com/broadgsa/gatk-protected +cd gatk-protected +git checkout 3.4 +# The GATK version is bound to a version of Biopet. Biopet 0.5.0 uses Gatk 3.4 +mvn clean install + +cd .. + +git clone https://github.com/biopet/biopet.git +cd biopet +git checkout 0.5.0 +mvn -DskipTests=true clean install +``` + +### Basic components + +### Qscript (pipeline) +A basic pipeline would look like this. [Extended example](example-pipeline.md) + +```scala +package org.example.group.pipelines + +import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.{ Gzip, Cat } +import org.broadinstitute.gatk.queue.QScript + +//TODO: Replace class name, must be the same as the name of the pipeline +class SimplePipeline(val root: Configurable) extends QScript with BiopetQScript { + // A constructor without arguments is needed if this pipeline is a root pipeline + // Root pipeline = the pipeline one wants to start on the commandline + def this() = this(null) + + @Input(required = true) + var inputFile: File = null + + /** This method can be used to initialize some classes where needed */ + def init(): Unit = { + } + + /** This method is the actual pipeline */ + def biopetScript: Unit = { + val cat = new Cat(this) + cat.input :+= inputFile + cat.output = new File(outputDir, "file.out") + add(cat) + + val gzip = new Gzip(this) + gzip.input :+= cat.output + gzip.output = new File(outputDir, "file.out.gz") + add(gzip) + } +} + + +object SimplePipeline extends PipelineCommand +``` + +### Extensions (wrappers) +Wrappers have to be written for each tool used inside the pipeline. A basic wrapper (example wraps the linux ```cat``` command) would look like this: +```scala +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Extension for GNU cat + */ +class Cat(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Input file", required = true) + var input: List[File] = Nil + + @Output(doc = "Unzipped file", required = true) + var output: File = _ + + executable = config("exe", default = "cat") + + /** return commandline to execute */ + def cmdLine = required(executable) + repeat(input) + " > " + required(output) +} +``` + +### Tools (Scala programs) +Within the Biopet framework it is also possible to write your own tools in Scala. +When a certain functionality or script is not incorporated within the framework one can write a tool that does the job. +Below you can see an example tool which is written for automatically building sample configs. + +[Extended example](example-tool.md) + +```scala +package nl.lumc.sasc.biopet.tools + +import java.io.{ PrintWriter, File } + +import nl.lumc.sasc.biopet.utils.ConfigUtils._ +import nl.lumc.sasc.biopet.utils.ToolCommand +import scala.collection.mutable + +import scala.io.Source + +/** + * This tool can convert a tsv to a json file + */ +object SamplesTsvToJson extends ToolCommand { + case class Args(inputFiles: List[File] = Nil, outputFile: Option[File] = None) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('i', "inputFiles") required () unbounded () valueName "<file>" action { (x, c) => + c.copy(inputFiles = x :: c.inputFiles) + } text "Input must be a tsv file, first line is seen as header and must at least have a 'sample' column, 'library' column is optional, multiple files allowed" + opt[File]('o', "outputFile") unbounded () valueName "<file>" action { (x, c) => + c.copy(outputFile = Some(x)) + } + } + + /** Executes SamplesTsvToJson */ + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + + val jsonString = stringFromInputs(commandArgs.inputFiles) + commandArgs.outputFile match { + case Some(file) => { + val writer = new PrintWriter(file) + writer.println(jsonString) + writer.close() + } + case _ => println(jsonString) + } + } + + def mapFromFile(inputFile: File): Map[String, Any] = { + val reader = Source.fromFile(inputFile) + val lines = reader.getLines().toList.filter(!_.isEmpty) + val header = lines.head.split("\t") + val sampleColumn = header.indexOf("sample") + val libraryColumn = header.indexOf("library") + if (sampleColumn == -1) throw new IllegalStateException("Sample column does not exist in: " + inputFile) + + val sampleLibCache: mutable.Set[(String, Option[String])] = mutable.Set() + + val librariesValues: List[Map[String, Any]] = for (tsvLine <- lines.tail) yield { + val values = tsvLine.split("\t") + require(header.length == values.length, "Number of columns is not the same as the header") + val sample = values(sampleColumn) + val library = if (libraryColumn != -1) Some(values(libraryColumn)) else None + + //FIXME: this is a workaround, should be removed after fixing #180 + if (sample.head.isDigit || library.forall(_.head.isDigit)) + throw new IllegalStateException("Sample or library may not start with a number") + + if (sampleLibCache.contains((sample, library))) + throw new IllegalStateException(s"Combination of $sample ${library.map("and " + _).getOrElse("")} is found multiple times") + else sampleLibCache.add((sample, library)) + val valuesMap = (for ( + t <- 0 until values.size if !values(t).isEmpty && t != sampleColumn && t != libraryColumn + ) yield header(t) -> values(t)).toMap + library match { + case Some(lib) => Map("samples" -> Map(sample -> Map("libraries" -> Map(lib -> valuesMap)))) + case _ => Map("samples" -> Map(sample -> valuesMap)) + } + } + librariesValues.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv)) + } + + def stringFromInputs(inputs: List[File]): String = { + val map = inputs.map(f => mapFromFile(f)).foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv)) + mapToJson(map).spaces2 + } +} +``` \ No newline at end of file diff --git a/docs/developer/scaladocs.md b/docs/developer/scaladocs.md new file mode 100644 index 0000000000000000000000000000000000000000..0ce38cb288967286943a15e35a98b533d88fc28a --- /dev/null +++ b/docs/developer/scaladocs.md @@ -0,0 +1,2 @@ +* [Scaladocs 0.5.0](https://humgenprojects.lumc.nl/sasc/scaladocs/v0.5.0#nl.lumc.sasc.biopet.package) +* [Scaladocs 0.4.0](https://humgenprojects.lumc.nl/sasc/scaladocs/v0.4.0#nl.lumc.sasc.biopet.package) diff --git a/docs/examples/gentrap_example.json b/docs/examples/gentrap_example.json index ddf0b6e5474bbb2d47e2cbf9e8a5ab400c5f4bff..1e69c4d6f92234cfa39f1b5872ba73f98aeff174 100644 --- a/docs/examples/gentrap_example.json +++ b/docs/examples/gentrap_example.json @@ -26,12 +26,12 @@ "expression_measures": ["fragments_per_gene", "bases_per_gene", "bases_per_exon"], "strand_protocol": "non_specific", "aligner": "gsnap", - "reference": "/share/isilon/system/local/Genomes-new-27-10-2011/H.Sapiens/hg19_nohap/gsnap/reference.fa", + "reference": "/path/to/Genome/H.Sapiens/hg19_nohap/gsnap/reference.fa", "annotation_gtf": "/path/to/data/annotation/ucsc_refseq.gtf", "annotation_bed": "/path/to/data/annotation/ucsc_refseq.bed", "annotation_refflat": "/path/to/data/annotation/ucsc_refseq.refFlat", "gsnap": { - "dir": "/share/isilon/system/local/Genomes-new-27-10-2011/H.Sapiens/hg19_nohap/gsnap", + "dir": "/path/to/genome/H.Sapiens/hg19_nohap/gsnap", "db": "hg19_nohap", "quiet_if_excessive": true, "npaths": 1 diff --git a/docs/about.md b/docs/general/about.md similarity index 83% rename from docs/about.md rename to docs/general/about.md index d0775b7fcb377a20583b0f29c38878915da3fcbe..57b320514e3281d67cedd75923f07e1485a6c3b1 100644 --- a/docs/about.md +++ b/docs/general/about.md @@ -15,13 +15,13 @@ need. ## Contributors -As of the 0.4.0 release, the following people (sorted by last name) have contributed to Biopet: +As of the 0.5.0 release, the following people (sorted by last name) have contributed to Biopet: - Wibowo Arindrarto - Sander Bollen - Peter van 't Hof -- Wai Yi Leung - Leon Mei +- Wai Yi Leung - Sander van der Zeeuw @@ -29,4 +29,4 @@ As of the 0.4.0 release, the following people (sorted by last name) have contrib Check our website at: [SASC](https://sasc.lumc.nl/) -We are also reachable through email: [SASC mail](mailto:sasc@lumc.nl) +Or send us an email: [SASC mail](mailto:sasc@lumc.nl) \ No newline at end of file diff --git a/docs/general/config.md b/docs/general/config.md index bebee860cf2c45aa06a54945bc17828031f2a1bf..79154412b491c13401469c1d4a33f6b435f9c6be 100644 --- a/docs/general/config.md +++ b/docs/general/config.md @@ -7,13 +7,15 @@ The sample config should be in [__JSON__](http://www.json.org/) or [__YAML__](ht - First field should have the key __"samples"__ - Second field should contain the __"libraries"__ - Third field contains __"R1" or "R2"__ or __"bam"__ -- The fastq input files can be provided zipped and un zipped +- The fastq input files can be provided zipped and unzipped +- `output_dir` is a required setting that should be set either in a `config.json` or specified on the invocation command via -cv output_dir=<path/to/outputdir\>. #### Example sample config ###### yaml: ``` yaml +output_dir: /home/user/myoutputdir samples: Sample_ID1: libraries: @@ -26,6 +28,7 @@ samples: ``` json { + "output_dir": "/home/user/myoutputdir", "samples":{ "Sample_ID1":{ "libraries":{ @@ -57,14 +60,20 @@ Note that there is a tool called [SamplesTsvToJson](../tools/SamplesTsvToJson.md ### The settings config The settings config enables a user to alter the settings for almost all settings available in the tools used for a given pipeline. -This config file should be written in JSON format. It can contain setup settings like references for the tools used, -if the pipeline should use chunking or setting memory limits for certain programs almost everything can be adjusted trough this config file. -One could set global variables containing settings for all tools used in the pipeline or set tool specific options one layer deeper into the JSON file. -E.g. in the example below the settings for Picard tools are altered only for Picard and not global. +This config file should be written in either JSON or YAML format. It can contain setup settings like: -~~~ + * references, + * cut offs, + * program modes and memory limits (program specific), + * Whether chunking should be used + * set program executables (if for some reason the user does not want to use the systems default tools) + * One could set global variables containing settings for all tools used in the pipeline or set tool specific options one layer + deeper into the JSON file. E.g. in the example below the settings for Picard tools are altered only for Picard and not global. + + +``` json "picard": { "validationstringency": "LENIENT" } -~~~ +``` Global setting examples are: ~~~ @@ -77,12 +86,14 @@ Global setting examples are: ---- #### References -Pipelines and tools that use references should now use the reference module. This gives some more fine-grained control over references. -E.g. pipelines and tools that use a fasta references file should now set value `reference_fasta`. -Additionally, we can set `reference_name` for the name to be used (e.g. `hg19`). If unset, Biopet will default to `unknown`. -It is also possible to set the `species` flag. Again, we will default to `unknown` if unset. +Pipelines and tools that use references should now use the reference module. +This gives a more fine-grained control over references and enables a user to curate the references in a structural way. +E.g. pipelines and tools which use a FASTA references should now set value `"reference_fasta"`. +Additionally, we can set `"reference_name"` for the name to be used (e.g. `"hg19"`). If unset, Biopet will default to `unknown`. +It is also possible to set the `"species"` flag. Again, we will default to `unknown` if unset. + #### Example settings config -~~~ +``` json { "reference_fasta": "/references/hg19_nohap/ucsc.hg19_nohap.fasta", "reference_name": "hg19_nohap", @@ -104,9 +115,9 @@ It is also possible to set the `species` flag. Again, we will default to `unknow "chunking": true, "haplotypecaller": { "scattercount": 1000 } } -~~~ +``` ### JSON validation -To check if the JSON file created is correct we can use multiple options the simplest way is using [this](http://jsonformatter.curiousconcept.com/) -website. It is also possible to use Python or Scala for validating but this requires some more knowledge. \ No newline at end of file +To check if the created JSON file is correct their are several possibilities: the simplest way is using [this](http://jsonformatter.curiousconcept.com/) +website. It is also possible to use Python, Scala or any other programming languages for validating JSON files but this requires some more knowledge. \ No newline at end of file diff --git a/docs/license.md b/docs/general/license.md similarity index 92% rename from docs/license.md rename to docs/general/license.md index 69a97f3463e5238a2bd5b84633c8b4f1c5299634..99a1259533c0f0366e243c2dd5ce22f85b87aa03 100644 --- a/docs/license.md +++ b/docs/general/license.md @@ -17,7 +17,7 @@ license, please contact us to obtain a separate license. Private release: ~~~bash -Due to the license issue with GATK, this part of Biopet can only be used inside the +Due to a license issue with GATK, this part of Biopet can only be used inside the LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions on how to use this protected part of biopet or contact us at sasc@lumc.nl ~~~ diff --git a/docs/general/requirements.md b/docs/general/requirements.md index 0105f7ccc29dcbd5def4b6b49a6bb1235031d858..0ac500d7b5c95be5b4047e0c58c7b3302e63f59f 100644 --- a/docs/general/requirements.md +++ b/docs/general/requirements.md @@ -6,6 +6,8 @@ For end-users: * [Java 7 JVM](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or [OpenJDK 7](http://openjdk.java.net/install/) * [Cran R 2.15.3](http://cran.r-project.org/) + * It strongly advised to run Biopet pipelines on a compute cluster since the amount of resources needed can not be achieved on + a local machine. Note that this does not mean it is not possible! For developers: diff --git a/docs/index.md b/docs/index.md index f55a39193eabc688fca65dc34bb6332c01be6a3b..7dc9d5dca6764e33e66da571b4de1c49b525c593 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,10 +13,10 @@ Biopet (Bio Pipeline Execution Toolkit) is the main pipeline development framewo Biopet is available as a JAR package in SHARK. The easiest way to start using it is to activate the `biopet` environment module, which sets useful aliases and environment variables: ~~~ -$ module load biopet/v0.4.0 +$ module load biopet/v0.5.0 ~~~ -With each Biopet release, an accompanying environment module is also released. The latest release is version 0.4.0, thus `biopet/v0.4.0` is the module you would want to load. +With each Biopet release, an accompanying environment module is also released. The latest release is version 0.5.0, thus `biopet/v0.5.0` is the module you would want to load. After loading the module, you can access the biopet package by simply typing `biopet`: @@ -48,6 +48,24 @@ $ biopet pipeline <pipeline_name> -config <path/to/config.json> -qsub -jobParaEn It is usually a good idea to do the real run using `screen` or `nohup` to prevent the job from terminating when you log out of SHARK. In practice, using `biopet` as it is is also fine. What you need to keep in mind, is that each pipeline has their own expected config layout. You can check out more about the general structure of our config files [here](general/config.md). For the specific structure that each pipeline accepts, please consult the respective pipeline page. +### Convention in this documentation + +To unify the commands used in the examples, we agree on the following: + +Whenever an example command starts with `biopet` as in: + +``` +biopet tool ... +``` + +One can replace the `biopet` command with: + +``` +java -jar </path/to/biopet.jar> tool +``` + +The `biopet` shortcut is only available on the SHARK cluster with the `module` environment installed. + ### Running Biopet in your own computer At the moment, we do not provide links to download the Biopet package. If you are interested in trying out Biopet locally, please contact us as [sasc@lumc.nl](mailto:sasc@lumc.nl). @@ -64,10 +82,10 @@ We welcome any kind of contribution, be it merge requests on the code base, docu To develop Biopet, Java 7, Maven 3.2.2, and GATK Queue 3.4 is required. Please consult the Java homepage and Maven homepage for the respective installation instruction. After you have both Java and Maven installed, you would then need to install GATK Queue. However, as the GATK Queue package is not yet available as an artifact in Maven Central, you will need to download, compile, and install GATK Queue first. ~~~ -$ git clone https://github.com/broadgsa/gatk -$ cd gatk +$ git clone https://github.com/broadgsa/gatk-protected +$ cd gatk-protected $ git checkout 3.4 # the current release is based on GATK 3.4 -$ mvn -U clean install +$ mvn clean install ~~~ This will install all the required dependencies to your local maven repository. After this is done, you can clone our repository and test if everything builds fine: @@ -75,7 +93,7 @@ This will install all the required dependencies to your local maven repository. ~~~ $ git clone https://github.com/biopet/biopet.git $ cd biopet -$ mvn -U clean install +$ mvn clean install ~~~ If everything builds fine, you're good to go! Otherwise, don't hesitate to contact us or file an issue at our issue tracker. @@ -83,8 +101,8 @@ If everything builds fine, you're good to go! Otherwise, don't hesitate to conta ## About -Go to the [about page](about.md) +Go to the [about page](general/about.md) ## License -See: [License](license.md) +See: [License](general/license.md) diff --git a/docs/pipelines/bam2wig.md b/docs/pipelines/bam2wig.md index e683f950fb106c3087b7c5e24aed9085f2f47d43..0a51eb278ad4d64365d472e45adb17007fdabb8d 100644 --- a/docs/pipelines/bam2wig.md +++ b/docs/pipelines/bam2wig.md @@ -33,12 +33,13 @@ Arguments for Bam2Wig: If you are on SHARK, you can also load the `biopet` module and execute `biopet pipeline` instead: ~~~bash -$ module load biopet/v0.3.0 +$ module load biopet/v0.5.0 $ biopet pipeline bam2wig ~~~ To run the pipeline: + ~~~bash biopet pipeline bam2wig -config </path/to/config.json> --bamfile </path/to/bam.bam> -qsub -jobParaEnv BWA -run ~~~ @@ -46,3 +47,8 @@ To run the pipeline: ## Output Files The pipeline generates three output track files: a bigWig file, a wiggle file, and a TDF file. + +## Getting Help + +If you have any questions on running Bam2Wig or suggestions on how to improve the overall flow, feel free to post an issue to our + issue tracker at [GitHub](https://github.com/biopet/biopet). Or contact us directly via: [SASC email](mailto: SASC@lumc.nl) diff --git a/docs/pipelines/basty.md b/docs/pipelines/basty.md index 0cb4d29ae94b76c669f4bb71aa2454d82effe8c1..40db3b00ea5f1ae3c55db74e05c78d565ccce43a 100644 --- a/docs/pipelines/basty.md +++ b/docs/pipelines/basty.md @@ -52,7 +52,7 @@ Specific configuration options additional to Basty are: ```json { - output_dir: </path/to/out_directory>, + "output_dir": </path/to/out_directory>, "shiva": { "variantcallers": ["freeBayes"] }, @@ -67,14 +67,14 @@ Specific configuration options additional to Basty are: ##### For the help screen: ~~~ -java -jar </path/to/biopet.jar> pipeline basty -h +biopet pipeline basty -h ~~~ ##### Run the pipeline: Note that one should first create the appropriate [configs](../general/config.md). ~~~ -java -jar </path/to/biopet/jar> pipeline basty -run -config MySamples.json -config MySettings.json +biopet pipeline basty -run -config MySamples.json -config MySettings.json ~~~ ### Result files @@ -152,3 +152,8 @@ The output files this pipeline produces are: ## References + +## Getting Help + +If you have any questions on running Basty, suggestions on how to improve the overall flow, or requests for your favorite +SNP typing algorithm, feel free to post an issue to our issue tracker at [GitHub](https://github.com/biopet/biopet). Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) diff --git a/docs/pipelines/carp.md b/docs/pipelines/carp.md index 6f5ab622a89956180ffb185083de6b817189242d..bc4ace74e9efda8d8058d13bcf67e6e9ae4d08a6 100644 --- a/docs/pipelines/carp.md +++ b/docs/pipelines/carp.md @@ -11,7 +11,7 @@ Carp is a pipeline for analyzing ChIP-seq NGS data. It uses the BWA MEM aligner The layout of the sample configuration for Carp is basically the same as with our other multi sample pipelines, for example: -~~~ +~~~ json { "samples": { "sample_X": { @@ -39,7 +39,8 @@ The layout of the sample configuration for Carp is basically the same as with ou } ~~~ -What's important there is that you can specify the control ChIP-seq experiment(s) for a given sample. These controls are usually ChIP-seq runs from input DNA and/or from treatment with nonspecific binding proteins such as IgG. In the example above, we are specifying `sample_Y` as the control for `sample_X`. +What's important here is that you can specify the control ChIP-seq experiment(s) for a given sample. These controls are usually +ChIP-seq runs from input DNA and/or from treatment with nonspecific binding proteins such as IgG. In the example above, we are specifying `sample_Y` as the control for `sample_X`. ### Pipeline Settings Configuration @@ -51,24 +52,163 @@ For the pipeline settings, there are some values that you need to specify while While optional settings are: 1. `aligner`: which aligner to use (`bwa` or `bowtie`) - +2. `macs2`: Here only the callpeak modus is implemented. But one can set all the options from [macs2 callpeak](https://github +.com/taoliu/MACS/#call-peaks) in this settings config. Note that the config value is: macs2_callpeak ## Running Carp As with other pipelines in the Biopet suite, Carp can be run by specifying the pipeline after the `pipeline` subcommand: -~~~ -java -jar </path/to/biopet.jar> pipeline carp -config </path/to/config.json> -qsub -jobParaEnv BWA -run +~~~ bash +biopet pipeline carp -config </path/to/config.json> -qsub -jobParaEnv BWA -run ~~~ If you already have the `biopet` environment module loaded, you can also simply call `biopet`: -~~~ +~~~ bash biopet pipeline carp -config </path/to/config.json> -qsub -jobParaEnv BWA -run ~~~ -It is also a good idea to specify retries (we recomend `-retry 3` up to `-retry 5`) so that cluster glitches do not interfere with your pipeline runs. +It is also a good idea to specify retries (we recommend `-retry 4` up to `-retry 8`) so that cluster glitches do not interfere +with your pipeline runs. + +## Example output + +```bash +. +├── Carp.summary.json +├── report +│  ├── alignmentSummary.png +│  ├── alignmentSummary.tsv +│  ├── ext +│  │  ├── css +│  │  │  ├── bootstrap_dashboard.css +│  │  │  ├── bootstrap.min.css +│  │  │  ├── bootstrap-theme.min.css +│  │  │  └── sortable-theme-bootstrap.css +│  │  ├── fonts +│  │  │  ├── glyphicons-halflings-regular.ttf +│  │  │  ├── glyphicons-halflings-regular.woff +│  │  │  └── glyphicons-halflings-regular.woff2 +│  │  └── js +│  │  ├── bootstrap.min.js +│  │  ├── jquery.min.js +│  │  └── sortable.min.js +│  ├── Files +│  │  └── index.html +│  ├── index.html +│  ├── insertsize.png +│  ├── insertsize.tsv +│  ├── QC_Bases_R1.png +│  ├── QC_Bases_R1.tsv +│  ├── QC_Bases_R2.png +│  ├── QC_Bases_R2.tsv +│  ├── QC_Reads_R1.png +│  ├── QC_Reads_R1.tsv +│  ├── QC_Reads_R2.png +│  ├── QC_Reads_R2.tsv +│  ├── Samples +│  │  ├── 10_Input_2 +│  │  │  ├── Alignment +│  │  │  │  ├── index.html +│  │  │  │  ├── insertsize.png +│  │  │  │  ├── insertsize.tsv +│  │  │  │  ├── wgs.png +│  │  │  │  └── wgs.tsv +│  │  │  ├── Files +│  │  │  │  └── index.html +│  │  │  ├── index.html +│  │  │  └── Libraries +│  │  │  ├── 3307 +│  │  │  │  ├── Alignment +│  │  │  │  │  ├── index.html +│  │  │  │  │  ├── insertsize.png +│  │  │  │  │  ├── insertsize.tsv +│  │  │  │  │  ├── wgs.png +│  │  │  │  │  └── wgs.tsv +│  │  │  │  ├── index.html +│  │  │  │  └── QC +│  │  │  │  ├── fastqc_R1_duplication_levels.png +│  │  │  │  ├── fastqc_R1_kmer_profiles.png +│  │  │  │  ├── fastqc_R1_per_base_quality.png +│  │  │  │  ├── fastqc_R1_per_base_sequence_content.png +│  │  │  │  ├── fastqc_R1_per_sequence_gc_content.png +│  │  │  │  ├── fastqc_R1_per_sequence_quality.png +│  │  │  │  ├── fastqc_R1_qc_duplication_levels.png +│  │  │  │  ├── fastqc_R1_qc_kmer_profiles.png +│  │  │  │  ├── fastqc_R1_qc_per_base_quality.png +│  │  │  │  ├── fastqc_R1_qc_per_base_sequence_content.png +│  │  │  │  ├── fastqc_R1_qc_per_sequence_gc_content.png +│  │  │  │  ├── fastqc_R1_qc_per_sequence_quality.png +│  │  │  │  ├── fastqc_R1_qc_sequence_length_distribution.png +│  │  │  │  ├── fastqc_R1_sequence_length_distribution.png +│  │  │  │  └── index.html +│  │  │  └── index.html +│  │  ├── 11_GR_2A +│  │  │  ├── Alignment +│  │  │  │  ├── index.html +│  │  │  │  ├── insertsize.png +│  │  │  │  ├── insertsize.tsv +│  │  │  │  ├── wgs.png +│  │  │  │  └── wgs.tsv +│  │  │  ├── alignmentSummary.png +│  │  │  ├── alignmentSummary.tsv +│  │  │  ├── Files +│  │  │  │  └── index.html +│  │  │  ├── index.html +│  │  │  └── Libraries +│  │  │  ├── 3307 +│  │  │  │  ├── Alignment +│  │  │  │  │  ├── index.html +│  │  │  │  │  ├── insertsize.png +│  │  │  │  │  ├── insertsize.tsv +│  │  │  │  │  ├── wgs.png +│  │  │  │  │  └── wgs.tsv +│  │  │  │  ├── index.html +│  │  │  │  └── QC +│  │  │  │  ├── fastqc_R1_duplication_levels.png +│  │  │  │  ├── fastqc_R1_kmer_profiles.png +│  │  │  │  ├── fastqc_R1_per_base_quality.png +│  │  │  │  ├── fastqc_R1_per_base_sequence_content.png +│  │  │  │  ├── fastqc_R1_per_sequence_gc_content.png +│  │  │  │  ├── fastqc_R1_per_sequence_quality.png +│  │  │  │  ├── fastqc_R1_qc_duplication_levels.png +│  │  │  │  ├── fastqc_R1_qc_kmer_profiles.png +│  │  │  │  ├── fastqc_R1_qc_per_base_quality.png +│  │  │  │  ├── fastqc_R1_qc_per_base_sequence_content.png +│  │  │  │  ├── fastqc_R1_qc_per_sequence_gc_content.png +│  │  │  │  ├── fastqc_R1_qc_per_sequence_quality.png +│  │  │  │  ├── fastqc_R1_qc_sequence_length_distribution.png +│  │  │  │  ├── fastqc_R1_sequence_length_distribution.png +│  │  │  │  └── index.html +│  │  │  ├── 3385 +│  │  │  │  ├── Alignment +│  │  │  │  │  ├── index.html +│  │  │  │  │  ├── insertsize.png +│  │  │  │  │  ├── insertsize.tsv +│  │  │  │  │  ├── wgs.png +│  │  │  │  │  └── wgs.tsv +│  │  │  │  ├── index.html +│  │  │  │  └── QC +│  │  │  │  ├── fastqc_R1_duplication_levels.png +│  │  │  │  ├── fastqc_R1_kmer_profiles.png +│  │  │  │  ├── fastqc_R1_per_base_quality.png +│  │  │  │  ├── fastqc_R1_per_base_sequence_content.png +│  │  │  │  ├── fastqc_R1_per_sequence_gc_content.png +│  │  │  │  ├── fastqc_R1_per_sequence_quality.png +│  │  │  │  ├── fastqc_R1_qc_duplication_levels.png +│  │  │  │  ├── fastqc_R1_qc_kmer_profiles.png +│  │  │  │  ├── fastqc_R1_qc_per_base_quality.png +│  │  │  │  ├── fastqc_R1_qc_per_base_sequence_content.png +│  │  │  │  ├── fastqc_R1_qc_per_sequence_gc_content.png +│  │  │  │  ├── fastqc_R1_qc_per_sequence_quality.png +│  │  │  │  ├── fastqc_R1_qc_sequence_length_distribution.png +│  │  │  │  ├── fastqc_R1_sequence_length_distribution.png +│  │  │  │  └── index.html +│  │  │  └── index.html +``` ## Getting Help -If you have any questions on running Carp, suggestions on how to improve the overall flow, or requests for your favorite ChIP-seq related program to be added, feel free to post an issue to our issue tracker at [https://git.lumc.nl/biopet/biopet/issues](https://git.lumc.nl/biopet/biopet/issues). +If you have any questions on running Carp, suggestions on how to improve the overall flow, or requests for your favorite ChIP-seq related program to be added, feel free to post an issue to our issue tracker at [GitHub](https://github.com/biopet/biopet). +Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) diff --git a/docs/pipelines/flexiprep.md b/docs/pipelines/flexiprep.md index 83d2dd6db660129e2ddf4e6bdcd5690d08d56275..83b2889f3adec27bd506aeb60ef795b9ae80fb9f 100644 --- a/docs/pipelines/flexiprep.md +++ b/docs/pipelines/flexiprep.md @@ -1,18 +1,19 @@ # Flexiprep ## Introduction -Flexiprep is our quality control pipeline. This pipeline checks for possible barcode contamination, clips reads, trims reads and runs -the <a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/" target="_blank">Fastqc</a> tool. -Adapter clipping is performed by <a href="https://github.com/marcelm/cutadapt" target="_blank">Cutadapt</a>. -For quality trimming we use <a href="https://github.com/najoshi/sickle" target="_blank">Sickle</a>. -Flexiprep works on `.fastq` files. +Flexiprep is a quality control pipeline. This pipeline checks for possible barcode contamination, clips reads, trims reads and + runs [FASTQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/). +Adapter clipping is performed by [Cutadapt](https://github.com/marcelm/cutadapt). +For quality trimming we use [Sickle](https://github.com/najoshi/sickle). +Flexiprep only works on `.fastq` files. ## Example To get the help menu: -~~~ -java -jar </path/to/biopet.jar> pipeline Flexiprep -h + +``` bash +biopet pipeline Flexiprep -h Arguments for Flexiprep: -R1,--input_r1 <input_r1> R1 fastq file (gzipped allowed) @@ -21,17 +22,18 @@ Arguments for Flexiprep: -library,--libid <libid> Library ID -config,--config_file <config_file> JSON config file(s) -DSC,--disablescatter Disable all scatters -~~~ +``` Note that the pipeline also works on unpaired reads where one should only provide R1. To start the pipeline (remove `-run` for a dry run): -~~~bash -java -jar Biopet-0.2.0.jar pipeline Flexiprep -run -outDir myDir \ + +``` bash +biopet pipeline Flexiprep -run -outDir myDir \ -R1 myFirstReadPair -R2 mySecondReadPair -sample mySampleName \ -library myLibname -config mySettings.json -~~~ +``` ## Configuration and flags @@ -64,7 +66,7 @@ The pipeline also outputs 2 Fastqc runs one before and one after quality control ### Example output -~~~ +~~~ bash . ├── mySample_01.qc.summary.json ├── mySample_01.qc.summary.json.out @@ -143,3 +145,9 @@ The pipeline also outputs 2 Fastqc runs one before and one after quality control └── report ~~~ + +## Getting Help + +If you have any questions on running Flexiprep, suggestions on how to improve the overall flow, or requests for your favorite +Quality Control (QC) related program to be added, feel free to post an issue to our issue tracker at [GitHub](https://github.com/biopet/biopet). +Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) diff --git a/docs/pipelines/gears.md b/docs/pipelines/gears.md new file mode 100644 index 0000000000000000000000000000000000000000..6d8d150a05625fb2a1564ea61974e0d91c6bc099 --- /dev/null +++ b/docs/pipelines/gears.md @@ -0,0 +1,83 @@ +# Gears + +## Introduction +Gears is a metagenomics pipeline. (``GE``nome ``A``nnotation of ``R``esidual ``S``equences). One can use this pipeline to identify contamination in sequencing runs on either raw FastQ files or BAM files. +In case of BAM file as input, it will extract the unaligned read(pair) sequences for analysis. + +Analysis result is reported in a sunburst graph, which is visible and navigatable in a webbrowser. + +Pipeline analysis components include: + + - Kraken, DerrickWood [GitHub](https://github.com/DerrickWood/kraken) + + +## Example + +To get the help menu: + +``` bash +biopet pipeline Gears -h + +... default config ... + +Arguments for Gears: + -R1,--fastqr1 <fastqr1> R1 reads in FastQ format + -R2,--fastqr2 <fastqr2> R2 reads in FastQ format + -bam,--bamfile <bamfile> All unmapped reads will be extracted from this bam for analysis + --outputname <outputname> Undocumented option + -sample,--sampleid <sampleid> Sample ID + -library,--libid <libid> Library ID + -config,--config_file <config_file> JSON / YAML config file(s) + -cv,--config_value <config_value> Config values, value should be formatted like 'key=value' or + 'path:path:key=value' + -DSC,--disablescatter Disable all scatters + +``` + +Note that the pipeline also works on unpaired reads where one should only provide R1. + + +To start the pipeline (remove `-run` for a dry run): + +``` bash +biopet pipeline Gears -run \ +-R1 myFirstReadPair -R2 mySecondReadPair -sample mySampleName \ +-library myLibname -config mySettings.json +``` + + +## Configuration and flags +For technical reasons, single sample pipelines, such as this pipeline do **not** take a sample config. +Input files are in stead given on the command line as a flag. + +Command line flags for Gears are: + +| Flag (short)| Flag (long) | Type | Function | +| ------------ | ----------- | ---- | -------- | +| -R1 | --input_r1 | Path (optional) | Path to input fastq file | +| -R2 | --input_r2 | Path (optional) | Path to second read pair fastq file. | +| -bam | --bamfile | Path (optional) | Path to bam file. | +| -sample | --sampleid | String (**required**) | Name of sample | +| -library | --libid | String (**required**) | Name of library | + +If `-R2` is given, the pipeline will assume a paired-end setup. `-bam` is mutualy exclusive with the `-R1` and `-R2` flags. Either specify `-bam` or `-R1` and/or `-R2`. + +### Config + + + +## Result files + +The results of `Gears` are stored in the following files: + +| File suffix | Application | Content | Description | +| ----------- | ----------- | ------- | ----------- | +| *.krkn.raw | kraken | tsv | Annotation per sequence | +| *.krkn.full | kraken-report | tsv | List of all annotation possible with counts filled in for this specific sample| +| *.krkn.json | krakenreport2json| json | JSON representation of the taxonomy report, for postprocessing | + +In a seperate `report` folder, one can find the html report displaying the summary and providing a navigation view on the taxonomy graph and (its) result. + +## Getting Help +For questions about this pipeline and suggestions, we have a GitHub page where you can submit your ideas and thoughts .[GitHub](https://github.com/biopet/biopet). +Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) diff --git a/docs/pipelines/gentrap.md b/docs/pipelines/gentrap.md index b6d4cb92ddc3df24efebfa82d68ab2808e8e6014..fe26fbc96c8edee9efecba3a56787085d292ea6b 100644 --- a/docs/pipelines/gentrap.md +++ b/docs/pipelines/gentrap.md @@ -26,7 +26,7 @@ As with other biopet pipelines, Gentrap relies on a JSON configuration file to r Samples are single experimental units whose expression you want to measure. They usually consist of a single sequencing library, but in some cases (for example when the experiment demands each sample have a minimum library depth) a single sample may contain multiple sequencing libraries as well. All this is can be configured using the correct JSON nesting, with the following pattern: -~~~ +~~~ json { "samples": { "sample_A": { @@ -43,7 +43,7 @@ Samples are single experimental units whose expression you want to measure. They In the example above, there is one sample (named `sample_A`) which contains one sequencing library (named `lib_01`). The library itself is paired end, with both `R1` and `R2` pointing to the location of the files in the file system. A more complicated example is the following: -~~~ +~~~ json { "samples": { "sample_X": { @@ -59,7 +59,7 @@ In the example above, there is one sample (named `sample_A`) which contains one "lib_one": { "R1": "/absolute/path/to/first/read/pair.fq", "R2": "/absolute/path/to/second/read/pair.fq" - } + }, "lib_two": { "R1": "/absolute/path/to/first/read/pair.fq", "R2": "/absolute/path/to/second/read/pair.fq" @@ -95,7 +95,7 @@ In addition to these, you must also remember to supply the alignment index requi Thus, an example settings configuration is as follows: -~~~ +~~~ json { "output_dir": "/path/to/output/dir", "expression_measures": ["fragments_per_gene", "bases_per_gene"], @@ -118,14 +118,14 @@ In most cases, it's practical to combine the samples and settings configuration As with other pipelines in the Biopet suite, Gentrap can be run by specifying the pipeline after the `pipeline` subcommand: -~~~ -$ java -jar </path/to/biopet.jar> pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run +~~~ bash +biopet pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run ~~~ You can also use the `biopet` environment module (recommended) when you are running the pipeline in SHARK: -~~~ -$ module load biopet/v0.3.1 +~~~ bash +$ module load biopet/v0.5.0 $ biopet pipeline gentrap -config </path/to/config.json> -qsub -jobParaEnv BWA -run ~~~ @@ -137,5 +137,5 @@ The number and types of output files depend on your run configuration. What you ## Getting Help -If you have any questions on running Gentrap, suggestions on how to improve the overall flow, or requests for your favorite RNA-seq related program to be added, feel free to post an issue to our issue tracker at [https://git.lumc.nl/biopet/biopet/issues](https://git.lumc.nl/biopet/biopet/issues). - +If you have any questions on running Gentrap, suggestions on how to improve the overall flow, or requests for your favorite RNA-seq related program to be added, + feel free to post an issue to our issue tracker at [GitHub](https://github.com/biopet/biopet). Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) \ No newline at end of file diff --git a/docs/pipelines/mapping.md b/docs/pipelines/mapping.md index 0e20aeecce28fe660b6df4ebd8b05a49cf50fc13..868bc7a4cc7491e2ce4464fa2242549bc3b0640e 100644 --- a/docs/pipelines/mapping.md +++ b/docs/pipelines/mapping.md @@ -91,7 +91,7 @@ Any supplied sample config will be ignored. For the help menu: ~~~ -java -jar </path/to/biopet.jar> pipeline mapping -h +biopet pipeline mapping -h Arguments for Mapping: -R1,--input_r1 <input_r1> R1 fastq file @@ -107,7 +107,7 @@ Arguments for Mapping: To run the pipeline: ~~~ -java -jar </path/to/biopet.jar> pipeline mapping -run --config mySettings.json \ +biopet pipeline mapping -run --config mySettings.json \ -R1 myReads1.fastq -R2 myReads2.fastq ~~~ Note that removing -R2 causes the pipeline to assume single end `.fastq` files. @@ -126,3 +126,9 @@ To perform a dry run simply remove `-run` from the commandline call. ├── metrics └── report ~~~ + +## Getting Help + +If you have any questions on running Mapping, suggestions on how to improve the overall flow, or requests for your favorite aligner to be added, feel free to post an issue to our issue tracker at + [GitHub](https://github.com/biopet/biopet). Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) + diff --git a/docs/pipelines/sage.md b/docs/pipelines/sage.md index 97e3ceab4fab0c6221c187135f647d1579fb426f..efb90851623d285b953f6b2c56c9c62999a0a6ed 100644 --- a/docs/pipelines/sage.md +++ b/docs/pipelines/sage.md @@ -35,7 +35,7 @@ Specific configuration values for the Sage pipeline are: As with other pipelines, you can run the Sage pipeline by invoking the `pipeline` subcommand. There is also a general help available which can be invoked using the `-h` flag: ~~~ -$ java -jar /path/to/biopet.jar pipeline sage -h +$ biopet pipeline sage -h Arguments for Sage: -s,--sample <sample> Only Sample @@ -49,14 +49,14 @@ Arguments for Sage: If you are on SHARK, you can also load the `biopet` module and execute `biopet pipeline` instead: ~~~ -$ module load biopet/v0.3.0 +$ module load biopet/v0.5.0 $ biopet pipeline sage ~~~ To run the pipeline: ~~~ - biopet pipeline sage -config /path/to/config.json -qsub -jobParaEnv BWA -run +$ biopet pipeline sage -config /path/to/config.json -qsub -jobParaEnv BWA -run ~~~ @@ -123,3 +123,8 @@ Below is an example of the output files that you will get after running Sage. He   ├── no_sense_genes.txt   └── tag.lib ~~~ + +## Getting Help + +If you have any questions on running SAGE or suggestions on how to improve the overall flow, feel free to post an issue to our issue tracker at [GitHub](https://github.com/biopet/biopet). +Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) diff --git a/docs/pipelines/shiva.md b/docs/pipelines/shiva.md index e0e98f4ea61c21b65b5ba2b0dc9ac9c69201e6b7..88fc86ce93ff309c2d388eb89832900f81b4be85 100644 --- a/docs/pipelines/shiva.md +++ b/docs/pipelines/shiva.md @@ -30,7 +30,7 @@ The full pipeline can start from fastq or from bam file. This pipeline will incl To view the help menu, execute: ~~~ -java -jar </path/to/biopet.jar> pipeline shiva -h +biopet pipeline shiva -h Arguments for Shiva: -sample,--onlysample <onlysample> Only Sample @@ -41,7 +41,7 @@ Arguments for Shiva: To run the pipeline: ~~~ -java -jar </path/to/biopet.jar> pipeline shiva -config MySamples.json -config MySettings.json -run +biopet pipeline shiva -config MySamples.json -config MySettings.json -run ~~~ A dry run can be performed by simply removing the `-run` flag from the command line call. @@ -67,7 +67,7 @@ Arguments for ShivaVariantcalling: To run the pipeline: ~~~ -java -jar </path/to/biopet.jar> pipeline shivavariantcalling -config MySettings.json -run +biopet pipeline shivavariantcalling -config MySettings.json -run ~~~ A dry run can be performed by simply removing the `-run` flag from the command line call. @@ -89,6 +89,7 @@ At this moment the following variant callers can be used * <a href="https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_genotyper_UnifiedGenotyper.php">unifiedgenotyper_allele</a> * Only genotype a given list of alleles with UnifiedGenotyper * <a href="https://samtools.github.io/bcftools/bcftools.html">bcftools</a> +* <a href="https://samtools.github.io/bcftools/bcftools.html">bcftools_singlesample</a> * <a href="https://github.com/ekg/freebayes">freebayes</a> * [raw](../tools/MpileupToVcf) @@ -108,6 +109,8 @@ To view all possible config options please navigate to our Gitlab wiki page | Namespace | Name | Type | Default | Function | | ----------- | ---- | ----- | ------- | -------- | +| shiva | species | String | unknown_species | Name of species, like H.sapiens | +| shiva | reference_name | String | unknown_reference_name | Name of reference, like hg19 | | shiva | reference_fasta | String | | reference to align to | | shiva | dbsnp | String | | vcf file of dbsnp records | | shiva | variantcallers | List[String] | | variantcaller to use, see list | @@ -122,7 +125,6 @@ To view all possible config options please navigate to our Gitlab wiki page | vcffilter | min_alternate_depth | Integer | 2 | Filter variants with at least x depth on the alternate allele | | vcffilter | min_samples_pass | Integer | 1 | Minimum amount of samples which pass custom filter (requires additional flags) | | vcffilter | filter_ref_calls | Boolean | true | Remove reference calls | -| vcfstats | reference | String | Path to reference to be used by `vcfstats` | Since Shiva uses the [Mapping](mapping.md) pipeline internally, mapping config values can be specified as well. For all the options, please see the corresponding documentation for the mapping pipeline. @@ -151,26 +153,31 @@ The config for these therefore is: **Config example** -```json -{ - "samples": { - "SampleID": { - "libraries": { - "lib_id_1": { "bam": "YourBam.bam" }, - "lib_id_2": { "R1": "file_R1.fq.gz", "R2": "file_R2.fq.gz" } - } - } - }, - "shiva": { - "reference": "<location of fasta of reference>", - "variantcallers": [ "haplotypecaller", "unifiedgenotyper" ], - "dbsnp": "</path/to/dbsnp.vcf>", - "vcffilter": { - "min_alternate_depth": 1 - } - }, - "output_dir": "<output directory>" -} +``` yaml +samples: + SampleID: + libraries: + lib_id_1: + bam: YourBam.bam + lib_id_2: + R1: file_R1.fq.gz + R2: file_R2.fq.gz +dbsnp: <dbsnp.vcf.gz> +vcffilter: + min_alternate_depth: 1 +output_dir: <output directory> +variantcallers: + - haplotypecaller + - unifiedgenotyper + - haplotypecaller_gvcf ``` ## References + +* Shiva follows the best practices of GATK: [GATK best practices](https://www.broadinstitute.org/gatk/guide/best-practices) + + +## Getting Help + +If you have any questions on running Shiva, suggestions on how to improve the overall flow, or requests for your favorite variant calling related program to be added, feel free to post an issue to our issue tracker at [GitHub](https://github.com/biopet/biopet). +Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) diff --git a/docs/pipelines/toucan.md b/docs/pipelines/toucan.md index 633f3cac24d8b0520dec97c1c5ef596ca6ae901b..a533bcaed4288429071dc6e32dc7fdb182df201a 100644 --- a/docs/pipelines/toucan.md +++ b/docs/pipelines/toucan.md @@ -11,8 +11,9 @@ Currently, it comprises just two steps: Example ----------- + ~~~~bash -java -jar Biopet-0.3.0.jar pipeline Toucan -h +biopet pipeline Toucan -h Arguments for Toucan: -Input,--inputvcf <inputvcf> Input VCF file -config,--config_file <config_file> JSON config file(s) @@ -36,7 +37,8 @@ Furthermore, the `fork` field will be overwritten by `threads` in case that one Therefore, it is recommended not to use `fork`, but to rather use `threads`. With that in mind, an example configuration using mode `standard` of the VepNormalizer would thus be: -~~~~ + +~~~ json { "varianteffectpredictor": { "vep_script": <path_to_exe>, @@ -49,16 +51,24 @@ With that in mind, an example configuration using mode `standard` of the VepNorm }, "output_dir": <path_to_output_directory> } -~~~~ +~~~ Running the pipeline --------------- The command to run the pipeline is: -~~~~ -java -jar pipeline Toucan -Input <input_vcf> -config <config_json> -run -~~~~ -If one wishes to run it on a cluster, the command becomes +~~~~ bash +biopet pipeline Toucan -Input <input_vcf> -config <config_json> -run ~~~~ -java -jar pipeline Toucan -Input <input_vcf> -config <config_json> -run -qsub -jobParaEnv <PE> + +If one wishes to run it on a cluster, the command becomes: + +~~~~ bash +biopet pipeline Toucan -Input <input_vcf> -config <config_json> -run -qsub -jobParaEnv <PE> ~~~~ + + +## Getting Help + +If you have any questions on running Toucan, suggestions on how to improve the overall flow, or requests for your favorite VCF annotator to be added, feel free to post an issue to our issue tracker at [GitHub](https://github.com/biopet/biopet). +Or contact us directly via: [SASC email](mailto:SASC@lumc.nl) \ No newline at end of file diff --git a/docs/release_notes_0.3.0.md b/docs/releasenotes/release_notes_0.3.0.md similarity index 100% rename from docs/release_notes_0.3.0.md rename to docs/releasenotes/release_notes_0.3.0.md diff --git a/docs/release_notes_0.3.1.md b/docs/releasenotes/release_notes_0.3.1.md similarity index 100% rename from docs/release_notes_0.3.1.md rename to docs/releasenotes/release_notes_0.3.1.md diff --git a/docs/release_notes_0.3.2.md b/docs/releasenotes/release_notes_0.3.2.md similarity index 100% rename from docs/release_notes_0.3.2.md rename to docs/releasenotes/release_notes_0.3.2.md diff --git a/docs/release_notes_0.4.0.md b/docs/releasenotes/release_notes_0.4.0.md similarity index 100% rename from docs/release_notes_0.4.0.md rename to docs/releasenotes/release_notes_0.4.0.md diff --git a/docs/releasenotes/release_notes_0.5.0.md b/docs/releasenotes/release_notes_0.5.0.md new file mode 100644 index 0000000000000000000000000000000000000000..5280574ea647d4de71f5cf652d4d54733c6c5052 --- /dev/null +++ b/docs/releasenotes/release_notes_0.5.0.md @@ -0,0 +1,37 @@ +# Release notes Biopet version 0.5.0 + +## General Code changes + +* Upgrade to Queue 3.4, with this also the htsjdk library to 1.132 +* Our `QC` and `Mapping` pipeline now use piping for the most used aligners and QC tools + * Reducing I/O over the network + * Reducing the disk usage (storage) and run time +* Added version command for Star +* Seperation of the `biopet`-framework into: `Core`, `Extensions`, `Tools` and `Utils` +* Optimized unit testing +* Unit test coverage on `Tools` increased +* Workaround: Added R-script files of Picard to biopet to fix picard jobs (files are not packaged in maven dependency) +* Added external example for developers + +## Functionality + +* Retries of pipeline and tools is now enabled by default +* Improvements in the reporting framework, allowing custom reporting elements for specific pipelines. +* Fixed reports when metrics of Flexiprep is skipped +* Added metagenomics pipeline: [Gears](../pipelines/gears.md) +* Added single sample variantcalling with bcftools +* Added ET + key support for GATK job invocation, disable phone-home feature when key is supplied +* Added more debug information in the `.log` directory when `-l debug` is enabled +* [Shiva](../pipelines/shiva.md): added support for `GenotypeConcordance` tool to check against a Golden Standard +* [Shiva](../pipelines/shiva.md): fixed a lot of small bugs when developing integration tests +* [Shiva](../pipelines/shiva.md): Workaround: Fixed a dependency on rerun, with this change there can be 2 bam files in the samples folder +* [Gentrap](../pipelines/gentrap.md): Improved error handling on missing annotation files + +## Infrastructure changes + +* Development environment within the LUMC now get tested with Jenkins + * Added integration tests Flexiprep + * Added integration tests Gears + * Added integration tests Mapping + * Added integration tests Shiva + * Added integration tests Toucan diff --git a/docs/tools/BastyGenerateFasta.md b/docs/tools/BastyGenerateFasta.md index 997ebaf346f336269fc278cf3f25af983424c179..eb475d75cedd14c1f8065ae08b1af8fc53decb24 100644 --- a/docs/tools/BastyGenerateFasta.md +++ b/docs/tools/BastyGenerateFasta.md @@ -8,7 +8,7 @@ It can be very useful to produce the right input needed for follow up tools, for To get the help menu: ~~~bash -java -jar Biopet-0.2.0-DEV-801b72ed.jar tool BastyGenerateFasta -h +biopet tool BastyGenerateFasta -h Usage: BastyGenerateFasta [options] diff --git a/docs/tools/BiopetFlagstat.md b/docs/tools/BiopetFlagstat.md index 26e14c240aafa4fc4fe745acf9b5feadc604576d..880db7aa80a55d4685fe63d47e781d4e66c4ebfb 100644 --- a/docs/tools/BiopetFlagstat.md +++ b/docs/tools/BiopetFlagstat.md @@ -8,7 +8,7 @@ It captures for example the # of mapped reads, # of duplicates, # of mates unmap ## Example To get the help menu: ~~~ -java -jar Biopet-0.2.0.jar tool BiopetFlagstat -h +biopet tool BiopetFlagstat -h Usage: BiopetFlagstat [options] -l <value> | --log_level <value> @@ -25,7 +25,7 @@ Usage: BiopetFlagstat [options] To run the tool: ~~~ -java -jar Biopet-0.2.0.jar tool BiopetFlagstat -I myBAM.bam +biopet tool BiopetFlagstat -I myBAM.bam ~~~ ### Output diff --git a/docs/tools/CheckAllelesVcfInBam.md b/docs/tools/CheckAllelesVcfInBam.md index b21791d9dad3ff056de03ff362cda839a39b354b..476f8a60c6df23b959a14af8323e11439940595f 100644 --- a/docs/tools/CheckAllelesVcfInBam.md +++ b/docs/tools/CheckAllelesVcfInBam.md @@ -6,7 +6,7 @@ This tool has been written to check the allele frequency in BAM files. ## Example To get the help menu: ~~~ -java -jar Biopet-0.2.0.jar tool CheckAllelesVcfInBam -h +biopet tool CheckAllelesVcfInBam -h Usage: CheckAllelesVcfInBam [options] -l <value> | --log_level <value> @@ -28,7 +28,7 @@ Usage: CheckAllelesVcfInBam [options] To run the tool: ~~~ -java -jar Biopet-0.2.0.jar tool CheckAllelesVcfInBam --inputFile myVCF.vcf \ +biopet tool CheckAllelesVcfInBam --inputFile myVCF.vcf \ --bam myBam1.bam --sample bam_sample1 --outputFile myAlleles.vcf ~~~ @@ -37,7 +37,7 @@ The only thing one needs to make sure off is matching the `--bam` and `--sample` For multiple bam files: ~~~ -java -jar Biopet-0.2.0.jar tool CheckAllelesVcfInBam --inputFile myVCF.vcf \ +biopet tool CheckAllelesVcfInBam --inputFile myVCF.vcf \ --bam myBam1.bam --sample bam_sample1 --bam myBam2.bam --sample bam_sample2 \ --bam myBam3.bam --sample bam_sample3 --outputFile myAlleles.vcf ~~~ diff --git a/docs/tools/ExtractAlignedFastq.md b/docs/tools/ExtractAlignedFastq.md index eb765142228ec9e0bded57e7b234f70c40130ca0..c1f069e6bab80ae5a1d3cfe760121e778289e3c4 100644 --- a/docs/tools/ExtractAlignedFastq.md +++ b/docs/tools/ExtractAlignedFastq.md @@ -9,7 +9,7 @@ The tool is also very usefull to create test data sets. ## Example To get the help menu: ~~~ -java -jar Biopet-0.2.0.jar tool ExtractAlignedFastq -h +biopet tool ExtractAlignedFastq -h ExtractAlignedFastq - Select aligned FASTQ records Usage: ExtractAlignedFastq [options] @@ -42,7 +42,7 @@ This tool creates FASTQ file(s) containing reads mapped to the given alignment i To run the tool: ~~~ -java -jar Biopet-0.2.0.jar tool ExtractAlignedFastq \ +biopet tool ExtractAlignedFastq \ --input_file myBam.bam --in1 myFastq_R1.fastq --out1 myOutFastq_R1.fastq --interval myTarget.bed ~~~ * Note that this tool works for single end and paired end data. The above example can be easily extended for paired end data. diff --git a/docs/tools/FastqSplitter.md b/docs/tools/FastqSplitter.md index 742a376b19d2948beeab4c6340792ccf2dbfde53..6f89bb766ea66ee85825500dc492854e02faa949 100644 --- a/docs/tools/FastqSplitter.md +++ b/docs/tools/FastqSplitter.md @@ -9,7 +9,7 @@ needed for the number of chunks specified. Note that this will be automatically ## Example To get the help menu: ~~~ -java -jar Biopet-0.2.0.jar tool FastqSplitter -h +biopet tool FastqSplitter -h Usage: FastqSplitter [options] -l <value> | --log_level <value> @@ -25,7 +25,7 @@ Usage: FastqSplitter [options] ~~~ To run the tool: ~~~ -java -jar Biopet-0.2.0.jar tool FastqSplitter --inputFile myFastq.fastq \ +biopet tool FastqSplitter --inputFile myFastq.fastq \ --output mySplittedFastq_1.fastq --output mySplittedFastq_2.fastq \ --output mySplittedFastq_3.fastq ~~~ diff --git a/docs/tools/FindRepeatsPacBio.md b/docs/tools/FindRepeatsPacBio.md index e02daeeab6eb9a5d8d3eeadf125364dbef9e97c4..02351d091c22a6c30e59465d6ce99e3e6f14d3ea 100644 --- a/docs/tools/FindRepeatsPacBio.md +++ b/docs/tools/FindRepeatsPacBio.md @@ -9,7 +9,7 @@ those regions with the BAM file. On those extracted regions the tool will perfor ## Example To get the help menu: ~~~ -java -jar Biopet-0.2.0.jar tool FindRepeatsPacBio -h +biopet tool FindRepeatsPacBio -h Usage: FindRepeatsPacBio [options] -l <value> | --log_level <value> @@ -26,7 +26,7 @@ Usage: FindRepeatsPacBio [options] To run the tool: ~~~ -java -jar Biopet-0.2.0.jar tool FindRepeatsPacBio --inputBam myInputbam.bam \ +biopet tool FindRepeatsPacBio --inputBam myInputbam.bam \ --inputBed myRepeatRegions.bed > mySummary.txt ~~~ Since the default output of the program is printed in stdout we can use > to write the output to a text file. diff --git a/docs/tools/MergeAlleles.md b/docs/tools/MergeAlleles.md index f1d891ca085f55085399c1a424fe841ff11d77cf..07fc3a4985dcca96bf359c9e3bbe214b9a9da0c6 100644 --- a/docs/tools/MergeAlleles.md +++ b/docs/tools/MergeAlleles.md @@ -7,7 +7,7 @@ This tool is used to merge overlapping alleles. ## Example To get the help menu: ~~~ -java -jar Biopet-0.2.0.jar tool MergeAlleles -h +biopet tool MergeAlleles -h Usage: MergeAlleles [options] -l <value> | --log_level <value> diff --git a/docs/tools/MpileupToVcf.md b/docs/tools/MpileupToVcf.md index 2261e47f7a31545e681aef1a4e4d90555ba50322..d1b3200e4f14878f22cd48168ad0f22eb9f999ed 100644 --- a/docs/tools/MpileupToVcf.md +++ b/docs/tools/MpileupToVcf.md @@ -10,14 +10,14 @@ so usually one does not want to safe these files. ## Example To start the tool: -~~~ -java -jar Biopet-0.2.0.jar tool mpileupToVcf +~~~ bash +biopet tool mpileupToVcf ~~~ To open the help: -~~~bash -java -jar Biopet-0.2.0.jar tool mpileupToVcf -h +~~~ bash +biopet tool mpileupToVcf -h Usage: MpileupToVcf [options] -l <value> | --log_level <value> diff --git a/docs/tools/SamplesTsvToJson.md b/docs/tools/SamplesTsvToJson.md index 84a33413e9d98bd110d02952aaa46a4891cf59e1..66bc0c7390096cf4ac6734e357360689aa2dcc6c 100644 --- a/docs/tools/SamplesTsvToJson.md +++ b/docs/tools/SamplesTsvToJson.md @@ -3,14 +3,14 @@ This tool enables a user to create a full sample sheet in JSON format suitable for all our Queue pipelines. The tool can be started as follows: -~~~ -java -jar <Biopet.jar> tool SamplesTsvToJson +~~~ bash +biopet tool SamplesTsvToJson ~~~ To open the help: -~~~ -java -jar Biopet-0.2.0.jar tool SamplesTsvToJson -h +~~~ bash +biopet tool SamplesTsvToJson -h Usage: SamplesTsvToJson [options] -l <value> | --log_level <value> @@ -29,7 +29,7 @@ JSON file is parsed with those properties inside it as well. The order of column #### Example -~~~ +~~~ json { "samples" : { "Sample_ID_1" : { diff --git a/docs/tools/VcfFilter.md b/docs/tools/VcfFilter.md index b9b88c9cca25f929889681317dcfcbe653dd581d..c9b3f064700c74181489097aaebfc6085dc8497e 100644 --- a/docs/tools/VcfFilter.md +++ b/docs/tools/VcfFilter.md @@ -7,8 +7,9 @@ There is a wide set of options which one can use to change the filter settings. ## Example To open the help menu: -~~~ -java -jar Biopet-0.2.0.jar tool VcfFilter -h + +~~~ bash +boppet tool VcfFilter -h Usage: VcfFilter [options] -l <value> | --log_level <value> @@ -47,8 +48,8 @@ Usage: VcfFilter [options] ~~~ To run the tool: -~~~ -java -jar Biopet-0.2.0.jar tool VcfFilter --inputVcf myInput.vcf \ +~~~ bash +biopet tool VcfFilter --inputVcf myInput.vcf \ --outputVcf myOutput.vcf --filterRefCalls --minSampleDepth ~~~ diff --git a/docs/tools/VcfToTsv.md b/docs/tools/VcfToTsv.md index 4f1e294f976a97e564dbf7ec3b516271d1c153d3..59d42da464bc146cb4e76c2864fc44e507df0f03 100644 --- a/docs/tools/VcfToTsv.md +++ b/docs/tools/VcfToTsv.md @@ -8,8 +8,9 @@ There is also a possibility to only select some specific fields from you vcf and ## Example To open the help menu: -~~~ -java -jar Biopet-0.2.0.jar tool VcfToTsv -h + +~~~ bash +biopet tool VcfToTsv -h Usage: VcfToTsv [options] -l <value> | --log_level <value> @@ -36,8 +37,9 @@ Usage: VcfToTsv [options] ~~~ To run the tool: -~~~ -java -jar Biopet-0.2.0.jar tool VcfToTsv --inputFile myVCF.vcf \ + +~~~ bash +biopet tool VcfToTsv --inputFile myVCF.vcf \ --outputFile my_tabDelimited_VCF.tsv --all_info ~~~ diff --git a/docs/tools/VepNormalizer.md b/docs/tools/VepNormalizer.md index b4c138308bcc90565c9cc712d19ffc2dc45f1702..a7cc5912596a10bfddce6462349013681baa3f9c 100644 --- a/docs/tools/VepNormalizer.md +++ b/docs/tools/VepNormalizer.md @@ -21,8 +21,8 @@ The CSQ tag is by default removed from the output VCF file. If one wishes to ret Example --------- -~~~~bash -java -jar Biopet-0.3.0.jar tool VepNormalizer -h +~~~ bash +biopet tool VepNormalizer -h |VepNormalizer - Parse VEP-annotated VCF to standard VCF format Usage: VepNormalizer [options] @@ -33,12 +33,13 @@ Usage: VepNormalizer [options] -v | --version Print version -I <vcf> | --InputFile <vcf> - Input VCF file + Input VCF file. Required. -O <vcf> | --OutputFile <vcf> - Output VCF file + Output VCF file. Required. -m <mode> | --mode <mode> - Mode + Mode. Can choose between <standard> (generates standard vcf) and <explode> (generates new record for each transcript). Required. --do-not-remove - Do not remove CSQ tag -~~~~ + Do not remove CSQ tag. Optional + +~~~ diff --git a/docs/tools/WipeReads.md b/docs/tools/WipeReads.md index 96377a03122808519293fb8204dd4144de974b1c..0263e1b7fa14c8f6884a8e6f601025137cad4e29 100644 --- a/docs/tools/WipeReads.md +++ b/docs/tools/WipeReads.md @@ -8,8 +8,9 @@ to known ribosomal RNA regions (using a supplied BED file containing intervals f ## Example To open the help menu: -~~~ -java -jar Biopet-0.2.0.jar tool WipeReads -h + +~~~ bash +biopet tool WipeReads -h WipeReads - Region-based reads removal from an indexed BAM file @@ -54,8 +55,9 @@ the given ones, they will also be removed. ~~~ To run the tool: -~~~ -java -jar Biopet-0.2.0.jar tool WipeReads --input_file myBam.bam \ + +~~~ bash +biopet tool WipeReads --input_file myBam.bam \ --interval_file myRibosomal_regions.bed --output_file myFilteredBam.bam ~~~ diff --git a/docs/tools/bedtointerval.md b/docs/tools/bedtointerval.md index 2c7093f5c4b28d0f5c269e8a1ed76219afa8e3bf..b93ddcfb467c6ff056cde65e516a3e7892d52645 100644 --- a/docs/tools/bedtointerval.md +++ b/docs/tools/bedtointerval.md @@ -7,7 +7,7 @@ Since the latest release of Picard tools (v 1.124) there is already a tool avail ## Example To get the help menu: ~~~ -java -jar Biopet-0.2.0.jar tool BedToInterval -h +biopet tool BedToInterval -h Usage: BedToInterval [options] -l <value> | --log_level <value> @@ -25,7 +25,7 @@ Usage: BedToInterval [options] To run the tool: ~~~ -java -jar Biopet-0.2.0 tool BedToInterval -I myBed.bed -o myIntervals.txt -b myBam.bam +biopet tool BedToInterval -I myBed.bed -o myIntervals.txt -b myBam.bam ~~~ ## Results diff --git a/docs/tools/bedtoolscoveragetocounts.md b/docs/tools/bedtoolscoveragetocounts.md index 441fcc71da38db096cc446ef388d597ebd992890..c733b818536d691dd5ea84d1b8c8081a28943688 100644 --- a/docs/tools/bedtoolscoveragetocounts.md +++ b/docs/tools/bedtoolscoveragetocounts.md @@ -7,7 +7,7 @@ This tool enables a user to generate a count file, out of a coverage file. ## Example To get the help menu: ~~~bash -java -jar Biopet-0.2.0.jar tool BedtoolsCoverageToCounts -h +biopet tool BedtoolsCoverageToCounts -h Usage: BedtoolsCoverageToCounts [options] -l <value> | --log_level <value> @@ -27,5 +27,5 @@ genes, ensemblIDs etc. etc. To run the tool: ~~~bash -java -jar Biopet-0.2.0.jar tool BedtoolsCoverageToCounts +biopet tool BedtoolsCoverageToCounts ~~~ \ No newline at end of file diff --git a/docs/tools/sagetools.md b/docs/tools/sagetools.md index 62451c70cc2d787c679b0ee23d3ee88296f8d7dd..d8024095e3d25ecf4288c9a4c042f541cd8b3b9b 100644 --- a/docs/tools/sagetools.md +++ b/docs/tools/sagetools.md @@ -4,8 +4,9 @@ Note that these tools are already implemented in the pipeline. ## SageCountFastq To open the help menu: -~~~ -java -jar Biopet-0.2.0.jar tool SageCreateLibrary -h + +~~~ bash +biopet tool SageCreateLibrary -h Usage: SageCountFastq [options] -l <value> | --log_level <value> @@ -21,8 +22,9 @@ Usage: SageCountFastq [options] ## SageCreateLibrary To open the help menu: -~~~ -java -jar Biopet-0.2.0.jar tool SageCreateLibrary -h + +~~~ bash +biopet tool SageCreateLibrary -h Usage: SageCreateLibrary [options] -l <value> | --log_level <value> @@ -48,8 +50,9 @@ Usage: SageCreateLibrary [options] ## SageCreateTagCounts To open the help menu: -~~~ -java -jar Biopet-0.2.0.jar tool SageCreateTagCounts -h + +~~~ bash +biopet tool SageCreateTagCounts -h Usage: SageCreateTagCounts [options] -l <value> | --log_level <value> diff --git a/external-example/pom.xml b/external-example/pom.xml index da0d2258fc90fe3cc9a49b76504be5388c683fa1..c799ac3cca14b7f1459294136f02f97a998f1b47 100644 --- a/external-example/pom.xml +++ b/external-example/pom.xml @@ -11,7 +11,7 @@ <artifactId>ExternalExample</artifactId> <!--TODO: replace version, for a new pipeline it's advised to start with '0.1.0-SNAPSHOT' --> - <version>0.1.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> @@ -40,19 +40,19 @@ <artifactId>BiopetCore</artifactId> <!--TODO: replace version of pipeline to a fixed version --> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>BiopetExtensions</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>Shiva</artifactId> <!--TODO: replace version of pipeline to a fixed version --> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </dependency> </dependencies> diff --git a/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala b/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala index 6099047a6e5153c15df565d3e70179006ef4ceac..a9ff3afa2daf3e3a1d48845b176fcc8489987b4d 100644 --- a/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala +++ b/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala @@ -1,7 +1,6 @@ package org.example.group.pipelines import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.pipelines.shiva.Shiva import nl.lumc.sasc.biopet.utils.config.Configurable @@ -32,6 +31,8 @@ class BiopetPipeline(val root: Configurable) extends QScript with SummaryQScript // Executing a biopet pipeline inside val shiva = new Shiva(this) + add(shiva) + shiva.init() shiva.biopetScript() addAll(shiva.functions) diff --git a/external-example/src/main/scala/org/example/group/pipelines/HelloPipeline.scala b/external-example/src/main/scala/org/example/group/pipelines/HelloPipeline.scala new file mode 100644 index 0000000000000000000000000000000000000000..82838b2b17155cd2c4d4c2890b5414b46f896af5 --- /dev/null +++ b/external-example/src/main/scala/org/example/group/pipelines/HelloPipeline.scala @@ -0,0 +1,39 @@ +package nl.lumc.sasc.biopet.pipelines.mypipeline + +import nl.lumc.sasc.biopet.core.PipelineCommand +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.extensions.Fastqc +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.QScript + +class HelloPipeline(val root: Configurable) extends QScript with SummaryQScript { + def this() = this(null) + + /** Only required when using [[SummaryQScript]] */ + def summaryFile = new File(outputDir, "hello.summary.json") + + /** Only required when using [[SummaryQScript]] */ + def summaryFiles: Map[String, File] = Map() + + /** Only required when using [[SummaryQScript]] */ + def summarySettings = Map() + + // This method can be used to initialize some classes where needed + def init(): Unit = { + } + + // This method is the actual pipeline + def biopetScript: Unit = { + + // Executing a tool like FastQC, calling the extension in `nl.lumc.sasc.biopet.extensions.Fastqc` + + val fastqc = new Fastqc(this) + fastqc.fastqfile = config("fastqc_input") + fastqc.output = new File(outputDir, "fastqc.txt") + add(fastqc) + + } +} + +//TODO: Replace object Name, must be the same as the class of the pipeline +object HelloPipeline extends PipelineCommand \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 871f41fbe83b44214aebc395eda59acd30bc5d5b..d78dc48086615f485b916ad485ef4cac4e0e8a03 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -4,19 +4,23 @@ pages: - General: - Config: 'general/config.md' - Requirements: 'general/requirements.md' + - About: 'general/about.md' + - License: 'general/license.md' - Pipelines: - - Basty: 'pipelines/basty.md' + - Basty (Snp typing): 'pipelines/basty.md' - Bam2Wig: 'pipelines/bam2wig.md' - - Carp: 'pipelines/carp.md' - - Gentrap: 'pipelines/gentrap.md' - - Shiva: 'pipelines/shiva.md' - - Flexiprep: 'pipelines/flexiprep.md' - - Mapping: 'pipelines/mapping.md' - - Toucan: 'pipelines/toucan.md' + - Carp (chip-seq): 'pipelines/carp.md' + - Flexiprep (QC): 'pipelines/flexiprep.md' + - Gears (Metagenome): 'pipelines/gears.md' + - Gentrap (RNA-seq): 'pipelines/gentrap.md' + - Mapping (Alignment): 'pipelines/mapping.md' - Sage: 'pipelines/sage.md' + - Shiva (variantcalling): 'pipelines/shiva.md' + - Toucan (Annotation): 'pipelines/toucan.md' - Tools: - SamplesTsvToJson: 'tools/SamplesTsvToJson.md' - - BastyGenerateFasta: 'tools/bedtointerval.md' + - BedToInterval: 'tools/bedtointerval.md' + - BastyGenerateFasta: 'tools/BastyGenerateFasta.md' - BedToInterval: 'tools/bedtointerval.md' - BedtoolsCoverageToCounts: 'tools/bedtoolscoveragetocounts.md' - BiopetFlagstat: 'tools/BiopetFlagstat.md' @@ -24,19 +28,24 @@ pages: - ExtractAlignedFastq: 'tools/ExtractAlignedFastq.md' - FastqSplitter: 'tools/FastqSplitter.md' - FindRepeatsPacBio: 'tools/FindRepeatsPacBio.md' - - VcfFilter: 'tools/VcfFilter.md' - MpileupToVcf: 'tools/MpileupToVcf.md' - Sagetools: 'tools/sagetools.md' - - VepNormalizer: 'tools/VepNormalizer.md' - WipeReads: 'tools/WipeReads.md' - - BastyGenerateFasta: 'tools/BastyGenerateFasta.md' + - VcfFilter: 'tools/VcfFilter.md' + - VepNormalizer: 'tools/VepNormalizer.md' - Release notes: - - 0.4.0: 'release_notes_0.4.0.md' - - 0.3.2: 'release_notes_0.3.2.md' - - 0.3.1: 'release_notes_0.3.1.md' - - 0.3.0: 'release_notes_0.3.0.md' -- About: 'about.md' -- License: 'license.md' + - 0.5.0: 'releasenotes/release_notes_0.5.0.md' + - 0.4.0: 'releasenotes/release_notes_0.4.0.md' + - 0.3.2: 'releasenotes/release_notes_0.3.2.md' + - 0.3.1: 'releasenotes/release_notes_0.3.1.md' + - 0.3.0: 'releasenotes/release_notes_0.3.0.md' +- Developer: + - Getting Started: 'developer/getting-started.md' + - Code Style: 'developer/code-style.md' + - Example pipeline: 'developer/example-pipeline.md' + - Example tool: 'developer/example-tool.md' + - Example pipeable: 'developer/example-pipeable.md' + - Scala docs: 'developer/scaladocs.md' #- ['developing/Setup.md', 'Developing', 'Setting up your local development environment'] #theme: readthedocs repo_url: https://github.com/biopet/biopet diff --git a/pom.xml b/pom.xml index 964da3f4aaf87824c6b9fade63d5bc607fe0e0aa..c8313ae54dbc2c9af3e0cc8fcaaa5ffa41a0fbf7 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>public</relativePath> </parent> diff --git a/protected/biopet-gatk-extensions/pom.xml b/protected/biopet-gatk-extensions/pom.xml index 7560fbe54364e42f059b114298134fa300726cd2..bf0ce809bb4e3ff1178e4a95686e380c1f45b601 100644 --- a/protected/biopet-gatk-extensions/pom.xml +++ b/protected/biopet-gatk-extensions/pom.xml @@ -15,7 +15,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>BiopetGatk</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> @@ -25,13 +25,13 @@ <dependencies> <dependency> <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetCore</artifactId> + <artifactId>BiopetExtensions</artifactId> <version>${project.version}</version> </dependency> <dependency> <groupId>org.broadinstitute.gatk</groupId> <artifactId>gatk-queue-extensions-distribution</artifactId> - <version>3.4</version> + <version>3.5</version> </dependency> </dependencies> </project> diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala index e347ebf4743eca9b2f80bc3f072c922cb012e88a..60614ecc938bef58534c810423254fc56eab5dbc 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala @@ -8,8 +8,13 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Output class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral { + + @Output(required = false) + protected var vcfIndex: File = _ + annotation ++= config("annotation", default = Seq(), freeVar = false).asStringList if (config.contains("dbsnp")) dbsnp = config("dbsnp") @@ -22,6 +27,11 @@ class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queu stand_call_conf = config("stand_call_conf", default = 30) stand_emit_conf = config("stand_emit_conf", default = 0) } + + override def freezeFieldValues(): Unit = { + super.freezeFieldValues() + if (out.getName.endsWith(".vcf.gz")) vcfIndex = new File(out.getAbsolutePath + ".tbi") + } } object GenotypeGVCFs { @@ -29,6 +39,7 @@ object GenotypeGVCFs { val gg = new GenotypeGVCFs(root) gg.variant = gvcfFiles gg.out = output + if (gg.out.getName.endsWith(".vcf.gz")) gg.vcfIndex = new File(gg.out.getAbsolutePath + ".tbi") gg } } \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala index 514879f9d2b96088a6e5b3e30eb969d44740934b..d7a07538c9cfcff82786c0d09d38001899783af5 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala @@ -5,11 +5,18 @@ */ package nl.lumc.sasc.biopet.extensions.gatk.broad +import java.io.File + import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral { + @Gather(enabled = false) + @Output(required = false) + protected var vcfIndex: File = _ + override val defaultThreads = 1 min_mapping_quality_score = config("minMappingQualityScore", default = 20) @@ -40,6 +47,7 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu override def freezeFieldValues() { super.freezeFieldValues() + if (out.getName.endsWith(".vcf.gz")) vcfIndex = new File(out.getAbsolutePath + ".tbi") if (bamOutput != null && nct.getOrElse(1) > 1) { logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug") nCoresRequest = Some(1) @@ -47,10 +55,22 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu nct = Some(getThreads) memoryLimit = Option(memoryLimit.getOrElse(2.0) * nct.getOrElse(1)) } +} + +object HaplotypeCaller { + def apply(root: Configurable, inputFiles: List[File], outputFile: File): HaplotypeCaller = { + val hc = new HaplotypeCaller(root) + hc.input_file = inputFiles + hc.out = outputFile + if (hc.out.getName.endsWith(".vcf.gz")) hc.vcfIndex = new File(hc.out.getAbsolutePath + ".tbi") + hc + } - def useGvcf() { - emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF - variant_index_type = GATKVCFIndexType.LINEAR - variant_index_parameter = config("variant_index_parameter", default = 128000) + def gvcf(root: Configurable, inputFile: File, outputFile: File): HaplotypeCaller = { + val hc = apply(root, List(inputFile), outputFile) + hc.emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF + hc.variant_index_type = GATKVCFIndexType.LINEAR + hc.variant_index_parameter = Some(hc.config("variant_index_parameter", default = 128000).asInt) + hc } -} +} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala index 44a3eac6607a6165920ab4d4564c9777119a1ca9..868b6ed0a62f87cf169571debd16a125ce8e9fc9 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala @@ -8,8 +8,14 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral { + + @Gather(enabled = false) + @Output + protected var bamIndex: File = _ + if (config.contains("scattercount")) scatterCount = config("scattercount") } @@ -19,6 +25,7 @@ object IndelRealigner { ir.input_file :+= input ir.targetIntervals = targetIntervals ir.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bam") + ir.bamIndex = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bai") ir } } \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala index 70d988f4b057572bc1c110ad597c232d6093a2cc..71e00e0512dee4a5d7f12744d729a8166ca18fe0 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala @@ -5,9 +5,17 @@ */ package nl.lumc.sasc.biopet.extensions.gatk.broad +import java.io.File + import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral { + + @Gather(enabled = false) + @Output(required = false) + protected var vcfIndex: File = _ + if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") sample_ploidy = config("ploidy") @@ -36,3 +44,14 @@ class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.q memoryLimit = Option(nct.getOrElse(1) * memoryLimit.getOrElse(2.0)) } } + +object UnifiedGenotyper { + def apply(root: Configurable, inputFiles: List[File], outputFile: File): UnifiedGenotyper = { + val ug = new UnifiedGenotyper(root) + ug.input_file = inputFiles + ug.out = outputFile + if (ug.out.getName.endsWith(".vcf.gz")) ug.vcfIndex = new File(ug.out.getAbsolutePath + ".tbi") + ug + } + +} \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/pom.xml b/protected/biopet-gatk-pipelines/pom.xml index 90fbbf942bfd68c1b25a21174c60341634c78c92..2cd542a4a3a2dfe4d58f09cf4a28609344e764c4 100644 --- a/protected/biopet-gatk-pipelines/pom.xml +++ b/protected/biopet-gatk-pipelines/pom.xml @@ -15,7 +15,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>BiopetGatk</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala deleted file mode 100644 index e489c4afdf4a30b8ca0b2a965242eae7811ad24c..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import org.broadinstitute.gatk.queue.QScript - -import scala.util.Random - -class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Sample gvcf file") - var sampleGvcf: File = _ - - @Argument(doc = "SampleName", required = true) - var sampleName: String = _ - - @Input(doc = "Gvcf files", shortName = "I", required = false) - var gvcfFiles: List[File] = Nil - - var reference: File = config("reference") - - @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = config("dbsnp") - - def init() { - if (config.contains("gvcffiles")) for (file <- config("gvcffiles").asList) - gvcfFiles ::= file.toString - } - - def biopetScript() { - var todoGvcfs = gvcfFiles - var gvcfPool: List[File] = Nil - addGenotypingPipeline(gvcfPool) - - while (todoGvcfs.nonEmpty) { - val index = Random.nextInt(todoGvcfs.size) - gvcfPool ::= todoGvcfs(index) - addGenotypingPipeline(gvcfPool) - todoGvcfs = todoGvcfs.filter(b => b != todoGvcfs(index)) - } - } - - def addGenotypingPipeline(gvcfPool: List[File]) { - val gatkGenotyping = new GatkGenotyping(this) - gatkGenotyping.inputGvcfs = sampleGvcf :: gvcfPool - gatkGenotyping.samples :+= sampleName - gatkGenotyping.outputDir = new File(outputDir, "samples_" + gvcfPool.size) - gatkGenotyping.init() - gatkGenotyping.biopetScript() - addAll(gatkGenotyping.functions) - } -} - -object GatkBenchmarkGenotyping extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala deleted file mode 100644 index 2f54cbbc70bc7c2666ef9c043017603c0b1c4b9f..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ GenotypeGVCFs, SelectVariants } -import org.broadinstitute.gatk.queue.QScript - -class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Gvcf files", shortName = "I") - var inputGvcfs: List[File] = Nil - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference") - - @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = config("dbsnp") - - @Argument(doc = "OutputName", required = false) - var outputName: String = "genotype" - - @Output(doc = "OutputFile", shortName = "O", required = false) - var outputFile: File = _ - - @Argument(doc = "Samples", shortName = "sample", required = false) - var samples: List[String] = Nil - - def init() { - require(outputName != null, "Outputname is null") - if (outputFile == null) outputFile = new File(outputDir, outputName + ".vcf.gz") - } - - def biopetScript() { - addGenotypeGVCFs(inputGvcfs, outputFile) - if (samples.nonEmpty) { - if (samples.size > 1) addSelectVariants(outputFile, samples, new File(outputDir, "samples/"), "all") - for (sample <- samples) addSelectVariants(outputFile, List(sample), new File(outputDir, "samples/"), sample) - } - } - - def addGenotypeGVCFs(gvcfFiles: List[File], outputFile: File): File = { - val genotypeGVCFs = GenotypeGVCFs(this, gvcfFiles, outputFile) - add(genotypeGVCFs) - genotypeGVCFs.out - } - - def addSelectVariants(inputFile: File, samples: List[String], outputDir: File, name: String) { - val selectVariants = SelectVariants(this, inputFile, new File(outputDir, name + ".vcf.gz")) - selectVariants.excludeNonVariants = true - for (sample <- samples) selectVariants.sample_name :+= sample - add(selectVariants) - } -} - -object GatkGenotyping extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala deleted file mode 100644 index 3707ec2751cd21c82193f65c47281d294a764778..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ /dev/null @@ -1,235 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import htsjdk.samtools.SamReaderFactory -import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ CombineGVCFs, CombineVariants } -import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, SamToFastq } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics -import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import org.broadinstitute.gatk.queue.QScript - -import scala.collection.JavaConversions._ - -class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript with SummaryQScript { - qscript => - def this() = this(null) - - @Argument(doc = "Skip Genotyping step", shortName = "skipgenotyping", required = false) - var skipGenotyping: Boolean = config("skip_genotyping", default = false) - - /** Merge gvcfs */ - var mergeGvcfs: Boolean = config("merge_gvcfs", default = false) - - /** Joint variantcalling */ - var jointVariantcalling: Boolean = config("joint_variantcalling", default = false) - - /** Joint genotyping */ - var jointGenotyping: Boolean = config("joint_genotyping", default = false) - - var singleSampleCalling = config("single_sample_calling", default = true) - var reference: File = config("reference") - var useAllelesOption: Boolean = config("use_alleles_option", default = false) - val externalGvcfs = config("external_gvcfs_files", default = Nil).asFileList - - def summaryFile = new File(outputDir, "GatkPipeline.summary.json") - - //TODO: Add summary - def summaryFiles = Map() - - //TODO: Add summary - def summarySettings = Map() - - def makeSample(id: String) = new Sample(id) - class Sample(sampleId: String) extends AbstractSample(sampleId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - def makeLibrary(id: String) = new Library(id) - class Library(libId: String) extends AbstractLibrary(libId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - val mapping = new Mapping(qscript) - mapping.sampleId = Some(sampleId) - mapping.libId = Some(libId) - mapping.outputDir = libDir - - /** Library variantcalling */ - val gatkVariantcalling = new GatkVariantcalling(qscript) - gatkVariantcalling.doublePreProces = false - gatkVariantcalling.sampleID = sampleId - gatkVariantcalling.outputDir = new File(libDir, "variantcalling") - - protected def addJobs(): Unit = { - val bamFile: Option[File] = if (config.contains("R1")) { - mapping.input_R1 = config("R1") - mapping.input_R2 = config("R2") - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) // Add functions of mapping to curent function pool - Some(mapping.finalBamFile) - } else if (config.contains("bam")) { - var bamFile: File = config("bam") - if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile) - - if (config("bam_to_fastq", default = false).asBoolean) { - val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libId + ".R1.fastq", - libDir + sampleId + "-" + libId + ".R2.fastq") - samToFastq.isIntermediate = true - qscript.add(samToFastq) - mapping.input_R1 = samToFastq.fastqR1 - mapping.input_R2 = Some(samToFastq.fastqR2) - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) // Add functions of mapping to curent function pool - Some(mapping.finalBamFile) - } else { - var readGroupOke = true - val inputSam = SamReaderFactory.makeDefault.open(bamFile) - val header = inputSam.getFileHeader.getReadGroups - for (readGroup <- inputSam.getFileHeader.getReadGroups) { - if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") - if (readGroup.getSample != sampleId || readGroup.getLibrary != libId) readGroupOke = false - } - inputSam.close() - - if (!readGroupOke) { - if (config("correct_readgroups", default = false)) { - logger.info("Correcting readgroups, file:" + bamFile) - val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libId + ".bam")) - aorrg.RGID = sampleId + "-" + libId - aorrg.RGLB = libId - aorrg.RGSM = sampleId - aorrg.isIntermediate = true - qscript.add(aorrg) - bamFile = aorrg.output - } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + - "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") - } - addAll(BamMetrics(qscript, bamFile, libDir + "metrics/").functions) - - Some(bamFile) - } - } else { - logger.error("Sample: " + sampleId + ": No R1 found for run: " + libId) - None - } - - if (bamFile.isDefined) { - gatkVariantcalling.inputBams = List(bamFile.get) - gatkVariantcalling.variantcalling = config("library_variantcalling", default = false) - gatkVariantcalling.init() - gatkVariantcalling.biopetScript() - addAll(gatkVariantcalling.functions) - } - - addSummaryQScript(mapping) - } - } - - /** sample variantcalling */ - val gatkVariantcalling = new GatkVariantcalling(qscript) - gatkVariantcalling.sampleID = sampleId - gatkVariantcalling.outputDir = new File(sampleDir, "variantcalling") - - protected def addJobs(): Unit = { - addPerLibJobs() - gatkVariantcalling.inputBams = libraries.map(_._2.mapping.finalBamFile).toList - gatkVariantcalling.preProcesBams = false - if (!singleSampleCalling) { - gatkVariantcalling.useHaplotypecaller = false - gatkVariantcalling.useUnifiedGenotyper = false - } - gatkVariantcalling.init() - gatkVariantcalling.biopetScript() - addAll(gatkVariantcalling.functions) - - gatkVariantcalling.inputBams.foreach(x => addAll(Bam2Wig(qscript, x).functions)) - } - } - - def init() { - } - - val multisampleVariantcalling = new GatkVariantcalling(this) { - override def configName = "gatkvariantcalling" - override def configPath: List[String] = super.configPath ::: "multisample" :: Nil - } - - def biopetScript(): Unit = { - addSamplesJobs() - - addSummaryJobs() - } - - def addMultiSampleJobs(): Unit = { - val gvcfFiles: List[File] = if (mergeGvcfs && externalGvcfs.size + samples.size > 1) { - val newFile = new File(outputDir, "merged.gvcf.vcf.gz") - add(CombineGVCFs(this, externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile), newFile)) - List(newFile) - } else externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile) - - if (!skipGenotyping && gvcfFiles.nonEmpty) { - if (jointGenotyping) { - val gatkGenotyping = new GatkGenotyping(this) - gatkGenotyping.inputGvcfs = gvcfFiles - gatkGenotyping.outputDir = new File(outputDir, "genotyping") - gatkGenotyping.init() - gatkGenotyping.biopetScript() - addAll(gatkGenotyping.functions) - var vcfFile = gatkGenotyping.outputFile - } - } else logger.warn("No gVCFs to genotype") - - if (jointVariantcalling) { - val allBamfiles = samples.map(_._2.gatkVariantcalling.scriptOutput.bamFiles).toList.fold(Nil)(_ ++ _) - val allRawVcfFiles = samples.map(_._2.gatkVariantcalling.scriptOutput.rawVcfFile).filter(_ != null).toList - - val gatkVariantcalling = new GatkVariantcalling(this) { - override def configName = "gatkvariantcalling" - override def configPath: List[String] = super.configPath ::: "multisample" :: Nil - } - - if (gatkVariantcalling.useMpileup) { - val cvRaw = CombineVariants(this, allRawVcfFiles.toList, new File(outputDir, "variantcalling/multisample.raw.vcf.gz")) - add(cvRaw) - gatkVariantcalling.rawVcfInput = cvRaw.out - } - - multisampleVariantcalling.preProcesBams = false - multisampleVariantcalling.doublePreProces = false - multisampleVariantcalling.inputBams = allBamfiles.toList - multisampleVariantcalling.outputDir = new File(outputDir, "variantcalling") - multisampleVariantcalling.outputName = "multisample" - multisampleVariantcalling.init() - multisampleVariantcalling.biopetScript() - addAll(multisampleVariantcalling.functions) - - if (config("inputtype", default = "dna").asString != "rna" && config("recalibration", default = false).asBoolean) { - val recalibration = new GatkVariantRecalibration(this) - recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile - recalibration.bamFiles = allBamfiles - recalibration.outputDir = new File(outputDir, "recalibration") - recalibration.init() - recalibration.biopetScript() - } - } - } -} - -object GatkPipeline extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala deleted file mode 100644 index 772aa6887d7a7e4983059f84ea3ac7b455880c4a..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ ApplyRecalibration, VariantAnnotator, VariantRecalibrator } -import org.broadinstitute.gatk.queue.QScript - -class GatkVariantRecalibration(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "input vcf file", shortName = "I") - var inputVcf: File = _ - - @Input(doc = "input vcf file", shortName = "BAM", required = false) - var bamFiles: List[File] = Nil - - @Output(doc = "output vcf file", shortName = "out") - var outputVcf: File = _ - - def init() { - require(inputVcf != null, "Missing Output directory on gatk module") - } - - def biopetScript() { - var vcfFile: File = if (bamFiles.nonEmpty) addVariantAnnotator(inputVcf, bamFiles, outputDir) else inputVcf - vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir) - vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir) - } - - def addSnpVariantRecalibrator(inputVcf: File, dir: File): File = { - val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false) - if (snpRecal.resource.nonEmpty) { - add(snpRecal) - - val snpApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - snpRecal.recal_file, snpRecal.tranches_file, indel = false) - add(snpApply) - - snpApply.out - } else { - logger.warn("Skipped snp Recalibration, resource is missing") - inputVcf - } - } - - def addIndelVariantRecalibrator(inputVcf: File, dir: File): File = { - val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true) - if (indelRecal.resource.nonEmpty) { - add(indelRecal) - - val indelApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - indelRecal.recal_file, indelRecal.tranches_file, indel = true) - add(indelApply) - - indelApply.out - } else { - logger.warn("Skipped indel Recalibration, resource is missing") - inputVcf - } - } - - def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: File): File = { - val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf")) - add(variantAnnotator) - variantAnnotator.out - } -} - -object GatkVariantRecalibration extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala deleted file mode 100644 index 750cbee21adea6c059d939aad518ca9aed0eb06e..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ /dev/null @@ -1,272 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.extensions.Ln -import nl.lumc.sasc.biopet.extensions.gatk.broad._ -import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates -import nl.lumc.sasc.biopet.extensions.tools.{ MergeAlleles, MpileupToVcf, VcfFilter, VcfStats } -import nl.lumc.sasc.biopet.utils.ConfigUtils -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile - -import scala.collection.SortedMap -import scala.language.reflectiveCalls - -class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - val scriptOutput = new GatkVariantcalling.ScriptOutput - - @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM") - var inputBams: List[File] = Nil - - @Input(doc = "Raw vcf file", shortName = "raw") - var rawVcfInput: File = _ - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference") - - @Argument(doc = "OutputName", required = false) - var outputName: String = _ - - @Argument(doc = "Sample name", required = false) - var sampleID: String = _ - - var preProcesBams: Boolean = config("pre_proces_bams", default = true) - var variantcalling: Boolean = true - var doublePreProces: Boolean = config("double_pre_proces", default = true) - var useHaplotypecaller: Boolean = config("use_haplotypecaller", default = true) - var useUnifiedGenotyper: Boolean = config("use_unifiedgenotyper", default = false) - var useAllelesOption: Boolean = config("use_alleles_option", default = false) - var useMpileup: Boolean = config("use_mpileup", default = true) - var useIndelRealigner: Boolean = config("use_indel_realign", default = true) - var useBaseRecalibration: Boolean = config("use_base_recalibration", default = true) - - def init() { - if (outputName == null && sampleID != null) outputName = sampleID - else if (outputName == null) outputName = config("output_name", default = "noname") - - val baseRecalibrator = new BaseRecalibrator(this) - if (preProcesBams && useBaseRecalibration && baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration") - useBaseRecalibration = false - } - } - - private def doublePreProces(files: List[File]): List[File] = { - if (files.isEmpty) throw new IllegalStateException("Files can't be empty") - else if (!doublePreProces) files - else if (files.size == 1) { - val bamFile = new File(outputDir, files.head.getName) - if (bamFile != files.head) { - val oldIndex: File = new File(files.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = swapExt(outputDir, bamFile, ".bam", ".bai") - val baiLn = Ln(this, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(this, files.head, bamFile) - bamLn.deps :+= baiLn.output - add(bamLn) - } - List(bamFile) - } else { - val markDup = MarkDuplicates(this, files, new File(outputDir, outputName + ".dedup.bam")) - markDup.isIntermediate = useIndelRealigner - add(markDup) - if (useIndelRealigner) { - List(addIndelRealign(markDup.output, outputDir, isIntermediate = false)) - } else { - List(markDup.output) - } - } - } - - def biopetScript() { - scriptOutput.bamFiles = { - doublePreProces(if (preProcesBams) { - for (inputBam <- inputBams) yield { - var bamFile = inputBam - if (useIndelRealigner) - bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration) - if (useBaseRecalibration) - bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = inputBams.size > 1) - bamFile - } - } else { - inputBams - }) - } - - if (variantcalling) { - var mergBuffer: SortedMap[String, File] = SortedMap() - def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) } - - if (sampleID != null && (useHaplotypecaller || config("joint_genotyping", default = false).asBoolean)) { - val hcGvcf = new HaplotypeCaller(this) - hcGvcf.useGvcf() - hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = new File(outputDir, outputName + ".hc.discovery.gvcf.vcf.gz") - add(hcGvcf) - scriptOutput.gvcfFile = hcGvcf.out - } - - if (useHaplotypecaller) { - if (sampleID != null) { - val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), new File(outputDir, outputName + ".hc.discovery.vcf.gz")) - add(genotypeGVCFs) - scriptOutput.hcVcfFile = genotypeGVCFs.out - } else { - val hcGvcf = new HaplotypeCaller(this) - hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = new File(outputDir, outputName + ".hc.discovery.vcf.gz") - add(hcGvcf) - scriptOutput.hcVcfFile = hcGvcf.out - } - mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile) - } - - if (useUnifiedGenotyper) { - val ugVcf = new UnifiedGenotyper(this) - ugVcf.input_file = scriptOutput.bamFiles - ugVcf.out = new File(outputDir, outputName + ".ug.discovery.vcf.gz") - add(ugVcf) - scriptOutput.ugVcfFile = ugVcf.out - mergBuffer += ("2.UG-Discovery" -> scriptOutput.ugVcfFile) - } - - // Generate raw vcf - if (useMpileup) { - if (sampleID != null && scriptOutput.bamFiles.size == 1) { - val m2v = new MpileupToVcf(this) - m2v.inputBam = scriptOutput.bamFiles.head - m2v.sample = sampleID - m2v.output = new File(outputDir, outputName + ".raw.vcf") - add(m2v) - scriptOutput.rawVcfFile = m2v.output - - val vcfFilter = new VcfFilter(this) { - override def defaults = Map("min_sample_depth" -> 8, - "min_alternate_depth" -> 2, - "min_samples_pass" -> 1, - "filter_ref_calls" -> true - ) - } - vcfFilter.inputVcf = m2v.output - vcfFilter.outputVcf = swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz") - add(vcfFilter) - scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf - } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput - if (scriptOutput.rawFilterVcfFile != null) mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile) - } - - // Allele mode - if (useAllelesOption) { - val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz") - mergeAlleles.isIntermediate = true - add(mergeAlleles) - - if (useHaplotypecaller) { - val hcAlleles = new HaplotypeCaller(this) - hcAlleles.input_file = scriptOutput.bamFiles - hcAlleles.out = new File(outputDir, outputName + ".hc.allele.vcf.gz") - hcAlleles.alleles = mergeAlleles.output - hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(hcAlleles) - scriptOutput.hcAlleleVcf = hcAlleles.out - mergBuffer += ("3.HC-alleles" -> hcAlleles.out) - } - - if (useUnifiedGenotyper) { - val ugAlleles = new UnifiedGenotyper(this) - ugAlleles.input_file = scriptOutput.bamFiles - ugAlleles.out = new File(outputDir, outputName + ".ug.allele.vcf.gz") - ugAlleles.alleles = mergeAlleles.output - ugAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(ugAlleles) - scriptOutput.ugAlleleVcf = ugAlleles.out - mergBuffer += ("4.UG-alleles" -> ugAlleles.out) - } - } - - def removeNoneVariants(input: File): File = { - val output = input.getAbsolutePath.stripSuffix(".vcf.gz") + ".variants_only.vcf.gz" - val sv = SelectVariants(this, input, output) - sv.excludeFiltered = true - sv.excludeNonVariants = true - sv.isIntermediate = true - add(sv) - sv.out - } - - val cvFinal = CombineVariants(this, mergeList.toList, new File(outputDir, outputName + ".final.vcf.gz")) - cvFinal.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED - add(cvFinal) - - val vcfStats = new VcfStats(this) - vcfStats.input = cvFinal.out - vcfStats.setOutputDir(new File(outputDir, "vcfstats")) - add(vcfStats) - - scriptOutput.finalVcfFile = cvFinal.out - } - } - - def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean = true): File = { - val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir) - realignerTargetCreator.isIntermediate = true - add(realignerTargetCreator) - - val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir) - indelRealigner.isIntermediate = isIntermediate - add(indelRealigner) - - indelRealigner.o - } - - def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean = false): File = { - val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) - - if (baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) - return inputBam - } - add(baseRecalibrator) - - if (config("use_analyze_covariates", default = false).asBoolean) { - val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after")) - baseRecalibratorAfter.BQSR = baseRecalibrator.o - add(baseRecalibratorAfter) - - add(AnalyzeCovariates(this, baseRecalibrator.o, baseRecalibratorAfter.o, swapExt(dir, inputBam, ".bam", ".baserecal.pdf"))) - } - - val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam")) - printReads.BQSR = baseRecalibrator.o - printReads.isIntermediate = isIntermediate - add(printReads) - - printReads.o - } -} - -object GatkVariantcalling extends PipelineCommand { - class ScriptOutput { - var bamFiles: List[File] = _ - var gvcfFile: File = _ - var hcVcfFile: File = _ - var ugVcfFile: File = _ - var rawVcfFile: File = _ - var rawFilterVcfFile: File = _ - var hcAlleleVcf: File = _ - var ugAlleleVcf: File = _ - var finalVcfFile: File = _ - } -} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala index cf5aa84c5bf75623b78ab3ad696a3d75300bd7fb..620619bfea1ccdb745f814d2b801c100437d4c76 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala @@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.extensions.gatk.broad._ -import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaTrait, ShivaVariantcallingTrait } +import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaVariantcallingTrait, ShivaTrait } import org.broadinstitute.gatk.queue.QScript /** @@ -43,21 +43,25 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { /** Class will generate library jobs */ class Library(libId: String) extends super.Library(libId) { - val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) - val useBaseRecalibration: Boolean = config("use_base_recalibration", default = true) - - /** Return true when baserecalibration is executed */ - protected def doneBaseRecalibrator: Boolean = { + lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) + lazy val useBaseRecalibration: Boolean = { + val c: Boolean = config("use_base_recalibration", default = true) val br = new BaseRecalibrator(qscript) - useBaseRecalibration && br.knownSites.nonEmpty + if (c && br.knownSites.isEmpty) + logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) + c && br.knownSites.nonEmpty } + override def summarySettings = super.summarySettings + + ("use_indel_realigner" -> useIndelRealigner) + + ("use_base_recalibration" -> useBaseRecalibration) + /** This will adds preprocess steps, gatk indel realignment and base recalibration is included here */ override def preProcess(input: File): Option[File] = { - if (!useIndelRealigner && !doneBaseRecalibrator) None + if (!useIndelRealigner && !useBaseRecalibration) None else { val indelRealignFile = useIndelRealigner match { - case true => addIndelRealign(input, libDir, doneBaseRecalibrator || libraries.size > 1) + case true => addIndelRealign(input, libDir, useBaseRecalibration || libraries.size > 1) case false => input } @@ -69,12 +73,16 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { } } + override def summarySettings = super.summarySettings + ("use_indel_realigner" -> useIndelRealigner) + + lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) + /** This methods will add double preprocess steps, with GATK indel realignment */ override protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { if (input.size <= 1) super.addDoublePreProcess(input) - else super.addDoublePreProcess(input, isIntermediate = true).collect { + else super.addDoublePreProcess(input, isIntermediate = useIndelRealigner).collect { case file => - config("use_indel_realigner", default = true).asBoolean match { + useIndelRealigner match { case true => addIndelRealign(file, sampleDir, isIntermediate = false) case false => file } @@ -99,10 +107,7 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = { val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) - if (baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) - return inputBam - } + if (baseRecalibrator.knownSites.isEmpty) return inputBam add(baseRecalibrator) if (config("use_analyze_covariates", default = false).asBoolean) { diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala index 1878d86fd150af451ab46fb69234ccbb66b05ec2..edbc633f3a03204f3d4e50ac14bc016134825252 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala @@ -6,9 +6,9 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad.GenotypeGVCFs +import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._ import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcallingTrait +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript /** @@ -22,99 +22,13 @@ class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVari /** Will generate all available variantcallers */ override def callersList = { - new HaplotypeCallerGvcf :: - new HaplotypeCallerAllele :: - new UnifiedGenotyperAllele :: - new UnifiedGenotyper :: - new HaplotypeCaller :: + new HaplotypeCallerGvcf(this) :: + new HaplotypeCallerAllele(this) :: + new UnifiedGenotyperAllele(this) :: + new UnifiedGenotyper(this) :: + new HaplotypeCaller(this) :: super.callersList } - - /** Default mode for the haplotypecaller */ - class HaplotypeCaller extends Variantcaller { - val name = "haplotypecaller" - protected val defaultPrio = 1 - - def outputFile = new File(outputDir, namePrefix + ".haplotypecaller.vcf.gz") - - def addJobs() { - val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript) - hc.input_file = inputBams - hc.out = outputFile - add(hc) - } - } - - /** Default mode for UnifiedGenotyper */ - class UnifiedGenotyper extends Variantcaller { - val name = "unifiedgenotyper" - protected val defaultPrio = 20 - - def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper.vcf.gz") - - def addJobs() { - val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript) - ug.input_file = inputBams - ug.out = outputFile - add(ug) - } - } - - /** Allele mode for Haplotypecaller */ - class HaplotypeCallerAllele extends Variantcaller { - val name = "haplotypecaller_allele" - protected val defaultPrio = 5 - - def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_allele.vcf.gz") - - def addJobs() { - val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript) - hc.input_file = inputBams - hc.out = outputFile - hc.alleles = config("input_alleles") - hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(hc) - } - } - - /** Allele mode for GenotyperAllele */ - class UnifiedGenotyperAllele extends Variantcaller { - val name = "unifiedgenotyper_allele" - protected val defaultPrio = 9 - - def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper_allele.vcf.gz") - - def addJobs() { - val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript) - ug.input_file = inputBams - ug.out = outputFile - ug.alleles = config("input_alleles") - ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(ug) - } - } - - /** Gvcf mode for haplotypecaller */ - class HaplotypeCallerGvcf extends Variantcaller { - val name = "haplotypecaller_gvcf" - protected val defaultPrio = 5 - - def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_gvcf.vcf.gz") - - def addJobs() { - val gvcfFiles = for (inputBam <- inputBams) yield { - val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript) - hc.input_file = List(inputBam) - hc.out = new File(outputDir, inputBam.getName.stripSuffix(".bam") + ".gvcf.vcf.gz") - hc.useGvcf() - add(hc) - hc.out - } - - val genotypeGVCFs = GenotypeGVCFs(qscript, gvcfFiles, outputFile) - add(genotypeGVCFs) - } - } } /** object to add default main method to pipeline */ diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala new file mode 100644 index 0000000000000000000000000000000000000000..f18b16812b48375bb7e1d57969ef9b1db7ba5691 --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala @@ -0,0 +1,17 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Default mode for the haplotypecaller */ +class HaplotypeCaller(val root: Configurable) extends Variantcaller { + val name = "haplotypecaller" + protected def defaultPrio = 1 + + def biopetScript() { + val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + add(hc) + } +} + diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala new file mode 100644 index 0000000000000000000000000000000000000000..a4bff551c7bff4a6376d16d8b0401b008a6aad8e --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala @@ -0,0 +1,18 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Allele mode for Haplotypecaller */ +class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller { + val name = "haplotypecaller_allele" + protected def defaultPrio = 5 + + def biopetScript() { + val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + hc.alleles = config("input_alleles") + hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES + add(hc) + } +} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..cf289cb789e4c5ad944b30eb0b71b163cc1115ff --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Gvcf mode for haplotypecaller */ +class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller { + val name = "haplotypecaller_gvcf" + protected def defaultPrio = 5 + + def biopetScript() { + val gvcfFiles = for ((sample, inputBam) <- inputBams) yield { + val hc = broad.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) + add(hc) + hc.out + } + + val genotypeGVCFs = broad.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) + add(genotypeGVCFs) + } +} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala new file mode 100644 index 0000000000000000000000000000000000000000..b71273b284c153ed628565d23c8ca0dffd7bd010 --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala @@ -0,0 +1,16 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Default mode for UnifiedGenotyper */ +class UnifiedGenotyper(val root: Configurable) extends Variantcaller { + val name = "unifiedgenotyper" + protected def defaultPrio = 20 + + def biopetScript() { + val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + add(ug) + } +} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala new file mode 100644 index 0000000000000000000000000000000000000000..61bb63ae3f897cc29bd37fc4c8af53f874faad28 --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala @@ -0,0 +1,18 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Allele mode for GenotyperAllele */ +class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller { + val name = "unifiedgenotyper_allele" + protected def defaultPrio = 9 + + def biopetScript() { + val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + ug.alleles = config("input_alleles") + ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES + add(ug) + } +} diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala index bd78727cad7e431d9af8dceaa8a4f6aef4c1d163..14207e5a829de7fd7cae4738a10fd9f6b28c5546 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala @@ -76,7 +76,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) - pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList + pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && diff --git a/protected/biopet-protected-package/pom.xml b/protected/biopet-protected-package/pom.xml index c5d31a083c6e328bae807e9c1001510689a25d67..e6228ebb6f54f15b653fb53872f7736a1833d43e 100644 --- a/protected/biopet-protected-package/pom.xml +++ b/protected/biopet-protected-package/pom.xml @@ -15,7 +15,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>BiopetGatk</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/protected/pom.xml b/protected/pom.xml index 15bbc1b4af960f9f238608ce50f6361df86b1e42..644d1c08504f15831f7c72ec2b1d1e4949c958fa 100644 --- a/protected/pom.xml +++ b/protected/pom.xml @@ -11,7 +11,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>BiopetRoot</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> <artifactId>BiopetGatk</artifactId> diff --git a/public/LICENSE b/public/LICENSE index b3d8f3ab4105c3de6e7d4776ddfabda6a458413f..6bd830857f966c0a6dbc0df954f646be9d01097f 120000 --- a/public/LICENSE +++ b/public/LICENSE @@ -1 +1 @@ -biopet-framework/src/main/resources/nl/lumc/sasc/biopet/License.txt \ No newline at end of file +biopet-core/src/main/resources/nl/lumc/sasc/biopet/License.txt \ No newline at end of file diff --git a/public/bam2wig/pom.xml b/public/bam2wig/pom.xml index 4ee5fd2681e87e9a999c4cfad22413dbd67a0bab..fab54a97890e332f3ac2e72833ac9d55d42e9d41 100644 --- a/public/bam2wig/pom.xml +++ b/public/bam2wig/pom.xml @@ -27,7 +27,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/bammetrics/pom.xml b/public/bammetrics/pom.xml index 00a9b094e48a83b83105ce348060d94743a4d75c..411086864943b1d61a84b9a132bbddce325cff95 100644 --- a/public/bammetrics/pom.xml +++ b/public/bammetrics/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp index f1bae9632f6bb046360496b0bf924515b791f794..af29b48fab64495b517427e08b416f161f7443cd 100644 --- a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp +++ b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp @@ -10,7 +10,7 @@ <%@ var rootPath: String %> <%@ var outputDir: File %> <%@ var metricsTag: String = "bammetrics" %> -<%@ var target: String %> +<%@ var target: Option[String] %> #{ val samples = sampleId match { case Some(sample) => List(sample.toString) @@ -46,7 +46,7 @@ #if (libs.head != libId) <tr> #end #if (!sampleLevel) <td><a href="${rootPath}Samples/${sample}/Libraries/${libId}/index.html">${libId}</a></td> #end #{ - val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List(metricsTag, "stats", target + "_cov_stats", "coverage", "_all") + val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List(metricsTag, "stats", target.get + "_cov_stats", "coverage", "_all") val total = summary.getValue((prefixPath ::: List("biopet_flagstat", "All")):_*).getOrElse(0L).asInstanceOf[Long] val mapped = summary.getValue((prefixPath ::: List("biopet_flagstat", "Mapped")):_*).getOrElse(0L).asInstanceOf[Long] val duplicates = summary.getValue((prefixPath ::: List("biopet_flagstat", "Duplicates")):_*).getOrElse(0L).asInstanceOf[Long] diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp index 8aea9602a65fd2282e38909b170722151ac13603..a89c4eb4cb5262adce57afe546f89db2e49b6849 100644 --- a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp +++ b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp @@ -8,13 +8,13 @@ <%@ var libId: Option[String] = None %> <%@ var outputDir: File %> <%@ var metricsTag: String = "bammetrics" %> -<%@ var target: String %> +<%@ var target: Option[String] %> #{ - val originalPlot = new File(summary.getValue(sampleId, libId, metricsTag, "files", target + "_cov_stats", "plot", "path") + val originalPlot = new File(summary.getValue(sampleId, libId, metricsTag, "files", target.get + "_cov_stats", "plot", "path") .getOrElse(throw new IllegalArgumentException("No plot found in summary")).toString) - val plot = new File(outputDir, target + "_cov_stats.png") + val plot = new File(outputDir, target.get + "_cov_stats.png") - val values = summary.getValue(sampleId, libId, metricsTag, "stats", target + "_cov_stats", "coverage", "_all") + val values = summary.getValue(sampleId, libId, metricsTag, "stats", target.get + "_cov_stats", "coverage", "_all") .getOrElse(throw new IllegalArgumentException("No plot found in summary")).asInstanceOf[Map[String, Any]] if (originalPlot.exists()) IoUtils.copyFile(originalPlot, plot) diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 981417a012d7d3e17ec87ad879e622d77273b01f..1e00d04cc24b86fa1f1df1cecbfe7bfc12c482aa 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -19,7 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag } +import nl.lumc.sasc.biopet.core.{ Reference, BiopetFifoPipe, PipelineCommand, SampleLibraryTag } import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect } import nl.lumc.sasc.biopet.extensions.picard._ import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat @@ -27,18 +27,17 @@ import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat import org.broadinstitute.gatk.queue.QScript -class BamMetrics(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag { +class BamMetrics(val root: Configurable) extends QScript + with SummaryQScript + with SampleLibraryTag + with Reference + with TargetRegions { + def this() = this(null) @Input(doc = "Bam File", shortName = "BAM", required = true) var inputBam: File = _ - /** Bed files for region of interests */ - var roiBedFiles: List[File] = config("regions_of_interest", Nil) - - /** Bed of amplicon that is used */ - var ampliconBedFile: Option[File] = config("amplicon_bed") - /** Settings for CollectRnaSeqMetrics */ var rnaMetricsSettings: Map[String, String] = Map() var transcriptRefFlatFile: Option[File] = config("transcript_refflat") @@ -51,7 +50,8 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit } /** returns files to store in summary */ - def summaryFiles = Map("input_bam" -> inputBam) ++ + def summaryFiles = Map("reference" -> referenceFasta(), + "input_bam" -> inputBam) ++ ampliconBedFile.map("amplicon" -> _).toMap ++ ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap @@ -77,7 +77,7 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit /** Script to add jobs */ def biopetScript() { - add(SamtoolsFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".flagstat"))) + add(SamtoolsFlagstat(this, inputBam, outputDir)) val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir) add(biopetFlagstat) @@ -140,7 +140,7 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam, ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir) add(pcrMetrics) - addSummarizable(chsMetrics, "targeted_pcr_metrics") + addSummarizable(pcrMetrics, "targeted_pcr_metrics") Intervals(bedFile, ampIntervals) } diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala index babc43f721b9a1bd4d03f6930a97e4cb454f4187..4da9ec9e5f878262e66473d559cc209580674992 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala @@ -23,7 +23,7 @@ import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue } import nl.lumc.sasc.biopet.utils.rscript.{ StackedBarPlot, LinePlot } class BammetricsReport(val root: Configurable) extends ReportBuilderExtension { - val builder = BammetricsReport + def builder = BammetricsReport } /** @@ -70,7 +70,7 @@ object BammetricsReport extends ReportBuilder { if (targets.isEmpty) List() else List("Targets" -> ReportPage( List(), - targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> t))), + targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))), Map())), List( "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala new file mode 100644 index 0000000000000000000000000000000000000000..01c6568c848479dc429b68c771b7f987cc4c9bd6 --- /dev/null +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala @@ -0,0 +1,16 @@ +package nl.lumc.sasc.biopet.pipelines.bammetrics + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Created by pjvan_thof on 11/20/15. + */ +trait TargetRegions extends Configurable { + /** Bed files for region of interests */ + var roiBedFiles: List[File] = config("regions_of_interest", Nil) + + /** Bed of amplicon that is used */ + var ampliconBedFile: Option[File] = config("amplicon_bed") +} diff --git a/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala b/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala index dcd32fe1d71924353daea489b623745b729b72fd..744988cbadd378eb1aef85eef95b8961261f8d56 100644 --- a/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala +++ b/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala @@ -61,7 +61,7 @@ class BamMetricsTest extends TestNGSuite with Matchers { } @Test(dataProvider = "bammetricsOptions") - def testFlexiprep(rois: Int, amplicon: Boolean, rna: Boolean) = { + def testBamMetrics(rois: Int, amplicon: Boolean, rna: Boolean) = { val map = ConfigUtils.mergeMaps(Map("output_dir" -> BamMetricsTest.outputDir), Map(BamMetricsTest.executables.toSeq: _*)) ++ (if (amplicon) Map("amplicon_bed" -> "amplicon.bed") else Map()) ++ diff --git a/public/basty/pom.xml b/public/basty/pom.xml index 4dd6e5ef31827ea51eb1fc1c6976404570a9cd19..e7a4ca01c2aea65350049cbb3b104684cb44a4b4 100644 --- a/public/basty/pom.xml +++ b/public/basty/pom.xml @@ -32,7 +32,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala index d9085369f0316b6a0474fe02cd15ef07c683713a..675d4d7557be2edf84c41494edfaa30af9d13a78 100644 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala +++ b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala @@ -27,9 +27,10 @@ import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins } import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait } import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.QScript trait BastyTrait extends MultiSampleQScript { - qscript => + qscript: QScript => case class FastaOutput(variants: File, consensus: File, consensusVariants: File) @@ -180,7 +181,7 @@ trait BastyTrait extends MultiSampleQScript { snpsOnly: Boolean = false): FastaOutput = { val bastyGenerateFasta = new BastyGenerateFasta(this) bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName - bastyGenerateFasta.inputVcf = shiva.variantCalling.get.finalFile + bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile if (shiva.samples.contains(sampleName)) { bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get } diff --git a/public/biopet-core/pom.xml b/public/biopet-core/pom.xml index 1b38b9f501d68479a60e52d40418367dd44358b3..c2064007a268ffa2a822eb4565a95acb52448a40 100644 --- a/public/biopet-core/pom.xml +++ b/public/biopet-core/pom.xml @@ -1,11 +1,28 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> <modelVersion>4.0.0</modelVersion> @@ -40,7 +57,7 @@ <dependency> <groupId>org.broadinstitute.gatk</groupId> <artifactId>gatk-queue</artifactId> - <version>3.4</version> + <version>3.5</version> <exclusions> <exclusion> <groupId>org.broadinstitute.gatk</groupId> @@ -51,7 +68,7 @@ <dependency> <groupId>org.broadinstitute.gatk</groupId> <artifactId>gatk-queue-extensions-public</artifactId> - <version>3.4</version> + <version>3.5</version> </dependency> <dependency> <groupId>org.scalatra.scalate</groupId> diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css index d705f1f35f05d1a3fe8007c3e044e0011b3e25cd..999f15db574b4d996acdd287f5599ea3be588cb4 100644 --- a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css +++ b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/bootstrap_dashboard.css @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ /* * Base structure */ diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css index ff68918cba21d3ad223530e6c195842ed3805951..980e3e00a3eeab261244b4f35ab8c99d0f3c666c 100644 --- a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css +++ b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/css/sortable-theme-bootstrap.css @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ /* line 2, ../sass/_sortable.sass */ table[data-sortable] { border-collapse: collapse; diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/d3.v3.5.5.min.js b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/d3.v3.5.5.min.js new file mode 100644 index 0000000000000000000000000000000000000000..34d5513ebfe117f8b47ee12c7b108610999039b5 --- /dev/null +++ b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/d3.v3.5.5.min.js @@ -0,0 +1,5 @@ +!function(){function n(n){return n&&(n.ownerDocument||n.document||n).documentElement}function t(n){return n&&(n.ownerDocument&&n.ownerDocument.defaultView||n.document&&n||n.defaultView)}function e(n,t){return t>n?-1:n>t?1:n>=t?0:0/0}function r(n){return null===n?0/0:+n}function u(n){return!isNaN(n)}function i(n){return{left:function(t,e,r,u){for(arguments.length<3&&(r=0),arguments.length<4&&(u=t.length);u>r;){var i=r+u>>>1;n(t[i],e)<0?r=i+1:u=i}return r},right:function(t,e,r,u){for(arguments.length<3&&(r=0),arguments.length<4&&(u=t.length);u>r;){var i=r+u>>>1;n(t[i],e)>0?u=i:r=i+1}return r}}}function o(n){return n.length}function a(n){for(var t=1;n*t%1;)t*=10;return t}function c(n,t){for(var e in t)Object.defineProperty(n.prototype,e,{value:t[e],enumerable:!1})}function l(){this._=Object.create(null)}function s(n){return(n+="")===pa||n[0]===va?va+n:n}function f(n){return(n+="")[0]===va?n.slice(1):n}function h(n){return s(n)in this._}function g(n){return(n=s(n))in this._&&delete this._[n]}function p(){var n=[];for(var t in this._)n.push(f(t));return n}function v(){var n=0;for(var t in this._)++n;return n}function d(){for(var n in this._)return!1;return!0}function m(){this._=Object.create(null)}function y(n){return n}function M(n,t,e){return function(){var r=e.apply(t,arguments);return r===t?n:r}}function x(n,t){if(t in n)return t;t=t.charAt(0).toUpperCase()+t.slice(1);for(var e=0,r=da.length;r>e;++e){var u=da[e]+t;if(u in n)return u}}function b(){}function _(){}function w(n){function t(){for(var t,r=e,u=-1,i=r.length;++u<i;)(t=r[u].on)&&t.apply(this,arguments);return n}var e=[],r=new l;return t.on=function(t,u){var i,o=r.get(t);return arguments.length<2?o&&o.on:(o&&(o.on=null,e=e.slice(0,i=e.indexOf(o)).concat(e.slice(i+1)),r.remove(t)),u&&e.push(r.set(t,{on:u})),n)},t}function S(){ta.event.preventDefault()}function k(){for(var n,t=ta.event;n=t.sourceEvent;)t=n;return t}function E(n){for(var t=new _,e=0,r=arguments.length;++e<r;)t[arguments[e]]=w(t);return t.of=function(e,r){return function(u){try{var i=u.sourceEvent=ta.event;u.target=n,ta.event=u,t[u.type].apply(e,r)}finally{ta.event=i}}},t}function A(n){return ya(n,_a),n}function N(n){return"function"==typeof n?n:function(){return Ma(n,this)}}function C(n){return"function"==typeof n?n:function(){return xa(n,this)}}function z(n,t){function e(){this.removeAttribute(n)}function r(){this.removeAttributeNS(n.space,n.local)}function u(){this.setAttribute(n,t)}function i(){this.setAttributeNS(n.space,n.local,t)}function o(){var e=t.apply(this,arguments);null==e?this.removeAttribute(n):this.setAttribute(n,e)}function a(){var e=t.apply(this,arguments);null==e?this.removeAttributeNS(n.space,n.local):this.setAttributeNS(n.space,n.local,e)}return n=ta.ns.qualify(n),null==t?n.local?r:e:"function"==typeof t?n.local?a:o:n.local?i:u}function q(n){return n.trim().replace(/\s+/g," ")}function L(n){return new RegExp("(?:^|\\s+)"+ta.requote(n)+"(?:\\s+|$)","g")}function T(n){return(n+"").trim().split(/^|\s+/)}function R(n,t){function e(){for(var e=-1;++e<u;)n[e](this,t)}function r(){for(var e=-1,r=t.apply(this,arguments);++e<u;)n[e](this,r)}n=T(n).map(D);var u=n.length;return"function"==typeof t?r:e}function D(n){var t=L(n);return function(e,r){if(u=e.classList)return r?u.add(n):u.remove(n);var u=e.getAttribute("class")||"";r?(t.lastIndex=0,t.test(u)||e.setAttribute("class",q(u+" "+n))):e.setAttribute("class",q(u.replace(t," ")))}}function P(n,t,e){function r(){this.style.removeProperty(n)}function u(){this.style.setProperty(n,t,e)}function i(){var r=t.apply(this,arguments);null==r?this.style.removeProperty(n):this.style.setProperty(n,r,e)}return null==t?r:"function"==typeof t?i:u}function U(n,t){function e(){delete this[n]}function r(){this[n]=t}function u(){var e=t.apply(this,arguments);null==e?delete this[n]:this[n]=e}return null==t?e:"function"==typeof t?u:r}function j(n){function t(){var t=this.ownerDocument,e=this.namespaceURI;return e?t.createElementNS(e,n):t.createElement(n)}function e(){return this.ownerDocument.createElementNS(n.space,n.local)}return"function"==typeof n?n:(n=ta.ns.qualify(n)).local?e:t}function F(){var n=this.parentNode;n&&n.removeChild(this)}function H(n){return{__data__:n}}function O(n){return function(){return ba(this,n)}}function I(n){return arguments.length||(n=e),function(t,e){return t&&e?n(t.__data__,e.__data__):!t-!e}}function Y(n,t){for(var e=0,r=n.length;r>e;e++)for(var u,i=n[e],o=0,a=i.length;a>o;o++)(u=i[o])&&t(u,o,e);return n}function Z(n){return ya(n,Sa),n}function V(n){var t,e;return function(r,u,i){var o,a=n[i].update,c=a.length;for(i!=e&&(e=i,t=0),u>=t&&(t=u+1);!(o=a[t])&&++t<c;);return o}}function X(n,t,e){function r(){var t=this[o];t&&(this.removeEventListener(n,t,t.$),delete this[o])}function u(){var u=c(t,ra(arguments));r.call(this),this.addEventListener(n,this[o]=u,u.$=e),u._=t}function i(){var t,e=new RegExp("^__on([^.]+)"+ta.requote(n)+"$");for(var r in this)if(t=r.match(e)){var u=this[r];this.removeEventListener(t[1],u,u.$),delete this[r]}}var o="__on"+n,a=n.indexOf("."),c=$;a>0&&(n=n.slice(0,a));var l=ka.get(n);return l&&(n=l,c=B),a?t?u:r:t?b:i}function $(n,t){return function(e){var r=ta.event;ta.event=e,t[0]=this.__data__;try{n.apply(this,t)}finally{ta.event=r}}}function B(n,t){var e=$(n,t);return function(n){var t=this,r=n.relatedTarget;r&&(r===t||8&r.compareDocumentPosition(t))||e.call(t,n)}}function W(e){var r=".dragsuppress-"+ ++Aa,u="click"+r,i=ta.select(t(e)).on("touchmove"+r,S).on("dragstart"+r,S).on("selectstart"+r,S);if(null==Ea&&(Ea="onselectstart"in e?!1:x(e.style,"userSelect")),Ea){var o=n(e).style,a=o[Ea];o[Ea]="none"}return function(n){if(i.on(r,null),Ea&&(o[Ea]=a),n){var t=function(){i.on(u,null)};i.on(u,function(){S(),t()},!0),setTimeout(t,0)}}}function J(n,e){e.changedTouches&&(e=e.changedTouches[0]);var r=n.ownerSVGElement||n;if(r.createSVGPoint){var u=r.createSVGPoint();if(0>Na){var i=t(n);if(i.scrollX||i.scrollY){r=ta.select("body").append("svg").style({position:"absolute",top:0,left:0,margin:0,padding:0,border:"none"},"important");var o=r[0][0].getScreenCTM();Na=!(o.f||o.e),r.remove()}}return Na?(u.x=e.pageX,u.y=e.pageY):(u.x=e.clientX,u.y=e.clientY),u=u.matrixTransform(n.getScreenCTM().inverse()),[u.x,u.y]}var a=n.getBoundingClientRect();return[e.clientX-a.left-n.clientLeft,e.clientY-a.top-n.clientTop]}function G(){return ta.event.changedTouches[0].identifier}function K(n){return n>0?1:0>n?-1:0}function Q(n,t,e){return(t[0]-n[0])*(e[1]-n[1])-(t[1]-n[1])*(e[0]-n[0])}function nt(n){return n>1?0:-1>n?qa:Math.acos(n)}function tt(n){return n>1?Ra:-1>n?-Ra:Math.asin(n)}function et(n){return((n=Math.exp(n))-1/n)/2}function rt(n){return((n=Math.exp(n))+1/n)/2}function ut(n){return((n=Math.exp(2*n))-1)/(n+1)}function it(n){return(n=Math.sin(n/2))*n}function ot(){}function at(n,t,e){return this instanceof at?(this.h=+n,this.s=+t,void(this.l=+e)):arguments.length<2?n instanceof at?new at(n.h,n.s,n.l):bt(""+n,_t,at):new at(n,t,e)}function ct(n,t,e){function r(n){return n>360?n-=360:0>n&&(n+=360),60>n?i+(o-i)*n/60:180>n?o:240>n?i+(o-i)*(240-n)/60:i}function u(n){return Math.round(255*r(n))}var i,o;return n=isNaN(n)?0:(n%=360)<0?n+360:n,t=isNaN(t)?0:0>t?0:t>1?1:t,e=0>e?0:e>1?1:e,o=.5>=e?e*(1+t):e+t-e*t,i=2*e-o,new mt(u(n+120),u(n),u(n-120))}function lt(n,t,e){return this instanceof lt?(this.h=+n,this.c=+t,void(this.l=+e)):arguments.length<2?n instanceof lt?new lt(n.h,n.c,n.l):n instanceof ft?gt(n.l,n.a,n.b):gt((n=wt((n=ta.rgb(n)).r,n.g,n.b)).l,n.a,n.b):new lt(n,t,e)}function st(n,t,e){return isNaN(n)&&(n=0),isNaN(t)&&(t=0),new ft(e,Math.cos(n*=Da)*t,Math.sin(n)*t)}function ft(n,t,e){return this instanceof ft?(this.l=+n,this.a=+t,void(this.b=+e)):arguments.length<2?n instanceof ft?new ft(n.l,n.a,n.b):n instanceof lt?st(n.h,n.c,n.l):wt((n=mt(n)).r,n.g,n.b):new ft(n,t,e)}function ht(n,t,e){var r=(n+16)/116,u=r+t/500,i=r-e/200;return u=pt(u)*Xa,r=pt(r)*$a,i=pt(i)*Ba,new mt(dt(3.2404542*u-1.5371385*r-.4985314*i),dt(-.969266*u+1.8760108*r+.041556*i),dt(.0556434*u-.2040259*r+1.0572252*i))}function gt(n,t,e){return n>0?new lt(Math.atan2(e,t)*Pa,Math.sqrt(t*t+e*e),n):new lt(0/0,0/0,n)}function pt(n){return n>.206893034?n*n*n:(n-4/29)/7.787037}function vt(n){return n>.008856?Math.pow(n,1/3):7.787037*n+4/29}function dt(n){return Math.round(255*(.00304>=n?12.92*n:1.055*Math.pow(n,1/2.4)-.055))}function mt(n,t,e){return this instanceof mt?(this.r=~~n,this.g=~~t,void(this.b=~~e)):arguments.length<2?n instanceof mt?new mt(n.r,n.g,n.b):bt(""+n,mt,ct):new mt(n,t,e)}function yt(n){return new mt(n>>16,n>>8&255,255&n)}function Mt(n){return yt(n)+""}function xt(n){return 16>n?"0"+Math.max(0,n).toString(16):Math.min(255,n).toString(16)}function bt(n,t,e){var r,u,i,o=0,a=0,c=0;if(r=/([a-z]+)\((.*)\)/i.exec(n))switch(u=r[2].split(","),r[1]){case"hsl":return e(parseFloat(u[0]),parseFloat(u[1])/100,parseFloat(u[2])/100);case"rgb":return t(kt(u[0]),kt(u[1]),kt(u[2]))}return(i=Ga.get(n.toLowerCase()))?t(i.r,i.g,i.b):(null==n||"#"!==n.charAt(0)||isNaN(i=parseInt(n.slice(1),16))||(4===n.length?(o=(3840&i)>>4,o=o>>4|o,a=240&i,a=a>>4|a,c=15&i,c=c<<4|c):7===n.length&&(o=(16711680&i)>>16,a=(65280&i)>>8,c=255&i)),t(o,a,c))}function _t(n,t,e){var r,u,i=Math.min(n/=255,t/=255,e/=255),o=Math.max(n,t,e),a=o-i,c=(o+i)/2;return a?(u=.5>c?a/(o+i):a/(2-o-i),r=n==o?(t-e)/a+(e>t?6:0):t==o?(e-n)/a+2:(n-t)/a+4,r*=60):(r=0/0,u=c>0&&1>c?0:r),new at(r,u,c)}function wt(n,t,e){n=St(n),t=St(t),e=St(e);var r=vt((.4124564*n+.3575761*t+.1804375*e)/Xa),u=vt((.2126729*n+.7151522*t+.072175*e)/$a),i=vt((.0193339*n+.119192*t+.9503041*e)/Ba);return ft(116*u-16,500*(r-u),200*(u-i))}function St(n){return(n/=255)<=.04045?n/12.92:Math.pow((n+.055)/1.055,2.4)}function kt(n){var t=parseFloat(n);return"%"===n.charAt(n.length-1)?Math.round(2.55*t):t}function Et(n){return"function"==typeof n?n:function(){return n}}function At(n){return function(t,e,r){return 2===arguments.length&&"function"==typeof e&&(r=e,e=null),Nt(t,e,n,r)}}function Nt(n,t,e,r){function u(){var n,t=c.status;if(!t&&zt(c)||t>=200&&300>t||304===t){try{n=e.call(i,c)}catch(r){return void o.error.call(i,r)}o.load.call(i,n)}else o.error.call(i,c)}var i={},o=ta.dispatch("beforesend","progress","load","error"),a={},c=new XMLHttpRequest,l=null;return!this.XDomainRequest||"withCredentials"in c||!/^(http(s)?:)?\/\//.test(n)||(c=new XDomainRequest),"onload"in c?c.onload=c.onerror=u:c.onreadystatechange=function(){c.readyState>3&&u()},c.onprogress=function(n){var t=ta.event;ta.event=n;try{o.progress.call(i,c)}finally{ta.event=t}},i.header=function(n,t){return n=(n+"").toLowerCase(),arguments.length<2?a[n]:(null==t?delete a[n]:a[n]=t+"",i)},i.mimeType=function(n){return arguments.length?(t=null==n?null:n+"",i):t},i.responseType=function(n){return arguments.length?(l=n,i):l},i.response=function(n){return e=n,i},["get","post"].forEach(function(n){i[n]=function(){return i.send.apply(i,[n].concat(ra(arguments)))}}),i.send=function(e,r,u){if(2===arguments.length&&"function"==typeof r&&(u=r,r=null),c.open(e,n,!0),null==t||"accept"in a||(a.accept=t+",*/*"),c.setRequestHeader)for(var s in a)c.setRequestHeader(s,a[s]);return null!=t&&c.overrideMimeType&&c.overrideMimeType(t),null!=l&&(c.responseType=l),null!=u&&i.on("error",u).on("load",function(n){u(null,n)}),o.beforesend.call(i,c),c.send(null==r?null:r),i},i.abort=function(){return c.abort(),i},ta.rebind(i,o,"on"),null==r?i:i.get(Ct(r))}function Ct(n){return 1===n.length?function(t,e){n(null==t?e:null)}:n}function zt(n){var t=n.responseType;return t&&"text"!==t?n.response:n.responseText}function qt(){var n=Lt(),t=Tt()-n;t>24?(isFinite(t)&&(clearTimeout(tc),tc=setTimeout(qt,t)),nc=0):(nc=1,rc(qt))}function Lt(){var n=Date.now();for(ec=Ka;ec;)n>=ec.t&&(ec.f=ec.c(n-ec.t)),ec=ec.n;return n}function Tt(){for(var n,t=Ka,e=1/0;t;)t.f?t=n?n.n=t.n:Ka=t.n:(t.t<e&&(e=t.t),t=(n=t).n);return Qa=n,e}function Rt(n,t){return t-(n?Math.ceil(Math.log(n)/Math.LN10):1)}function Dt(n,t){var e=Math.pow(10,3*ga(8-t));return{scale:t>8?function(n){return n/e}:function(n){return n*e},symbol:n}}function Pt(n){var t=n.decimal,e=n.thousands,r=n.grouping,u=n.currency,i=r&&e?function(n,t){for(var u=n.length,i=[],o=0,a=r[0],c=0;u>0&&a>0&&(c+a+1>t&&(a=Math.max(1,t-c)),i.push(n.substring(u-=a,u+a)),!((c+=a+1)>t));)a=r[o=(o+1)%r.length];return i.reverse().join(e)}:y;return function(n){var e=ic.exec(n),r=e[1]||" ",o=e[2]||">",a=e[3]||"-",c=e[4]||"",l=e[5],s=+e[6],f=e[7],h=e[8],g=e[9],p=1,v="",d="",m=!1,y=!0;switch(h&&(h=+h.substring(1)),(l||"0"===r&&"="===o)&&(l=r="0",o="="),g){case"n":f=!0,g="g";break;case"%":p=100,d="%",g="f";break;case"p":p=100,d="%",g="r";break;case"b":case"o":case"x":case"X":"#"===c&&(v="0"+g.toLowerCase());case"c":y=!1;case"d":m=!0,h=0;break;case"s":p=-1,g="r"}"$"===c&&(v=u[0],d=u[1]),"r"!=g||h||(g="g"),null!=h&&("g"==g?h=Math.max(1,Math.min(21,h)):("e"==g||"f"==g)&&(h=Math.max(0,Math.min(20,h)))),g=oc.get(g)||Ut;var M=l&&f;return function(n){var e=d;if(m&&n%1)return"";var u=0>n||0===n&&0>1/n?(n=-n,"-"):"-"===a?"":a;if(0>p){var c=ta.formatPrefix(n,h);n=c.scale(n),e=c.symbol+d}else n*=p;n=g(n,h);var x,b,_=n.lastIndexOf(".");if(0>_){var w=y?n.lastIndexOf("e"):-1;0>w?(x=n,b=""):(x=n.substring(0,w),b=n.substring(w))}else x=n.substring(0,_),b=t+n.substring(_+1);!l&&f&&(x=i(x,1/0));var S=v.length+x.length+b.length+(M?0:u.length),k=s>S?new Array(S=s-S+1).join(r):"";return M&&(x=i(k+x,k.length?s-b.length:1/0)),u+=v,n=x+b,("<"===o?u+n+k:">"===o?k+u+n:"^"===o?k.substring(0,S>>=1)+u+n+k.substring(S):u+(M?n:k+n))+e}}}function Ut(n){return n+""}function jt(){this._=new Date(arguments.length>1?Date.UTC.apply(this,arguments):arguments[0])}function Ft(n,t,e){function r(t){var e=n(t),r=i(e,1);return r-t>t-e?e:r}function u(e){return t(e=n(new cc(e-1)),1),e}function i(n,e){return t(n=new cc(+n),e),n}function o(n,r,i){var o=u(n),a=[];if(i>1)for(;r>o;)e(o)%i||a.push(new Date(+o)),t(o,1);else for(;r>o;)a.push(new Date(+o)),t(o,1);return a}function a(n,t,e){try{cc=jt;var r=new jt;return r._=n,o(r,t,e)}finally{cc=Date}}n.floor=n,n.round=r,n.ceil=u,n.offset=i,n.range=o;var c=n.utc=Ht(n);return c.floor=c,c.round=Ht(r),c.ceil=Ht(u),c.offset=Ht(i),c.range=a,n}function Ht(n){return function(t,e){try{cc=jt;var r=new jt;return r._=t,n(r,e)._}finally{cc=Date}}}function Ot(n){function t(n){function t(t){for(var e,u,i,o=[],a=-1,c=0;++a<r;)37===n.charCodeAt(a)&&(o.push(n.slice(c,a)),null!=(u=sc[e=n.charAt(++a)])&&(e=n.charAt(++a)),(i=N[e])&&(e=i(t,null==u?"e"===e?" ":"0":u)),o.push(e),c=a+1);return o.push(n.slice(c,a)),o.join("")}var r=n.length;return t.parse=function(t){var r={y:1900,m:0,d:1,H:0,M:0,S:0,L:0,Z:null},u=e(r,n,t,0);if(u!=t.length)return null;"p"in r&&(r.H=r.H%12+12*r.p);var i=null!=r.Z&&cc!==jt,o=new(i?jt:cc);return"j"in r?o.setFullYear(r.y,0,r.j):"w"in r&&("W"in r||"U"in r)?(o.setFullYear(r.y,0,1),o.setFullYear(r.y,0,"W"in r?(r.w+6)%7+7*r.W-(o.getDay()+5)%7:r.w+7*r.U-(o.getDay()+6)%7)):o.setFullYear(r.y,r.m,r.d),o.setHours(r.H+(r.Z/100|0),r.M+r.Z%100,r.S,r.L),i?o._:o},t.toString=function(){return n},t}function e(n,t,e,r){for(var u,i,o,a=0,c=t.length,l=e.length;c>a;){if(r>=l)return-1;if(u=t.charCodeAt(a++),37===u){if(o=t.charAt(a++),i=C[o in sc?t.charAt(a++):o],!i||(r=i(n,e,r))<0)return-1}else if(u!=e.charCodeAt(r++))return-1}return r}function r(n,t,e){_.lastIndex=0;var r=_.exec(t.slice(e));return r?(n.w=w.get(r[0].toLowerCase()),e+r[0].length):-1}function u(n,t,e){x.lastIndex=0;var r=x.exec(t.slice(e));return r?(n.w=b.get(r[0].toLowerCase()),e+r[0].length):-1}function i(n,t,e){E.lastIndex=0;var r=E.exec(t.slice(e));return r?(n.m=A.get(r[0].toLowerCase()),e+r[0].length):-1}function o(n,t,e){S.lastIndex=0;var r=S.exec(t.slice(e));return r?(n.m=k.get(r[0].toLowerCase()),e+r[0].length):-1}function a(n,t,r){return e(n,N.c.toString(),t,r)}function c(n,t,r){return e(n,N.x.toString(),t,r)}function l(n,t,r){return e(n,N.X.toString(),t,r)}function s(n,t,e){var r=M.get(t.slice(e,e+=2).toLowerCase());return null==r?-1:(n.p=r,e)}var f=n.dateTime,h=n.date,g=n.time,p=n.periods,v=n.days,d=n.shortDays,m=n.months,y=n.shortMonths;t.utc=function(n){function e(n){try{cc=jt;var t=new cc;return t._=n,r(t)}finally{cc=Date}}var r=t(n);return e.parse=function(n){try{cc=jt;var t=r.parse(n);return t&&t._}finally{cc=Date}},e.toString=r.toString,e},t.multi=t.utc.multi=ae;var M=ta.map(),x=Yt(v),b=Zt(v),_=Yt(d),w=Zt(d),S=Yt(m),k=Zt(m),E=Yt(y),A=Zt(y);p.forEach(function(n,t){M.set(n.toLowerCase(),t)});var N={a:function(n){return d[n.getDay()]},A:function(n){return v[n.getDay()]},b:function(n){return y[n.getMonth()]},B:function(n){return m[n.getMonth()]},c:t(f),d:function(n,t){return It(n.getDate(),t,2)},e:function(n,t){return It(n.getDate(),t,2)},H:function(n,t){return It(n.getHours(),t,2)},I:function(n,t){return It(n.getHours()%12||12,t,2)},j:function(n,t){return It(1+ac.dayOfYear(n),t,3)},L:function(n,t){return It(n.getMilliseconds(),t,3)},m:function(n,t){return It(n.getMonth()+1,t,2)},M:function(n,t){return It(n.getMinutes(),t,2)},p:function(n){return p[+(n.getHours()>=12)]},S:function(n,t){return It(n.getSeconds(),t,2)},U:function(n,t){return It(ac.sundayOfYear(n),t,2)},w:function(n){return n.getDay()},W:function(n,t){return It(ac.mondayOfYear(n),t,2)},x:t(h),X:t(g),y:function(n,t){return It(n.getFullYear()%100,t,2)},Y:function(n,t){return It(n.getFullYear()%1e4,t,4)},Z:ie,"%":function(){return"%"}},C={a:r,A:u,b:i,B:o,c:a,d:Qt,e:Qt,H:te,I:te,j:ne,L:ue,m:Kt,M:ee,p:s,S:re,U:Xt,w:Vt,W:$t,x:c,X:l,y:Wt,Y:Bt,Z:Jt,"%":oe};return t}function It(n,t,e){var r=0>n?"-":"",u=(r?-n:n)+"",i=u.length;return r+(e>i?new Array(e-i+1).join(t)+u:u)}function Yt(n){return new RegExp("^(?:"+n.map(ta.requote).join("|")+")","i")}function Zt(n){for(var t=new l,e=-1,r=n.length;++e<r;)t.set(n[e].toLowerCase(),e);return t}function Vt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+1));return r?(n.w=+r[0],e+r[0].length):-1}function Xt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e));return r?(n.U=+r[0],e+r[0].length):-1}function $t(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e));return r?(n.W=+r[0],e+r[0].length):-1}function Bt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+4));return r?(n.y=+r[0],e+r[0].length):-1}function Wt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.y=Gt(+r[0]),e+r[0].length):-1}function Jt(n,t,e){return/^[+-]\d{4}$/.test(t=t.slice(e,e+5))?(n.Z=-t,e+5):-1}function Gt(n){return n+(n>68?1900:2e3)}function Kt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.m=r[0]-1,e+r[0].length):-1}function Qt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.d=+r[0],e+r[0].length):-1}function ne(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+3));return r?(n.j=+r[0],e+r[0].length):-1}function te(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.H=+r[0],e+r[0].length):-1}function ee(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.M=+r[0],e+r[0].length):-1}function re(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.S=+r[0],e+r[0].length):-1}function ue(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+3));return r?(n.L=+r[0],e+r[0].length):-1}function ie(n){var t=n.getTimezoneOffset(),e=t>0?"-":"+",r=ga(t)/60|0,u=ga(t)%60;return e+It(r,"0",2)+It(u,"0",2)}function oe(n,t,e){hc.lastIndex=0;var r=hc.exec(t.slice(e,e+1));return r?e+r[0].length:-1}function ae(n){for(var t=n.length,e=-1;++e<t;)n[e][0]=this(n[e][0]);return function(t){for(var e=0,r=n[e];!r[1](t);)r=n[++e];return r[0](t)}}function ce(){}function le(n,t,e){var r=e.s=n+t,u=r-n,i=r-u;e.t=n-i+(t-u)}function se(n,t){n&&dc.hasOwnProperty(n.type)&&dc[n.type](n,t)}function fe(n,t,e){var r,u=-1,i=n.length-e;for(t.lineStart();++u<i;)r=n[u],t.point(r[0],r[1],r[2]);t.lineEnd()}function he(n,t){var e=-1,r=n.length;for(t.polygonStart();++e<r;)fe(n[e],t,1);t.polygonEnd()}function ge(){function n(n,t){n*=Da,t=t*Da/2+qa/4;var e=n-r,o=e>=0?1:-1,a=o*e,c=Math.cos(t),l=Math.sin(t),s=i*l,f=u*c+s*Math.cos(a),h=s*o*Math.sin(a);yc.add(Math.atan2(h,f)),r=n,u=c,i=l}var t,e,r,u,i;Mc.point=function(o,a){Mc.point=n,r=(t=o)*Da,u=Math.cos(a=(e=a)*Da/2+qa/4),i=Math.sin(a)},Mc.lineEnd=function(){n(t,e)}}function pe(n){var t=n[0],e=n[1],r=Math.cos(e);return[r*Math.cos(t),r*Math.sin(t),Math.sin(e)]}function ve(n,t){return n[0]*t[0]+n[1]*t[1]+n[2]*t[2]}function de(n,t){return[n[1]*t[2]-n[2]*t[1],n[2]*t[0]-n[0]*t[2],n[0]*t[1]-n[1]*t[0]]}function me(n,t){n[0]+=t[0],n[1]+=t[1],n[2]+=t[2]}function ye(n,t){return[n[0]*t,n[1]*t,n[2]*t]}function Me(n){var t=Math.sqrt(n[0]*n[0]+n[1]*n[1]+n[2]*n[2]);n[0]/=t,n[1]/=t,n[2]/=t}function xe(n){return[Math.atan2(n[1],n[0]),tt(n[2])]}function be(n,t){return ga(n[0]-t[0])<Ca&&ga(n[1]-t[1])<Ca}function _e(n,t){n*=Da;var e=Math.cos(t*=Da);we(e*Math.cos(n),e*Math.sin(n),Math.sin(t))}function we(n,t,e){++xc,_c+=(n-_c)/xc,wc+=(t-wc)/xc,Sc+=(e-Sc)/xc}function Se(){function n(n,u){n*=Da;var i=Math.cos(u*=Da),o=i*Math.cos(n),a=i*Math.sin(n),c=Math.sin(u),l=Math.atan2(Math.sqrt((l=e*c-r*a)*l+(l=r*o-t*c)*l+(l=t*a-e*o)*l),t*o+e*a+r*c);bc+=l,kc+=l*(t+(t=o)),Ec+=l*(e+(e=a)),Ac+=l*(r+(r=c)),we(t,e,r)}var t,e,r;qc.point=function(u,i){u*=Da;var o=Math.cos(i*=Da);t=o*Math.cos(u),e=o*Math.sin(u),r=Math.sin(i),qc.point=n,we(t,e,r)}}function ke(){qc.point=_e}function Ee(){function n(n,t){n*=Da;var e=Math.cos(t*=Da),o=e*Math.cos(n),a=e*Math.sin(n),c=Math.sin(t),l=u*c-i*a,s=i*o-r*c,f=r*a-u*o,h=Math.sqrt(l*l+s*s+f*f),g=r*o+u*a+i*c,p=h&&-nt(g)/h,v=Math.atan2(h,g);Nc+=p*l,Cc+=p*s,zc+=p*f,bc+=v,kc+=v*(r+(r=o)),Ec+=v*(u+(u=a)),Ac+=v*(i+(i=c)),we(r,u,i)}var t,e,r,u,i;qc.point=function(o,a){t=o,e=a,qc.point=n,o*=Da;var c=Math.cos(a*=Da);r=c*Math.cos(o),u=c*Math.sin(o),i=Math.sin(a),we(r,u,i)},qc.lineEnd=function(){n(t,e),qc.lineEnd=ke,qc.point=_e}}function Ae(n,t){function e(e,r){return e=n(e,r),t(e[0],e[1])}return n.invert&&t.invert&&(e.invert=function(e,r){return e=t.invert(e,r),e&&n.invert(e[0],e[1])}),e}function Ne(){return!0}function Ce(n,t,e,r,u){var i=[],o=[];if(n.forEach(function(n){if(!((t=n.length-1)<=0)){var t,e=n[0],r=n[t];if(be(e,r)){u.lineStart();for(var a=0;t>a;++a)u.point((e=n[a])[0],e[1]);return void u.lineEnd()}var c=new qe(e,n,null,!0),l=new qe(e,null,c,!1);c.o=l,i.push(c),o.push(l),c=new qe(r,n,null,!1),l=new qe(r,null,c,!0),c.o=l,i.push(c),o.push(l)}}),o.sort(t),ze(i),ze(o),i.length){for(var a=0,c=e,l=o.length;l>a;++a)o[a].e=c=!c;for(var s,f,h=i[0];;){for(var g=h,p=!0;g.v;)if((g=g.n)===h)return;s=g.z,u.lineStart();do{if(g.v=g.o.v=!0,g.e){if(p)for(var a=0,l=s.length;l>a;++a)u.point((f=s[a])[0],f[1]);else r(g.x,g.n.x,1,u);g=g.n}else{if(p){s=g.p.z;for(var a=s.length-1;a>=0;--a)u.point((f=s[a])[0],f[1])}else r(g.x,g.p.x,-1,u);g=g.p}g=g.o,s=g.z,p=!p}while(!g.v);u.lineEnd()}}}function ze(n){if(t=n.length){for(var t,e,r=0,u=n[0];++r<t;)u.n=e=n[r],e.p=u,u=e;u.n=e=n[0],e.p=u}}function qe(n,t,e,r){this.x=n,this.z=t,this.o=e,this.e=r,this.v=!1,this.n=this.p=null}function Le(n,t,e,r){return function(u,i){function o(t,e){var r=u(t,e);n(t=r[0],e=r[1])&&i.point(t,e)}function a(n,t){var e=u(n,t);d.point(e[0],e[1])}function c(){y.point=a,d.lineStart()}function l(){y.point=o,d.lineEnd()}function s(n,t){v.push([n,t]);var e=u(n,t);x.point(e[0],e[1])}function f(){x.lineStart(),v=[]}function h(){s(v[0][0],v[0][1]),x.lineEnd();var n,t=x.clean(),e=M.buffer(),r=e.length;if(v.pop(),p.push(v),v=null,r)if(1&t){n=e[0];var u,r=n.length-1,o=-1;if(r>0){for(b||(i.polygonStart(),b=!0),i.lineStart();++o<r;)i.point((u=n[o])[0],u[1]);i.lineEnd()}}else r>1&&2&t&&e.push(e.pop().concat(e.shift())),g.push(e.filter(Te))}var g,p,v,d=t(i),m=u.invert(r[0],r[1]),y={point:o,lineStart:c,lineEnd:l,polygonStart:function(){y.point=s,y.lineStart=f,y.lineEnd=h,g=[],p=[]},polygonEnd:function(){y.point=o,y.lineStart=c,y.lineEnd=l,g=ta.merge(g);var n=Fe(m,p);g.length?(b||(i.polygonStart(),b=!0),Ce(g,De,n,e,i)):n&&(b||(i.polygonStart(),b=!0),i.lineStart(),e(null,null,1,i),i.lineEnd()),b&&(i.polygonEnd(),b=!1),g=p=null},sphere:function(){i.polygonStart(),i.lineStart(),e(null,null,1,i),i.lineEnd(),i.polygonEnd()}},M=Re(),x=t(M),b=!1;return y}}function Te(n){return n.length>1}function Re(){var n,t=[];return{lineStart:function(){t.push(n=[])},point:function(t,e){n.push([t,e])},lineEnd:b,buffer:function(){var e=t;return t=[],n=null,e},rejoin:function(){t.length>1&&t.push(t.pop().concat(t.shift()))}}}function De(n,t){return((n=n.x)[0]<0?n[1]-Ra-Ca:Ra-n[1])-((t=t.x)[0]<0?t[1]-Ra-Ca:Ra-t[1])}function Pe(n){var t,e=0/0,r=0/0,u=0/0;return{lineStart:function(){n.lineStart(),t=1},point:function(i,o){var a=i>0?qa:-qa,c=ga(i-e);ga(c-qa)<Ca?(n.point(e,r=(r+o)/2>0?Ra:-Ra),n.point(u,r),n.lineEnd(),n.lineStart(),n.point(a,r),n.point(i,r),t=0):u!==a&&c>=qa&&(ga(e-u)<Ca&&(e-=u*Ca),ga(i-a)<Ca&&(i-=a*Ca),r=Ue(e,r,i,o),n.point(u,r),n.lineEnd(),n.lineStart(),n.point(a,r),t=0),n.point(e=i,r=o),u=a},lineEnd:function(){n.lineEnd(),e=r=0/0},clean:function(){return 2-t}}}function Ue(n,t,e,r){var u,i,o=Math.sin(n-e);return ga(o)>Ca?Math.atan((Math.sin(t)*(i=Math.cos(r))*Math.sin(e)-Math.sin(r)*(u=Math.cos(t))*Math.sin(n))/(u*i*o)):(t+r)/2}function je(n,t,e,r){var u;if(null==n)u=e*Ra,r.point(-qa,u),r.point(0,u),r.point(qa,u),r.point(qa,0),r.point(qa,-u),r.point(0,-u),r.point(-qa,-u),r.point(-qa,0),r.point(-qa,u);else if(ga(n[0]-t[0])>Ca){var i=n[0]<t[0]?qa:-qa;u=e*i/2,r.point(-i,u),r.point(0,u),r.point(i,u)}else r.point(t[0],t[1])}function Fe(n,t){var e=n[0],r=n[1],u=[Math.sin(e),-Math.cos(e),0],i=0,o=0;yc.reset();for(var a=0,c=t.length;c>a;++a){var l=t[a],s=l.length;if(s)for(var f=l[0],h=f[0],g=f[1]/2+qa/4,p=Math.sin(g),v=Math.cos(g),d=1;;){d===s&&(d=0),n=l[d];var m=n[0],y=n[1]/2+qa/4,M=Math.sin(y),x=Math.cos(y),b=m-h,_=b>=0?1:-1,w=_*b,S=w>qa,k=p*M;if(yc.add(Math.atan2(k*_*Math.sin(w),v*x+k*Math.cos(w))),i+=S?b+_*La:b,S^h>=e^m>=e){var E=de(pe(f),pe(n));Me(E);var A=de(u,E);Me(A);var N=(S^b>=0?-1:1)*tt(A[2]);(r>N||r===N&&(E[0]||E[1]))&&(o+=S^b>=0?1:-1)}if(!d++)break;h=m,p=M,v=x,f=n}}return(-Ca>i||Ca>i&&0>yc)^1&o}function He(n){function t(n,t){return Math.cos(n)*Math.cos(t)>i}function e(n){var e,i,c,l,s;return{lineStart:function(){l=c=!1,s=1},point:function(f,h){var g,p=[f,h],v=t(f,h),d=o?v?0:u(f,h):v?u(f+(0>f?qa:-qa),h):0;if(!e&&(l=c=v)&&n.lineStart(),v!==c&&(g=r(e,p),(be(e,g)||be(p,g))&&(p[0]+=Ca,p[1]+=Ca,v=t(p[0],p[1]))),v!==c)s=0,v?(n.lineStart(),g=r(p,e),n.point(g[0],g[1])):(g=r(e,p),n.point(g[0],g[1]),n.lineEnd()),e=g;else if(a&&e&&o^v){var m;d&i||!(m=r(p,e,!0))||(s=0,o?(n.lineStart(),n.point(m[0][0],m[0][1]),n.point(m[1][0],m[1][1]),n.lineEnd()):(n.point(m[1][0],m[1][1]),n.lineEnd(),n.lineStart(),n.point(m[0][0],m[0][1])))}!v||e&&be(e,p)||n.point(p[0],p[1]),e=p,c=v,i=d},lineEnd:function(){c&&n.lineEnd(),e=null},clean:function(){return s|(l&&c)<<1}}}function r(n,t,e){var r=pe(n),u=pe(t),o=[1,0,0],a=de(r,u),c=ve(a,a),l=a[0],s=c-l*l;if(!s)return!e&&n;var f=i*c/s,h=-i*l/s,g=de(o,a),p=ye(o,f),v=ye(a,h);me(p,v);var d=g,m=ve(p,d),y=ve(d,d),M=m*m-y*(ve(p,p)-1);if(!(0>M)){var x=Math.sqrt(M),b=ye(d,(-m-x)/y);if(me(b,p),b=xe(b),!e)return b;var _,w=n[0],S=t[0],k=n[1],E=t[1];w>S&&(_=w,w=S,S=_);var A=S-w,N=ga(A-qa)<Ca,C=N||Ca>A;if(!N&&k>E&&(_=k,k=E,E=_),C?N?k+E>0^b[1]<(ga(b[0]-w)<Ca?k:E):k<=b[1]&&b[1]<=E:A>qa^(w<=b[0]&&b[0]<=S)){var z=ye(d,(-m+x)/y);return me(z,p),[b,xe(z)]}}}function u(t,e){var r=o?n:qa-n,u=0;return-r>t?u|=1:t>r&&(u|=2),-r>e?u|=4:e>r&&(u|=8),u}var i=Math.cos(n),o=i>0,a=ga(i)>Ca,c=gr(n,6*Da);return Le(t,e,c,o?[0,-n]:[-qa,n-qa])}function Oe(n,t,e,r){return function(u){var i,o=u.a,a=u.b,c=o.x,l=o.y,s=a.x,f=a.y,h=0,g=1,p=s-c,v=f-l;if(i=n-c,p||!(i>0)){if(i/=p,0>p){if(h>i)return;g>i&&(g=i)}else if(p>0){if(i>g)return;i>h&&(h=i)}if(i=e-c,p||!(0>i)){if(i/=p,0>p){if(i>g)return;i>h&&(h=i)}else if(p>0){if(h>i)return;g>i&&(g=i)}if(i=t-l,v||!(i>0)){if(i/=v,0>v){if(h>i)return;g>i&&(g=i)}else if(v>0){if(i>g)return;i>h&&(h=i)}if(i=r-l,v||!(0>i)){if(i/=v,0>v){if(i>g)return;i>h&&(h=i)}else if(v>0){if(h>i)return;g>i&&(g=i)}return h>0&&(u.a={x:c+h*p,y:l+h*v}),1>g&&(u.b={x:c+g*p,y:l+g*v}),u}}}}}}function Ie(n,t,e,r){function u(r,u){return ga(r[0]-n)<Ca?u>0?0:3:ga(r[0]-e)<Ca?u>0?2:1:ga(r[1]-t)<Ca?u>0?1:0:u>0?3:2}function i(n,t){return o(n.x,t.x)}function o(n,t){var e=u(n,1),r=u(t,1);return e!==r?e-r:0===e?t[1]-n[1]:1===e?n[0]-t[0]:2===e?n[1]-t[1]:t[0]-n[0]}return function(a){function c(n){for(var t=0,e=d.length,r=n[1],u=0;e>u;++u)for(var i,o=1,a=d[u],c=a.length,l=a[0];c>o;++o)i=a[o],l[1]<=r?i[1]>r&&Q(l,i,n)>0&&++t:i[1]<=r&&Q(l,i,n)<0&&--t,l=i;return 0!==t}function l(i,a,c,l){var s=0,f=0;if(null==i||(s=u(i,c))!==(f=u(a,c))||o(i,a)<0^c>0){do l.point(0===s||3===s?n:e,s>1?r:t);while((s=(s+c+4)%4)!==f)}else l.point(a[0],a[1])}function s(u,i){return u>=n&&e>=u&&i>=t&&r>=i}function f(n,t){s(n,t)&&a.point(n,t)}function h(){C.point=p,d&&d.push(m=[]),S=!0,w=!1,b=_=0/0}function g(){v&&(p(y,M),x&&w&&A.rejoin(),v.push(A.buffer())),C.point=f,w&&a.lineEnd()}function p(n,t){n=Math.max(-Tc,Math.min(Tc,n)),t=Math.max(-Tc,Math.min(Tc,t));var e=s(n,t);if(d&&m.push([n,t]),S)y=n,M=t,x=e,S=!1,e&&(a.lineStart(),a.point(n,t));else if(e&&w)a.point(n,t);else{var r={a:{x:b,y:_},b:{x:n,y:t}};N(r)?(w||(a.lineStart(),a.point(r.a.x,r.a.y)),a.point(r.b.x,r.b.y),e||a.lineEnd(),k=!1):e&&(a.lineStart(),a.point(n,t),k=!1)}b=n,_=t,w=e}var v,d,m,y,M,x,b,_,w,S,k,E=a,A=Re(),N=Oe(n,t,e,r),C={point:f,lineStart:h,lineEnd:g,polygonStart:function(){a=A,v=[],d=[],k=!0},polygonEnd:function(){a=E,v=ta.merge(v);var t=c([n,r]),e=k&&t,u=v.length;(e||u)&&(a.polygonStart(),e&&(a.lineStart(),l(null,null,1,a),a.lineEnd()),u&&Ce(v,i,t,l,a),a.polygonEnd()),v=d=m=null}};return C}}function Ye(n){var t=0,e=qa/3,r=ir(n),u=r(t,e);return u.parallels=function(n){return arguments.length?r(t=n[0]*qa/180,e=n[1]*qa/180):[t/qa*180,e/qa*180]},u}function Ze(n,t){function e(n,t){var e=Math.sqrt(i-2*u*Math.sin(t))/u;return[e*Math.sin(n*=u),o-e*Math.cos(n)]}var r=Math.sin(n),u=(r+Math.sin(t))/2,i=1+r*(2*u-r),o=Math.sqrt(i)/u;return e.invert=function(n,t){var e=o-t;return[Math.atan2(n,e)/u,tt((i-(n*n+e*e)*u*u)/(2*u))]},e}function Ve(){function n(n,t){Dc+=u*n-r*t,r=n,u=t}var t,e,r,u;Hc.point=function(i,o){Hc.point=n,t=r=i,e=u=o},Hc.lineEnd=function(){n(t,e)}}function Xe(n,t){Pc>n&&(Pc=n),n>jc&&(jc=n),Uc>t&&(Uc=t),t>Fc&&(Fc=t)}function $e(){function n(n,t){o.push("M",n,",",t,i)}function t(n,t){o.push("M",n,",",t),a.point=e}function e(n,t){o.push("L",n,",",t)}function r(){a.point=n}function u(){o.push("Z")}var i=Be(4.5),o=[],a={point:n,lineStart:function(){a.point=t},lineEnd:r,polygonStart:function(){a.lineEnd=u},polygonEnd:function(){a.lineEnd=r,a.point=n},pointRadius:function(n){return i=Be(n),a},result:function(){if(o.length){var n=o.join("");return o=[],n}}};return a}function Be(n){return"m0,"+n+"a"+n+","+n+" 0 1,1 0,"+-2*n+"a"+n+","+n+" 0 1,1 0,"+2*n+"z"}function We(n,t){_c+=n,wc+=t,++Sc}function Je(){function n(n,r){var u=n-t,i=r-e,o=Math.sqrt(u*u+i*i);kc+=o*(t+n)/2,Ec+=o*(e+r)/2,Ac+=o,We(t=n,e=r)}var t,e;Ic.point=function(r,u){Ic.point=n,We(t=r,e=u)}}function Ge(){Ic.point=We}function Ke(){function n(n,t){var e=n-r,i=t-u,o=Math.sqrt(e*e+i*i);kc+=o*(r+n)/2,Ec+=o*(u+t)/2,Ac+=o,o=u*n-r*t,Nc+=o*(r+n),Cc+=o*(u+t),zc+=3*o,We(r=n,u=t)}var t,e,r,u;Ic.point=function(i,o){Ic.point=n,We(t=r=i,e=u=o)},Ic.lineEnd=function(){n(t,e)}}function Qe(n){function t(t,e){n.moveTo(t+o,e),n.arc(t,e,o,0,La)}function e(t,e){n.moveTo(t,e),a.point=r}function r(t,e){n.lineTo(t,e)}function u(){a.point=t}function i(){n.closePath()}var o=4.5,a={point:t,lineStart:function(){a.point=e},lineEnd:u,polygonStart:function(){a.lineEnd=i},polygonEnd:function(){a.lineEnd=u,a.point=t},pointRadius:function(n){return o=n,a},result:b};return a}function nr(n){function t(n){return(a?r:e)(n)}function e(t){return rr(t,function(e,r){e=n(e,r),t.point(e[0],e[1])})}function r(t){function e(e,r){e=n(e,r),t.point(e[0],e[1])}function r(){M=0/0,S.point=i,t.lineStart()}function i(e,r){var i=pe([e,r]),o=n(e,r);u(M,x,y,b,_,w,M=o[0],x=o[1],y=e,b=i[0],_=i[1],w=i[2],a,t),t.point(M,x)}function o(){S.point=e,t.lineEnd()}function c(){r(),S.point=l,S.lineEnd=s}function l(n,t){i(f=n,h=t),g=M,p=x,v=b,d=_,m=w,S.point=i}function s(){u(M,x,y,b,_,w,g,p,f,v,d,m,a,t),S.lineEnd=o,o()}var f,h,g,p,v,d,m,y,M,x,b,_,w,S={point:e,lineStart:r,lineEnd:o,polygonStart:function(){t.polygonStart(),S.lineStart=c +},polygonEnd:function(){t.polygonEnd(),S.lineStart=r}};return S}function u(t,e,r,a,c,l,s,f,h,g,p,v,d,m){var y=s-t,M=f-e,x=y*y+M*M;if(x>4*i&&d--){var b=a+g,_=c+p,w=l+v,S=Math.sqrt(b*b+_*_+w*w),k=Math.asin(w/=S),E=ga(ga(w)-1)<Ca||ga(r-h)<Ca?(r+h)/2:Math.atan2(_,b),A=n(E,k),N=A[0],C=A[1],z=N-t,q=C-e,L=M*z-y*q;(L*L/x>i||ga((y*z+M*q)/x-.5)>.3||o>a*g+c*p+l*v)&&(u(t,e,r,a,c,l,N,C,E,b/=S,_/=S,w,d,m),m.point(N,C),u(N,C,E,b,_,w,s,f,h,g,p,v,d,m))}}var i=.5,o=Math.cos(30*Da),a=16;return t.precision=function(n){return arguments.length?(a=(i=n*n)>0&&16,t):Math.sqrt(i)},t}function tr(n){var t=nr(function(t,e){return n([t*Pa,e*Pa])});return function(n){return or(t(n))}}function er(n){this.stream=n}function rr(n,t){return{point:t,sphere:function(){n.sphere()},lineStart:function(){n.lineStart()},lineEnd:function(){n.lineEnd()},polygonStart:function(){n.polygonStart()},polygonEnd:function(){n.polygonEnd()}}}function ur(n){return ir(function(){return n})()}function ir(n){function t(n){return n=a(n[0]*Da,n[1]*Da),[n[0]*h+c,l-n[1]*h]}function e(n){return n=a.invert((n[0]-c)/h,(l-n[1])/h),n&&[n[0]*Pa,n[1]*Pa]}function r(){a=Ae(o=lr(m,M,x),i);var n=i(v,d);return c=g-n[0]*h,l=p+n[1]*h,u()}function u(){return s&&(s.valid=!1,s=null),t}var i,o,a,c,l,s,f=nr(function(n,t){return n=i(n,t),[n[0]*h+c,l-n[1]*h]}),h=150,g=480,p=250,v=0,d=0,m=0,M=0,x=0,b=Lc,_=y,w=null,S=null;return t.stream=function(n){return s&&(s.valid=!1),s=or(b(o,f(_(n)))),s.valid=!0,s},t.clipAngle=function(n){return arguments.length?(b=null==n?(w=n,Lc):He((w=+n)*Da),u()):w},t.clipExtent=function(n){return arguments.length?(S=n,_=n?Ie(n[0][0],n[0][1],n[1][0],n[1][1]):y,u()):S},t.scale=function(n){return arguments.length?(h=+n,r()):h},t.translate=function(n){return arguments.length?(g=+n[0],p=+n[1],r()):[g,p]},t.center=function(n){return arguments.length?(v=n[0]%360*Da,d=n[1]%360*Da,r()):[v*Pa,d*Pa]},t.rotate=function(n){return arguments.length?(m=n[0]%360*Da,M=n[1]%360*Da,x=n.length>2?n[2]%360*Da:0,r()):[m*Pa,M*Pa,x*Pa]},ta.rebind(t,f,"precision"),function(){return i=n.apply(this,arguments),t.invert=i.invert&&e,r()}}function or(n){return rr(n,function(t,e){n.point(t*Da,e*Da)})}function ar(n,t){return[n,t]}function cr(n,t){return[n>qa?n-La:-qa>n?n+La:n,t]}function lr(n,t,e){return n?t||e?Ae(fr(n),hr(t,e)):fr(n):t||e?hr(t,e):cr}function sr(n){return function(t,e){return t+=n,[t>qa?t-La:-qa>t?t+La:t,e]}}function fr(n){var t=sr(n);return t.invert=sr(-n),t}function hr(n,t){function e(n,t){var e=Math.cos(t),a=Math.cos(n)*e,c=Math.sin(n)*e,l=Math.sin(t),s=l*r+a*u;return[Math.atan2(c*i-s*o,a*r-l*u),tt(s*i+c*o)]}var r=Math.cos(n),u=Math.sin(n),i=Math.cos(t),o=Math.sin(t);return e.invert=function(n,t){var e=Math.cos(t),a=Math.cos(n)*e,c=Math.sin(n)*e,l=Math.sin(t),s=l*i-c*o;return[Math.atan2(c*i+l*o,a*r+s*u),tt(s*r-a*u)]},e}function gr(n,t){var e=Math.cos(n),r=Math.sin(n);return function(u,i,o,a){var c=o*t;null!=u?(u=pr(e,u),i=pr(e,i),(o>0?i>u:u>i)&&(u+=o*La)):(u=n+o*La,i=n-.5*c);for(var l,s=u;o>0?s>i:i>s;s-=c)a.point((l=xe([e,-r*Math.cos(s),-r*Math.sin(s)]))[0],l[1])}}function pr(n,t){var e=pe(t);e[0]-=n,Me(e);var r=nt(-e[1]);return((-e[2]<0?-r:r)+2*Math.PI-Ca)%(2*Math.PI)}function vr(n,t,e){var r=ta.range(n,t-Ca,e).concat(t);return function(n){return r.map(function(t){return[n,t]})}}function dr(n,t,e){var r=ta.range(n,t-Ca,e).concat(t);return function(n){return r.map(function(t){return[t,n]})}}function mr(n){return n.source}function yr(n){return n.target}function Mr(n,t,e,r){var u=Math.cos(t),i=Math.sin(t),o=Math.cos(r),a=Math.sin(r),c=u*Math.cos(n),l=u*Math.sin(n),s=o*Math.cos(e),f=o*Math.sin(e),h=2*Math.asin(Math.sqrt(it(r-t)+u*o*it(e-n))),g=1/Math.sin(h),p=h?function(n){var t=Math.sin(n*=h)*g,e=Math.sin(h-n)*g,r=e*c+t*s,u=e*l+t*f,o=e*i+t*a;return[Math.atan2(u,r)*Pa,Math.atan2(o,Math.sqrt(r*r+u*u))*Pa]}:function(){return[n*Pa,t*Pa]};return p.distance=h,p}function xr(){function n(n,u){var i=Math.sin(u*=Da),o=Math.cos(u),a=ga((n*=Da)-t),c=Math.cos(a);Yc+=Math.atan2(Math.sqrt((a=o*Math.sin(a))*a+(a=r*i-e*o*c)*a),e*i+r*o*c),t=n,e=i,r=o}var t,e,r;Zc.point=function(u,i){t=u*Da,e=Math.sin(i*=Da),r=Math.cos(i),Zc.point=n},Zc.lineEnd=function(){Zc.point=Zc.lineEnd=b}}function br(n,t){function e(t,e){var r=Math.cos(t),u=Math.cos(e),i=n(r*u);return[i*u*Math.sin(t),i*Math.sin(e)]}return e.invert=function(n,e){var r=Math.sqrt(n*n+e*e),u=t(r),i=Math.sin(u),o=Math.cos(u);return[Math.atan2(n*i,r*o),Math.asin(r&&e*i/r)]},e}function _r(n,t){function e(n,t){o>0?-Ra+Ca>t&&(t=-Ra+Ca):t>Ra-Ca&&(t=Ra-Ca);var e=o/Math.pow(u(t),i);return[e*Math.sin(i*n),o-e*Math.cos(i*n)]}var r=Math.cos(n),u=function(n){return Math.tan(qa/4+n/2)},i=n===t?Math.sin(n):Math.log(r/Math.cos(t))/Math.log(u(t)/u(n)),o=r*Math.pow(u(n),i)/i;return i?(e.invert=function(n,t){var e=o-t,r=K(i)*Math.sqrt(n*n+e*e);return[Math.atan2(n,e)/i,2*Math.atan(Math.pow(o/r,1/i))-Ra]},e):Sr}function wr(n,t){function e(n,t){var e=i-t;return[e*Math.sin(u*n),i-e*Math.cos(u*n)]}var r=Math.cos(n),u=n===t?Math.sin(n):(r-Math.cos(t))/(t-n),i=r/u+n;return ga(u)<Ca?ar:(e.invert=function(n,t){var e=i-t;return[Math.atan2(n,e)/u,i-K(u)*Math.sqrt(n*n+e*e)]},e)}function Sr(n,t){return[n,Math.log(Math.tan(qa/4+t/2))]}function kr(n){var t,e=ur(n),r=e.scale,u=e.translate,i=e.clipExtent;return e.scale=function(){var n=r.apply(e,arguments);return n===e?t?e.clipExtent(null):e:n},e.translate=function(){var n=u.apply(e,arguments);return n===e?t?e.clipExtent(null):e:n},e.clipExtent=function(n){var o=i.apply(e,arguments);if(o===e){if(t=null==n){var a=qa*r(),c=u();i([[c[0]-a,c[1]-a],[c[0]+a,c[1]+a]])}}else t&&(o=null);return o},e.clipExtent(null)}function Er(n,t){return[Math.log(Math.tan(qa/4+t/2)),-n]}function Ar(n){return n[0]}function Nr(n){return n[1]}function Cr(n){for(var t=n.length,e=[0,1],r=2,u=2;t>u;u++){for(;r>1&&Q(n[e[r-2]],n[e[r-1]],n[u])<=0;)--r;e[r++]=u}return e.slice(0,r)}function zr(n,t){return n[0]-t[0]||n[1]-t[1]}function qr(n,t,e){return(e[0]-t[0])*(n[1]-t[1])<(e[1]-t[1])*(n[0]-t[0])}function Lr(n,t,e,r){var u=n[0],i=e[0],o=t[0]-u,a=r[0]-i,c=n[1],l=e[1],s=t[1]-c,f=r[1]-l,h=(a*(c-l)-f*(u-i))/(f*o-a*s);return[u+h*o,c+h*s]}function Tr(n){var t=n[0],e=n[n.length-1];return!(t[0]-e[0]||t[1]-e[1])}function Rr(){tu(this),this.edge=this.site=this.circle=null}function Dr(n){var t=el.pop()||new Rr;return t.site=n,t}function Pr(n){Xr(n),Qc.remove(n),el.push(n),tu(n)}function Ur(n){var t=n.circle,e=t.x,r=t.cy,u={x:e,y:r},i=n.P,o=n.N,a=[n];Pr(n);for(var c=i;c.circle&&ga(e-c.circle.x)<Ca&&ga(r-c.circle.cy)<Ca;)i=c.P,a.unshift(c),Pr(c),c=i;a.unshift(c),Xr(c);for(var l=o;l.circle&&ga(e-l.circle.x)<Ca&&ga(r-l.circle.cy)<Ca;)o=l.N,a.push(l),Pr(l),l=o;a.push(l),Xr(l);var s,f=a.length;for(s=1;f>s;++s)l=a[s],c=a[s-1],Kr(l.edge,c.site,l.site,u);c=a[0],l=a[f-1],l.edge=Jr(c.site,l.site,null,u),Vr(c),Vr(l)}function jr(n){for(var t,e,r,u,i=n.x,o=n.y,a=Qc._;a;)if(r=Fr(a,o)-i,r>Ca)a=a.L;else{if(u=i-Hr(a,o),!(u>Ca)){r>-Ca?(t=a.P,e=a):u>-Ca?(t=a,e=a.N):t=e=a;break}if(!a.R){t=a;break}a=a.R}var c=Dr(n);if(Qc.insert(t,c),t||e){if(t===e)return Xr(t),e=Dr(t.site),Qc.insert(c,e),c.edge=e.edge=Jr(t.site,c.site),Vr(t),void Vr(e);if(!e)return void(c.edge=Jr(t.site,c.site));Xr(t),Xr(e);var l=t.site,s=l.x,f=l.y,h=n.x-s,g=n.y-f,p=e.site,v=p.x-s,d=p.y-f,m=2*(h*d-g*v),y=h*h+g*g,M=v*v+d*d,x={x:(d*y-g*M)/m+s,y:(h*M-v*y)/m+f};Kr(e.edge,l,p,x),c.edge=Jr(l,n,null,x),e.edge=Jr(n,p,null,x),Vr(t),Vr(e)}}function Fr(n,t){var e=n.site,r=e.x,u=e.y,i=u-t;if(!i)return r;var o=n.P;if(!o)return-1/0;e=o.site;var a=e.x,c=e.y,l=c-t;if(!l)return a;var s=a-r,f=1/i-1/l,h=s/l;return f?(-h+Math.sqrt(h*h-2*f*(s*s/(-2*l)-c+l/2+u-i/2)))/f+r:(r+a)/2}function Hr(n,t){var e=n.N;if(e)return Fr(e,t);var r=n.site;return r.y===t?r.x:1/0}function Or(n){this.site=n,this.edges=[]}function Ir(n){for(var t,e,r,u,i,o,a,c,l,s,f=n[0][0],h=n[1][0],g=n[0][1],p=n[1][1],v=Kc,d=v.length;d--;)if(i=v[d],i&&i.prepare())for(a=i.edges,c=a.length,o=0;c>o;)s=a[o].end(),r=s.x,u=s.y,l=a[++o%c].start(),t=l.x,e=l.y,(ga(r-t)>Ca||ga(u-e)>Ca)&&(a.splice(o,0,new Qr(Gr(i.site,s,ga(r-f)<Ca&&p-u>Ca?{x:f,y:ga(t-f)<Ca?e:p}:ga(u-p)<Ca&&h-r>Ca?{x:ga(e-p)<Ca?t:h,y:p}:ga(r-h)<Ca&&u-g>Ca?{x:h,y:ga(t-h)<Ca?e:g}:ga(u-g)<Ca&&r-f>Ca?{x:ga(e-g)<Ca?t:f,y:g}:null),i.site,null)),++c)}function Yr(n,t){return t.angle-n.angle}function Zr(){tu(this),this.x=this.y=this.arc=this.site=this.cy=null}function Vr(n){var t=n.P,e=n.N;if(t&&e){var r=t.site,u=n.site,i=e.site;if(r!==i){var o=u.x,a=u.y,c=r.x-o,l=r.y-a,s=i.x-o,f=i.y-a,h=2*(c*f-l*s);if(!(h>=-za)){var g=c*c+l*l,p=s*s+f*f,v=(f*g-l*p)/h,d=(c*p-s*g)/h,f=d+a,m=rl.pop()||new Zr;m.arc=n,m.site=u,m.x=v+o,m.y=f+Math.sqrt(v*v+d*d),m.cy=f,n.circle=m;for(var y=null,M=tl._;M;)if(m.y<M.y||m.y===M.y&&m.x<=M.x){if(!M.L){y=M.P;break}M=M.L}else{if(!M.R){y=M;break}M=M.R}tl.insert(y,m),y||(nl=m)}}}}function Xr(n){var t=n.circle;t&&(t.P||(nl=t.N),tl.remove(t),rl.push(t),tu(t),n.circle=null)}function $r(n){for(var t,e=Gc,r=Oe(n[0][0],n[0][1],n[1][0],n[1][1]),u=e.length;u--;)t=e[u],(!Br(t,n)||!r(t)||ga(t.a.x-t.b.x)<Ca&&ga(t.a.y-t.b.y)<Ca)&&(t.a=t.b=null,e.splice(u,1))}function Br(n,t){var e=n.b;if(e)return!0;var r,u,i=n.a,o=t[0][0],a=t[1][0],c=t[0][1],l=t[1][1],s=n.l,f=n.r,h=s.x,g=s.y,p=f.x,v=f.y,d=(h+p)/2,m=(g+v)/2;if(v===g){if(o>d||d>=a)return;if(h>p){if(i){if(i.y>=l)return}else i={x:d,y:c};e={x:d,y:l}}else{if(i){if(i.y<c)return}else i={x:d,y:l};e={x:d,y:c}}}else if(r=(h-p)/(v-g),u=m-r*d,-1>r||r>1)if(h>p){if(i){if(i.y>=l)return}else i={x:(c-u)/r,y:c};e={x:(l-u)/r,y:l}}else{if(i){if(i.y<c)return}else i={x:(l-u)/r,y:l};e={x:(c-u)/r,y:c}}else if(v>g){if(i){if(i.x>=a)return}else i={x:o,y:r*o+u};e={x:a,y:r*a+u}}else{if(i){if(i.x<o)return}else i={x:a,y:r*a+u};e={x:o,y:r*o+u}}return n.a=i,n.b=e,!0}function Wr(n,t){this.l=n,this.r=t,this.a=this.b=null}function Jr(n,t,e,r){var u=new Wr(n,t);return Gc.push(u),e&&Kr(u,n,t,e),r&&Kr(u,t,n,r),Kc[n.i].edges.push(new Qr(u,n,t)),Kc[t.i].edges.push(new Qr(u,t,n)),u}function Gr(n,t,e){var r=new Wr(n,null);return r.a=t,r.b=e,Gc.push(r),r}function Kr(n,t,e,r){n.a||n.b?n.l===e?n.b=r:n.a=r:(n.a=r,n.l=t,n.r=e)}function Qr(n,t,e){var r=n.a,u=n.b;this.edge=n,this.site=t,this.angle=e?Math.atan2(e.y-t.y,e.x-t.x):n.l===t?Math.atan2(u.x-r.x,r.y-u.y):Math.atan2(r.x-u.x,u.y-r.y)}function nu(){this._=null}function tu(n){n.U=n.C=n.L=n.R=n.P=n.N=null}function eu(n,t){var e=t,r=t.R,u=e.U;u?u.L===e?u.L=r:u.R=r:n._=r,r.U=u,e.U=r,e.R=r.L,e.R&&(e.R.U=e),r.L=e}function ru(n,t){var e=t,r=t.L,u=e.U;u?u.L===e?u.L=r:u.R=r:n._=r,r.U=u,e.U=r,e.L=r.R,e.L&&(e.L.U=e),r.R=e}function uu(n){for(;n.L;)n=n.L;return n}function iu(n,t){var e,r,u,i=n.sort(ou).pop();for(Gc=[],Kc=new Array(n.length),Qc=new nu,tl=new nu;;)if(u=nl,i&&(!u||i.y<u.y||i.y===u.y&&i.x<u.x))(i.x!==e||i.y!==r)&&(Kc[i.i]=new Or(i),jr(i),e=i.x,r=i.y),i=n.pop();else{if(!u)break;Ur(u.arc)}t&&($r(t),Ir(t));var o={cells:Kc,edges:Gc};return Qc=tl=Gc=Kc=null,o}function ou(n,t){return t.y-n.y||t.x-n.x}function au(n,t,e){return(n.x-e.x)*(t.y-n.y)-(n.x-t.x)*(e.y-n.y)}function cu(n){return n.x}function lu(n){return n.y}function su(){return{leaf:!0,nodes:[],point:null,x:null,y:null}}function fu(n,t,e,r,u,i){if(!n(t,e,r,u,i)){var o=.5*(e+u),a=.5*(r+i),c=t.nodes;c[0]&&fu(n,c[0],e,r,o,a),c[1]&&fu(n,c[1],o,r,u,a),c[2]&&fu(n,c[2],e,a,o,i),c[3]&&fu(n,c[3],o,a,u,i)}}function hu(n,t,e,r,u,i,o){var a,c=1/0;return function l(n,s,f,h,g){if(!(s>i||f>o||r>h||u>g)){if(p=n.point){var p,v=t-n.x,d=e-n.y,m=v*v+d*d;if(c>m){var y=Math.sqrt(c=m);r=t-y,u=e-y,i=t+y,o=e+y,a=p}}for(var M=n.nodes,x=.5*(s+h),b=.5*(f+g),_=t>=x,w=e>=b,S=w<<1|_,k=S+4;k>S;++S)if(n=M[3&S])switch(3&S){case 0:l(n,s,f,x,b);break;case 1:l(n,x,f,h,b);break;case 2:l(n,s,b,x,g);break;case 3:l(n,x,b,h,g)}}}(n,r,u,i,o),a}function gu(n,t){n=ta.rgb(n),t=ta.rgb(t);var e=n.r,r=n.g,u=n.b,i=t.r-e,o=t.g-r,a=t.b-u;return function(n){return"#"+xt(Math.round(e+i*n))+xt(Math.round(r+o*n))+xt(Math.round(u+a*n))}}function pu(n,t){var e,r={},u={};for(e in n)e in t?r[e]=mu(n[e],t[e]):u[e]=n[e];for(e in t)e in n||(u[e]=t[e]);return function(n){for(e in r)u[e]=r[e](n);return u}}function vu(n,t){return n=+n,t=+t,function(e){return n*(1-e)+t*e}}function du(n,t){var e,r,u,i=il.lastIndex=ol.lastIndex=0,o=-1,a=[],c=[];for(n+="",t+="";(e=il.exec(n))&&(r=ol.exec(t));)(u=r.index)>i&&(u=t.slice(i,u),a[o]?a[o]+=u:a[++o]=u),(e=e[0])===(r=r[0])?a[o]?a[o]+=r:a[++o]=r:(a[++o]=null,c.push({i:o,x:vu(e,r)})),i=ol.lastIndex;return i<t.length&&(u=t.slice(i),a[o]?a[o]+=u:a[++o]=u),a.length<2?c[0]?(t=c[0].x,function(n){return t(n)+""}):function(){return t}:(t=c.length,function(n){for(var e,r=0;t>r;++r)a[(e=c[r]).i]=e.x(n);return a.join("")})}function mu(n,t){for(var e,r=ta.interpolators.length;--r>=0&&!(e=ta.interpolators[r](n,t)););return e}function yu(n,t){var e,r=[],u=[],i=n.length,o=t.length,a=Math.min(n.length,t.length);for(e=0;a>e;++e)r.push(mu(n[e],t[e]));for(;i>e;++e)u[e]=n[e];for(;o>e;++e)u[e]=t[e];return function(n){for(e=0;a>e;++e)u[e]=r[e](n);return u}}function Mu(n){return function(t){return 0>=t?0:t>=1?1:n(t)}}function xu(n){return function(t){return 1-n(1-t)}}function bu(n){return function(t){return.5*(.5>t?n(2*t):2-n(2-2*t))}}function _u(n){return n*n}function wu(n){return n*n*n}function Su(n){if(0>=n)return 0;if(n>=1)return 1;var t=n*n,e=t*n;return 4*(.5>n?e:3*(n-t)+e-.75)}function ku(n){return function(t){return Math.pow(t,n)}}function Eu(n){return 1-Math.cos(n*Ra)}function Au(n){return Math.pow(2,10*(n-1))}function Nu(n){return 1-Math.sqrt(1-n*n)}function Cu(n,t){var e;return arguments.length<2&&(t=.45),arguments.length?e=t/La*Math.asin(1/n):(n=1,e=t/4),function(r){return 1+n*Math.pow(2,-10*r)*Math.sin((r-e)*La/t)}}function zu(n){return n||(n=1.70158),function(t){return t*t*((n+1)*t-n)}}function qu(n){return 1/2.75>n?7.5625*n*n:2/2.75>n?7.5625*(n-=1.5/2.75)*n+.75:2.5/2.75>n?7.5625*(n-=2.25/2.75)*n+.9375:7.5625*(n-=2.625/2.75)*n+.984375}function Lu(n,t){n=ta.hcl(n),t=ta.hcl(t);var e=n.h,r=n.c,u=n.l,i=t.h-e,o=t.c-r,a=t.l-u;return isNaN(o)&&(o=0,r=isNaN(r)?t.c:r),isNaN(i)?(i=0,e=isNaN(e)?t.h:e):i>180?i-=360:-180>i&&(i+=360),function(n){return st(e+i*n,r+o*n,u+a*n)+""}}function Tu(n,t){n=ta.hsl(n),t=ta.hsl(t);var e=n.h,r=n.s,u=n.l,i=t.h-e,o=t.s-r,a=t.l-u;return isNaN(o)&&(o=0,r=isNaN(r)?t.s:r),isNaN(i)?(i=0,e=isNaN(e)?t.h:e):i>180?i-=360:-180>i&&(i+=360),function(n){return ct(e+i*n,r+o*n,u+a*n)+""}}function Ru(n,t){n=ta.lab(n),t=ta.lab(t);var e=n.l,r=n.a,u=n.b,i=t.l-e,o=t.a-r,a=t.b-u;return function(n){return ht(e+i*n,r+o*n,u+a*n)+""}}function Du(n,t){return t-=n,function(e){return Math.round(n+t*e)}}function Pu(n){var t=[n.a,n.b],e=[n.c,n.d],r=ju(t),u=Uu(t,e),i=ju(Fu(e,t,-u))||0;t[0]*e[1]<e[0]*t[1]&&(t[0]*=-1,t[1]*=-1,r*=-1,u*=-1),this.rotate=(r?Math.atan2(t[1],t[0]):Math.atan2(-e[0],e[1]))*Pa,this.translate=[n.e,n.f],this.scale=[r,i],this.skew=i?Math.atan2(u,i)*Pa:0}function Uu(n,t){return n[0]*t[0]+n[1]*t[1]}function ju(n){var t=Math.sqrt(Uu(n,n));return t&&(n[0]/=t,n[1]/=t),t}function Fu(n,t,e){return n[0]+=e*t[0],n[1]+=e*t[1],n}function Hu(n,t){var e,r=[],u=[],i=ta.transform(n),o=ta.transform(t),a=i.translate,c=o.translate,l=i.rotate,s=o.rotate,f=i.skew,h=o.skew,g=i.scale,p=o.scale;return a[0]!=c[0]||a[1]!=c[1]?(r.push("translate(",null,",",null,")"),u.push({i:1,x:vu(a[0],c[0])},{i:3,x:vu(a[1],c[1])})):r.push(c[0]||c[1]?"translate("+c+")":""),l!=s?(l-s>180?s+=360:s-l>180&&(l+=360),u.push({i:r.push(r.pop()+"rotate(",null,")")-2,x:vu(l,s)})):s&&r.push(r.pop()+"rotate("+s+")"),f!=h?u.push({i:r.push(r.pop()+"skewX(",null,")")-2,x:vu(f,h)}):h&&r.push(r.pop()+"skewX("+h+")"),g[0]!=p[0]||g[1]!=p[1]?(e=r.push(r.pop()+"scale(",null,",",null,")"),u.push({i:e-4,x:vu(g[0],p[0])},{i:e-2,x:vu(g[1],p[1])})):(1!=p[0]||1!=p[1])&&r.push(r.pop()+"scale("+p+")"),e=u.length,function(n){for(var t,i=-1;++i<e;)r[(t=u[i]).i]=t.x(n);return r.join("")}}function Ou(n,t){return t=(t-=n=+n)||1/t,function(e){return(e-n)/t}}function Iu(n,t){return t=(t-=n=+n)||1/t,function(e){return Math.max(0,Math.min(1,(e-n)/t))}}function Yu(n){for(var t=n.source,e=n.target,r=Vu(t,e),u=[t];t!==r;)t=t.parent,u.push(t);for(var i=u.length;e!==r;)u.splice(i,0,e),e=e.parent;return u}function Zu(n){for(var t=[],e=n.parent;null!=e;)t.push(n),n=e,e=e.parent;return t.push(n),t}function Vu(n,t){if(n===t)return n;for(var e=Zu(n),r=Zu(t),u=e.pop(),i=r.pop(),o=null;u===i;)o=u,u=e.pop(),i=r.pop();return o}function Xu(n){n.fixed|=2}function $u(n){n.fixed&=-7}function Bu(n){n.fixed|=4,n.px=n.x,n.py=n.y}function Wu(n){n.fixed&=-5}function Ju(n,t,e){var r=0,u=0;if(n.charge=0,!n.leaf)for(var i,o=n.nodes,a=o.length,c=-1;++c<a;)i=o[c],null!=i&&(Ju(i,t,e),n.charge+=i.charge,r+=i.charge*i.cx,u+=i.charge*i.cy);if(n.point){n.leaf||(n.point.x+=Math.random()-.5,n.point.y+=Math.random()-.5);var l=t*e[n.point.index];n.charge+=n.pointCharge=l,r+=l*n.point.x,u+=l*n.point.y}n.cx=r/n.charge,n.cy=u/n.charge}function Gu(n,t){return ta.rebind(n,t,"sort","children","value"),n.nodes=n,n.links=ri,n}function Ku(n,t){for(var e=[n];null!=(n=e.pop());)if(t(n),(u=n.children)&&(r=u.length))for(var r,u;--r>=0;)e.push(u[r])}function Qu(n,t){for(var e=[n],r=[];null!=(n=e.pop());)if(r.push(n),(i=n.children)&&(u=i.length))for(var u,i,o=-1;++o<u;)e.push(i[o]);for(;null!=(n=r.pop());)t(n)}function ni(n){return n.children}function ti(n){return n.value}function ei(n,t){return t.value-n.value}function ri(n){return ta.merge(n.map(function(n){return(n.children||[]).map(function(t){return{source:n,target:t}})}))}function ui(n){return n.x}function ii(n){return n.y}function oi(n,t,e){n.y0=t,n.y=e}function ai(n){return ta.range(n.length)}function ci(n){for(var t=-1,e=n[0].length,r=[];++t<e;)r[t]=0;return r}function li(n){for(var t,e=1,r=0,u=n[0][1],i=n.length;i>e;++e)(t=n[e][1])>u&&(r=e,u=t);return r}function si(n){return n.reduce(fi,0)}function fi(n,t){return n+t[1]}function hi(n,t){return gi(n,Math.ceil(Math.log(t.length)/Math.LN2+1))}function gi(n,t){for(var e=-1,r=+n[0],u=(n[1]-r)/t,i=[];++e<=t;)i[e]=u*e+r;return i}function pi(n){return[ta.min(n),ta.max(n)]}function vi(n,t){return n.value-t.value}function di(n,t){var e=n._pack_next;n._pack_next=t,t._pack_prev=n,t._pack_next=e,e._pack_prev=t}function mi(n,t){n._pack_next=t,t._pack_prev=n}function yi(n,t){var e=t.x-n.x,r=t.y-n.y,u=n.r+t.r;return.999*u*u>e*e+r*r}function Mi(n){function t(n){s=Math.min(n.x-n.r,s),f=Math.max(n.x+n.r,f),h=Math.min(n.y-n.r,h),g=Math.max(n.y+n.r,g)}if((e=n.children)&&(l=e.length)){var e,r,u,i,o,a,c,l,s=1/0,f=-1/0,h=1/0,g=-1/0;if(e.forEach(xi),r=e[0],r.x=-r.r,r.y=0,t(r),l>1&&(u=e[1],u.x=u.r,u.y=0,t(u),l>2))for(i=e[2],wi(r,u,i),t(i),di(r,i),r._pack_prev=i,di(i,u),u=r._pack_next,o=3;l>o;o++){wi(r,u,i=e[o]);var p=0,v=1,d=1;for(a=u._pack_next;a!==u;a=a._pack_next,v++)if(yi(a,i)){p=1;break}if(1==p)for(c=r._pack_prev;c!==a._pack_prev&&!yi(c,i);c=c._pack_prev,d++);p?(d>v||v==d&&u.r<r.r?mi(r,u=a):mi(r=c,u),o--):(di(r,i),u=i,t(i))}var m=(s+f)/2,y=(h+g)/2,M=0;for(o=0;l>o;o++)i=e[o],i.x-=m,i.y-=y,M=Math.max(M,i.r+Math.sqrt(i.x*i.x+i.y*i.y));n.r=M,e.forEach(bi)}}function xi(n){n._pack_next=n._pack_prev=n}function bi(n){delete n._pack_next,delete n._pack_prev}function _i(n,t,e,r){var u=n.children;if(n.x=t+=r*n.x,n.y=e+=r*n.y,n.r*=r,u)for(var i=-1,o=u.length;++i<o;)_i(u[i],t,e,r)}function wi(n,t,e){var r=n.r+e.r,u=t.x-n.x,i=t.y-n.y;if(r&&(u||i)){var o=t.r+e.r,a=u*u+i*i;o*=o,r*=r;var c=.5+(r-o)/(2*a),l=Math.sqrt(Math.max(0,2*o*(r+a)-(r-=a)*r-o*o))/(2*a);e.x=n.x+c*u+l*i,e.y=n.y+c*i-l*u}else e.x=n.x+r,e.y=n.y}function Si(n,t){return n.parent==t.parent?1:2}function ki(n){var t=n.children;return t.length?t[0]:n.t}function Ei(n){var t,e=n.children;return(t=e.length)?e[t-1]:n.t}function Ai(n,t,e){var r=e/(t.i-n.i);t.c-=r,t.s+=e,n.c+=r,t.z+=e,t.m+=e}function Ni(n){for(var t,e=0,r=0,u=n.children,i=u.length;--i>=0;)t=u[i],t.z+=e,t.m+=e,e+=t.s+(r+=t.c)}function Ci(n,t,e){return n.a.parent===t.parent?n.a:e}function zi(n){return 1+ta.max(n,function(n){return n.y})}function qi(n){return n.reduce(function(n,t){return n+t.x},0)/n.length}function Li(n){var t=n.children;return t&&t.length?Li(t[0]):n}function Ti(n){var t,e=n.children;return e&&(t=e.length)?Ti(e[t-1]):n}function Ri(n){return{x:n.x,y:n.y,dx:n.dx,dy:n.dy}}function Di(n,t){var e=n.x+t[3],r=n.y+t[0],u=n.dx-t[1]-t[3],i=n.dy-t[0]-t[2];return 0>u&&(e+=u/2,u=0),0>i&&(r+=i/2,i=0),{x:e,y:r,dx:u,dy:i}}function Pi(n){var t=n[0],e=n[n.length-1];return e>t?[t,e]:[e,t]}function Ui(n){return n.rangeExtent?n.rangeExtent():Pi(n.range())}function ji(n,t,e,r){var u=e(n[0],n[1]),i=r(t[0],t[1]);return function(n){return i(u(n))}}function Fi(n,t){var e,r=0,u=n.length-1,i=n[r],o=n[u];return i>o&&(e=r,r=u,u=e,e=i,i=o,o=e),n[r]=t.floor(i),n[u]=t.ceil(o),n}function Hi(n){return n?{floor:function(t){return Math.floor(t/n)*n},ceil:function(t){return Math.ceil(t/n)*n}}:ml}function Oi(n,t,e,r){var u=[],i=[],o=0,a=Math.min(n.length,t.length)-1;for(n[a]<n[0]&&(n=n.slice().reverse(),t=t.slice().reverse());++o<=a;)u.push(e(n[o-1],n[o])),i.push(r(t[o-1],t[o]));return function(t){var e=ta.bisect(n,t,1,a)-1;return i[e](u[e](t))}}function Ii(n,t,e,r){function u(){var u=Math.min(n.length,t.length)>2?Oi:ji,c=r?Iu:Ou;return o=u(n,t,c,e),a=u(t,n,c,mu),i}function i(n){return o(n)}var o,a;return i.invert=function(n){return a(n)},i.domain=function(t){return arguments.length?(n=t.map(Number),u()):n},i.range=function(n){return arguments.length?(t=n,u()):t},i.rangeRound=function(n){return i.range(n).interpolate(Du)},i.clamp=function(n){return arguments.length?(r=n,u()):r},i.interpolate=function(n){return arguments.length?(e=n,u()):e},i.ticks=function(t){return Xi(n,t)},i.tickFormat=function(t,e){return $i(n,t,e)},i.nice=function(t){return Zi(n,t),u()},i.copy=function(){return Ii(n,t,e,r)},u()}function Yi(n,t){return ta.rebind(n,t,"range","rangeRound","interpolate","clamp")}function Zi(n,t){return Fi(n,Hi(Vi(n,t)[2]))}function Vi(n,t){null==t&&(t=10);var e=Pi(n),r=e[1]-e[0],u=Math.pow(10,Math.floor(Math.log(r/t)/Math.LN10)),i=t/r*u;return.15>=i?u*=10:.35>=i?u*=5:.75>=i&&(u*=2),e[0]=Math.ceil(e[0]/u)*u,e[1]=Math.floor(e[1]/u)*u+.5*u,e[2]=u,e}function Xi(n,t){return ta.range.apply(ta,Vi(n,t))}function $i(n,t,e){var r=Vi(n,t);if(e){var u=ic.exec(e);if(u.shift(),"s"===u[8]){var i=ta.formatPrefix(Math.max(ga(r[0]),ga(r[1])));return u[7]||(u[7]="."+Bi(i.scale(r[2]))),u[8]="f",e=ta.format(u.join("")),function(n){return e(i.scale(n))+i.symbol}}u[7]||(u[7]="."+Wi(u[8],r)),e=u.join("")}else e=",."+Bi(r[2])+"f";return ta.format(e)}function Bi(n){return-Math.floor(Math.log(n)/Math.LN10+.01)}function Wi(n,t){var e=Bi(t[2]);return n in yl?Math.abs(e-Bi(Math.max(ga(t[0]),ga(t[1]))))+ +("e"!==n):e-2*("%"===n)}function Ji(n,t,e,r){function u(n){return(e?Math.log(0>n?0:n):-Math.log(n>0?0:-n))/Math.log(t)}function i(n){return e?Math.pow(t,n):-Math.pow(t,-n)}function o(t){return n(u(t))}return o.invert=function(t){return i(n.invert(t))},o.domain=function(t){return arguments.length?(e=t[0]>=0,n.domain((r=t.map(Number)).map(u)),o):r},o.base=function(e){return arguments.length?(t=+e,n.domain(r.map(u)),o):t},o.nice=function(){var t=Fi(r.map(u),e?Math:xl);return n.domain(t),r=t.map(i),o},o.ticks=function(){var n=Pi(r),o=[],a=n[0],c=n[1],l=Math.floor(u(a)),s=Math.ceil(u(c)),f=t%1?2:t;if(isFinite(s-l)){if(e){for(;s>l;l++)for(var h=1;f>h;h++)o.push(i(l)*h);o.push(i(l))}else for(o.push(i(l));l++<s;)for(var h=f-1;h>0;h--)o.push(i(l)*h);for(l=0;o[l]<a;l++);for(s=o.length;o[s-1]>c;s--);o=o.slice(l,s)}return o},o.tickFormat=function(n,t){if(!arguments.length)return Ml;arguments.length<2?t=Ml:"function"!=typeof t&&(t=ta.format(t));var r,a=Math.max(.1,n/o.ticks().length),c=e?(r=1e-12,Math.ceil):(r=-1e-12,Math.floor);return function(n){return n/i(c(u(n)+r))<=a?t(n):""}},o.copy=function(){return Ji(n.copy(),t,e,r)},Yi(o,n)}function Gi(n,t,e){function r(t){return n(u(t))}var u=Ki(t),i=Ki(1/t);return r.invert=function(t){return i(n.invert(t))},r.domain=function(t){return arguments.length?(n.domain((e=t.map(Number)).map(u)),r):e},r.ticks=function(n){return Xi(e,n)},r.tickFormat=function(n,t){return $i(e,n,t)},r.nice=function(n){return r.domain(Zi(e,n))},r.exponent=function(o){return arguments.length?(u=Ki(t=o),i=Ki(1/t),n.domain(e.map(u)),r):t},r.copy=function(){return Gi(n.copy(),t,e)},Yi(r,n)}function Ki(n){return function(t){return 0>t?-Math.pow(-t,n):Math.pow(t,n)}}function Qi(n,t){function e(e){return i[((u.get(e)||("range"===t.t?u.set(e,n.push(e)):0/0))-1)%i.length]}function r(t,e){return ta.range(n.length).map(function(n){return t+e*n})}var u,i,o;return e.domain=function(r){if(!arguments.length)return n;n=[],u=new l;for(var i,o=-1,a=r.length;++o<a;)u.has(i=r[o])||u.set(i,n.push(i));return e[t.t].apply(e,t.a)},e.range=function(n){return arguments.length?(i=n,o=0,t={t:"range",a:arguments},e):i},e.rangePoints=function(u,a){arguments.length<2&&(a=0);var c=u[0],l=u[1],s=n.length<2?(c=(c+l)/2,0):(l-c)/(n.length-1+a);return i=r(c+s*a/2,s),o=0,t={t:"rangePoints",a:arguments},e},e.rangeRoundPoints=function(u,a){arguments.length<2&&(a=0);var c=u[0],l=u[1],s=n.length<2?(c=l=Math.round((c+l)/2),0):(l-c)/(n.length-1+a)|0;return i=r(c+Math.round(s*a/2+(l-c-(n.length-1+a)*s)/2),s),o=0,t={t:"rangeRoundPoints",a:arguments},e},e.rangeBands=function(u,a,c){arguments.length<2&&(a=0),arguments.length<3&&(c=a);var l=u[1]<u[0],s=u[l-0],f=u[1-l],h=(f-s)/(n.length-a+2*c);return i=r(s+h*c,h),l&&i.reverse(),o=h*(1-a),t={t:"rangeBands",a:arguments},e},e.rangeRoundBands=function(u,a,c){arguments.length<2&&(a=0),arguments.length<3&&(c=a);var l=u[1]<u[0],s=u[l-0],f=u[1-l],h=Math.floor((f-s)/(n.length-a+2*c));return i=r(s+Math.round((f-s-(n.length-a)*h)/2),h),l&&i.reverse(),o=Math.round(h*(1-a)),t={t:"rangeRoundBands",a:arguments},e},e.rangeBand=function(){return o},e.rangeExtent=function(){return Pi(t.a[0])},e.copy=function(){return Qi(n,t)},e.domain(n)}function no(n,t){function i(){var e=0,r=t.length;for(a=[];++e<r;)a[e-1]=ta.quantile(n,e/r);return o}function o(n){return isNaN(n=+n)?void 0:t[ta.bisect(a,n)]}var a;return o.domain=function(t){return arguments.length?(n=t.map(r).filter(u).sort(e),i()):n},o.range=function(n){return arguments.length?(t=n,i()):t},o.quantiles=function(){return a},o.invertExtent=function(e){return e=t.indexOf(e),0>e?[0/0,0/0]:[e>0?a[e-1]:n[0],e<a.length?a[e]:n[n.length-1]]},o.copy=function(){return no(n,t)},i()}function to(n,t,e){function r(t){return e[Math.max(0,Math.min(o,Math.floor(i*(t-n))))]}function u(){return i=e.length/(t-n),o=e.length-1,r}var i,o;return r.domain=function(e){return arguments.length?(n=+e[0],t=+e[e.length-1],u()):[n,t]},r.range=function(n){return arguments.length?(e=n,u()):e},r.invertExtent=function(t){return t=e.indexOf(t),t=0>t?0/0:t/i+n,[t,t+1/i]},r.copy=function(){return to(n,t,e)},u()}function eo(n,t){function e(e){return e>=e?t[ta.bisect(n,e)]:void 0}return e.domain=function(t){return arguments.length?(n=t,e):n},e.range=function(n){return arguments.length?(t=n,e):t},e.invertExtent=function(e){return e=t.indexOf(e),[n[e-1],n[e]]},e.copy=function(){return eo(n,t)},e}function ro(n){function t(n){return+n}return t.invert=t,t.domain=t.range=function(e){return arguments.length?(n=e.map(t),t):n},t.ticks=function(t){return Xi(n,t)},t.tickFormat=function(t,e){return $i(n,t,e)},t.copy=function(){return ro(n)},t}function uo(){return 0}function io(n){return n.innerRadius}function oo(n){return n.outerRadius}function ao(n){return n.startAngle}function co(n){return n.endAngle}function lo(n){return n&&n.padAngle}function so(n,t,e,r){return(n-e)*t-(t-r)*n>0?0:1}function fo(n,t,e,r,u){var i=n[0]-t[0],o=n[1]-t[1],a=(u?r:-r)/Math.sqrt(i*i+o*o),c=a*o,l=-a*i,s=n[0]+c,f=n[1]+l,h=t[0]+c,g=t[1]+l,p=(s+h)/2,v=(f+g)/2,d=h-s,m=g-f,y=d*d+m*m,M=e-r,x=s*g-h*f,b=(0>m?-1:1)*Math.sqrt(M*M*y-x*x),_=(x*m-d*b)/y,w=(-x*d-m*b)/y,S=(x*m+d*b)/y,k=(-x*d+m*b)/y,E=_-p,A=w-v,N=S-p,C=k-v;return E*E+A*A>N*N+C*C&&(_=S,w=k),[[_-c,w-l],[_*e/M,w*e/M]]}function ho(n){function t(t){function o(){l.push("M",i(n(s),a))}for(var c,l=[],s=[],f=-1,h=t.length,g=Et(e),p=Et(r);++f<h;)u.call(this,c=t[f],f)?s.push([+g.call(this,c,f),+p.call(this,c,f)]):s.length&&(o(),s=[]);return s.length&&o(),l.length?l.join(""):null}var e=Ar,r=Nr,u=Ne,i=go,o=i.key,a=.7;return t.x=function(n){return arguments.length?(e=n,t):e},t.y=function(n){return arguments.length?(r=n,t):r},t.defined=function(n){return arguments.length?(u=n,t):u},t.interpolate=function(n){return arguments.length?(o="function"==typeof n?i=n:(i=El.get(n)||go).key,t):o},t.tension=function(n){return arguments.length?(a=n,t):a},t}function go(n){return n.join("L")}function po(n){return go(n)+"Z"}function vo(n){for(var t=0,e=n.length,r=n[0],u=[r[0],",",r[1]];++t<e;)u.push("H",(r[0]+(r=n[t])[0])/2,"V",r[1]);return e>1&&u.push("H",r[0]),u.join("")}function mo(n){for(var t=0,e=n.length,r=n[0],u=[r[0],",",r[1]];++t<e;)u.push("V",(r=n[t])[1],"H",r[0]);return u.join("")}function yo(n){for(var t=0,e=n.length,r=n[0],u=[r[0],",",r[1]];++t<e;)u.push("H",(r=n[t])[0],"V",r[1]);return u.join("")}function Mo(n,t){return n.length<4?go(n):n[1]+_o(n.slice(1,-1),wo(n,t))}function xo(n,t){return n.length<3?go(n):n[0]+_o((n.push(n[0]),n),wo([n[n.length-2]].concat(n,[n[1]]),t))}function bo(n,t){return n.length<3?go(n):n[0]+_o(n,wo(n,t))}function _o(n,t){if(t.length<1||n.length!=t.length&&n.length!=t.length+2)return go(n);var e=n.length!=t.length,r="",u=n[0],i=n[1],o=t[0],a=o,c=1;if(e&&(r+="Q"+(i[0]-2*o[0]/3)+","+(i[1]-2*o[1]/3)+","+i[0]+","+i[1],u=n[1],c=2),t.length>1){a=t[1],i=n[c],c++,r+="C"+(u[0]+o[0])+","+(u[1]+o[1])+","+(i[0]-a[0])+","+(i[1]-a[1])+","+i[0]+","+i[1];for(var l=2;l<t.length;l++,c++)i=n[c],a=t[l],r+="S"+(i[0]-a[0])+","+(i[1]-a[1])+","+i[0]+","+i[1]}if(e){var s=n[c];r+="Q"+(i[0]+2*a[0]/3)+","+(i[1]+2*a[1]/3)+","+s[0]+","+s[1]}return r}function wo(n,t){for(var e,r=[],u=(1-t)/2,i=n[0],o=n[1],a=1,c=n.length;++a<c;)e=i,i=o,o=n[a],r.push([u*(o[0]-e[0]),u*(o[1]-e[1])]);return r}function So(n){if(n.length<3)return go(n);var t=1,e=n.length,r=n[0],u=r[0],i=r[1],o=[u,u,u,(r=n[1])[0]],a=[i,i,i,r[1]],c=[u,",",i,"L",No(Cl,o),",",No(Cl,a)];for(n.push(n[e-1]);++t<=e;)r=n[t],o.shift(),o.push(r[0]),a.shift(),a.push(r[1]),Co(c,o,a);return n.pop(),c.push("L",r),c.join("")}function ko(n){if(n.length<4)return go(n);for(var t,e=[],r=-1,u=n.length,i=[0],o=[0];++r<3;)t=n[r],i.push(t[0]),o.push(t[1]);for(e.push(No(Cl,i)+","+No(Cl,o)),--r;++r<u;)t=n[r],i.shift(),i.push(t[0]),o.shift(),o.push(t[1]),Co(e,i,o);return e.join("")}function Eo(n){for(var t,e,r=-1,u=n.length,i=u+4,o=[],a=[];++r<4;)e=n[r%u],o.push(e[0]),a.push(e[1]);for(t=[No(Cl,o),",",No(Cl,a)],--r;++r<i;)e=n[r%u],o.shift(),o.push(e[0]),a.shift(),a.push(e[1]),Co(t,o,a);return t.join("")}function Ao(n,t){var e=n.length-1;if(e)for(var r,u,i=n[0][0],o=n[0][1],a=n[e][0]-i,c=n[e][1]-o,l=-1;++l<=e;)r=n[l],u=l/e,r[0]=t*r[0]+(1-t)*(i+u*a),r[1]=t*r[1]+(1-t)*(o+u*c);return So(n)}function No(n,t){return n[0]*t[0]+n[1]*t[1]+n[2]*t[2]+n[3]*t[3]}function Co(n,t,e){n.push("C",No(Al,t),",",No(Al,e),",",No(Nl,t),",",No(Nl,e),",",No(Cl,t),",",No(Cl,e))}function zo(n,t){return(t[1]-n[1])/(t[0]-n[0])}function qo(n){for(var t=0,e=n.length-1,r=[],u=n[0],i=n[1],o=r[0]=zo(u,i);++t<e;)r[t]=(o+(o=zo(u=i,i=n[t+1])))/2;return r[t]=o,r}function Lo(n){for(var t,e,r,u,i=[],o=qo(n),a=-1,c=n.length-1;++a<c;)t=zo(n[a],n[a+1]),ga(t)<Ca?o[a]=o[a+1]=0:(e=o[a]/t,r=o[a+1]/t,u=e*e+r*r,u>9&&(u=3*t/Math.sqrt(u),o[a]=u*e,o[a+1]=u*r));for(a=-1;++a<=c;)u=(n[Math.min(c,a+1)][0]-n[Math.max(0,a-1)][0])/(6*(1+o[a]*o[a])),i.push([u||0,o[a]*u||0]);return i}function To(n){return n.length<3?go(n):n[0]+_o(n,Lo(n))}function Ro(n){for(var t,e,r,u=-1,i=n.length;++u<i;)t=n[u],e=t[0],r=t[1]-Ra,t[0]=e*Math.cos(r),t[1]=e*Math.sin(r);return n}function Do(n){function t(t){function c(){v.push("M",a(n(m),f),s,l(n(d.reverse()),f),"Z")}for(var h,g,p,v=[],d=[],m=[],y=-1,M=t.length,x=Et(e),b=Et(u),_=e===r?function(){return g}:Et(r),w=u===i?function(){return p}:Et(i);++y<M;)o.call(this,h=t[y],y)?(d.push([g=+x.call(this,h,y),p=+b.call(this,h,y)]),m.push([+_.call(this,h,y),+w.call(this,h,y)])):d.length&&(c(),d=[],m=[]);return d.length&&c(),v.length?v.join(""):null}var e=Ar,r=Ar,u=0,i=Nr,o=Ne,a=go,c=a.key,l=a,s="L",f=.7;return t.x=function(n){return arguments.length?(e=r=n,t):r},t.x0=function(n){return arguments.length?(e=n,t):e},t.x1=function(n){return arguments.length?(r=n,t):r +},t.y=function(n){return arguments.length?(u=i=n,t):i},t.y0=function(n){return arguments.length?(u=n,t):u},t.y1=function(n){return arguments.length?(i=n,t):i},t.defined=function(n){return arguments.length?(o=n,t):o},t.interpolate=function(n){return arguments.length?(c="function"==typeof n?a=n:(a=El.get(n)||go).key,l=a.reverse||a,s=a.closed?"M":"L",t):c},t.tension=function(n){return arguments.length?(f=n,t):f},t}function Po(n){return n.radius}function Uo(n){return[n.x,n.y]}function jo(n){return function(){var t=n.apply(this,arguments),e=t[0],r=t[1]-Ra;return[e*Math.cos(r),e*Math.sin(r)]}}function Fo(){return 64}function Ho(){return"circle"}function Oo(n){var t=Math.sqrt(n/qa);return"M0,"+t+"A"+t+","+t+" 0 1,1 0,"+-t+"A"+t+","+t+" 0 1,1 0,"+t+"Z"}function Io(n){return function(){var t,e;(t=this[n])&&(e=t[t.active])&&(--t.count?delete t[t.active]:delete this[n],t.active+=.5,e.event&&e.event.interrupt.call(this,this.__data__,e.index))}}function Yo(n,t,e){return ya(n,Pl),n.namespace=t,n.id=e,n}function Zo(n,t,e,r){var u=n.id,i=n.namespace;return Y(n,"function"==typeof e?function(n,o,a){n[i][u].tween.set(t,r(e.call(n,n.__data__,o,a)))}:(e=r(e),function(n){n[i][u].tween.set(t,e)}))}function Vo(n){return null==n&&(n=""),function(){this.textContent=n}}function Xo(n){return null==n?"__transition__":"__transition_"+n+"__"}function $o(n,t,e,r,u){var i=n[e]||(n[e]={active:0,count:0}),o=i[r];if(!o){var a=u.time;o=i[r]={tween:new l,time:a,delay:u.delay,duration:u.duration,ease:u.ease,index:t},u=null,++i.count,ta.timer(function(u){function c(e){if(i.active>r)return s();var u=i[i.active];u&&(--i.count,delete i[i.active],u.event&&u.event.interrupt.call(n,n.__data__,u.index)),i.active=r,o.event&&o.event.start.call(n,n.__data__,t),o.tween.forEach(function(e,r){(r=r.call(n,n.__data__,t))&&v.push(r)}),h=o.ease,f=o.duration,ta.timer(function(){return p.c=l(e||1)?Ne:l,1},0,a)}function l(e){if(i.active!==r)return 1;for(var u=e/f,a=h(u),c=v.length;c>0;)v[--c].call(n,a);return u>=1?(o.event&&o.event.end.call(n,n.__data__,t),s()):void 0}function s(){return--i.count?delete i[r]:delete n[e],1}var f,h,g=o.delay,p=ec,v=[];return p.t=g+a,u>=g?c(u-g):void(p.c=c)},0,a)}}function Bo(n,t,e){n.attr("transform",function(n){var r=t(n);return"translate("+(isFinite(r)?r:e(n))+",0)"})}function Wo(n,t,e){n.attr("transform",function(n){var r=t(n);return"translate(0,"+(isFinite(r)?r:e(n))+")"})}function Jo(n){return n.toISOString()}function Go(n,t,e){function r(t){return n(t)}function u(n,e){var r=n[1]-n[0],u=r/e,i=ta.bisect(Vl,u);return i==Vl.length?[t.year,Vi(n.map(function(n){return n/31536e6}),e)[2]]:i?t[u/Vl[i-1]<Vl[i]/u?i-1:i]:[Bl,Vi(n,e)[2]]}return r.invert=function(t){return Ko(n.invert(t))},r.domain=function(t){return arguments.length?(n.domain(t),r):n.domain().map(Ko)},r.nice=function(n,t){function e(e){return!isNaN(e)&&!n.range(e,Ko(+e+1),t).length}var i=r.domain(),o=Pi(i),a=null==n?u(o,10):"number"==typeof n&&u(o,n);return a&&(n=a[0],t=a[1]),r.domain(Fi(i,t>1?{floor:function(t){for(;e(t=n.floor(t));)t=Ko(t-1);return t},ceil:function(t){for(;e(t=n.ceil(t));)t=Ko(+t+1);return t}}:n))},r.ticks=function(n,t){var e=Pi(r.domain()),i=null==n?u(e,10):"number"==typeof n?u(e,n):!n.range&&[{range:n},t];return i&&(n=i[0],t=i[1]),n.range(e[0],Ko(+e[1]+1),1>t?1:t)},r.tickFormat=function(){return e},r.copy=function(){return Go(n.copy(),t,e)},Yi(r,n)}function Ko(n){return new Date(n)}function Qo(n){return JSON.parse(n.responseText)}function na(n){var t=ua.createRange();return t.selectNode(ua.body),t.createContextualFragment(n.responseText)}var ta={version:"3.5.5"},ea=[].slice,ra=function(n){return ea.call(n)},ua=this.document;if(ua)try{ra(ua.documentElement.childNodes)[0].nodeType}catch(ia){ra=function(n){for(var t=n.length,e=new Array(t);t--;)e[t]=n[t];return e}}if(Date.now||(Date.now=function(){return+new Date}),ua)try{ua.createElement("DIV").style.setProperty("opacity",0,"")}catch(oa){var aa=this.Element.prototype,ca=aa.setAttribute,la=aa.setAttributeNS,sa=this.CSSStyleDeclaration.prototype,fa=sa.setProperty;aa.setAttribute=function(n,t){ca.call(this,n,t+"")},aa.setAttributeNS=function(n,t,e){la.call(this,n,t,e+"")},sa.setProperty=function(n,t,e){fa.call(this,n,t+"",e)}}ta.ascending=e,ta.descending=function(n,t){return n>t?-1:t>n?1:t>=n?0:0/0},ta.min=function(n,t){var e,r,u=-1,i=n.length;if(1===arguments.length){for(;++u<i;)if(null!=(r=n[u])&&r>=r){e=r;break}for(;++u<i;)null!=(r=n[u])&&e>r&&(e=r)}else{for(;++u<i;)if(null!=(r=t.call(n,n[u],u))&&r>=r){e=r;break}for(;++u<i;)null!=(r=t.call(n,n[u],u))&&e>r&&(e=r)}return e},ta.max=function(n,t){var e,r,u=-1,i=n.length;if(1===arguments.length){for(;++u<i;)if(null!=(r=n[u])&&r>=r){e=r;break}for(;++u<i;)null!=(r=n[u])&&r>e&&(e=r)}else{for(;++u<i;)if(null!=(r=t.call(n,n[u],u))&&r>=r){e=r;break}for(;++u<i;)null!=(r=t.call(n,n[u],u))&&r>e&&(e=r)}return e},ta.extent=function(n,t){var e,r,u,i=-1,o=n.length;if(1===arguments.length){for(;++i<o;)if(null!=(r=n[i])&&r>=r){e=u=r;break}for(;++i<o;)null!=(r=n[i])&&(e>r&&(e=r),r>u&&(u=r))}else{for(;++i<o;)if(null!=(r=t.call(n,n[i],i))&&r>=r){e=u=r;break}for(;++i<o;)null!=(r=t.call(n,n[i],i))&&(e>r&&(e=r),r>u&&(u=r))}return[e,u]},ta.sum=function(n,t){var e,r=0,i=n.length,o=-1;if(1===arguments.length)for(;++o<i;)u(e=+n[o])&&(r+=e);else for(;++o<i;)u(e=+t.call(n,n[o],o))&&(r+=e);return r},ta.mean=function(n,t){var e,i=0,o=n.length,a=-1,c=o;if(1===arguments.length)for(;++a<o;)u(e=r(n[a]))?i+=e:--c;else for(;++a<o;)u(e=r(t.call(n,n[a],a)))?i+=e:--c;return c?i/c:void 0},ta.quantile=function(n,t){var e=(n.length-1)*t+1,r=Math.floor(e),u=+n[r-1],i=e-r;return i?u+i*(n[r]-u):u},ta.median=function(n,t){var i,o=[],a=n.length,c=-1;if(1===arguments.length)for(;++c<a;)u(i=r(n[c]))&&o.push(i);else for(;++c<a;)u(i=r(t.call(n,n[c],c)))&&o.push(i);return o.length?ta.quantile(o.sort(e),.5):void 0},ta.variance=function(n,t){var e,i,o=n.length,a=0,c=0,l=-1,s=0;if(1===arguments.length)for(;++l<o;)u(e=r(n[l]))&&(i=e-a,a+=i/++s,c+=i*(e-a));else for(;++l<o;)u(e=r(t.call(n,n[l],l)))&&(i=e-a,a+=i/++s,c+=i*(e-a));return s>1?c/(s-1):void 0},ta.deviation=function(){var n=ta.variance.apply(this,arguments);return n?Math.sqrt(n):n};var ha=i(e);ta.bisectLeft=ha.left,ta.bisect=ta.bisectRight=ha.right,ta.bisector=function(n){return i(1===n.length?function(t,r){return e(n(t),r)}:n)},ta.shuffle=function(n,t,e){(i=arguments.length)<3&&(e=n.length,2>i&&(t=0));for(var r,u,i=e-t;i;)u=Math.random()*i--|0,r=n[i+t],n[i+t]=n[u+t],n[u+t]=r;return n},ta.permute=function(n,t){for(var e=t.length,r=new Array(e);e--;)r[e]=n[t[e]];return r},ta.pairs=function(n){for(var t,e=0,r=n.length-1,u=n[0],i=new Array(0>r?0:r);r>e;)i[e]=[t=u,u=n[++e]];return i},ta.zip=function(){if(!(r=arguments.length))return[];for(var n=-1,t=ta.min(arguments,o),e=new Array(t);++n<t;)for(var r,u=-1,i=e[n]=new Array(r);++u<r;)i[u]=arguments[u][n];return e},ta.transpose=function(n){return ta.zip.apply(ta,n)},ta.keys=function(n){var t=[];for(var e in n)t.push(e);return t},ta.values=function(n){var t=[];for(var e in n)t.push(n[e]);return t},ta.entries=function(n){var t=[];for(var e in n)t.push({key:e,value:n[e]});return t},ta.merge=function(n){for(var t,e,r,u=n.length,i=-1,o=0;++i<u;)o+=n[i].length;for(e=new Array(o);--u>=0;)for(r=n[u],t=r.length;--t>=0;)e[--o]=r[t];return e};var ga=Math.abs;ta.range=function(n,t,e){if(arguments.length<3&&(e=1,arguments.length<2&&(t=n,n=0)),(t-n)/e===1/0)throw new Error("infinite range");var r,u=[],i=a(ga(e)),o=-1;if(n*=i,t*=i,e*=i,0>e)for(;(r=n+e*++o)>t;)u.push(r/i);else for(;(r=n+e*++o)<t;)u.push(r/i);return u},ta.map=function(n,t){var e=new l;if(n instanceof l)n.forEach(function(n,t){e.set(n,t)});else if(Array.isArray(n)){var r,u=-1,i=n.length;if(1===arguments.length)for(;++u<i;)e.set(u,n[u]);else for(;++u<i;)e.set(t.call(n,r=n[u],u),r)}else for(var o in n)e.set(o,n[o]);return e};var pa="__proto__",va="\x00";c(l,{has:h,get:function(n){return this._[s(n)]},set:function(n,t){return this._[s(n)]=t},remove:g,keys:p,values:function(){var n=[];for(var t in this._)n.push(this._[t]);return n},entries:function(){var n=[];for(var t in this._)n.push({key:f(t),value:this._[t]});return n},size:v,empty:d,forEach:function(n){for(var t in this._)n.call(this,f(t),this._[t])}}),ta.nest=function(){function n(t,o,a){if(a>=i.length)return r?r.call(u,o):e?o.sort(e):o;for(var c,s,f,h,g=-1,p=o.length,v=i[a++],d=new l;++g<p;)(h=d.get(c=v(s=o[g])))?h.push(s):d.set(c,[s]);return t?(s=t(),f=function(e,r){s.set(e,n(t,r,a))}):(s={},f=function(e,r){s[e]=n(t,r,a)}),d.forEach(f),s}function t(n,e){if(e>=i.length)return n;var r=[],u=o[e++];return n.forEach(function(n,u){r.push({key:n,values:t(u,e)})}),u?r.sort(function(n,t){return u(n.key,t.key)}):r}var e,r,u={},i=[],o=[];return u.map=function(t,e){return n(e,t,0)},u.entries=function(e){return t(n(ta.map,e,0),0)},u.key=function(n){return i.push(n),u},u.sortKeys=function(n){return o[i.length-1]=n,u},u.sortValues=function(n){return e=n,u},u.rollup=function(n){return r=n,u},u},ta.set=function(n){var t=new m;if(n)for(var e=0,r=n.length;r>e;++e)t.add(n[e]);return t},c(m,{has:h,add:function(n){return this._[s(n+="")]=!0,n},remove:g,values:p,size:v,empty:d,forEach:function(n){for(var t in this._)n.call(this,f(t))}}),ta.behavior={},ta.rebind=function(n,t){for(var e,r=1,u=arguments.length;++r<u;)n[e=arguments[r]]=M(n,t,t[e]);return n};var da=["webkit","ms","moz","Moz","o","O"];ta.dispatch=function(){for(var n=new _,t=-1,e=arguments.length;++t<e;)n[arguments[t]]=w(n);return n},_.prototype.on=function(n,t){var e=n.indexOf("."),r="";if(e>=0&&(r=n.slice(e+1),n=n.slice(0,e)),n)return arguments.length<2?this[n].on(r):this[n].on(r,t);if(2===arguments.length){if(null==t)for(n in this)this.hasOwnProperty(n)&&this[n].on(r,null);return this}},ta.event=null,ta.requote=function(n){return n.replace(ma,"\\$&")};var ma=/[\\\^\$\*\+\?\|\[\]\(\)\.\{\}]/g,ya={}.__proto__?function(n,t){n.__proto__=t}:function(n,t){for(var e in t)n[e]=t[e]},Ma=function(n,t){return t.querySelector(n)},xa=function(n,t){return t.querySelectorAll(n)},ba=function(n,t){var e=n.matches||n[x(n,"matchesSelector")];return(ba=function(n,t){return e.call(n,t)})(n,t)};"function"==typeof Sizzle&&(Ma=function(n,t){return Sizzle(n,t)[0]||null},xa=Sizzle,ba=Sizzle.matchesSelector),ta.selection=function(){return ta.select(ua.documentElement)};var _a=ta.selection.prototype=[];_a.select=function(n){var t,e,r,u,i=[];n=N(n);for(var o=-1,a=this.length;++o<a;){i.push(t=[]),t.parentNode=(r=this[o]).parentNode;for(var c=-1,l=r.length;++c<l;)(u=r[c])?(t.push(e=n.call(u,u.__data__,c,o)),e&&"__data__"in u&&(e.__data__=u.__data__)):t.push(null)}return A(i)},_a.selectAll=function(n){var t,e,r=[];n=C(n);for(var u=-1,i=this.length;++u<i;)for(var o=this[u],a=-1,c=o.length;++a<c;)(e=o[a])&&(r.push(t=ra(n.call(e,e.__data__,a,u))),t.parentNode=e);return A(r)};var wa={svg:"http://www.w3.org/2000/svg",xhtml:"http://www.w3.org/1999/xhtml",xlink:"http://www.w3.org/1999/xlink",xml:"http://www.w3.org/XML/1998/namespace",xmlns:"http://www.w3.org/2000/xmlns/"};ta.ns={prefix:wa,qualify:function(n){var t=n.indexOf(":"),e=n;return t>=0&&(e=n.slice(0,t),n=n.slice(t+1)),wa.hasOwnProperty(e)?{space:wa[e],local:n}:n}},_a.attr=function(n,t){if(arguments.length<2){if("string"==typeof n){var e=this.node();return n=ta.ns.qualify(n),n.local?e.getAttributeNS(n.space,n.local):e.getAttribute(n)}for(t in n)this.each(z(t,n[t]));return this}return this.each(z(n,t))},_a.classed=function(n,t){if(arguments.length<2){if("string"==typeof n){var e=this.node(),r=(n=T(n)).length,u=-1;if(t=e.classList){for(;++u<r;)if(!t.contains(n[u]))return!1}else for(t=e.getAttribute("class");++u<r;)if(!L(n[u]).test(t))return!1;return!0}for(t in n)this.each(R(t,n[t]));return this}return this.each(R(n,t))},_a.style=function(n,e,r){var u=arguments.length;if(3>u){if("string"!=typeof n){2>u&&(e="");for(r in n)this.each(P(r,n[r],e));return this}if(2>u){var i=this.node();return t(i).getComputedStyle(i,null).getPropertyValue(n)}r=""}return this.each(P(n,e,r))},_a.property=function(n,t){if(arguments.length<2){if("string"==typeof n)return this.node()[n];for(t in n)this.each(U(t,n[t]));return this}return this.each(U(n,t))},_a.text=function(n){return arguments.length?this.each("function"==typeof n?function(){var t=n.apply(this,arguments);this.textContent=null==t?"":t}:null==n?function(){this.textContent=""}:function(){this.textContent=n}):this.node().textContent},_a.html=function(n){return arguments.length?this.each("function"==typeof n?function(){var t=n.apply(this,arguments);this.innerHTML=null==t?"":t}:null==n?function(){this.innerHTML=""}:function(){this.innerHTML=n}):this.node().innerHTML},_a.append=function(n){return n=j(n),this.select(function(){return this.appendChild(n.apply(this,arguments))})},_a.insert=function(n,t){return n=j(n),t=N(t),this.select(function(){return this.insertBefore(n.apply(this,arguments),t.apply(this,arguments)||null)})},_a.remove=function(){return this.each(F)},_a.data=function(n,t){function e(n,e){var r,u,i,o=n.length,f=e.length,h=Math.min(o,f),g=new Array(f),p=new Array(f),v=new Array(o);if(t){var d,m=new l,y=new Array(o);for(r=-1;++r<o;)m.has(d=t.call(u=n[r],u.__data__,r))?v[r]=u:m.set(d,u),y[r]=d;for(r=-1;++r<f;)(u=m.get(d=t.call(e,i=e[r],r)))?u!==!0&&(g[r]=u,u.__data__=i):p[r]=H(i),m.set(d,!0);for(r=-1;++r<o;)m.get(y[r])!==!0&&(v[r]=n[r])}else{for(r=-1;++r<h;)u=n[r],i=e[r],u?(u.__data__=i,g[r]=u):p[r]=H(i);for(;f>r;++r)p[r]=H(e[r]);for(;o>r;++r)v[r]=n[r]}p.update=g,p.parentNode=g.parentNode=v.parentNode=n.parentNode,a.push(p),c.push(g),s.push(v)}var r,u,i=-1,o=this.length;if(!arguments.length){for(n=new Array(o=(r=this[0]).length);++i<o;)(u=r[i])&&(n[i]=u.__data__);return n}var a=Z([]),c=A([]),s=A([]);if("function"==typeof n)for(;++i<o;)e(r=this[i],n.call(r,r.parentNode.__data__,i));else for(;++i<o;)e(r=this[i],n);return c.enter=function(){return a},c.exit=function(){return s},c},_a.datum=function(n){return arguments.length?this.property("__data__",n):this.property("__data__")},_a.filter=function(n){var t,e,r,u=[];"function"!=typeof n&&(n=O(n));for(var i=0,o=this.length;o>i;i++){u.push(t=[]),t.parentNode=(e=this[i]).parentNode;for(var a=0,c=e.length;c>a;a++)(r=e[a])&&n.call(r,r.__data__,a,i)&&t.push(r)}return A(u)},_a.order=function(){for(var n=-1,t=this.length;++n<t;)for(var e,r=this[n],u=r.length-1,i=r[u];--u>=0;)(e=r[u])&&(i&&i!==e.nextSibling&&i.parentNode.insertBefore(e,i),i=e);return this},_a.sort=function(n){n=I.apply(this,arguments);for(var t=-1,e=this.length;++t<e;)this[t].sort(n);return this.order()},_a.each=function(n){return Y(this,function(t,e,r){n.call(t,t.__data__,e,r)})},_a.call=function(n){var t=ra(arguments);return n.apply(t[0]=this,t),this},_a.empty=function(){return!this.node()},_a.node=function(){for(var n=0,t=this.length;t>n;n++)for(var e=this[n],r=0,u=e.length;u>r;r++){var i=e[r];if(i)return i}return null},_a.size=function(){var n=0;return Y(this,function(){++n}),n};var Sa=[];ta.selection.enter=Z,ta.selection.enter.prototype=Sa,Sa.append=_a.append,Sa.empty=_a.empty,Sa.node=_a.node,Sa.call=_a.call,Sa.size=_a.size,Sa.select=function(n){for(var t,e,r,u,i,o=[],a=-1,c=this.length;++a<c;){r=(u=this[a]).update,o.push(t=[]),t.parentNode=u.parentNode;for(var l=-1,s=u.length;++l<s;)(i=u[l])?(t.push(r[l]=e=n.call(u.parentNode,i.__data__,l,a)),e.__data__=i.__data__):t.push(null)}return A(o)},Sa.insert=function(n,t){return arguments.length<2&&(t=V(this)),_a.insert.call(this,n,t)},ta.select=function(t){var e;return"string"==typeof t?(e=[Ma(t,ua)],e.parentNode=ua.documentElement):(e=[t],e.parentNode=n(t)),A([e])},ta.selectAll=function(n){var t;return"string"==typeof n?(t=ra(xa(n,ua)),t.parentNode=ua.documentElement):(t=n,t.parentNode=null),A([t])},_a.on=function(n,t,e){var r=arguments.length;if(3>r){if("string"!=typeof n){2>r&&(t=!1);for(e in n)this.each(X(e,n[e],t));return this}if(2>r)return(r=this.node()["__on"+n])&&r._;e=!1}return this.each(X(n,t,e))};var ka=ta.map({mouseenter:"mouseover",mouseleave:"mouseout"});ua&&ka.forEach(function(n){"on"+n in ua&&ka.remove(n)});var Ea,Aa=0;ta.mouse=function(n){return J(n,k())};var Na=this.navigator&&/WebKit/.test(this.navigator.userAgent)?-1:0;ta.touch=function(n,t,e){if(arguments.length<3&&(e=t,t=k().changedTouches),t)for(var r,u=0,i=t.length;i>u;++u)if((r=t[u]).identifier===e)return J(n,r)},ta.behavior.drag=function(){function n(){this.on("mousedown.drag",i).on("touchstart.drag",o)}function e(n,t,e,i,o){return function(){function a(){var n,e,r=t(h,v);r&&(n=r[0]-M[0],e=r[1]-M[1],p|=n|e,M=r,g({type:"drag",x:r[0]+l[0],y:r[1]+l[1],dx:n,dy:e}))}function c(){t(h,v)&&(m.on(i+d,null).on(o+d,null),y(p&&ta.event.target===f),g({type:"dragend"}))}var l,s=this,f=ta.event.target,h=s.parentNode,g=r.of(s,arguments),p=0,v=n(),d=".drag"+(null==v?"":"-"+v),m=ta.select(e(f)).on(i+d,a).on(o+d,c),y=W(f),M=t(h,v);u?(l=u.apply(s,arguments),l=[l.x-M[0],l.y-M[1]]):l=[0,0],g({type:"dragstart"})}}var r=E(n,"drag","dragstart","dragend"),u=null,i=e(b,ta.mouse,t,"mousemove","mouseup"),o=e(G,ta.touch,y,"touchmove","touchend");return n.origin=function(t){return arguments.length?(u=t,n):u},ta.rebind(n,r,"on")},ta.touches=function(n,t){return arguments.length<2&&(t=k().touches),t?ra(t).map(function(t){var e=J(n,t);return e.identifier=t.identifier,e}):[]};var Ca=1e-6,za=Ca*Ca,qa=Math.PI,La=2*qa,Ta=La-Ca,Ra=qa/2,Da=qa/180,Pa=180/qa,Ua=Math.SQRT2,ja=2,Fa=4;ta.interpolateZoom=function(n,t){function e(n){var t=n*y;if(m){var e=rt(v),o=i/(ja*h)*(e*ut(Ua*t+v)-et(v));return[r+o*l,u+o*s,i*e/rt(Ua*t+v)]}return[r+n*l,u+n*s,i*Math.exp(Ua*t)]}var r=n[0],u=n[1],i=n[2],o=t[0],a=t[1],c=t[2],l=o-r,s=a-u,f=l*l+s*s,h=Math.sqrt(f),g=(c*c-i*i+Fa*f)/(2*i*ja*h),p=(c*c-i*i-Fa*f)/(2*c*ja*h),v=Math.log(Math.sqrt(g*g+1)-g),d=Math.log(Math.sqrt(p*p+1)-p),m=d-v,y=(m||Math.log(c/i))/Ua;return e.duration=1e3*y,e},ta.behavior.zoom=function(){function n(n){n.on(q,f).on(Oa+".zoom",g).on("dblclick.zoom",p).on(R,h)}function e(n){return[(n[0]-k.x)/k.k,(n[1]-k.y)/k.k]}function r(n){return[n[0]*k.k+k.x,n[1]*k.k+k.y]}function u(n){k.k=Math.max(N[0],Math.min(N[1],n))}function i(n,t){t=r(t),k.x+=n[0]-t[0],k.y+=n[1]-t[1]}function o(t,e,r,o){t.__chart__={x:k.x,y:k.y,k:k.k},u(Math.pow(2,o)),i(d=e,r),t=ta.select(t),C>0&&(t=t.transition().duration(C)),t.call(n.event)}function a(){b&&b.domain(x.range().map(function(n){return(n-k.x)/k.k}).map(x.invert)),w&&w.domain(_.range().map(function(n){return(n-k.y)/k.k}).map(_.invert))}function c(n){z++||n({type:"zoomstart"})}function l(n){a(),n({type:"zoom",scale:k.k,translate:[k.x,k.y]})}function s(n){--z||n({type:"zoomend"}),d=null}function f(){function n(){f=1,i(ta.mouse(u),g),l(a)}function r(){h.on(L,null).on(T,null),p(f&&ta.event.target===o),s(a)}var u=this,o=ta.event.target,a=D.of(u,arguments),f=0,h=ta.select(t(u)).on(L,n).on(T,r),g=e(ta.mouse(u)),p=W(u);Dl.call(u),c(a)}function h(){function n(){var n=ta.touches(p);return g=k.k,n.forEach(function(n){n.identifier in d&&(d[n.identifier]=e(n))}),n}function t(){var t=ta.event.target;ta.select(t).on(x,r).on(b,a),_.push(t);for(var e=ta.event.changedTouches,u=0,i=e.length;i>u;++u)d[e[u].identifier]=null;var c=n(),l=Date.now();if(1===c.length){if(500>l-M){var s=c[0];o(p,s,d[s.identifier],Math.floor(Math.log(k.k)/Math.LN2)+1),S()}M=l}else if(c.length>1){var s=c[0],f=c[1],h=s[0]-f[0],g=s[1]-f[1];m=h*h+g*g}}function r(){var n,t,e,r,o=ta.touches(p);Dl.call(p);for(var a=0,c=o.length;c>a;++a,r=null)if(e=o[a],r=d[e.identifier]){if(t)break;n=e,t=r}if(r){var s=(s=e[0]-n[0])*s+(s=e[1]-n[1])*s,f=m&&Math.sqrt(s/m);n=[(n[0]+e[0])/2,(n[1]+e[1])/2],t=[(t[0]+r[0])/2,(t[1]+r[1])/2],u(f*g)}M=null,i(n,t),l(v)}function a(){if(ta.event.touches.length){for(var t=ta.event.changedTouches,e=0,r=t.length;r>e;++e)delete d[t[e].identifier];for(var u in d)return void n()}ta.selectAll(_).on(y,null),w.on(q,f).on(R,h),E(),s(v)}var g,p=this,v=D.of(p,arguments),d={},m=0,y=".zoom-"+ta.event.changedTouches[0].identifier,x="touchmove"+y,b="touchend"+y,_=[],w=ta.select(p),E=W(p);t(),c(v),w.on(q,null).on(R,t)}function g(){var n=D.of(this,arguments);y?clearTimeout(y):(v=e(d=m||ta.mouse(this)),Dl.call(this),c(n)),y=setTimeout(function(){y=null,s(n)},50),S(),u(Math.pow(2,.002*Ha())*k.k),i(d,v),l(n)}function p(){var n=ta.mouse(this),t=Math.log(k.k)/Math.LN2;o(this,n,e(n),ta.event.shiftKey?Math.ceil(t)-1:Math.floor(t)+1)}var v,d,m,y,M,x,b,_,w,k={x:0,y:0,k:1},A=[960,500],N=Ia,C=250,z=0,q="mousedown.zoom",L="mousemove.zoom",T="mouseup.zoom",R="touchstart.zoom",D=E(n,"zoomstart","zoom","zoomend");return Oa||(Oa="onwheel"in ua?(Ha=function(){return-ta.event.deltaY*(ta.event.deltaMode?120:1)},"wheel"):"onmousewheel"in ua?(Ha=function(){return ta.event.wheelDelta},"mousewheel"):(Ha=function(){return-ta.event.detail},"MozMousePixelScroll")),n.event=function(n){n.each(function(){var n=D.of(this,arguments),t=k;Tl?ta.select(this).transition().each("start.zoom",function(){k=this.__chart__||{x:0,y:0,k:1},c(n)}).tween("zoom:zoom",function(){var e=A[0],r=A[1],u=d?d[0]:e/2,i=d?d[1]:r/2,o=ta.interpolateZoom([(u-k.x)/k.k,(i-k.y)/k.k,e/k.k],[(u-t.x)/t.k,(i-t.y)/t.k,e/t.k]);return function(t){var r=o(t),a=e/r[2];this.__chart__=k={x:u-r[0]*a,y:i-r[1]*a,k:a},l(n)}}).each("interrupt.zoom",function(){s(n)}).each("end.zoom",function(){s(n)}):(this.__chart__=k,c(n),l(n),s(n))})},n.translate=function(t){return arguments.length?(k={x:+t[0],y:+t[1],k:k.k},a(),n):[k.x,k.y]},n.scale=function(t){return arguments.length?(k={x:k.x,y:k.y,k:+t},a(),n):k.k},n.scaleExtent=function(t){return arguments.length?(N=null==t?Ia:[+t[0],+t[1]],n):N},n.center=function(t){return arguments.length?(m=t&&[+t[0],+t[1]],n):m},n.size=function(t){return arguments.length?(A=t&&[+t[0],+t[1]],n):A},n.duration=function(t){return arguments.length?(C=+t,n):C},n.x=function(t){return arguments.length?(b=t,x=t.copy(),k={x:0,y:0,k:1},n):b},n.y=function(t){return arguments.length?(w=t,_=t.copy(),k={x:0,y:0,k:1},n):w},ta.rebind(n,D,"on")};var Ha,Oa,Ia=[0,1/0];ta.color=ot,ot.prototype.toString=function(){return this.rgb()+""},ta.hsl=at;var Ya=at.prototype=new ot;Ya.brighter=function(n){return n=Math.pow(.7,arguments.length?n:1),new at(this.h,this.s,this.l/n)},Ya.darker=function(n){return n=Math.pow(.7,arguments.length?n:1),new at(this.h,this.s,n*this.l)},Ya.rgb=function(){return ct(this.h,this.s,this.l)},ta.hcl=lt;var Za=lt.prototype=new ot;Za.brighter=function(n){return new lt(this.h,this.c,Math.min(100,this.l+Va*(arguments.length?n:1)))},Za.darker=function(n){return new lt(this.h,this.c,Math.max(0,this.l-Va*(arguments.length?n:1)))},Za.rgb=function(){return st(this.h,this.c,this.l).rgb()},ta.lab=ft;var Va=18,Xa=.95047,$a=1,Ba=1.08883,Wa=ft.prototype=new ot;Wa.brighter=function(n){return new ft(Math.min(100,this.l+Va*(arguments.length?n:1)),this.a,this.b)},Wa.darker=function(n){return new ft(Math.max(0,this.l-Va*(arguments.length?n:1)),this.a,this.b)},Wa.rgb=function(){return ht(this.l,this.a,this.b)},ta.rgb=mt;var Ja=mt.prototype=new ot;Ja.brighter=function(n){n=Math.pow(.7,arguments.length?n:1);var t=this.r,e=this.g,r=this.b,u=30;return t||e||r?(t&&u>t&&(t=u),e&&u>e&&(e=u),r&&u>r&&(r=u),new mt(Math.min(255,t/n),Math.min(255,e/n),Math.min(255,r/n))):new mt(u,u,u)},Ja.darker=function(n){return n=Math.pow(.7,arguments.length?n:1),new mt(n*this.r,n*this.g,n*this.b)},Ja.hsl=function(){return _t(this.r,this.g,this.b)},Ja.toString=function(){return"#"+xt(this.r)+xt(this.g)+xt(this.b)};var Ga=ta.map({aliceblue:15792383,antiquewhite:16444375,aqua:65535,aquamarine:8388564,azure:15794175,beige:16119260,bisque:16770244,black:0,blanchedalmond:16772045,blue:255,blueviolet:9055202,brown:10824234,burlywood:14596231,cadetblue:6266528,chartreuse:8388352,chocolate:13789470,coral:16744272,cornflowerblue:6591981,cornsilk:16775388,crimson:14423100,cyan:65535,darkblue:139,darkcyan:35723,darkgoldenrod:12092939,darkgray:11119017,darkgreen:25600,darkgrey:11119017,darkkhaki:12433259,darkmagenta:9109643,darkolivegreen:5597999,darkorange:16747520,darkorchid:10040012,darkred:9109504,darksalmon:15308410,darkseagreen:9419919,darkslateblue:4734347,darkslategray:3100495,darkslategrey:3100495,darkturquoise:52945,darkviolet:9699539,deeppink:16716947,deepskyblue:49151,dimgray:6908265,dimgrey:6908265,dodgerblue:2003199,firebrick:11674146,floralwhite:16775920,forestgreen:2263842,fuchsia:16711935,gainsboro:14474460,ghostwhite:16316671,gold:16766720,goldenrod:14329120,gray:8421504,green:32768,greenyellow:11403055,grey:8421504,honeydew:15794160,hotpink:16738740,indianred:13458524,indigo:4915330,ivory:16777200,khaki:15787660,lavender:15132410,lavenderblush:16773365,lawngreen:8190976,lemonchiffon:16775885,lightblue:11393254,lightcoral:15761536,lightcyan:14745599,lightgoldenrodyellow:16448210,lightgray:13882323,lightgreen:9498256,lightgrey:13882323,lightpink:16758465,lightsalmon:16752762,lightseagreen:2142890,lightskyblue:8900346,lightslategray:7833753,lightslategrey:7833753,lightsteelblue:11584734,lightyellow:16777184,lime:65280,limegreen:3329330,linen:16445670,magenta:16711935,maroon:8388608,mediumaquamarine:6737322,mediumblue:205,mediumorchid:12211667,mediumpurple:9662683,mediumseagreen:3978097,mediumslateblue:8087790,mediumspringgreen:64154,mediumturquoise:4772300,mediumvioletred:13047173,midnightblue:1644912,mintcream:16121850,mistyrose:16770273,moccasin:16770229,navajowhite:16768685,navy:128,oldlace:16643558,olive:8421376,olivedrab:7048739,orange:16753920,orangered:16729344,orchid:14315734,palegoldenrod:15657130,palegreen:10025880,paleturquoise:11529966,palevioletred:14381203,papayawhip:16773077,peachpuff:16767673,peru:13468991,pink:16761035,plum:14524637,powderblue:11591910,purple:8388736,rebeccapurple:6697881,red:16711680,rosybrown:12357519,royalblue:4286945,saddlebrown:9127187,salmon:16416882,sandybrown:16032864,seagreen:3050327,seashell:16774638,sienna:10506797,silver:12632256,skyblue:8900331,slateblue:6970061,slategray:7372944,slategrey:7372944,snow:16775930,springgreen:65407,steelblue:4620980,tan:13808780,teal:32896,thistle:14204888,tomato:16737095,turquoise:4251856,violet:15631086,wheat:16113331,white:16777215,whitesmoke:16119285,yellow:16776960,yellowgreen:10145074});Ga.forEach(function(n,t){Ga.set(n,yt(t))}),ta.functor=Et,ta.xhr=At(y),ta.dsv=function(n,t){function e(n,e,i){arguments.length<3&&(i=e,e=null);var o=Nt(n,t,null==e?r:u(e),i);return o.row=function(n){return arguments.length?o.response(null==(e=n)?r:u(n)):e},o}function r(n){return e.parse(n.responseText)}function u(n){return function(t){return e.parse(t.responseText,n)}}function i(t){return t.map(o).join(n)}function o(n){return a.test(n)?'"'+n.replace(/\"/g,'""')+'"':n}var a=new RegExp('["'+n+"\n]"),c=n.charCodeAt(0);return e.parse=function(n,t){var r;return e.parseRows(n,function(n,e){if(r)return r(n,e-1);var u=new Function("d","return {"+n.map(function(n,t){return JSON.stringify(n)+": d["+t+"]"}).join(",")+"}");r=t?function(n,e){return t(u(n),e)}:u})},e.parseRows=function(n,t){function e(){if(s>=l)return o;if(u)return u=!1,i;var t=s;if(34===n.charCodeAt(t)){for(var e=t;e++<l;)if(34===n.charCodeAt(e)){if(34!==n.charCodeAt(e+1))break;++e}s=e+2;var r=n.charCodeAt(e+1);return 13===r?(u=!0,10===n.charCodeAt(e+2)&&++s):10===r&&(u=!0),n.slice(t+1,e).replace(/""/g,'"')}for(;l>s;){var r=n.charCodeAt(s++),a=1;if(10===r)u=!0;else if(13===r)u=!0,10===n.charCodeAt(s)&&(++s,++a);else if(r!==c)continue;return n.slice(t,s-a)}return n.slice(t)}for(var r,u,i={},o={},a=[],l=n.length,s=0,f=0;(r=e())!==o;){for(var h=[];r!==i&&r!==o;)h.push(r),r=e();t&&null==(h=t(h,f++))||a.push(h)}return a},e.format=function(t){if(Array.isArray(t[0]))return e.formatRows(t);var r=new m,u=[];return t.forEach(function(n){for(var t in n)r.has(t)||u.push(r.add(t))}),[u.map(o).join(n)].concat(t.map(function(t){return u.map(function(n){return o(t[n])}).join(n)})).join("\n")},e.formatRows=function(n){return n.map(i).join("\n")},e},ta.csv=ta.dsv(",","text/csv"),ta.tsv=ta.dsv(" ","text/tab-separated-values");var Ka,Qa,nc,tc,ec,rc=this[x(this,"requestAnimationFrame")]||function(n){setTimeout(n,17)};ta.timer=function(n,t,e){var r=arguments.length;2>r&&(t=0),3>r&&(e=Date.now());var u=e+t,i={c:n,t:u,f:!1,n:null};Qa?Qa.n=i:Ka=i,Qa=i,nc||(tc=clearTimeout(tc),nc=1,rc(qt))},ta.timer.flush=function(){Lt(),Tt()},ta.round=function(n,t){return t?Math.round(n*(t=Math.pow(10,t)))/t:Math.round(n)};var uc=["y","z","a","f","p","n","\xb5","m","","k","M","G","T","P","E","Z","Y"].map(Dt);ta.formatPrefix=function(n,t){var e=0;return n&&(0>n&&(n*=-1),t&&(n=ta.round(n,Rt(n,t))),e=1+Math.floor(1e-12+Math.log(n)/Math.LN10),e=Math.max(-24,Math.min(24,3*Math.floor((e-1)/3)))),uc[8+e/3]};var ic=/(?:([^{])?([<>=^]))?([+\- ])?([$#])?(0)?(\d+)?(,)?(\.-?\d+)?([a-z%])?/i,oc=ta.map({b:function(n){return n.toString(2)},c:function(n){return String.fromCharCode(n)},o:function(n){return n.toString(8)},x:function(n){return n.toString(16)},X:function(n){return n.toString(16).toUpperCase()},g:function(n,t){return n.toPrecision(t)},e:function(n,t){return n.toExponential(t)},f:function(n,t){return n.toFixed(t)},r:function(n,t){return(n=ta.round(n,Rt(n,t))).toFixed(Math.max(0,Math.min(20,Rt(n*(1+1e-15),t))))}}),ac=ta.time={},cc=Date;jt.prototype={getDate:function(){return this._.getUTCDate()},getDay:function(){return this._.getUTCDay()},getFullYear:function(){return this._.getUTCFullYear()},getHours:function(){return this._.getUTCHours()},getMilliseconds:function(){return this._.getUTCMilliseconds()},getMinutes:function(){return this._.getUTCMinutes()},getMonth:function(){return this._.getUTCMonth()},getSeconds:function(){return this._.getUTCSeconds()},getTime:function(){return this._.getTime()},getTimezoneOffset:function(){return 0},valueOf:function(){return this._.valueOf()},setDate:function(){lc.setUTCDate.apply(this._,arguments)},setDay:function(){lc.setUTCDay.apply(this._,arguments)},setFullYear:function(){lc.setUTCFullYear.apply(this._,arguments)},setHours:function(){lc.setUTCHours.apply(this._,arguments)},setMilliseconds:function(){lc.setUTCMilliseconds.apply(this._,arguments)},setMinutes:function(){lc.setUTCMinutes.apply(this._,arguments)},setMonth:function(){lc.setUTCMonth.apply(this._,arguments)},setSeconds:function(){lc.setUTCSeconds.apply(this._,arguments)},setTime:function(){lc.setTime.apply(this._,arguments)}};var lc=Date.prototype;ac.year=Ft(function(n){return n=ac.day(n),n.setMonth(0,1),n},function(n,t){n.setFullYear(n.getFullYear()+t)},function(n){return n.getFullYear()}),ac.years=ac.year.range,ac.years.utc=ac.year.utc.range,ac.day=Ft(function(n){var t=new cc(2e3,0);return t.setFullYear(n.getFullYear(),n.getMonth(),n.getDate()),t},function(n,t){n.setDate(n.getDate()+t)},function(n){return n.getDate()-1}),ac.days=ac.day.range,ac.days.utc=ac.day.utc.range,ac.dayOfYear=function(n){var t=ac.year(n);return Math.floor((n-t-6e4*(n.getTimezoneOffset()-t.getTimezoneOffset()))/864e5)},["sunday","monday","tuesday","wednesday","thursday","friday","saturday"].forEach(function(n,t){t=7-t;var e=ac[n]=Ft(function(n){return(n=ac.day(n)).setDate(n.getDate()-(n.getDay()+t)%7),n},function(n,t){n.setDate(n.getDate()+7*Math.floor(t))},function(n){var e=ac.year(n).getDay();return Math.floor((ac.dayOfYear(n)+(e+t)%7)/7)-(e!==t)});ac[n+"s"]=e.range,ac[n+"s"].utc=e.utc.range,ac[n+"OfYear"]=function(n){var e=ac.year(n).getDay();return Math.floor((ac.dayOfYear(n)+(e+t)%7)/7)}}),ac.week=ac.sunday,ac.weeks=ac.sunday.range,ac.weeks.utc=ac.sunday.utc.range,ac.weekOfYear=ac.sundayOfYear;var sc={"-":"",_:" ",0:"0"},fc=/^\s*\d+/,hc=/^%/;ta.locale=function(n){return{numberFormat:Pt(n),timeFormat:Ot(n)}};var gc=ta.locale({decimal:".",thousands:",",grouping:[3],currency:["$",""],dateTime:"%a %b %e %X %Y",date:"%m/%d/%Y",time:"%H:%M:%S",periods:["AM","PM"],days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]});ta.format=gc.numberFormat,ta.geo={},ce.prototype={s:0,t:0,add:function(n){le(n,this.t,pc),le(pc.s,this.s,this),this.s?this.t+=pc.t:this.s=pc.t +},reset:function(){this.s=this.t=0},valueOf:function(){return this.s}};var pc=new ce;ta.geo.stream=function(n,t){n&&vc.hasOwnProperty(n.type)?vc[n.type](n,t):se(n,t)};var vc={Feature:function(n,t){se(n.geometry,t)},FeatureCollection:function(n,t){for(var e=n.features,r=-1,u=e.length;++r<u;)se(e[r].geometry,t)}},dc={Sphere:function(n,t){t.sphere()},Point:function(n,t){n=n.coordinates,t.point(n[0],n[1],n[2])},MultiPoint:function(n,t){for(var e=n.coordinates,r=-1,u=e.length;++r<u;)n=e[r],t.point(n[0],n[1],n[2])},LineString:function(n,t){fe(n.coordinates,t,0)},MultiLineString:function(n,t){for(var e=n.coordinates,r=-1,u=e.length;++r<u;)fe(e[r],t,0)},Polygon:function(n,t){he(n.coordinates,t)},MultiPolygon:function(n,t){for(var e=n.coordinates,r=-1,u=e.length;++r<u;)he(e[r],t)},GeometryCollection:function(n,t){for(var e=n.geometries,r=-1,u=e.length;++r<u;)se(e[r],t)}};ta.geo.area=function(n){return mc=0,ta.geo.stream(n,Mc),mc};var mc,yc=new ce,Mc={sphere:function(){mc+=4*qa},point:b,lineStart:b,lineEnd:b,polygonStart:function(){yc.reset(),Mc.lineStart=ge},polygonEnd:function(){var n=2*yc;mc+=0>n?4*qa+n:n,Mc.lineStart=Mc.lineEnd=Mc.point=b}};ta.geo.bounds=function(){function n(n,t){M.push(x=[s=n,h=n]),f>t&&(f=t),t>g&&(g=t)}function t(t,e){var r=pe([t*Da,e*Da]);if(m){var u=de(m,r),i=[u[1],-u[0],0],o=de(i,u);Me(o),o=xe(o);var c=t-p,l=c>0?1:-1,v=o[0]*Pa*l,d=ga(c)>180;if(d^(v>l*p&&l*t>v)){var y=o[1]*Pa;y>g&&(g=y)}else if(v=(v+360)%360-180,d^(v>l*p&&l*t>v)){var y=-o[1]*Pa;f>y&&(f=y)}else f>e&&(f=e),e>g&&(g=e);d?p>t?a(s,t)>a(s,h)&&(h=t):a(t,h)>a(s,h)&&(s=t):h>=s?(s>t&&(s=t),t>h&&(h=t)):t>p?a(s,t)>a(s,h)&&(h=t):a(t,h)>a(s,h)&&(s=t)}else n(t,e);m=r,p=t}function e(){b.point=t}function r(){x[0]=s,x[1]=h,b.point=n,m=null}function u(n,e){if(m){var r=n-p;y+=ga(r)>180?r+(r>0?360:-360):r}else v=n,d=e;Mc.point(n,e),t(n,e)}function i(){Mc.lineStart()}function o(){u(v,d),Mc.lineEnd(),ga(y)>Ca&&(s=-(h=180)),x[0]=s,x[1]=h,m=null}function a(n,t){return(t-=n)<0?t+360:t}function c(n,t){return n[0]-t[0]}function l(n,t){return t[0]<=t[1]?t[0]<=n&&n<=t[1]:n<t[0]||t[1]<n}var s,f,h,g,p,v,d,m,y,M,x,b={point:n,lineStart:e,lineEnd:r,polygonStart:function(){b.point=u,b.lineStart=i,b.lineEnd=o,y=0,Mc.polygonStart()},polygonEnd:function(){Mc.polygonEnd(),b.point=n,b.lineStart=e,b.lineEnd=r,0>yc?(s=-(h=180),f=-(g=90)):y>Ca?g=90:-Ca>y&&(f=-90),x[0]=s,x[1]=h}};return function(n){g=h=-(s=f=1/0),M=[],ta.geo.stream(n,b);var t=M.length;if(t){M.sort(c);for(var e,r=1,u=M[0],i=[u];t>r;++r)e=M[r],l(e[0],u)||l(e[1],u)?(a(u[0],e[1])>a(u[0],u[1])&&(u[1]=e[1]),a(e[0],u[1])>a(u[0],u[1])&&(u[0]=e[0])):i.push(u=e);for(var o,e,p=-1/0,t=i.length-1,r=0,u=i[t];t>=r;u=e,++r)e=i[r],(o=a(u[1],e[0]))>p&&(p=o,s=e[0],h=u[1])}return M=x=null,1/0===s||1/0===f?[[0/0,0/0],[0/0,0/0]]:[[s,f],[h,g]]}}(),ta.geo.centroid=function(n){xc=bc=_c=wc=Sc=kc=Ec=Ac=Nc=Cc=zc=0,ta.geo.stream(n,qc);var t=Nc,e=Cc,r=zc,u=t*t+e*e+r*r;return za>u&&(t=kc,e=Ec,r=Ac,Ca>bc&&(t=_c,e=wc,r=Sc),u=t*t+e*e+r*r,za>u)?[0/0,0/0]:[Math.atan2(e,t)*Pa,tt(r/Math.sqrt(u))*Pa]};var xc,bc,_c,wc,Sc,kc,Ec,Ac,Nc,Cc,zc,qc={sphere:b,point:_e,lineStart:Se,lineEnd:ke,polygonStart:function(){qc.lineStart=Ee},polygonEnd:function(){qc.lineStart=Se}},Lc=Le(Ne,Pe,je,[-qa,-qa/2]),Tc=1e9;ta.geo.clipExtent=function(){var n,t,e,r,u,i,o={stream:function(n){return u&&(u.valid=!1),u=i(n),u.valid=!0,u},extent:function(a){return arguments.length?(i=Ie(n=+a[0][0],t=+a[0][1],e=+a[1][0],r=+a[1][1]),u&&(u.valid=!1,u=null),o):[[n,t],[e,r]]}};return o.extent([[0,0],[960,500]])},(ta.geo.conicEqualArea=function(){return Ye(Ze)}).raw=Ze,ta.geo.albers=function(){return ta.geo.conicEqualArea().rotate([96,0]).center([-.6,38.7]).parallels([29.5,45.5]).scale(1070)},ta.geo.albersUsa=function(){function n(n){var i=n[0],o=n[1];return t=null,e(i,o),t||(r(i,o),t)||u(i,o),t}var t,e,r,u,i=ta.geo.albers(),o=ta.geo.conicEqualArea().rotate([154,0]).center([-2,58.5]).parallels([55,65]),a=ta.geo.conicEqualArea().rotate([157,0]).center([-3,19.9]).parallels([8,18]),c={point:function(n,e){t=[n,e]}};return n.invert=function(n){var t=i.scale(),e=i.translate(),r=(n[0]-e[0])/t,u=(n[1]-e[1])/t;return(u>=.12&&.234>u&&r>=-.425&&-.214>r?o:u>=.166&&.234>u&&r>=-.214&&-.115>r?a:i).invert(n)},n.stream=function(n){var t=i.stream(n),e=o.stream(n),r=a.stream(n);return{point:function(n,u){t.point(n,u),e.point(n,u),r.point(n,u)},sphere:function(){t.sphere(),e.sphere(),r.sphere()},lineStart:function(){t.lineStart(),e.lineStart(),r.lineStart()},lineEnd:function(){t.lineEnd(),e.lineEnd(),r.lineEnd()},polygonStart:function(){t.polygonStart(),e.polygonStart(),r.polygonStart()},polygonEnd:function(){t.polygonEnd(),e.polygonEnd(),r.polygonEnd()}}},n.precision=function(t){return arguments.length?(i.precision(t),o.precision(t),a.precision(t),n):i.precision()},n.scale=function(t){return arguments.length?(i.scale(t),o.scale(.35*t),a.scale(t),n.translate(i.translate())):i.scale()},n.translate=function(t){if(!arguments.length)return i.translate();var l=i.scale(),s=+t[0],f=+t[1];return e=i.translate(t).clipExtent([[s-.455*l,f-.238*l],[s+.455*l,f+.238*l]]).stream(c).point,r=o.translate([s-.307*l,f+.201*l]).clipExtent([[s-.425*l+Ca,f+.12*l+Ca],[s-.214*l-Ca,f+.234*l-Ca]]).stream(c).point,u=a.translate([s-.205*l,f+.212*l]).clipExtent([[s-.214*l+Ca,f+.166*l+Ca],[s-.115*l-Ca,f+.234*l-Ca]]).stream(c).point,n},n.scale(1070)};var Rc,Dc,Pc,Uc,jc,Fc,Hc={point:b,lineStart:b,lineEnd:b,polygonStart:function(){Dc=0,Hc.lineStart=Ve},polygonEnd:function(){Hc.lineStart=Hc.lineEnd=Hc.point=b,Rc+=ga(Dc/2)}},Oc={point:Xe,lineStart:b,lineEnd:b,polygonStart:b,polygonEnd:b},Ic={point:We,lineStart:Je,lineEnd:Ge,polygonStart:function(){Ic.lineStart=Ke},polygonEnd:function(){Ic.point=We,Ic.lineStart=Je,Ic.lineEnd=Ge}};ta.geo.path=function(){function n(n){return n&&("function"==typeof a&&i.pointRadius(+a.apply(this,arguments)),o&&o.valid||(o=u(i)),ta.geo.stream(n,o)),i.result()}function t(){return o=null,n}var e,r,u,i,o,a=4.5;return n.area=function(n){return Rc=0,ta.geo.stream(n,u(Hc)),Rc},n.centroid=function(n){return _c=wc=Sc=kc=Ec=Ac=Nc=Cc=zc=0,ta.geo.stream(n,u(Ic)),zc?[Nc/zc,Cc/zc]:Ac?[kc/Ac,Ec/Ac]:Sc?[_c/Sc,wc/Sc]:[0/0,0/0]},n.bounds=function(n){return jc=Fc=-(Pc=Uc=1/0),ta.geo.stream(n,u(Oc)),[[Pc,Uc],[jc,Fc]]},n.projection=function(n){return arguments.length?(u=(e=n)?n.stream||tr(n):y,t()):e},n.context=function(n){return arguments.length?(i=null==(r=n)?new $e:new Qe(n),"function"!=typeof a&&i.pointRadius(a),t()):r},n.pointRadius=function(t){return arguments.length?(a="function"==typeof t?t:(i.pointRadius(+t),+t),n):a},n.projection(ta.geo.albersUsa()).context(null)},ta.geo.transform=function(n){return{stream:function(t){var e=new er(t);for(var r in n)e[r]=n[r];return e}}},er.prototype={point:function(n,t){this.stream.point(n,t)},sphere:function(){this.stream.sphere()},lineStart:function(){this.stream.lineStart()},lineEnd:function(){this.stream.lineEnd()},polygonStart:function(){this.stream.polygonStart()},polygonEnd:function(){this.stream.polygonEnd()}},ta.geo.projection=ur,ta.geo.projectionMutator=ir,(ta.geo.equirectangular=function(){return ur(ar)}).raw=ar.invert=ar,ta.geo.rotation=function(n){function t(t){return t=n(t[0]*Da,t[1]*Da),t[0]*=Pa,t[1]*=Pa,t}return n=lr(n[0]%360*Da,n[1]*Da,n.length>2?n[2]*Da:0),t.invert=function(t){return t=n.invert(t[0]*Da,t[1]*Da),t[0]*=Pa,t[1]*=Pa,t},t},cr.invert=ar,ta.geo.circle=function(){function n(){var n="function"==typeof r?r.apply(this,arguments):r,t=lr(-n[0]*Da,-n[1]*Da,0).invert,u=[];return e(null,null,1,{point:function(n,e){u.push(n=t(n,e)),n[0]*=Pa,n[1]*=Pa}}),{type:"Polygon",coordinates:[u]}}var t,e,r=[0,0],u=6;return n.origin=function(t){return arguments.length?(r=t,n):r},n.angle=function(r){return arguments.length?(e=gr((t=+r)*Da,u*Da),n):t},n.precision=function(r){return arguments.length?(e=gr(t*Da,(u=+r)*Da),n):u},n.angle(90)},ta.geo.distance=function(n,t){var e,r=(t[0]-n[0])*Da,u=n[1]*Da,i=t[1]*Da,o=Math.sin(r),a=Math.cos(r),c=Math.sin(u),l=Math.cos(u),s=Math.sin(i),f=Math.cos(i);return Math.atan2(Math.sqrt((e=f*o)*e+(e=l*s-c*f*a)*e),c*s+l*f*a)},ta.geo.graticule=function(){function n(){return{type:"MultiLineString",coordinates:t()}}function t(){return ta.range(Math.ceil(i/d)*d,u,d).map(h).concat(ta.range(Math.ceil(l/m)*m,c,m).map(g)).concat(ta.range(Math.ceil(r/p)*p,e,p).filter(function(n){return ga(n%d)>Ca}).map(s)).concat(ta.range(Math.ceil(a/v)*v,o,v).filter(function(n){return ga(n%m)>Ca}).map(f))}var e,r,u,i,o,a,c,l,s,f,h,g,p=10,v=p,d=90,m=360,y=2.5;return n.lines=function(){return t().map(function(n){return{type:"LineString",coordinates:n}})},n.outline=function(){return{type:"Polygon",coordinates:[h(i).concat(g(c).slice(1),h(u).reverse().slice(1),g(l).reverse().slice(1))]}},n.extent=function(t){return arguments.length?n.majorExtent(t).minorExtent(t):n.minorExtent()},n.majorExtent=function(t){return arguments.length?(i=+t[0][0],u=+t[1][0],l=+t[0][1],c=+t[1][1],i>u&&(t=i,i=u,u=t),l>c&&(t=l,l=c,c=t),n.precision(y)):[[i,l],[u,c]]},n.minorExtent=function(t){return arguments.length?(r=+t[0][0],e=+t[1][0],a=+t[0][1],o=+t[1][1],r>e&&(t=r,r=e,e=t),a>o&&(t=a,a=o,o=t),n.precision(y)):[[r,a],[e,o]]},n.step=function(t){return arguments.length?n.majorStep(t).minorStep(t):n.minorStep()},n.majorStep=function(t){return arguments.length?(d=+t[0],m=+t[1],n):[d,m]},n.minorStep=function(t){return arguments.length?(p=+t[0],v=+t[1],n):[p,v]},n.precision=function(t){return arguments.length?(y=+t,s=vr(a,o,90),f=dr(r,e,y),h=vr(l,c,90),g=dr(i,u,y),n):y},n.majorExtent([[-180,-90+Ca],[180,90-Ca]]).minorExtent([[-180,-80-Ca],[180,80+Ca]])},ta.geo.greatArc=function(){function n(){return{type:"LineString",coordinates:[t||r.apply(this,arguments),e||u.apply(this,arguments)]}}var t,e,r=mr,u=yr;return n.distance=function(){return ta.geo.distance(t||r.apply(this,arguments),e||u.apply(this,arguments))},n.source=function(e){return arguments.length?(r=e,t="function"==typeof e?null:e,n):r},n.target=function(t){return arguments.length?(u=t,e="function"==typeof t?null:t,n):u},n.precision=function(){return arguments.length?n:0},n},ta.geo.interpolate=function(n,t){return Mr(n[0]*Da,n[1]*Da,t[0]*Da,t[1]*Da)},ta.geo.length=function(n){return Yc=0,ta.geo.stream(n,Zc),Yc};var Yc,Zc={sphere:b,point:b,lineStart:xr,lineEnd:b,polygonStart:b,polygonEnd:b},Vc=br(function(n){return Math.sqrt(2/(1+n))},function(n){return 2*Math.asin(n/2)});(ta.geo.azimuthalEqualArea=function(){return ur(Vc)}).raw=Vc;var Xc=br(function(n){var t=Math.acos(n);return t&&t/Math.sin(t)},y);(ta.geo.azimuthalEquidistant=function(){return ur(Xc)}).raw=Xc,(ta.geo.conicConformal=function(){return Ye(_r)}).raw=_r,(ta.geo.conicEquidistant=function(){return Ye(wr)}).raw=wr;var $c=br(function(n){return 1/n},Math.atan);(ta.geo.gnomonic=function(){return ur($c)}).raw=$c,Sr.invert=function(n,t){return[n,2*Math.atan(Math.exp(t))-Ra]},(ta.geo.mercator=function(){return kr(Sr)}).raw=Sr;var Bc=br(function(){return 1},Math.asin);(ta.geo.orthographic=function(){return ur(Bc)}).raw=Bc;var Wc=br(function(n){return 1/(1+n)},function(n){return 2*Math.atan(n)});(ta.geo.stereographic=function(){return ur(Wc)}).raw=Wc,Er.invert=function(n,t){return[-t,2*Math.atan(Math.exp(n))-Ra]},(ta.geo.transverseMercator=function(){var n=kr(Er),t=n.center,e=n.rotate;return n.center=function(n){return n?t([-n[1],n[0]]):(n=t(),[n[1],-n[0]])},n.rotate=function(n){return n?e([n[0],n[1],n.length>2?n[2]+90:90]):(n=e(),[n[0],n[1],n[2]-90])},e([0,0,90])}).raw=Er,ta.geom={},ta.geom.hull=function(n){function t(n){if(n.length<3)return[];var t,u=Et(e),i=Et(r),o=n.length,a=[],c=[];for(t=0;o>t;t++)a.push([+u.call(this,n[t],t),+i.call(this,n[t],t),t]);for(a.sort(zr),t=0;o>t;t++)c.push([a[t][0],-a[t][1]]);var l=Cr(a),s=Cr(c),f=s[0]===l[0],h=s[s.length-1]===l[l.length-1],g=[];for(t=l.length-1;t>=0;--t)g.push(n[a[l[t]][2]]);for(t=+f;t<s.length-h;++t)g.push(n[a[s[t]][2]]);return g}var e=Ar,r=Nr;return arguments.length?t(n):(t.x=function(n){return arguments.length?(e=n,t):e},t.y=function(n){return arguments.length?(r=n,t):r},t)},ta.geom.polygon=function(n){return ya(n,Jc),n};var Jc=ta.geom.polygon.prototype=[];Jc.area=function(){for(var n,t=-1,e=this.length,r=this[e-1],u=0;++t<e;)n=r,r=this[t],u+=n[1]*r[0]-n[0]*r[1];return.5*u},Jc.centroid=function(n){var t,e,r=-1,u=this.length,i=0,o=0,a=this[u-1];for(arguments.length||(n=-1/(6*this.area()));++r<u;)t=a,a=this[r],e=t[0]*a[1]-a[0]*t[1],i+=(t[0]+a[0])*e,o+=(t[1]+a[1])*e;return[i*n,o*n]},Jc.clip=function(n){for(var t,e,r,u,i,o,a=Tr(n),c=-1,l=this.length-Tr(this),s=this[l-1];++c<l;){for(t=n.slice(),n.length=0,u=this[c],i=t[(r=t.length-a)-1],e=-1;++e<r;)o=t[e],qr(o,s,u)?(qr(i,s,u)||n.push(Lr(i,o,s,u)),n.push(o)):qr(i,s,u)&&n.push(Lr(i,o,s,u)),i=o;a&&n.push(n[0]),s=u}return n};var Gc,Kc,Qc,nl,tl,el=[],rl=[];Or.prototype.prepare=function(){for(var n,t=this.edges,e=t.length;e--;)n=t[e].edge,n.b&&n.a||t.splice(e,1);return t.sort(Yr),t.length},Qr.prototype={start:function(){return this.edge.l===this.site?this.edge.a:this.edge.b},end:function(){return this.edge.l===this.site?this.edge.b:this.edge.a}},nu.prototype={insert:function(n,t){var e,r,u;if(n){if(t.P=n,t.N=n.N,n.N&&(n.N.P=t),n.N=t,n.R){for(n=n.R;n.L;)n=n.L;n.L=t}else n.R=t;e=n}else this._?(n=uu(this._),t.P=null,t.N=n,n.P=n.L=t,e=n):(t.P=t.N=null,this._=t,e=null);for(t.L=t.R=null,t.U=e,t.C=!0,n=t;e&&e.C;)r=e.U,e===r.L?(u=r.R,u&&u.C?(e.C=u.C=!1,r.C=!0,n=r):(n===e.R&&(eu(this,e),n=e,e=n.U),e.C=!1,r.C=!0,ru(this,r))):(u=r.L,u&&u.C?(e.C=u.C=!1,r.C=!0,n=r):(n===e.L&&(ru(this,e),n=e,e=n.U),e.C=!1,r.C=!0,eu(this,r))),e=n.U;this._.C=!1},remove:function(n){n.N&&(n.N.P=n.P),n.P&&(n.P.N=n.N),n.N=n.P=null;var t,e,r,u=n.U,i=n.L,o=n.R;if(e=i?o?uu(o):i:o,u?u.L===n?u.L=e:u.R=e:this._=e,i&&o?(r=e.C,e.C=n.C,e.L=i,i.U=e,e!==o?(u=e.U,e.U=n.U,n=e.R,u.L=n,e.R=o,o.U=e):(e.U=u,u=e,n=e.R)):(r=n.C,n=e),n&&(n.U=u),!r){if(n&&n.C)return void(n.C=!1);do{if(n===this._)break;if(n===u.L){if(t=u.R,t.C&&(t.C=!1,u.C=!0,eu(this,u),t=u.R),t.L&&t.L.C||t.R&&t.R.C){t.R&&t.R.C||(t.L.C=!1,t.C=!0,ru(this,t),t=u.R),t.C=u.C,u.C=t.R.C=!1,eu(this,u),n=this._;break}}else if(t=u.L,t.C&&(t.C=!1,u.C=!0,ru(this,u),t=u.L),t.L&&t.L.C||t.R&&t.R.C){t.L&&t.L.C||(t.R.C=!1,t.C=!0,eu(this,t),t=u.L),t.C=u.C,u.C=t.L.C=!1,ru(this,u),n=this._;break}t.C=!0,n=u,u=u.U}while(!n.C);n&&(n.C=!1)}}},ta.geom.voronoi=function(n){function t(n){var t=new Array(n.length),r=a[0][0],u=a[0][1],i=a[1][0],o=a[1][1];return iu(e(n),a).cells.forEach(function(e,a){var c=e.edges,l=e.site,s=t[a]=c.length?c.map(function(n){var t=n.start();return[t.x,t.y]}):l.x>=r&&l.x<=i&&l.y>=u&&l.y<=o?[[r,o],[i,o],[i,u],[r,u]]:[];s.point=n[a]}),t}function e(n){return n.map(function(n,t){return{x:Math.round(i(n,t)/Ca)*Ca,y:Math.round(o(n,t)/Ca)*Ca,i:t}})}var r=Ar,u=Nr,i=r,o=u,a=ul;return n?t(n):(t.links=function(n){return iu(e(n)).edges.filter(function(n){return n.l&&n.r}).map(function(t){return{source:n[t.l.i],target:n[t.r.i]}})},t.triangles=function(n){var t=[];return iu(e(n)).cells.forEach(function(e,r){for(var u,i,o=e.site,a=e.edges.sort(Yr),c=-1,l=a.length,s=a[l-1].edge,f=s.l===o?s.r:s.l;++c<l;)u=s,i=f,s=a[c].edge,f=s.l===o?s.r:s.l,r<i.i&&r<f.i&&au(o,i,f)<0&&t.push([n[r],n[i.i],n[f.i]])}),t},t.x=function(n){return arguments.length?(i=Et(r=n),t):r},t.y=function(n){return arguments.length?(o=Et(u=n),t):u},t.clipExtent=function(n){return arguments.length?(a=null==n?ul:n,t):a===ul?null:a},t.size=function(n){return arguments.length?t.clipExtent(n&&[[0,0],n]):a===ul?null:a&&a[1]},t)};var ul=[[-1e6,-1e6],[1e6,1e6]];ta.geom.delaunay=function(n){return ta.geom.voronoi().triangles(n)},ta.geom.quadtree=function(n,t,e,r,u){function i(n){function i(n,t,e,r,u,i,o,a){if(!isNaN(e)&&!isNaN(r))if(n.leaf){var c=n.x,s=n.y;if(null!=c)if(ga(c-e)+ga(s-r)<.01)l(n,t,e,r,u,i,o,a);else{var f=n.point;n.x=n.y=n.point=null,l(n,f,c,s,u,i,o,a),l(n,t,e,r,u,i,o,a)}else n.x=e,n.y=r,n.point=t}else l(n,t,e,r,u,i,o,a)}function l(n,t,e,r,u,o,a,c){var l=.5*(u+a),s=.5*(o+c),f=e>=l,h=r>=s,g=h<<1|f;n.leaf=!1,n=n.nodes[g]||(n.nodes[g]=su()),f?u=l:a=l,h?o=s:c=s,i(n,t,e,r,u,o,a,c)}var s,f,h,g,p,v,d,m,y,M=Et(a),x=Et(c);if(null!=t)v=t,d=e,m=r,y=u;else if(m=y=-(v=d=1/0),f=[],h=[],p=n.length,o)for(g=0;p>g;++g)s=n[g],s.x<v&&(v=s.x),s.y<d&&(d=s.y),s.x>m&&(m=s.x),s.y>y&&(y=s.y),f.push(s.x),h.push(s.y);else for(g=0;p>g;++g){var b=+M(s=n[g],g),_=+x(s,g);v>b&&(v=b),d>_&&(d=_),b>m&&(m=b),_>y&&(y=_),f.push(b),h.push(_)}var w=m-v,S=y-d;w>S?y=d+w:m=v+S;var k=su();if(k.add=function(n){i(k,n,+M(n,++g),+x(n,g),v,d,m,y)},k.visit=function(n){fu(n,k,v,d,m,y)},k.find=function(n){return hu(k,n[0],n[1],v,d,m,y)},g=-1,null==t){for(;++g<p;)i(k,n[g],f[g],h[g],v,d,m,y);--g}else n.forEach(k.add);return f=h=n=s=null,k}var o,a=Ar,c=Nr;return(o=arguments.length)?(a=cu,c=lu,3===o&&(u=e,r=t,e=t=0),i(n)):(i.x=function(n){return arguments.length?(a=n,i):a},i.y=function(n){return arguments.length?(c=n,i):c},i.extent=function(n){return arguments.length?(null==n?t=e=r=u=null:(t=+n[0][0],e=+n[0][1],r=+n[1][0],u=+n[1][1]),i):null==t?null:[[t,e],[r,u]]},i.size=function(n){return arguments.length?(null==n?t=e=r=u=null:(t=e=0,r=+n[0],u=+n[1]),i):null==t?null:[r-t,u-e]},i)},ta.interpolateRgb=gu,ta.interpolateObject=pu,ta.interpolateNumber=vu,ta.interpolateString=du;var il=/[-+]?(?:\d+\.?\d*|\.?\d+)(?:[eE][-+]?\d+)?/g,ol=new RegExp(il.source,"g");ta.interpolate=mu,ta.interpolators=[function(n,t){var e=typeof t;return("string"===e?Ga.has(t)||/^(#|rgb\(|hsl\()/.test(t)?gu:du:t instanceof ot?gu:Array.isArray(t)?yu:"object"===e&&isNaN(t)?pu:vu)(n,t)}],ta.interpolateArray=yu;var al=function(){return y},cl=ta.map({linear:al,poly:ku,quad:function(){return _u},cubic:function(){return wu},sin:function(){return Eu},exp:function(){return Au},circle:function(){return Nu},elastic:Cu,back:zu,bounce:function(){return qu}}),ll=ta.map({"in":y,out:xu,"in-out":bu,"out-in":function(n){return bu(xu(n))}});ta.ease=function(n){var t=n.indexOf("-"),e=t>=0?n.slice(0,t):n,r=t>=0?n.slice(t+1):"in";return e=cl.get(e)||al,r=ll.get(r)||y,Mu(r(e.apply(null,ea.call(arguments,1))))},ta.interpolateHcl=Lu,ta.interpolateHsl=Tu,ta.interpolateLab=Ru,ta.interpolateRound=Du,ta.transform=function(n){var t=ua.createElementNS(ta.ns.prefix.svg,"g");return(ta.transform=function(n){if(null!=n){t.setAttribute("transform",n);var e=t.transform.baseVal.consolidate()}return new Pu(e?e.matrix:sl)})(n)},Pu.prototype.toString=function(){return"translate("+this.translate+")rotate("+this.rotate+")skewX("+this.skew+")scale("+this.scale+")"};var sl={a:1,b:0,c:0,d:1,e:0,f:0};ta.interpolateTransform=Hu,ta.layout={},ta.layout.bundle=function(){return function(n){for(var t=[],e=-1,r=n.length;++e<r;)t.push(Yu(n[e]));return t}},ta.layout.chord=function(){function n(){var n,l,f,h,g,p={},v=[],d=ta.range(i),m=[];for(e=[],r=[],n=0,h=-1;++h<i;){for(l=0,g=-1;++g<i;)l+=u[h][g];v.push(l),m.push(ta.range(i)),n+=l}for(o&&d.sort(function(n,t){return o(v[n],v[t])}),a&&m.forEach(function(n,t){n.sort(function(n,e){return a(u[t][n],u[t][e])})}),n=(La-s*i)/n,l=0,h=-1;++h<i;){for(f=l,g=-1;++g<i;){var y=d[h],M=m[y][g],x=u[y][M],b=l,_=l+=x*n;p[y+"-"+M]={index:y,subindex:M,startAngle:b,endAngle:_,value:x}}r[y]={index:y,startAngle:f,endAngle:l,value:(l-f)/n},l+=s}for(h=-1;++h<i;)for(g=h-1;++g<i;){var w=p[h+"-"+g],S=p[g+"-"+h];(w.value||S.value)&&e.push(w.value<S.value?{source:S,target:w}:{source:w,target:S})}c&&t()}function t(){e.sort(function(n,t){return c((n.source.value+n.target.value)/2,(t.source.value+t.target.value)/2)})}var e,r,u,i,o,a,c,l={},s=0;return l.matrix=function(n){return arguments.length?(i=(u=n)&&u.length,e=r=null,l):u},l.padding=function(n){return arguments.length?(s=n,e=r=null,l):s},l.sortGroups=function(n){return arguments.length?(o=n,e=r=null,l):o},l.sortSubgroups=function(n){return arguments.length?(a=n,e=null,l):a},l.sortChords=function(n){return arguments.length?(c=n,e&&t(),l):c},l.chords=function(){return e||n(),e},l.groups=function(){return r||n(),r},l},ta.layout.force=function(){function n(n){return function(t,e,r,u){if(t.point!==n){var i=t.cx-n.x,o=t.cy-n.y,a=u-e,c=i*i+o*o;if(c>a*a/d){if(p>c){var l=t.charge/c;n.px-=i*l,n.py-=o*l}return!0}if(t.point&&c&&p>c){var l=t.pointCharge/c;n.px-=i*l,n.py-=o*l}}return!t.charge}}function t(n){n.px=ta.event.x,n.py=ta.event.y,a.resume()}var e,r,u,i,o,a={},c=ta.dispatch("start","tick","end"),l=[1,1],s=.9,f=fl,h=hl,g=-30,p=gl,v=.1,d=.64,m=[],M=[];return a.tick=function(){if((r*=.99)<.005)return c.end({type:"end",alpha:r=0}),!0;var t,e,a,f,h,p,d,y,x,b=m.length,_=M.length;for(e=0;_>e;++e)a=M[e],f=a.source,h=a.target,y=h.x-f.x,x=h.y-f.y,(p=y*y+x*x)&&(p=r*i[e]*((p=Math.sqrt(p))-u[e])/p,y*=p,x*=p,h.x-=y*(d=f.weight/(h.weight+f.weight)),h.y-=x*d,f.x+=y*(d=1-d),f.y+=x*d);if((d=r*v)&&(y=l[0]/2,x=l[1]/2,e=-1,d))for(;++e<b;)a=m[e],a.x+=(y-a.x)*d,a.y+=(x-a.y)*d;if(g)for(Ju(t=ta.geom.quadtree(m),r,o),e=-1;++e<b;)(a=m[e]).fixed||t.visit(n(a));for(e=-1;++e<b;)a=m[e],a.fixed?(a.x=a.px,a.y=a.py):(a.x-=(a.px-(a.px=a.x))*s,a.y-=(a.py-(a.py=a.y))*s);c.tick({type:"tick",alpha:r})},a.nodes=function(n){return arguments.length?(m=n,a):m},a.links=function(n){return arguments.length?(M=n,a):M},a.size=function(n){return arguments.length?(l=n,a):l},a.linkDistance=function(n){return arguments.length?(f="function"==typeof n?n:+n,a):f},a.distance=a.linkDistance,a.linkStrength=function(n){return arguments.length?(h="function"==typeof n?n:+n,a):h},a.friction=function(n){return arguments.length?(s=+n,a):s},a.charge=function(n){return arguments.length?(g="function"==typeof n?n:+n,a):g},a.chargeDistance=function(n){return arguments.length?(p=n*n,a):Math.sqrt(p)},a.gravity=function(n){return arguments.length?(v=+n,a):v},a.theta=function(n){return arguments.length?(d=n*n,a):Math.sqrt(d)},a.alpha=function(n){return arguments.length?(n=+n,r?r=n>0?n:0:n>0&&(c.start({type:"start",alpha:r=n}),ta.timer(a.tick)),a):r},a.start=function(){function n(n,r){if(!e){for(e=new Array(c),a=0;c>a;++a)e[a]=[];for(a=0;s>a;++a){var u=M[a];e[u.source.index].push(u.target),e[u.target.index].push(u.source)}}for(var i,o=e[t],a=-1,l=o.length;++a<l;)if(!isNaN(i=o[a][n]))return i;return Math.random()*r}var t,e,r,c=m.length,s=M.length,p=l[0],v=l[1];for(t=0;c>t;++t)(r=m[t]).index=t,r.weight=0;for(t=0;s>t;++t)r=M[t],"number"==typeof r.source&&(r.source=m[r.source]),"number"==typeof r.target&&(r.target=m[r.target]),++r.source.weight,++r.target.weight;for(t=0;c>t;++t)r=m[t],isNaN(r.x)&&(r.x=n("x",p)),isNaN(r.y)&&(r.y=n("y",v)),isNaN(r.px)&&(r.px=r.x),isNaN(r.py)&&(r.py=r.y);if(u=[],"function"==typeof f)for(t=0;s>t;++t)u[t]=+f.call(this,M[t],t);else for(t=0;s>t;++t)u[t]=f;if(i=[],"function"==typeof h)for(t=0;s>t;++t)i[t]=+h.call(this,M[t],t);else for(t=0;s>t;++t)i[t]=h;if(o=[],"function"==typeof g)for(t=0;c>t;++t)o[t]=+g.call(this,m[t],t);else for(t=0;c>t;++t)o[t]=g;return a.resume()},a.resume=function(){return a.alpha(.1)},a.stop=function(){return a.alpha(0)},a.drag=function(){return e||(e=ta.behavior.drag().origin(y).on("dragstart.force",Xu).on("drag.force",t).on("dragend.force",$u)),arguments.length?void this.on("mouseover.force",Bu).on("mouseout.force",Wu).call(e):e},ta.rebind(a,c,"on")};var fl=20,hl=1,gl=1/0;ta.layout.hierarchy=function(){function n(u){var i,o=[u],a=[];for(u.depth=0;null!=(i=o.pop());)if(a.push(i),(l=e.call(n,i,i.depth))&&(c=l.length)){for(var c,l,s;--c>=0;)o.push(s=l[c]),s.parent=i,s.depth=i.depth+1;r&&(i.value=0),i.children=l}else r&&(i.value=+r.call(n,i,i.depth)||0),delete i.children;return Qu(u,function(n){var e,u;t&&(e=n.children)&&e.sort(t),r&&(u=n.parent)&&(u.value+=n.value)}),a}var t=ei,e=ni,r=ti;return n.sort=function(e){return arguments.length?(t=e,n):t},n.children=function(t){return arguments.length?(e=t,n):e},n.value=function(t){return arguments.length?(r=t,n):r},n.revalue=function(t){return r&&(Ku(t,function(n){n.children&&(n.value=0)}),Qu(t,function(t){var e;t.children||(t.value=+r.call(n,t,t.depth)||0),(e=t.parent)&&(e.value+=t.value)})),t},n},ta.layout.partition=function(){function n(t,e,r,u){var i=t.children;if(t.x=e,t.y=t.depth*u,t.dx=r,t.dy=u,i&&(o=i.length)){var o,a,c,l=-1;for(r=t.value?r/t.value:0;++l<o;)n(a=i[l],e,c=a.value*r,u),e+=c}}function t(n){var e=n.children,r=0;if(e&&(u=e.length))for(var u,i=-1;++i<u;)r=Math.max(r,t(e[i]));return 1+r}function e(e,i){var o=r.call(this,e,i);return n(o[0],0,u[0],u[1]/t(o[0])),o}var r=ta.layout.hierarchy(),u=[1,1];return e.size=function(n){return arguments.length?(u=n,e):u},Gu(e,r)},ta.layout.pie=function(){function n(o){var a,c=o.length,l=o.map(function(e,r){return+t.call(n,e,r)}),s=+("function"==typeof r?r.apply(this,arguments):r),f=("function"==typeof u?u.apply(this,arguments):u)-s,h=Math.min(Math.abs(f)/c,+("function"==typeof i?i.apply(this,arguments):i)),g=h*(0>f?-1:1),p=(f-c*g)/ta.sum(l),v=ta.range(c),d=[];return null!=e&&v.sort(e===pl?function(n,t){return l[t]-l[n]}:function(n,t){return e(o[n],o[t])}),v.forEach(function(n){d[n]={data:o[n],value:a=l[n],startAngle:s,endAngle:s+=a*p+g,padAngle:h}}),d}var t=Number,e=pl,r=0,u=La,i=0;return n.value=function(e){return arguments.length?(t=e,n):t},n.sort=function(t){return arguments.length?(e=t,n):e},n.startAngle=function(t){return arguments.length?(r=t,n):r},n.endAngle=function(t){return arguments.length?(u=t,n):u},n.padAngle=function(t){return arguments.length?(i=t,n):i},n};var pl={};ta.layout.stack=function(){function n(a,c){if(!(h=a.length))return a;var l=a.map(function(e,r){return t.call(n,e,r)}),s=l.map(function(t){return t.map(function(t,e){return[i.call(n,t,e),o.call(n,t,e)]})}),f=e.call(n,s,c);l=ta.permute(l,f),s=ta.permute(s,f);var h,g,p,v,d=r.call(n,s,c),m=l[0].length;for(p=0;m>p;++p)for(u.call(n,l[0][p],v=d[p],s[0][p][1]),g=1;h>g;++g)u.call(n,l[g][p],v+=s[g-1][p][1],s[g][p][1]);return a}var t=y,e=ai,r=ci,u=oi,i=ui,o=ii;return n.values=function(e){return arguments.length?(t=e,n):t},n.order=function(t){return arguments.length?(e="function"==typeof t?t:vl.get(t)||ai,n):e},n.offset=function(t){return arguments.length?(r="function"==typeof t?t:dl.get(t)||ci,n):r},n.x=function(t){return arguments.length?(i=t,n):i},n.y=function(t){return arguments.length?(o=t,n):o},n.out=function(t){return arguments.length?(u=t,n):u},n};var vl=ta.map({"inside-out":function(n){var t,e,r=n.length,u=n.map(li),i=n.map(si),o=ta.range(r).sort(function(n,t){return u[n]-u[t]}),a=0,c=0,l=[],s=[];for(t=0;r>t;++t)e=o[t],c>a?(a+=i[e],l.push(e)):(c+=i[e],s.push(e));return s.reverse().concat(l)},reverse:function(n){return ta.range(n.length).reverse()},"default":ai}),dl=ta.map({silhouette:function(n){var t,e,r,u=n.length,i=n[0].length,o=[],a=0,c=[];for(e=0;i>e;++e){for(t=0,r=0;u>t;t++)r+=n[t][e][1];r>a&&(a=r),o.push(r)}for(e=0;i>e;++e)c[e]=(a-o[e])/2;return c},wiggle:function(n){var t,e,r,u,i,o,a,c,l,s=n.length,f=n[0],h=f.length,g=[];for(g[0]=c=l=0,e=1;h>e;++e){for(t=0,u=0;s>t;++t)u+=n[t][e][1];for(t=0,i=0,a=f[e][0]-f[e-1][0];s>t;++t){for(r=0,o=(n[t][e][1]-n[t][e-1][1])/(2*a);t>r;++r)o+=(n[r][e][1]-n[r][e-1][1])/a;i+=o*n[t][e][1]}g[e]=c-=u?i/u*a:0,l>c&&(l=c)}for(e=0;h>e;++e)g[e]-=l;return g},expand:function(n){var t,e,r,u=n.length,i=n[0].length,o=1/u,a=[];for(e=0;i>e;++e){for(t=0,r=0;u>t;t++)r+=n[t][e][1];if(r)for(t=0;u>t;t++)n[t][e][1]/=r;else for(t=0;u>t;t++)n[t][e][1]=o}for(e=0;i>e;++e)a[e]=0;return a},zero:ci});ta.layout.histogram=function(){function n(n,i){for(var o,a,c=[],l=n.map(e,this),s=r.call(this,l,i),f=u.call(this,s,l,i),i=-1,h=l.length,g=f.length-1,p=t?1:1/h;++i<g;)o=c[i]=[],o.dx=f[i+1]-(o.x=f[i]),o.y=0;if(g>0)for(i=-1;++i<h;)a=l[i],a>=s[0]&&a<=s[1]&&(o=c[ta.bisect(f,a,1,g)-1],o.y+=p,o.push(n[i]));return c}var t=!0,e=Number,r=pi,u=hi;return n.value=function(t){return arguments.length?(e=t,n):e},n.range=function(t){return arguments.length?(r=Et(t),n):r},n.bins=function(t){return arguments.length?(u="number"==typeof t?function(n){return gi(n,t)}:Et(t),n):u},n.frequency=function(e){return arguments.length?(t=!!e,n):t},n},ta.layout.pack=function(){function n(n,i){var o=e.call(this,n,i),a=o[0],c=u[0],l=u[1],s=null==t?Math.sqrt:"function"==typeof t?t:function(){return t};if(a.x=a.y=0,Qu(a,function(n){n.r=+s(n.value)}),Qu(a,Mi),r){var f=r*(t?1:Math.max(2*a.r/c,2*a.r/l))/2;Qu(a,function(n){n.r+=f}),Qu(a,Mi),Qu(a,function(n){n.r-=f})}return _i(a,c/2,l/2,t?1:1/Math.max(2*a.r/c,2*a.r/l)),o}var t,e=ta.layout.hierarchy().sort(vi),r=0,u=[1,1];return n.size=function(t){return arguments.length?(u=t,n):u},n.radius=function(e){return arguments.length?(t=null==e||"function"==typeof e?e:+e,n):t},n.padding=function(t){return arguments.length?(r=+t,n):r},Gu(n,e)},ta.layout.tree=function(){function n(n,u){var s=o.call(this,n,u),f=s[0],h=t(f);if(Qu(h,e),h.parent.m=-h.z,Ku(h,r),l)Ku(f,i);else{var g=f,p=f,v=f;Ku(f,function(n){n.x<g.x&&(g=n),n.x>p.x&&(p=n),n.depth>v.depth&&(v=n)});var d=a(g,p)/2-g.x,m=c[0]/(p.x+a(p,g)/2+d),y=c[1]/(v.depth||1);Ku(f,function(n){n.x=(n.x+d)*m,n.y=n.depth*y})}return s}function t(n){for(var t,e={A:null,children:[n]},r=[e];null!=(t=r.pop());)for(var u,i=t.children,o=0,a=i.length;a>o;++o)r.push((i[o]=u={_:i[o],parent:t,children:(u=i[o].children)&&u.slice()||[],A:null,a:null,z:0,m:0,c:0,s:0,t:null,i:o}).a=u);return e.children[0]}function e(n){var t=n.children,e=n.parent.children,r=n.i?e[n.i-1]:null;if(t.length){Ni(n);var i=(t[0].z+t[t.length-1].z)/2;r?(n.z=r.z+a(n._,r._),n.m=n.z-i):n.z=i}else r&&(n.z=r.z+a(n._,r._));n.parent.A=u(n,r,n.parent.A||e[0])}function r(n){n._.x=n.z+n.parent.m,n.m+=n.parent.m}function u(n,t,e){if(t){for(var r,u=n,i=n,o=t,c=u.parent.children[0],l=u.m,s=i.m,f=o.m,h=c.m;o=Ei(o),u=ki(u),o&&u;)c=ki(c),i=Ei(i),i.a=n,r=o.z+f-u.z-l+a(o._,u._),r>0&&(Ai(Ci(o,n,e),n,r),l+=r,s+=r),f+=o.m,l+=u.m,h+=c.m,s+=i.m;o&&!Ei(i)&&(i.t=o,i.m+=f-s),u&&!ki(c)&&(c.t=u,c.m+=l-h,e=n)}return e}function i(n){n.x*=c[0],n.y=n.depth*c[1]}var o=ta.layout.hierarchy().sort(null).value(null),a=Si,c=[1,1],l=null;return n.separation=function(t){return arguments.length?(a=t,n):a},n.size=function(t){return arguments.length?(l=null==(c=t)?i:null,n):l?null:c},n.nodeSize=function(t){return arguments.length?(l=null==(c=t)?null:i,n):l?c:null},Gu(n,o)},ta.layout.cluster=function(){function n(n,i){var o,a=t.call(this,n,i),c=a[0],l=0;Qu(c,function(n){var t=n.children;t&&t.length?(n.x=qi(t),n.y=zi(t)):(n.x=o?l+=e(n,o):0,n.y=0,o=n)});var s=Li(c),f=Ti(c),h=s.x-e(s,f)/2,g=f.x+e(f,s)/2;return Qu(c,u?function(n){n.x=(n.x-c.x)*r[0],n.y=(c.y-n.y)*r[1]}:function(n){n.x=(n.x-h)/(g-h)*r[0],n.y=(1-(c.y?n.y/c.y:1))*r[1]}),a}var t=ta.layout.hierarchy().sort(null).value(null),e=Si,r=[1,1],u=!1;return n.separation=function(t){return arguments.length?(e=t,n):e},n.size=function(t){return arguments.length?(u=null==(r=t),n):u?null:r},n.nodeSize=function(t){return arguments.length?(u=null!=(r=t),n):u?r:null},Gu(n,t)},ta.layout.treemap=function(){function n(n,t){for(var e,r,u=-1,i=n.length;++u<i;)r=(e=n[u]).value*(0>t?0:t),e.area=isNaN(r)||0>=r?0:r}function t(e){var i=e.children;if(i&&i.length){var o,a,c,l=f(e),s=[],h=i.slice(),p=1/0,v="slice"===g?l.dx:"dice"===g?l.dy:"slice-dice"===g?1&e.depth?l.dy:l.dx:Math.min(l.dx,l.dy);for(n(h,l.dx*l.dy/e.value),s.area=0;(c=h.length)>0;)s.push(o=h[c-1]),s.area+=o.area,"squarify"!==g||(a=r(s,v))<=p?(h.pop(),p=a):(s.area-=s.pop().area,u(s,v,l,!1),v=Math.min(l.dx,l.dy),s.length=s.area=0,p=1/0);s.length&&(u(s,v,l,!0),s.length=s.area=0),i.forEach(t)}}function e(t){var r=t.children;if(r&&r.length){var i,o=f(t),a=r.slice(),c=[];for(n(a,o.dx*o.dy/t.value),c.area=0;i=a.pop();)c.push(i),c.area+=i.area,null!=i.z&&(u(c,i.z?o.dx:o.dy,o,!a.length),c.length=c.area=0);r.forEach(e)}}function r(n,t){for(var e,r=n.area,u=0,i=1/0,o=-1,a=n.length;++o<a;)(e=n[o].area)&&(i>e&&(i=e),e>u&&(u=e));return r*=r,t*=t,r?Math.max(t*u*p/r,r/(t*i*p)):1/0}function u(n,t,e,r){var u,i=-1,o=n.length,a=e.x,l=e.y,s=t?c(n.area/t):0;if(t==e.dx){for((r||s>e.dy)&&(s=e.dy);++i<o;)u=n[i],u.x=a,u.y=l,u.dy=s,a+=u.dx=Math.min(e.x+e.dx-a,s?c(u.area/s):0);u.z=!0,u.dx+=e.x+e.dx-a,e.y+=s,e.dy-=s}else{for((r||s>e.dx)&&(s=e.dx);++i<o;)u=n[i],u.x=a,u.y=l,u.dx=s,l+=u.dy=Math.min(e.y+e.dy-l,s?c(u.area/s):0);u.z=!1,u.dy+=e.y+e.dy-l,e.x+=s,e.dx-=s}}function i(r){var u=o||a(r),i=u[0];return i.x=0,i.y=0,i.dx=l[0],i.dy=l[1],o&&a.revalue(i),n([i],i.dx*i.dy/i.value),(o?e:t)(i),h&&(o=u),u}var o,a=ta.layout.hierarchy(),c=Math.round,l=[1,1],s=null,f=Ri,h=!1,g="squarify",p=.5*(1+Math.sqrt(5)); +return i.size=function(n){return arguments.length?(l=n,i):l},i.padding=function(n){function t(t){var e=n.call(i,t,t.depth);return null==e?Ri(t):Di(t,"number"==typeof e?[e,e,e,e]:e)}function e(t){return Di(t,n)}if(!arguments.length)return s;var r;return f=null==(s=n)?Ri:"function"==(r=typeof n)?t:"number"===r?(n=[n,n,n,n],e):e,i},i.round=function(n){return arguments.length?(c=n?Math.round:Number,i):c!=Number},i.sticky=function(n){return arguments.length?(h=n,o=null,i):h},i.ratio=function(n){return arguments.length?(p=n,i):p},i.mode=function(n){return arguments.length?(g=n+"",i):g},Gu(i,a)},ta.random={normal:function(n,t){var e=arguments.length;return 2>e&&(t=1),1>e&&(n=0),function(){var e,r,u;do e=2*Math.random()-1,r=2*Math.random()-1,u=e*e+r*r;while(!u||u>1);return n+t*e*Math.sqrt(-2*Math.log(u)/u)}},logNormal:function(){var n=ta.random.normal.apply(ta,arguments);return function(){return Math.exp(n())}},bates:function(n){var t=ta.random.irwinHall(n);return function(){return t()/n}},irwinHall:function(n){return function(){for(var t=0,e=0;n>e;e++)t+=Math.random();return t}}},ta.scale={};var ml={floor:y,ceil:y};ta.scale.linear=function(){return Ii([0,1],[0,1],mu,!1)};var yl={s:1,g:1,p:1,r:1,e:1};ta.scale.log=function(){return Ji(ta.scale.linear().domain([0,1]),10,!0,[1,10])};var Ml=ta.format(".0e"),xl={floor:function(n){return-Math.ceil(-n)},ceil:function(n){return-Math.floor(-n)}};ta.scale.pow=function(){return Gi(ta.scale.linear(),1,[0,1])},ta.scale.sqrt=function(){return ta.scale.pow().exponent(.5)},ta.scale.ordinal=function(){return Qi([],{t:"range",a:[[]]})},ta.scale.category10=function(){return ta.scale.ordinal().range(bl)},ta.scale.category20=function(){return ta.scale.ordinal().range(_l)},ta.scale.category20b=function(){return ta.scale.ordinal().range(wl)},ta.scale.category20c=function(){return ta.scale.ordinal().range(Sl)};var bl=[2062260,16744206,2924588,14034728,9725885,9197131,14907330,8355711,12369186,1556175].map(Mt),_l=[2062260,11454440,16744206,16759672,2924588,10018698,14034728,16750742,9725885,12955861,9197131,12885140,14907330,16234194,8355711,13092807,12369186,14408589,1556175,10410725].map(Mt),wl=[3750777,5395619,7040719,10264286,6519097,9216594,11915115,13556636,9202993,12426809,15186514,15190932,8666169,11356490,14049643,15177372,8077683,10834324,13528509,14589654].map(Mt),Sl=[3244733,7057110,10406625,13032431,15095053,16616764,16625259,16634018,3253076,7652470,10607003,13101504,7695281,10394312,12369372,14342891,6513507,9868950,12434877,14277081].map(Mt);ta.scale.quantile=function(){return no([],[])},ta.scale.quantize=function(){return to(0,1,[0,1])},ta.scale.threshold=function(){return eo([.5],[0,1])},ta.scale.identity=function(){return ro([0,1])},ta.svg={},ta.svg.arc=function(){function n(){var n=Math.max(0,+e.apply(this,arguments)),l=Math.max(0,+r.apply(this,arguments)),s=o.apply(this,arguments)-Ra,f=a.apply(this,arguments)-Ra,h=Math.abs(f-s),g=s>f?0:1;if(n>l&&(p=l,l=n,n=p),h>=Ta)return t(l,g)+(n?t(n,1-g):"")+"Z";var p,v,d,m,y,M,x,b,_,w,S,k,E=0,A=0,N=[];if((m=(+c.apply(this,arguments)||0)/2)&&(d=i===kl?Math.sqrt(n*n+l*l):+i.apply(this,arguments),g||(A*=-1),l&&(A=tt(d/l*Math.sin(m))),n&&(E=tt(d/n*Math.sin(m)))),l){y=l*Math.cos(s+A),M=l*Math.sin(s+A),x=l*Math.cos(f-A),b=l*Math.sin(f-A);var C=Math.abs(f-s-2*A)<=qa?0:1;if(A&&so(y,M,x,b)===g^C){var z=(s+f)/2;y=l*Math.cos(z),M=l*Math.sin(z),x=b=null}}else y=M=0;if(n){_=n*Math.cos(f-E),w=n*Math.sin(f-E),S=n*Math.cos(s+E),k=n*Math.sin(s+E);var q=Math.abs(s-f+2*E)<=qa?0:1;if(E&&so(_,w,S,k)===1-g^q){var L=(s+f)/2;_=n*Math.cos(L),w=n*Math.sin(L),S=k=null}}else _=w=0;if((p=Math.min(Math.abs(l-n)/2,+u.apply(this,arguments)))>.001){v=l>n^g?0:1;var T=null==S?[_,w]:null==x?[y,M]:Lr([y,M],[S,k],[x,b],[_,w]),R=y-T[0],D=M-T[1],P=x-T[0],U=b-T[1],j=1/Math.sin(Math.acos((R*P+D*U)/(Math.sqrt(R*R+D*D)*Math.sqrt(P*P+U*U)))/2),F=Math.sqrt(T[0]*T[0]+T[1]*T[1]);if(null!=x){var H=Math.min(p,(l-F)/(j+1)),O=fo(null==S?[_,w]:[S,k],[y,M],l,H,g),I=fo([x,b],[_,w],l,H,g);p===H?N.push("M",O[0],"A",H,",",H," 0 0,",v," ",O[1],"A",l,",",l," 0 ",1-g^so(O[1][0],O[1][1],I[1][0],I[1][1]),",",g," ",I[1],"A",H,",",H," 0 0,",v," ",I[0]):N.push("M",O[0],"A",H,",",H," 0 1,",v," ",I[0])}else N.push("M",y,",",M);if(null!=S){var Y=Math.min(p,(n-F)/(j-1)),Z=fo([y,M],[S,k],n,-Y,g),V=fo([_,w],null==x?[y,M]:[x,b],n,-Y,g);p===Y?N.push("L",V[0],"A",Y,",",Y," 0 0,",v," ",V[1],"A",n,",",n," 0 ",g^so(V[1][0],V[1][1],Z[1][0],Z[1][1]),",",1-g," ",Z[1],"A",Y,",",Y," 0 0,",v," ",Z[0]):N.push("L",V[0],"A",Y,",",Y," 0 0,",v," ",Z[0])}else N.push("L",_,",",w)}else N.push("M",y,",",M),null!=x&&N.push("A",l,",",l," 0 ",C,",",g," ",x,",",b),N.push("L",_,",",w),null!=S&&N.push("A",n,",",n," 0 ",q,",",1-g," ",S,",",k);return N.push("Z"),N.join("")}function t(n,t){return"M0,"+n+"A"+n+","+n+" 0 1,"+t+" 0,"+-n+"A"+n+","+n+" 0 1,"+t+" 0,"+n}var e=io,r=oo,u=uo,i=kl,o=ao,a=co,c=lo;return n.innerRadius=function(t){return arguments.length?(e=Et(t),n):e},n.outerRadius=function(t){return arguments.length?(r=Et(t),n):r},n.cornerRadius=function(t){return arguments.length?(u=Et(t),n):u},n.padRadius=function(t){return arguments.length?(i=t==kl?kl:Et(t),n):i},n.startAngle=function(t){return arguments.length?(o=Et(t),n):o},n.endAngle=function(t){return arguments.length?(a=Et(t),n):a},n.padAngle=function(t){return arguments.length?(c=Et(t),n):c},n.centroid=function(){var n=(+e.apply(this,arguments)+ +r.apply(this,arguments))/2,t=(+o.apply(this,arguments)+ +a.apply(this,arguments))/2-Ra;return[Math.cos(t)*n,Math.sin(t)*n]},n};var kl="auto";ta.svg.line=function(){return ho(y)};var El=ta.map({linear:go,"linear-closed":po,step:vo,"step-before":mo,"step-after":yo,basis:So,"basis-open":ko,"basis-closed":Eo,bundle:Ao,cardinal:bo,"cardinal-open":Mo,"cardinal-closed":xo,monotone:To});El.forEach(function(n,t){t.key=n,t.closed=/-closed$/.test(n)});var Al=[0,2/3,1/3,0],Nl=[0,1/3,2/3,0],Cl=[0,1/6,2/3,1/6];ta.svg.line.radial=function(){var n=ho(Ro);return n.radius=n.x,delete n.x,n.angle=n.y,delete n.y,n},mo.reverse=yo,yo.reverse=mo,ta.svg.area=function(){return Do(y)},ta.svg.area.radial=function(){var n=Do(Ro);return n.radius=n.x,delete n.x,n.innerRadius=n.x0,delete n.x0,n.outerRadius=n.x1,delete n.x1,n.angle=n.y,delete n.y,n.startAngle=n.y0,delete n.y0,n.endAngle=n.y1,delete n.y1,n},ta.svg.chord=function(){function n(n,a){var c=t(this,i,n,a),l=t(this,o,n,a);return"M"+c.p0+r(c.r,c.p1,c.a1-c.a0)+(e(c,l)?u(c.r,c.p1,c.r,c.p0):u(c.r,c.p1,l.r,l.p0)+r(l.r,l.p1,l.a1-l.a0)+u(l.r,l.p1,c.r,c.p0))+"Z"}function t(n,t,e,r){var u=t.call(n,e,r),i=a.call(n,u,r),o=c.call(n,u,r)-Ra,s=l.call(n,u,r)-Ra;return{r:i,a0:o,a1:s,p0:[i*Math.cos(o),i*Math.sin(o)],p1:[i*Math.cos(s),i*Math.sin(s)]}}function e(n,t){return n.a0==t.a0&&n.a1==t.a1}function r(n,t,e){return"A"+n+","+n+" 0 "+ +(e>qa)+",1 "+t}function u(n,t,e,r){return"Q 0,0 "+r}var i=mr,o=yr,a=Po,c=ao,l=co;return n.radius=function(t){return arguments.length?(a=Et(t),n):a},n.source=function(t){return arguments.length?(i=Et(t),n):i},n.target=function(t){return arguments.length?(o=Et(t),n):o},n.startAngle=function(t){return arguments.length?(c=Et(t),n):c},n.endAngle=function(t){return arguments.length?(l=Et(t),n):l},n},ta.svg.diagonal=function(){function n(n,u){var i=t.call(this,n,u),o=e.call(this,n,u),a=(i.y+o.y)/2,c=[i,{x:i.x,y:a},{x:o.x,y:a},o];return c=c.map(r),"M"+c[0]+"C"+c[1]+" "+c[2]+" "+c[3]}var t=mr,e=yr,r=Uo;return n.source=function(e){return arguments.length?(t=Et(e),n):t},n.target=function(t){return arguments.length?(e=Et(t),n):e},n.projection=function(t){return arguments.length?(r=t,n):r},n},ta.svg.diagonal.radial=function(){var n=ta.svg.diagonal(),t=Uo,e=n.projection;return n.projection=function(n){return arguments.length?e(jo(t=n)):t},n},ta.svg.symbol=function(){function n(n,r){return(zl.get(t.call(this,n,r))||Oo)(e.call(this,n,r))}var t=Ho,e=Fo;return n.type=function(e){return arguments.length?(t=Et(e),n):t},n.size=function(t){return arguments.length?(e=Et(t),n):e},n};var zl=ta.map({circle:Oo,cross:function(n){var t=Math.sqrt(n/5)/2;return"M"+-3*t+","+-t+"H"+-t+"V"+-3*t+"H"+t+"V"+-t+"H"+3*t+"V"+t+"H"+t+"V"+3*t+"H"+-t+"V"+t+"H"+-3*t+"Z"},diamond:function(n){var t=Math.sqrt(n/(2*Ll)),e=t*Ll;return"M0,"+-t+"L"+e+",0 0,"+t+" "+-e+",0Z"},square:function(n){var t=Math.sqrt(n)/2;return"M"+-t+","+-t+"L"+t+","+-t+" "+t+","+t+" "+-t+","+t+"Z"},"triangle-down":function(n){var t=Math.sqrt(n/ql),e=t*ql/2;return"M0,"+e+"L"+t+","+-e+" "+-t+","+-e+"Z"},"triangle-up":function(n){var t=Math.sqrt(n/ql),e=t*ql/2;return"M0,"+-e+"L"+t+","+e+" "+-t+","+e+"Z"}});ta.svg.symbolTypes=zl.keys();var ql=Math.sqrt(3),Ll=Math.tan(30*Da);_a.transition=function(n){for(var t,e,r=Tl||++Ul,u=Xo(n),i=[],o=Rl||{time:Date.now(),ease:Su,delay:0,duration:250},a=-1,c=this.length;++a<c;){i.push(t=[]);for(var l=this[a],s=-1,f=l.length;++s<f;)(e=l[s])&&$o(e,s,u,r,o),t.push(e)}return Yo(i,u,r)},_a.interrupt=function(n){return this.each(null==n?Dl:Io(Xo(n)))};var Tl,Rl,Dl=Io(Xo()),Pl=[],Ul=0;Pl.call=_a.call,Pl.empty=_a.empty,Pl.node=_a.node,Pl.size=_a.size,ta.transition=function(n,t){return n&&n.transition?Tl?n.transition(t):n:ta.selection().transition(n)},ta.transition.prototype=Pl,Pl.select=function(n){var t,e,r,u=this.id,i=this.namespace,o=[];n=N(n);for(var a=-1,c=this.length;++a<c;){o.push(t=[]);for(var l=this[a],s=-1,f=l.length;++s<f;)(r=l[s])&&(e=n.call(r,r.__data__,s,a))?("__data__"in r&&(e.__data__=r.__data__),$o(e,s,i,u,r[i][u]),t.push(e)):t.push(null)}return Yo(o,i,u)},Pl.selectAll=function(n){var t,e,r,u,i,o=this.id,a=this.namespace,c=[];n=C(n);for(var l=-1,s=this.length;++l<s;)for(var f=this[l],h=-1,g=f.length;++h<g;)if(r=f[h]){i=r[a][o],e=n.call(r,r.__data__,h,l),c.push(t=[]);for(var p=-1,v=e.length;++p<v;)(u=e[p])&&$o(u,p,a,o,i),t.push(u)}return Yo(c,a,o)},Pl.filter=function(n){var t,e,r,u=[];"function"!=typeof n&&(n=O(n));for(var i=0,o=this.length;o>i;i++){u.push(t=[]);for(var e=this[i],a=0,c=e.length;c>a;a++)(r=e[a])&&n.call(r,r.__data__,a,i)&&t.push(r)}return Yo(u,this.namespace,this.id)},Pl.tween=function(n,t){var e=this.id,r=this.namespace;return arguments.length<2?this.node()[r][e].tween.get(n):Y(this,null==t?function(t){t[r][e].tween.remove(n)}:function(u){u[r][e].tween.set(n,t)})},Pl.attr=function(n,t){function e(){this.removeAttribute(a)}function r(){this.removeAttributeNS(a.space,a.local)}function u(n){return null==n?e:(n+="",function(){var t,e=this.getAttribute(a);return e!==n&&(t=o(e,n),function(n){this.setAttribute(a,t(n))})})}function i(n){return null==n?r:(n+="",function(){var t,e=this.getAttributeNS(a.space,a.local);return e!==n&&(t=o(e,n),function(n){this.setAttributeNS(a.space,a.local,t(n))})})}if(arguments.length<2){for(t in n)this.attr(t,n[t]);return this}var o="transform"==n?Hu:mu,a=ta.ns.qualify(n);return Zo(this,"attr."+n,t,a.local?i:u)},Pl.attrTween=function(n,t){function e(n,e){var r=t.call(this,n,e,this.getAttribute(u));return r&&function(n){this.setAttribute(u,r(n))}}function r(n,e){var r=t.call(this,n,e,this.getAttributeNS(u.space,u.local));return r&&function(n){this.setAttributeNS(u.space,u.local,r(n))}}var u=ta.ns.qualify(n);return this.tween("attr."+n,u.local?r:e)},Pl.style=function(n,e,r){function u(){this.style.removeProperty(n)}function i(e){return null==e?u:(e+="",function(){var u,i=t(this).getComputedStyle(this,null).getPropertyValue(n);return i!==e&&(u=mu(i,e),function(t){this.style.setProperty(n,u(t),r)})})}var o=arguments.length;if(3>o){if("string"!=typeof n){2>o&&(e="");for(r in n)this.style(r,n[r],e);return this}r=""}return Zo(this,"style."+n,e,i)},Pl.styleTween=function(n,e,r){function u(u,i){var o=e.call(this,u,i,t(this).getComputedStyle(this,null).getPropertyValue(n));return o&&function(t){this.style.setProperty(n,o(t),r)}}return arguments.length<3&&(r=""),this.tween("style."+n,u)},Pl.text=function(n){return Zo(this,"text",n,Vo)},Pl.remove=function(){var n=this.namespace;return this.each("end.transition",function(){var t;this[n].count<2&&(t=this.parentNode)&&t.removeChild(this)})},Pl.ease=function(n){var t=this.id,e=this.namespace;return arguments.length<1?this.node()[e][t].ease:("function"!=typeof n&&(n=ta.ease.apply(ta,arguments)),Y(this,function(r){r[e][t].ease=n}))},Pl.delay=function(n){var t=this.id,e=this.namespace;return arguments.length<1?this.node()[e][t].delay:Y(this,"function"==typeof n?function(r,u,i){r[e][t].delay=+n.call(r,r.__data__,u,i)}:(n=+n,function(r){r[e][t].delay=n}))},Pl.duration=function(n){var t=this.id,e=this.namespace;return arguments.length<1?this.node()[e][t].duration:Y(this,"function"==typeof n?function(r,u,i){r[e][t].duration=Math.max(1,n.call(r,r.__data__,u,i))}:(n=Math.max(1,n),function(r){r[e][t].duration=n}))},Pl.each=function(n,t){var e=this.id,r=this.namespace;if(arguments.length<2){var u=Rl,i=Tl;try{Tl=e,Y(this,function(t,u,i){Rl=t[r][e],n.call(t,t.__data__,u,i)})}finally{Rl=u,Tl=i}}else Y(this,function(u){var i=u[r][e];(i.event||(i.event=ta.dispatch("start","end","interrupt"))).on(n,t)});return this},Pl.transition=function(){for(var n,t,e,r,u=this.id,i=++Ul,o=this.namespace,a=[],c=0,l=this.length;l>c;c++){a.push(n=[]);for(var t=this[c],s=0,f=t.length;f>s;s++)(e=t[s])&&(r=e[o][u],$o(e,s,o,i,{time:r.time,ease:r.ease,delay:r.delay+r.duration,duration:r.duration})),n.push(e)}return Yo(a,o,i)},ta.svg.axis=function(){function n(n){n.each(function(){var n,l=ta.select(this),s=this.__chart__||e,f=this.__chart__=e.copy(),h=null==c?f.ticks?f.ticks.apply(f,a):f.domain():c,g=null==t?f.tickFormat?f.tickFormat.apply(f,a):y:t,p=l.selectAll(".tick").data(h,f),v=p.enter().insert("g",".domain").attr("class","tick").style("opacity",Ca),d=ta.transition(p.exit()).style("opacity",Ca).remove(),m=ta.transition(p.order()).style("opacity",1),M=Math.max(u,0)+o,x=Ui(f),b=l.selectAll(".domain").data([0]),_=(b.enter().append("path").attr("class","domain"),ta.transition(b));v.append("line"),v.append("text");var w,S,k,E,A=v.select("line"),N=m.select("line"),C=p.select("text").text(g),z=v.select("text"),q=m.select("text"),L="top"===r||"left"===r?-1:1;if("bottom"===r||"top"===r?(n=Bo,w="x",k="y",S="x2",E="y2",C.attr("dy",0>L?"0em":".71em").style("text-anchor","middle"),_.attr("d","M"+x[0]+","+L*i+"V0H"+x[1]+"V"+L*i)):(n=Wo,w="y",k="x",S="y2",E="x2",C.attr("dy",".32em").style("text-anchor",0>L?"end":"start"),_.attr("d","M"+L*i+","+x[0]+"H0V"+x[1]+"H"+L*i)),A.attr(E,L*u),z.attr(k,L*M),N.attr(S,0).attr(E,L*u),q.attr(w,0).attr(k,L*M),f.rangeBand){var T=f,R=T.rangeBand()/2;s=f=function(n){return T(n)+R}}else s.rangeBand?s=f:d.call(n,f,s);v.call(n,s,f),m.call(n,f,f)})}var t,e=ta.scale.linear(),r=jl,u=6,i=6,o=3,a=[10],c=null;return n.scale=function(t){return arguments.length?(e=t,n):e},n.orient=function(t){return arguments.length?(r=t in Fl?t+"":jl,n):r},n.ticks=function(){return arguments.length?(a=arguments,n):a},n.tickValues=function(t){return arguments.length?(c=t,n):c},n.tickFormat=function(e){return arguments.length?(t=e,n):t},n.tickSize=function(t){var e=arguments.length;return e?(u=+t,i=+arguments[e-1],n):u},n.innerTickSize=function(t){return arguments.length?(u=+t,n):u},n.outerTickSize=function(t){return arguments.length?(i=+t,n):i},n.tickPadding=function(t){return arguments.length?(o=+t,n):o},n.tickSubdivide=function(){return arguments.length&&n},n};var jl="bottom",Fl={top:1,right:1,bottom:1,left:1};ta.svg.brush=function(){function n(t){t.each(function(){var t=ta.select(this).style("pointer-events","all").style("-webkit-tap-highlight-color","rgba(0,0,0,0)").on("mousedown.brush",i).on("touchstart.brush",i),o=t.selectAll(".background").data([0]);o.enter().append("rect").attr("class","background").style("visibility","hidden").style("cursor","crosshair"),t.selectAll(".extent").data([0]).enter().append("rect").attr("class","extent").style("cursor","move");var a=t.selectAll(".resize").data(v,y);a.exit().remove(),a.enter().append("g").attr("class",function(n){return"resize "+n}).style("cursor",function(n){return Hl[n]}).append("rect").attr("x",function(n){return/[ew]$/.test(n)?-3:null}).attr("y",function(n){return/^[ns]/.test(n)?-3:null}).attr("width",6).attr("height",6).style("visibility","hidden"),a.style("display",n.empty()?"none":null);var c,f=ta.transition(t),h=ta.transition(o);l&&(c=Ui(l),h.attr("x",c[0]).attr("width",c[1]-c[0]),r(f)),s&&(c=Ui(s),h.attr("y",c[0]).attr("height",c[1]-c[0]),u(f)),e(f)})}function e(n){n.selectAll(".resize").attr("transform",function(n){return"translate("+f[+/e$/.test(n)]+","+h[+/^s/.test(n)]+")"})}function r(n){n.select(".extent").attr("x",f[0]),n.selectAll(".extent,.n>rect,.s>rect").attr("width",f[1]-f[0])}function u(n){n.select(".extent").attr("y",h[0]),n.selectAll(".extent,.e>rect,.w>rect").attr("height",h[1]-h[0])}function i(){function i(){32==ta.event.keyCode&&(C||(M=null,q[0]-=f[1],q[1]-=h[1],C=2),S())}function v(){32==ta.event.keyCode&&2==C&&(q[0]+=f[1],q[1]+=h[1],C=0,S())}function d(){var n=ta.mouse(b),t=!1;x&&(n[0]+=x[0],n[1]+=x[1]),C||(ta.event.altKey?(M||(M=[(f[0]+f[1])/2,(h[0]+h[1])/2]),q[0]=f[+(n[0]<M[0])],q[1]=h[+(n[1]<M[1])]):M=null),A&&m(n,l,0)&&(r(k),t=!0),N&&m(n,s,1)&&(u(k),t=!0),t&&(e(k),w({type:"brush",mode:C?"move":"resize"}))}function m(n,t,e){var r,u,i=Ui(t),c=i[0],l=i[1],s=q[e],v=e?h:f,d=v[1]-v[0];return C&&(c-=s,l-=d+s),r=(e?p:g)?Math.max(c,Math.min(l,n[e])):n[e],C?u=(r+=s)+d:(M&&(s=Math.max(c,Math.min(l,2*M[e]-r))),r>s?(u=r,r=s):u=s),v[0]!=r||v[1]!=u?(e?a=null:o=null,v[0]=r,v[1]=u,!0):void 0}function y(){d(),k.style("pointer-events","all").selectAll(".resize").style("display",n.empty()?"none":null),ta.select("body").style("cursor",null),L.on("mousemove.brush",null).on("mouseup.brush",null).on("touchmove.brush",null).on("touchend.brush",null).on("keydown.brush",null).on("keyup.brush",null),z(),w({type:"brushend"})}var M,x,b=this,_=ta.select(ta.event.target),w=c.of(b,arguments),k=ta.select(b),E=_.datum(),A=!/^(n|s)$/.test(E)&&l,N=!/^(e|w)$/.test(E)&&s,C=_.classed("extent"),z=W(b),q=ta.mouse(b),L=ta.select(t(b)).on("keydown.brush",i).on("keyup.brush",v);if(ta.event.changedTouches?L.on("touchmove.brush",d).on("touchend.brush",y):L.on("mousemove.brush",d).on("mouseup.brush",y),k.interrupt().selectAll("*").interrupt(),C)q[0]=f[0]-q[0],q[1]=h[0]-q[1];else if(E){var T=+/w$/.test(E),R=+/^n/.test(E);x=[f[1-T]-q[0],h[1-R]-q[1]],q[0]=f[T],q[1]=h[R]}else ta.event.altKey&&(M=q.slice());k.style("pointer-events","none").selectAll(".resize").style("display",null),ta.select("body").style("cursor",_.style("cursor")),w({type:"brushstart"}),d()}var o,a,c=E(n,"brushstart","brush","brushend"),l=null,s=null,f=[0,0],h=[0,0],g=!0,p=!0,v=Ol[0];return n.event=function(n){n.each(function(){var n=c.of(this,arguments),t={x:f,y:h,i:o,j:a},e=this.__chart__||t;this.__chart__=t,Tl?ta.select(this).transition().each("start.brush",function(){o=e.i,a=e.j,f=e.x,h=e.y,n({type:"brushstart"})}).tween("brush:brush",function(){var e=yu(f,t.x),r=yu(h,t.y);return o=a=null,function(u){f=t.x=e(u),h=t.y=r(u),n({type:"brush",mode:"resize"})}}).each("end.brush",function(){o=t.i,a=t.j,n({type:"brush",mode:"resize"}),n({type:"brushend"})}):(n({type:"brushstart"}),n({type:"brush",mode:"resize"}),n({type:"brushend"}))})},n.x=function(t){return arguments.length?(l=t,v=Ol[!l<<1|!s],n):l},n.y=function(t){return arguments.length?(s=t,v=Ol[!l<<1|!s],n):s},n.clamp=function(t){return arguments.length?(l&&s?(g=!!t[0],p=!!t[1]):l?g=!!t:s&&(p=!!t),n):l&&s?[g,p]:l?g:s?p:null},n.extent=function(t){var e,r,u,i,c;return arguments.length?(l&&(e=t[0],r=t[1],s&&(e=e[0],r=r[0]),o=[e,r],l.invert&&(e=l(e),r=l(r)),e>r&&(c=e,e=r,r=c),(e!=f[0]||r!=f[1])&&(f=[e,r])),s&&(u=t[0],i=t[1],l&&(u=u[1],i=i[1]),a=[u,i],s.invert&&(u=s(u),i=s(i)),u>i&&(c=u,u=i,i=c),(u!=h[0]||i!=h[1])&&(h=[u,i])),n):(l&&(o?(e=o[0],r=o[1]):(e=f[0],r=f[1],l.invert&&(e=l.invert(e),r=l.invert(r)),e>r&&(c=e,e=r,r=c))),s&&(a?(u=a[0],i=a[1]):(u=h[0],i=h[1],s.invert&&(u=s.invert(u),i=s.invert(i)),u>i&&(c=u,u=i,i=c))),l&&s?[[e,u],[r,i]]:l?[e,r]:s&&[u,i])},n.clear=function(){return n.empty()||(f=[0,0],h=[0,0],o=a=null),n},n.empty=function(){return!!l&&f[0]==f[1]||!!s&&h[0]==h[1]},ta.rebind(n,c,"on")};var Hl={n:"ns-resize",e:"ew-resize",s:"ns-resize",w:"ew-resize",nw:"nwse-resize",ne:"nesw-resize",se:"nwse-resize",sw:"nesw-resize"},Ol=[["n","e","s","w","nw","ne","se","sw"],["e","w"],["n","s"],[]],Il=ac.format=gc.timeFormat,Yl=Il.utc,Zl=Yl("%Y-%m-%dT%H:%M:%S.%LZ");Il.iso=Date.prototype.toISOString&&+new Date("2000-01-01T00:00:00.000Z")?Jo:Zl,Jo.parse=function(n){var t=new Date(n);return isNaN(t)?null:t},Jo.toString=Zl.toString,ac.second=Ft(function(n){return new cc(1e3*Math.floor(n/1e3))},function(n,t){n.setTime(n.getTime()+1e3*Math.floor(t))},function(n){return n.getSeconds()}),ac.seconds=ac.second.range,ac.seconds.utc=ac.second.utc.range,ac.minute=Ft(function(n){return new cc(6e4*Math.floor(n/6e4))},function(n,t){n.setTime(n.getTime()+6e4*Math.floor(t))},function(n){return n.getMinutes()}),ac.minutes=ac.minute.range,ac.minutes.utc=ac.minute.utc.range,ac.hour=Ft(function(n){var t=n.getTimezoneOffset()/60;return new cc(36e5*(Math.floor(n/36e5-t)+t))},function(n,t){n.setTime(n.getTime()+36e5*Math.floor(t))},function(n){return n.getHours()}),ac.hours=ac.hour.range,ac.hours.utc=ac.hour.utc.range,ac.month=Ft(function(n){return n=ac.day(n),n.setDate(1),n},function(n,t){n.setMonth(n.getMonth()+t)},function(n){return n.getMonth()}),ac.months=ac.month.range,ac.months.utc=ac.month.utc.range;var Vl=[1e3,5e3,15e3,3e4,6e4,3e5,9e5,18e5,36e5,108e5,216e5,432e5,864e5,1728e5,6048e5,2592e6,7776e6,31536e6],Xl=[[ac.second,1],[ac.second,5],[ac.second,15],[ac.second,30],[ac.minute,1],[ac.minute,5],[ac.minute,15],[ac.minute,30],[ac.hour,1],[ac.hour,3],[ac.hour,6],[ac.hour,12],[ac.day,1],[ac.day,2],[ac.week,1],[ac.month,1],[ac.month,3],[ac.year,1]],$l=Il.multi([[".%L",function(n){return n.getMilliseconds()}],[":%S",function(n){return n.getSeconds()}],["%I:%M",function(n){return n.getMinutes()}],["%I %p",function(n){return n.getHours()}],["%a %d",function(n){return n.getDay()&&1!=n.getDate()}],["%b %d",function(n){return 1!=n.getDate()}],["%B",function(n){return n.getMonth()}],["%Y",Ne]]),Bl={range:function(n,t,e){return ta.range(Math.ceil(n/e)*e,+t,e).map(Ko)},floor:y,ceil:y};Xl.year=ac.year,ac.scale=function(){return Go(ta.scale.linear(),Xl,$l)};var Wl=Xl.map(function(n){return[n[0].utc,n[1]]}),Jl=Yl.multi([[".%L",function(n){return n.getUTCMilliseconds()}],[":%S",function(n){return n.getUTCSeconds()}],["%I:%M",function(n){return n.getUTCMinutes()}],["%I %p",function(n){return n.getUTCHours()}],["%a %d",function(n){return n.getUTCDay()&&1!=n.getUTCDate()}],["%b %d",function(n){return 1!=n.getUTCDate()}],["%B",function(n){return n.getUTCMonth()}],["%Y",Ne]]);Wl.year=ac.year.utc,ac.scale.utc=function(){return Go(ta.scale.linear(),Wl,Jl)},ta.text=At(function(n){return n.responseText}),ta.json=function(n,t){return Nt(n,"application/json",Qo,t)},ta.html=function(n,t){return Nt(n,"text/html",na,t)},ta.xml=At(function(n){return n.responseXML}),"function"==typeof define&&define.amd?define(ta):"object"==typeof module&&module.exports&&(module.exports=ta),this.d3=ta}(); \ No newline at end of file diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js index 242074c43328498a4e3a861331c370512204728b..29eede7475711bee241717deffcf679b5f9062d5 100644 --- a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js +++ b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/ext/js/sortable.min.js @@ -1,2 +1,17 @@ +/* + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ /*! sortable.js 0.5.0 */ (function(){var a,b,c,d,e,f;a="table[data-sortable]",c=/^-?[£$¤]?[\d,.]+%?$/,f=/^\s+|\s+$/g,e="ontouchstart"in document.documentElement,b=e?"touchstart":"click",d={init:function(){var b,c,e,f,g;for(c=document.querySelectorAll(a),g=[],e=0,f=c.length;f>e;e++)b=c[e],g.push(d.initTable(b));return g},initTable:function(a){var b,c,e,f,g;if(1===a.tHead.rows.length&&"true"!==a.getAttribute("data-sortable-initialized")){for(a.setAttribute("data-sortable-initialized","true"),e=a.querySelectorAll("th"),b=f=0,g=e.length;g>f;b=++f)c=e[b],"false"!==c.getAttribute("data-sortable")&&d.setupClickableTH(a,c,b);return a}},setupClickableTH:function(a,c,e){var f;return f=d.getColumnType(a,e),c.addEventListener(b,function(){var b,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u;for(j="true"===this.getAttribute("data-sorted"),k=this.getAttribute("data-sorted-direction"),b=j?"ascending"===k?"descending":"ascending":f.defaultSortDirection,m=this.parentNode.querySelectorAll("th"),n=0,q=m.length;q>n;n++)c=m[n],c.setAttribute("data-sorted","false"),c.removeAttribute("data-sorted-direction");for(this.setAttribute("data-sorted","true"),this.setAttribute("data-sorted-direction",b),l=a.tBodies[0],h=[],t=l.rows,o=0,r=t.length;r>o;o++)g=t[o],h.push([d.getNodeValue(g.cells[e]),g]);for(j?h.reverse():h.sort(f.compare),u=[],p=0,s=h.length;s>p;p++)i=h[p],u.push(l.appendChild(i[1]));return u})},getColumnType:function(a,b){var e,f,g,h,i;for(i=a.tBodies[0].rows,g=0,h=i.length;h>g;g++)if(e=i[g],f=d.getNodeValue(e.cells[b]),""!==f&&f.match(c))return d.types.numeric;return d.types.alpha},getNodeValue:function(a){return a?null!==a.getAttribute("data-value")?a.getAttribute("data-value"):"undefined"!=typeof a.innerText?a.innerText.replace(f,""):a.textContent.replace(f,""):""},types:{numeric:{defaultSortDirection:"descending",compare:function(a,b){var c,d;return c=parseFloat(a[0].replace(/[^0-9.-]/g,"")),d=parseFloat(b[0].replace(/[^0-9.-]/g,"")),isNaN(c)&&(c=0),isNaN(d)&&(d=0),d-c}},alpha:{defaultSortDirection:"ascending",compare:function(a,b){var c,d;return c=a[0].toLowerCase(),d=b[0].toLowerCase(),c===d?0:d>c?-1:1}}}},setTimeout(d.init,0),window.Sortable=d}).call(this); \ No newline at end of file diff --git a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp index b706ac45804afd61411115fbd47ca93bfbaa1fb9..7102c5ef76a3e3e5694b188164b72a031a0d47f3 100644 --- a/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp +++ b/public/biopet-core/src/main/resources/nl/lumc/sasc/biopet/core/report/main.ssp @@ -67,6 +67,7 @@ <!-- sortable tables //--> <script src="${rootPath}ext/js/sortable.min.js"></script> + <script src="${rootPath}ext/js/d3.v3.5.5.min.js"></script> <script language="JavaScript"> <!-- $(document).ready(function() { diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala index 7d38c60b2966c62e76e42c77922828ada9cbb897..8b4f1f801681f8ead4a58e2e02264acf8680c57d 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.core import java.io.File diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala index 8b5e9fbaefe84175828c9a3f844c2120bb9ba259..f7be2d57f1c3cb6a724a9c89acb51592f8045432 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetPipe.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.core import java.io.File diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 1109f30da9fcb53cd622935b8451d6ab551d0205..c95299b0d4c47ef189a22fce2fcd1dfaff9183f8 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -17,19 +17,18 @@ package nl.lumc.sasc.biopet.core import java.io.File +import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension import nl.lumc.sasc.biopet.utils.Logging -import org.broadinstitute.gatk.queue.QSettings +import org.broadinstitute.gatk.queue.{ QScript, QSettings } import org.broadinstitute.gatk.queue.function.QFunction import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } import org.broadinstitute.gatk.utils.commandline.Argument -import scala.collection.mutable.ListBuffer - /** Base for biopet pipeline */ -trait BiopetQScript extends Configurable with GatkLogging { +trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false) val configfiles: List[File] = Nil @@ -78,6 +77,13 @@ trait BiopetQScript extends Configurable with GatkLogging { case f: ScatterGatherableFunction => f.scatterCount = 1 case _ => } + + this match { + case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) => + logger.info("Write report is skipped because sample flag is used") + case _ => reportClass.foreach(add(_)) + } + for (function <- functions) function match { case f: BiopetCommandLineFunction => f.preProcessExecutable() @@ -93,14 +99,18 @@ trait BiopetQScript extends Configurable with GatkLogging { inputFiles.foreach { i => if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}") - else if (!i.file.canRead()) Logging.addError(s"Input file can not be read: ${i.file}") + else if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}") } - this match { - case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) => - logger.info("Write report is skipped because sample flag is used") - case _ => reportClass.foreach(add(_)) - } + functions.filter(_.jobOutputFile == null).foreach(f => { + try { + f.jobOutputFile = new File(f.firstOutput.getAbsoluteFile.getParent, "." + f.firstOutput.getName + "." + configName + ".out") + } catch { + case e: NullPointerException => logger.warn(s"Can't generate a jobOutputFile for $f") + } + }) + + if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json")) Logging.checkErrors() } @@ -116,8 +126,26 @@ trait BiopetQScript extends Configurable with GatkLogging { function.isIntermediate = isIntermediate add(function) } + + def add(subPipeline: QScript): Unit = { + subPipeline.qSettings = this.qSettings + subPipeline match { + case that: SummaryQScript => + that.init() + that.biopetScript() + this match { + case s: SummaryQScript => s.addSummaryQScript(that) + case _ => + } + case that: BiopetQScript => + that.init() + that.biopetScript() + case _ => subPipeline.script + } + addAll(subPipeline.functions) + } } object BiopetQScript { - protected case class InputFile(file: File, md5: Option[String] = None) + case class InputFile(file: File, md5: Option[String] = None) } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala index 1d9d6396235164d611f10291657cff7ffe72be60..8ef7eb746cd744021656c56ae28f3226c800a0a7 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala @@ -1,27 +1,39 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.core -import java.io.File - import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.function.CommandLineFunction /** - * Created by pjvanthof on 01/10/15. + * This trait will control resources given to a CommandlineFunction */ trait CommandLineResources extends CommandLineFunction with Configurable { def defaultThreads = 1 final def threads = nCoresRequest match { case Some(i) => i - case _ => { + case _ => val t = getThreads nCoresRequest = Some(t) t - } } var vmem: Option[String] = config("vmem") - def defaultCoreMemory: Double = 1.0 + def defaultCoreMemory: Double = 2.0 def defaultVmemFactor: Double = 1.4 var vmemFactor: Double = config("vmem_factor", default = defaultVmemFactor) @@ -46,10 +58,12 @@ trait CommandLineResources extends CommandLineFunction with Configurable { * @return number of threads */ private def getThreads(default: Int): Int = { - val maxThreads: Int = config("maxthreads", default = 24) + val maxThreads: Option[Int] = config("maxthreads") val threads: Int = config("threads", default = default) - if (maxThreads > threads) threads - else maxThreads + maxThreads match { + case Some(max) => if (max > threads) threads else max + case _ => threads + } } def setResources(): Unit = { @@ -59,30 +73,30 @@ trait CommandLineResources extends CommandLineFunction with Configurable { case e: NullPointerException => null } - if (jobOutputFile == null && firstOutput != null) - jobOutputFile = new File(firstOutput.getAbsoluteFile.getParent, "." + firstOutput.getName + "." + configName + ".out") - nCoresRequest = Option(threads) + /** The 1e retry does not yet upgrade the memory */ + val retryMultipler = if (retry > 1) retry - 1 else 0 + _coreMemory = config("core_memory", default = defaultCoreMemory).asDouble + - (0.5 * retry) + (0.5 * retryMultipler) if (config.contains("memory_limit")) memoryLimit = config("memory_limit") else memoryLimit = Some(_coreMemory * threads) if (config.contains("resident_limit")) residentLimit = config("resident_limit") - else residentLimit = Some((_coreMemory + (0.5 * retry)) * residentFactor) + else residentLimit = Some((_coreMemory + (0.5 * retryMultipler)) * residentFactor) - if (!config.contains("vmem")) vmem = Some((_coreMemory * (vmemFactor + (0.5 * retry))) + "G") + if (!config.contains("vmem")) vmem = Some((_coreMemory * (vmemFactor + (0.5 * retryMultipler))) + "G") jobName = configName + ":" + (if (firstOutput != null) firstOutput.getName else jobOutputFile) } override def setupRetry(): Unit = { super.setupRetry() if (vmem.isDefined) jobResourceRequests = jobResourceRequests.filterNot(_.contains("h_vmem=")) - logger.info("Auto raise memory on retry") + if (retry > 0) logger.info("Auto raise memory on retry") retry += 1 - this.freeze() + this.freezeFieldValues() } var threadsCorrection = 0 diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index f60aeeca83c20ad810ad2183a8938137200a0ee3..fa4f40645b49efee9ba5fb9c42a05c1c10006b0d 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -19,11 +19,11 @@ import java.io.File import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript } import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils } +import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.utils.commandline.Argument /** This trait creates a structured way of use multisample pipelines */ -trait MultiSampleQScript extends SummaryQScript { - qscript => +trait MultiSampleQScript extends SummaryQScript { qscript: QScript => @Argument(doc = "Only Sample", shortName = "s", required = false, fullName = "sample") private[core] val onlySamples: List[String] = Nil @@ -35,6 +35,9 @@ trait MultiSampleQScript extends SummaryQScript { /** Overrules config of qscript with default sample */ val config = new ConfigFunctions(defaultSample = sampleId) + /** Sample specific settings */ + def summarySettings: Map[String, Any] = Map() + /** Library class with basic functions build in */ abstract class AbstractLibrary(val libId: String) extends Summarizable { /** Overrules config of qscript with default sample and default library */ @@ -45,6 +48,9 @@ trait MultiSampleQScript extends SummaryQScript { qscript.addSummarizable(summarizable, name, Some(sampleId), Some(libId)) } + /** Library specific settings */ + def summarySettings: Map[String, Any] = Map() + /** Adds the library jobs */ final def addAndTrackJobs(): Unit = { if (nameRegex.findFirstIn(libId) == None) diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala index dcede52573eb901026c7e62deb9d071d486df9fb..a33223019ce1eb7c3fce24ed3891f0ccd6183b5c 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala @@ -18,19 +18,24 @@ package nl.lumc.sasc.biopet.core import java.io.{ File, PrintWriter } import nl.lumc.sasc.biopet.utils.config.Config +import nl.lumc.sasc.biopet.utils.ConfigUtils.ImplicitConversions import nl.lumc.sasc.biopet.core.workaround.BiopetQCommandLine import nl.lumc.sasc.biopet.utils.{ MainCommand, Logging } import org.apache.log4j.{ PatternLayout, WriterAppender } import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } /** Wrapper around executable from Queue */ -trait PipelineCommand extends MainCommand with GatkLogging { +trait PipelineCommand extends MainCommand with GatkLogging with ImplicitConversions { /** * Gets location of compiled class of pipeline * @return path from classPath to class file */ - def pipeline = "/" + getClass.getName.stripSuffix("$").replaceAll("\\.", "/") + ".class" + def pipeline = "/" + getClass.getName.takeWhile(_ != '$').replaceAll("\\.", "/") + ".class" + + def pipelineName = getClass.getName.takeWhile(_ != '$').split("\\.").last.toLowerCase + + protected val globalConfig = Config.global /** Class can be used directly from java with -cp option */ def main(args: Array[String]): Unit = { @@ -38,7 +43,7 @@ trait PipelineCommand extends MainCommand with GatkLogging { for (t <- 0 until argsSize) { if (args(t) == "-config" || args(t) == "--config_file") { if (args.length <= (t + 1)) throw new IllegalStateException("-config needs a value: <file>") - Config.global.loadConfigFile(new File(args(t + 1))) + globalConfig.loadConfigFile(new File(args(t + 1))) } if (args(t) == "-cv" || args(t) == "--config_value") { @@ -49,7 +54,7 @@ trait PipelineCommand extends MainCommand with GatkLogging { val p = v(0).split(":") val key = p.last val path = p.dropRight(1).toList - Config.global.addValue(key, value, path) + globalConfig.addValue(key, value, path) } if (args(t) == "--logging_level" || args(t) == "-l") { @@ -71,8 +76,8 @@ trait PipelineCommand extends MainCommand with GatkLogging { val logFile = { val pipelineName = this.getClass.getSimpleName.toLowerCase.split("""\$""").head - val pipelineConfig = Config.global.map.getOrElse(pipelineName, Map()).asInstanceOf[Map[String, Any]] - val pipelineOutputDir = new File(Config.global.map.getOrElse("output_dir", pipelineConfig.getOrElse("output_dir", "./")).toString) + val pipelineConfig = globalConfig.map.getOrElse(pipelineName, Map()).asInstanceOf[Map[String, Any]] + val pipelineOutputDir = new File(globalConfig.map.getOrElse("output_dir", pipelineConfig.getOrElse("output_dir", "./")).toString) val logDir: File = new File(pipelineOutputDir, ".log") logDir.mkdirs() new File(logDir, "biopet." + BiopetQCommandLine.timestamp + ".log") @@ -87,6 +92,11 @@ trait PipelineCommand extends MainCommand with GatkLogging { if (!args.contains("--log_to_file") && !args.contains("-log")) { argv ++= List("--log_to_file", new File(logFile.getParentFile, "queue." + BiopetQCommandLine.timestamp + ".log").getAbsolutePath) } + if (!args.contains("-retry") && !args.contains("--retry_failed")) { + val retry: Int = globalConfig(pipelineName, Nil, "retry", default = 5) + logger.info("No retry flag found, ") + argv ++= List("-retry", retry.toString) + } BiopetQCommandLine.main(argv) } } \ No newline at end of file diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala index be479a8fa972fe896fb8d45cd74708944ba4ba64..152230c8a41c18e28b69e3c5fb9d10febe8eec68 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala @@ -118,22 +118,24 @@ object Reference { /** * Raise an exception when given fasta file has no fai file * @param fastaFile Fasta file - * @throws IllegalArgumentException */ def requireFai(fastaFile: File): Unit = { val fai = new File(fastaFile.getAbsolutePath + ".fai") - require(fai.exists(), "Reference is missing a fai file") - require(IndexedFastaSequenceFile.canCreateIndexedFastaReader(fastaFile), - "Index of reference cannot be loaded, reference: " + fastaFile) + if (fai.exists()) { + if (!IndexedFastaSequenceFile.canCreateIndexedFastaReader(fastaFile)) + Logging.addError(s"Index of reference cannot be loaded, reference: $fastaFile") + } else Logging.addError("Reference is missing a fai file") } /** * Raise an exception when given fasta file has no dict file * @param fastaFile Fasta file - * @throws IllegalArgumentException */ def requireDict(fastaFile: File): Unit = { - val dict = new File(fastaFile.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta") + ".dict") - require(dict.exists(), "Reference is missing a dict file") + val dict = new File(fastaFile.getAbsolutePath + .stripSuffix(".fna") + .stripSuffix(".fa") + .stripSuffix(".fasta") + ".dict") + if (!dict.exists()) Logging.addError("Reference is missing a dict file") } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala index a3317faf604a9ae80c02ad0c3d9751fbc65849b9..e387200afcfce1687c2087753ddcb1ec16985774 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala @@ -24,6 +24,9 @@ import org.broadinstitute.gatk.utils.commandline.Argument * @author Peter van 't Hof */ trait SampleLibraryTag extends Configurable { + + //FIXME: not possible to have required sample / lib + @Argument(doc = "Sample ID", shortName = "sample", required = false) var sampleId: Option[String] = root match { case tag: SampleLibraryTag => tag.sampleId diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala index c29332656ca730eb91c84514f4d1e64c939b02a7..6f67277a58b0710c4ba8bde9d387989fa6a2327e 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommandFunction.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.core import nl.lumc.sasc.biopet.FullVersion @@ -13,13 +28,5 @@ trait ToolCommandFunction extends BiopetJavaCommandLineFunction with Version { override def getVersion = Some("Biopet " + FullVersion) - override def beforeGraph(): Unit = { - javaMainClass = toolObject.getClass.getName.takeWhile(_ != '$') - super.beforeGraph() - } - - override def freezeFieldValues(): Unit = { - javaMainClass = toolObject.getClass.getName.takeWhile(_ != '$') - super.freezeFieldValues() - } + javaMainClass = toolObject.getClass.getName.takeWhile(_ != '$') } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala index 91d813acb3a2c633d65a7c6d8f403c05d2ea2332..45c41b1f39cd2bc68811e77e533f472b9fb24056 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.core import java.io.File diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala new file mode 100644 index 0000000000000000000000000000000000000000..232954b7d8be7bf99cd4b354fb72d570ed1ebddc --- /dev/null +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala @@ -0,0 +1,136 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.core + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils } +import org.broadinstitute.gatk.queue.function.{ CommandLineFunction, QFunction } +import scala.collection.mutable +import scala.collection.mutable.ListBuffer + +/** + * This object will generate with [[WriteDependencies.writeDependencies]] a json file where information about job and file dependencies are stored + * + * @author Peter van 't Hof <p.j.van_t_hof@lumc.nl> + */ +object WriteDependencies extends Logging with Configurable { + val root: Configurable = null + private val functionNames: mutable.Map[QFunction, String] = mutable.Map() + + private def createFunctionNames(functions: Seq[QFunction]): Unit = { + val cache: mutable.Map[String, Int] = mutable.Map() + for (function <- functions) { + val baseName = function match { + case f: Configurable => f.configName + case f => f.getClass.getSimpleName + } + cache += baseName -> (cache.getOrElse(baseName, 0) + 1) + functionNames += function -> s"$baseName-${cache(baseName)}" + } + } + + /** + * This method will generate a json file where information about job and file dependencies are stored + * + * @param functions This should be all functions that are given to the graph of Queue + * @param outputFile Json file to write dependencies to + */ + def writeDependencies(functions: Seq[QFunction], outputFile: File): Unit = { + logger.info("Start calculating dependencies") + + val errorOnMissingInput: Boolean = config("error_on_missing_input", false) + + createFunctionNames(functions) + + case class QueueFile(file: File) { + private val inputJobs: ListBuffer[QFunction] = ListBuffer() + def addInputJob(function: QFunction) = inputJobs += function + def inputJobNames = inputJobs.toList.map(functionNames) + + private val outputJobs: ListBuffer[QFunction] = ListBuffer() + def addOutputJob(function: QFunction) = { + if (outputJobs.nonEmpty) logger.warn(s"File '$file' is found as output of multiple jobs") + outputJobs += function + } + def outputJobNames = outputJobs.toList.map(functionNames) + + def getMap = { + val fileExist = file.exists() + if (!fileExist && outputJobs.isEmpty) { + if (errorOnMissingInput) Logging.addError(s"Input file does not exist: $file") + else logger.warn(s"Input file does not exist: $file") + } + Map( + "path" -> file.getAbsolutePath, + "intermediate" -> isIntermediate, + "output_jobs" -> outputJobNames, + "input_jobs" -> inputJobNames, + "exist_at_start" -> fileExist, + "pipeline_input" -> outputJobs.isEmpty + ) + } + + def isIntermediate = outputJobs.exists(_.isIntermediate) + } + + val files: mutable.Map[File, QueueFile] = mutable.Map() + + def outputFiles(function: QFunction) = { + if (function.jobErrorFile == null) function.outputs :+ function.jobOutputFile + else function.outputs :+ function.jobOutputFile :+ function.jobErrorFile + } + + for (function <- functions) { + for (input <- function.inputs) { + val file = files.getOrElse(input, QueueFile(input)) + file.addInputJob(function) + files += input -> file + } + for (output <- outputFiles(function)) { + val file = files.getOrElse(output, QueueFile(output)) + file.addOutputJob(function) + files += output -> file + } + } + + val jobs = functionNames.par.map { + case (f, name) => + name -> Map("command" -> (f match { + case cmd: CommandLineFunction => cmd.commandLine + case _ => None + }), "intermediate" -> f.isIntermediate, + "depens_on_intermediate" -> f.inputs.exists(files(_).isIntermediate), + "depens_on_jobs" -> f.inputs.toList.flatMap(files(_).outputJobNames).distinct, + "ouput_used_by_jobs" -> outputFiles(f).toList.flatMap(files(_).inputJobNames).distinct, + "outputs" -> outputFiles(f).toList, + "inputs" -> f.inputs.toList, + "done_at_start" -> f.isDone, + "fail_at_start" -> f.isFail) + }.toIterator.toMap + + logger.info(s"Writing dependencies to: $outputFile") + val writer = new PrintWriter(outputFile) + writer.println(ConfigUtils.mapToJson(Map( + "jobs" -> jobs.toMap, + "files" -> files.values.par.map(_.getMap).toList + )).spaces2) + writer.close() + + logger.info("done calculating dependencies") + } +} diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala index 0ae2587f7928bb8d8cfe3e157f79fec7afff031a..1ea79a8c1421805cdc8b4abd23afcd265c6c2179 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.core.extensions import java.io.File @@ -21,11 +36,6 @@ class CheckChecksum extends InProcessFunction { @Argument(required = true) var checksum: String = _ - override def freezeFieldValues(): Unit = { - super.freezeFieldValues() - jobOutputFile = new File(checksumFile.getParentFile, checksumFile.getName + ".check.out") - } - /** Exits whenever the input md5sum is not the same as the output md5sum */ def run: Unit = { val outputChecksum = WriteSummary.parseChecksum(checksumFile).toLowerCase diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala index a2e43d404de302bdb21dbcf9dffeee96c0857de6..2ec92145abe956ec94a11342e49b57752ed7f5fc 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala @@ -16,11 +16,13 @@ package nl.lumc.sasc.biopet.core.report import java.io._ + import nl.lumc.sasc.biopet.core.ToolCommandFunction import nl.lumc.sasc.biopet.utils.summary.Summary -import nl.lumc.sasc.biopet.utils.{ ToolCommand, Logging, IoUtils } +import nl.lumc.sasc.biopet.utils.{ IoUtils, Logging, ToolCommand } import org.broadinstitute.gatk.utils.commandline.Input import org.fusesource.scalate.{ TemplateEngine, TemplateSource } + import scala.collection.mutable /** @@ -31,7 +33,7 @@ import scala.collection.mutable trait ReportBuilderExtension extends ToolCommandFunction { /** Report builder object */ - val builder: ReportBuilder + def builder: ReportBuilder def toolObject = builder @@ -95,6 +97,22 @@ trait ReportBuilder extends ToolCommand { private var _libId: Option[String] = None protected def libId = _libId + case class ExtFile(resourcePath: String, targetPath: String) + + def extFiles = List( + "css/bootstrap_dashboard.css", + "css/bootstrap.min.css", + "css/bootstrap-theme.min.css", + "css/sortable-theme-bootstrap.css", + "js/jquery.min.js", + "js/sortable.min.js", + "js/bootstrap.min.js", + "js/d3.v3.5.5.min.js", + "fonts/glyphicons-halflings-regular.woff", + "fonts/glyphicons-halflings-regular.ttf", + "fonts/glyphicons-halflings-regular.woff2" + ).map(x => ExtFile("/nl/lumc/sasc/biopet/core/report/ext/" + x, x)) + /** Main function to for building the report */ def main(args: Array[String]): Unit = { logger.info("Start") @@ -123,22 +141,9 @@ trait ReportBuilder extends ToolCommand { // Static files that will be copied to the output folder, then file is added to [resourceDir] it's need to be added here also val extOutputDir: File = new File(cmdArgs.outputDir, "ext") - val resourceDir: String = "/nl/lumc/sasc/biopet/core/report/ext/" - val extFiles = List( - "css/bootstrap_dashboard.css", - "css/bootstrap.min.css", - "css/bootstrap-theme.min.css", - "css/sortable-theme-bootstrap.css", - "js/jquery.min.js", - "js/sortable.min.js", - "js/bootstrap.min.js", - "fonts/glyphicons-halflings-regular.woff", - "fonts/glyphicons-halflings-regular.ttf", - "fonts/glyphicons-halflings-regular.woff2" - ) for (resource <- extFiles.par) { - IoUtils.copyStreamToFile(getClass.getResourceAsStream(resourceDir + resource), new File(extOutputDir, resource), createDirs = true) + IoUtils.copyStreamToFile(getClass.getResourceAsStream(resource.resourcePath), new File(extOutputDir, resource.targetPath), createDirs = true) } logger.info("Parsing summary") diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala index ab2f64546a7f79e8432ec1ac7ff0ab19427cccfc..0e947ba6c7d3bbf144c98feb1b7cb21601c6629f 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.extensions.{ CheckChecksum, Md5sum } +import org.broadinstitute.gatk.queue.QScript import scala.collection.mutable @@ -27,7 +28,7 @@ import scala.collection.mutable * * Created by pjvan_thof on 2/14/15. */ -trait SummaryQScript extends BiopetQScript { qscript => +trait SummaryQScript extends BiopetQScript { qscript: QScript => /** Key is sample/library, None is sample or library is not applicable */ private[summary] var summarizables: Map[(String, Option[String], Option[String]), List[Summarizable]] = Map() @@ -91,45 +92,37 @@ trait SummaryQScript extends BiopetQScript { qscript => summaryQScripts :+= summaryQScript } + private var addedJobs = false + /** Add jobs to qscript to execute summary, also add checksum jobs */ def addSummaryJobs(): Unit = { + if (addedJobs) throw new IllegalStateException("Summary jobs for this QScript are already executed") val writeSummary = new WriteSummary(this) def addChecksum(file: File): Unit = { - if (writeSummary.md5sum && !SummaryQScript.md5sumCache.contains(file)) { - val md5sum = new Md5sum(this) { - override def configName = "md5sum" - override def cmdLine: String = super.cmdLine + " || " + - required("echo") + required("error_on_capture " + input.toString) + " > " + required(output) - } - md5sum.input = file - md5sum.output = new File(file.getParentFile, file.getName + ".md5") - - // Need to not write a md5 file outside the outputDir - if (!file.getAbsolutePath.startsWith(outputDir.getAbsolutePath)) - md5sum.output = new File(outputDir, ".md5" + file.getAbsolutePath + ".md5") - - writeSummary.deps :+= md5sum.output - SummaryQScript.md5sumCache += file -> md5sum.output - add(md5sum) + if (writeSummary.md5sum) { + if (!SummaryQScript.md5sumCache.contains(file)) { + val md5sum = new Md5sum(this) { + override def configName = "md5sum" + + override def cmdLine: String = super.cmdLine + " || " + + required("echo") + required("error_on_capture " + input.toString) + " > " + required(output) + } + md5sum.input = file + md5sum.output = new File(file.getParentFile, file.getName + ".md5") + + // Need to not write a md5 file outside the outputDir + if (!file.getAbsolutePath.startsWith(outputDir.getAbsolutePath)) + md5sum.output = new File(outputDir, ".md5" + file.getAbsolutePath + ".md5") + + writeSummary.deps :+= md5sum.output + SummaryQScript.md5sumCache += file -> md5sum.output + add(md5sum) + } else writeSummary.deps :+= SummaryQScript.md5sumCache(file) } //TODO: add more checksums types } - for (inputFile <- inputFiles) { - inputFile.md5 match { - case Some(checksum) => { - val checkMd5 = new CheckChecksum - checkMd5.inputFile = inputFile.file - require(SummaryQScript.md5sumCache.contains(inputFile.file), "Md5 job is not executed, checksum file can't be found") - checkMd5.checksumFile = SummaryQScript.md5sumCache(inputFile.file) - checkMd5.checksum = checksum - add(checkMd5) - } - case _ => - } - } - for ((_, summarizableList) <- summarizables; summarizable <- summarizableList) { summarizable match { case f: BiopetCommandLineFunction => f.beforeGraph() @@ -146,6 +139,22 @@ trait SummaryQScript extends BiopetQScript { qscript => } } + for (inputFile <- inputFiles) { + inputFile.md5 match { + case Some(checksum) => { + val checkMd5 = new CheckChecksum + checkMd5.inputFile = inputFile.file + require(SummaryQScript.md5sumCache.contains(inputFile.file), + s"Md5 job is not executed, checksum file can't be found for: ${inputFile.file}") + checkMd5.checksumFile = SummaryQScript.md5sumCache(inputFile.file) + checkMd5.checksum = checksum + checkMd5.jobOutputFile = new File(checkMd5.checksumFile.getParentFile, checkMd5.checksumFile.getName + ".check.out") + add(checkMd5) + } + case _ => + } + } + for ((_, file) <- this.summaryFiles) addChecksum(file) @@ -154,6 +163,8 @@ trait SummaryQScript extends BiopetQScript { qscript => logger.info("Write summary is skipped because sample flag is used") case _ => add(writeSummary) } + + addedJobs = true } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 7b8f34ecbb8108d53c342df2f4112df973f702c3..02c860fdb1719c8c4635d467bea752b74745f9b2 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -18,9 +18,9 @@ package nl.lumc.sasc.biopet.core.summary import java.io.{ File, PrintWriter } import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, BiopetJavaCommandLineFunction, SampleLibraryTag } +import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.utils.ConfigUtils -import nl.lumc.sasc.biopet.{ LastCommitHash, Version } +import nl.lumc.sasc.biopet.LastCommitHash import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -90,7 +90,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config } ( - qscript.functions.flatMap(fetchVersion(_)) ++ + qscript.functions.flatMap(fetchVersion) ++ qscript.functions .flatMap { case f: BiopetCommandLineFunction => f.pipesJobs @@ -99,13 +99,29 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config ).toMap } - val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++ - (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map("pipeline" -> files))) ++ - (if (executables.isEmpty) Map[String, Any]() else Map("executables" -> executables.toMap)))) + val map = Map(qscript.summaryName -> Map( + "settings" -> settings, + "files" -> Map("pipeline" -> files), + "executables" -> executables.toMap) + ) qscript match { case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId) - case _ => map + case q: MultiSampleQScript => + ConfigUtils.mergeMaps( + Map("samples" -> q.samples.map { + case (sampleName, sample) => + sampleName -> Map( + qscript.summaryName -> Map("settings" -> sample.summarySettings), + "libraries" -> sample.libraries.map { + case (libName, lib) => + libName -> Map( + qscript.summaryName -> Map("settings" -> lib.summarySettings) + ) + } + ) + }), map) + case _ => map } } diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala index bac6932ddecd94dcbeb6ff6648edd5461ddced13..52e77b6f415488915fdd90769a8162cba7fbc588 100644 --- a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/BiopetPipeTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.core import org.scalatest.Matchers diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/CommandLineResourcesTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/CommandLineResourcesTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..6eee8849a5d6972abcca0d6efa2f924ba95d5e48 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/CommandLineResourcesTest.scala @@ -0,0 +1,90 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.core + +import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable } +import org.broadinstitute.gatk.queue.function.CommandLineFunction +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test +import scala.language.reflectiveCalls + +/** + * Created by pjvanthof on 17/11/15. + */ +class CommandLineResourcesTest extends TestNGSuite with Matchers { + class CommandLineFunctionMock(c: Map[String, Any] = Map()) extends CommandLineFunction with Configurable { + override def freezeFieldValues() {} + def commandLine = "command" + val root = null + override def globalConfig = new Config(c) + } + + @Test + def testDefaults(): Unit = { + val cmd = new CommandLineFunctionMock with CommandLineResources + cmd.coreMemory shouldBe 2.0 + cmd.residentFactor shouldBe 1.2 + cmd.vmemFactor shouldBe 1.4 + cmd.retry shouldBe 0 + cmd.threads shouldBe 1 + + cmd.setResources() + + cmd.memoryLimit shouldBe Some(cmd.coreMemory * cmd.threads) + cmd.residentLimit shouldBe Some(cmd.coreMemory * cmd.residentFactor) + cmd.vmem shouldBe Some((cmd.coreMemory * cmd.vmemFactor) + "G") + + cmd.jobResourceRequests shouldBe empty + + cmd.freezeFieldValues() + + cmd.jobResourceRequests should contain("h_vmem=" + cmd.vmem.get) + + cmd.setupRetry() + cmd.retry shouldBe 1 + cmd.setupRetry() + cmd.retry shouldBe 2 + cmd.setupRetry() + cmd.retry shouldBe 3 + + } + + @Test + def testMaxThreads(): Unit = { + val cmd = new CommandLineFunctionMock(Map("maxthreads" -> 5, "threads" -> 10)) with CommandLineResources + + cmd.threads shouldBe 5 + } + + @Test + def testCombine(): Unit = { + val cmd1 = new CommandLineFunctionMock with CommandLineResources + val cmd2 = new CommandLineFunctionMock with CommandLineResources + val mainCmd = new CommandLineFunctionMock with CommandLineResources { + def combine(functions: List[CommandLineResources]) = combineResources(functions) + } + mainCmd.combine(List(cmd1, cmd2)) + + mainCmd.coreMemory shouldBe 2.0 + mainCmd.residentFactor shouldBe 1.2 + mainCmd.vmemFactor shouldBe 1.4 + mainCmd.memoryLimit shouldBe Some(4.0) + mainCmd.retry shouldBe 0 + mainCmd.threads shouldBe 2 + + } +} diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..fb8c5a422359040ba6bf53ec78c78020853a470e --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala @@ -0,0 +1,44 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.core + +import nl.lumc.sasc.biopet.utils.config.Config +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.language.reflectiveCalls + +/** + * Created by pjvanthof on 17/11/15. + */ +class PipelineCommandTest extends TestNGSuite with Matchers { + @Test + def testPipelineCommand: Unit = { + val pipeline = new PipelineCommand { + override val globalConfig = new Config(Map()) + def getConfig = globalConfig + } + + pipeline.pipelineName shouldBe this.getClass.getSimpleName.toLowerCase + pipeline.pipeline shouldBe s"/${this.getClass.getName.stripSuffix("$").replaceAll("\\.", "/")}.class" + + // Config should be emty if the main method is not yet touched + pipeline.getConfig.map shouldBe empty + + //TODO: Main method testing + } +} diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/SampleLibraryTagTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/SampleLibraryTagTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..8809acdf9643af4e182866b018f0f5e4ef818737 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/SampleLibraryTagTest.scala @@ -0,0 +1,56 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.core + +import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable } +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvanthof on 17/11/15. + */ +class SampleLibraryTagTest extends TestNGSuite with Matchers { + @Test + def testDefault: Unit = { + val o = new SampleLibraryTag { + override def root: Configurable = null + override def globalConfig = new Config(Map()) + } + o.sampleId shouldBe None + o.libId shouldBe None + } + + @Test + def testInherit: Unit = { + val o1 = new SampleLibraryTag { + override def root: Configurable = null + override def globalConfig = new Config(Map()) + } + o1.sampleId = Some("sampleName") + o1.libId = Some("libName") + o1.sampleId shouldBe Some("sampleName") + o1.libId shouldBe Some("libName") + + val o2 = new SampleLibraryTag { + override def root: Configurable = o1 + override def globalConfig = new Config(Map()) + } + o2.sampleId shouldBe o1.sampleId + o2.libId shouldBe o1.libId + + } +} diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ToolCommandTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ToolCommandTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..ea412e0bbfd991cd67cda97c9c8e0bebc624d5b5 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ToolCommandTest.scala @@ -0,0 +1,45 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.core + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test +import nl.lumc.sasc.biopet.FullVersion + +/** + * Created by pjvanthof on 16/11/15. + */ +class ToolCommandTest extends TestNGSuite with Matchers { + @Test + def testToolCommand: Unit = { + val tool = new ToolCommandFunction { + def root = null + def toolObject = ToolCommandTest + } + + tool.versionCommand shouldBe empty + tool.versionRegex.toString() shouldBe empty + tool.getVersion shouldBe Some("Biopet " + FullVersion) + tool.beforeGraph + + tool.javaMainClass shouldBe ToolCommandTest.getClass.getName.takeWhile(_ != '$') + } +} + +object ToolCommandTest { + +} \ No newline at end of file diff --git a/public/biopet-extensions/pom.xml b/public/biopet-extensions/pom.xml index 05ec9ca8ba6a942f2fa5e5833e6b91198efa3abb..26cc102b5df164ac6fc4df51fe89eb33b87ffe27 100644 --- a/public/biopet-extensions/pom.xml +++ b/public/biopet-extensions/pom.xml @@ -1,11 +1,28 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala index 21dcc313ce1e4ff0f20b253d81651cf1369f3cab..2733ba975b7ea05ef8ade6d534ea48ec711c0c72 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala @@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions import java.io.File +import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -33,9 +34,6 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Refe @Input(doc = "Fastq file R2", shortName = "R2", required = false) var R2: Option[File] = None - @Input(doc = "The reference file for the bam files.", shortName = "R", required = true) - var reference: File = null - @Output(doc = "Output file SAM", shortName = "output", required = true) var output: File = null @@ -59,13 +57,20 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Refe var maqerr: Option[Int] = config("maqerr") var maxins: Option[Int] = config("maxins") var largeIndex: Boolean = config("large-index", default = false) + var bowtieIndex: String = config("bowtie_index") override def beforeGraph() { super.beforeGraph() - if (reference == null) reference = referenceFasta() - val basename = reference.getName.stripSuffix(".fasta").stripSuffix(".fa") - if (reference.getParentFile.list().toList.filter(_.startsWith(basename)).exists(_.endsWith(".ebwtl"))) - largeIndex = config("large-index", default = true) + val indexDir = new File(bowtieIndex).getParentFile + val basename = bowtieIndex.stripPrefix(indexDir.getPath + File.separator) + if (indexDir.exists()) { + if (indexDir.list().toList.filter(_.startsWith(basename)).exists(_.endsWith(".ebwtl"))) + largeIndex = config("large-index", default = true) + else { + if (!indexDir.list().toList.filter(_.startsWith(basename)).exists(_.endsWith(".ebwt"))) + Logging.addError(s"No index files found for bowtie in: $indexDir with basename: $basename") + } + } } /** return commandline to execute */ @@ -83,7 +88,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Refe optional("--maxbts", maxbts) + optional("--maqerr", maqerr) + optional("--maxins", maxins) + - required(reference.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta")) + + required(bowtieIndex) + (R2 match { case Some(r2) => required("-1", R1) + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie2.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie2.scala new file mode 100644 index 0000000000000000000000000000000000000000..b97c6a08d44472f8d8d189a180db90b509a85a83 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie2.scala @@ -0,0 +1,234 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version } +import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Extension for bowtie 2 + * + * Based on version 2.2.6 + */ +class Bowtie2(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { + @Input(doc = "Fastq file R1", shortName = "R1") + var R1: File = null + + @Input(doc = "Fastq file R2", shortName = "R2", required = false) + var R2: Option[File] = None + + @Output(doc = "Output file SAM", shortName = "output", required = true) + var output: File = null + + executable = config("exe", default = "bowtie2", freeVar = false) + def versionRegex = """.*[Vv]ersion:? (.*)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " --version" + + override def defaultCoreMemory = 4.0 + override def defaultThreads = 4 + + /* Required */ + var bowtieIndex: String = config("bowtie_index") + + /* Input options */ + var q: Boolean = config("q", default = false) + var qseq: Boolean = config("qseq", default = false) + var f: Boolean = config("f", default = false) + var r: Boolean = config("r", default = false) + var c: Boolean = config("c", default = false) + var skip: Option[Int] = config("skip") + var upto: Option[Int] = config("upto") + var trim5: Option[Int] = config("trim5") + var trim3: Option[Int] = config("trim3") + var phred33: Boolean = config("phred33", default = false) + var phred64: Boolean = config("phred64", default = false) + var int_quals: Boolean = config("int_quals", default = false) + + /* Alignment options */ + var N: Option[Int] = config("N") + var L: Option[Int] = config("L") + var i: Option[String] = config("i") + var n_ceil: Option[String] = config("n_ceil") + var dpad: Option[Int] = config("dpad") + var gbar: Option[Int] = config("gbar") + var ignore_quals: Boolean = config("ignore_quals", default = false) + var nofw: Boolean = config("nofw", default = false) + var norc: Boolean = config("norc", default = false) + var no_1mm_upfront: Boolean = config("no_1mm_upfront", default = false) + var end_to_end: Boolean = config("end_to_end", default = false) + var local: Boolean = config("local", default = false) + + /* Scoring */ + var ma: Option[Int] = config("ma") + var mp: Option[Int] = config("mp") + var np: Option[Int] = config("np") + var rdg: Option[String] = config("rdg") + var rfg: Option[String] = config("rfg") + var score_min: Option[String] = config("score_min") + + /* Reporting */ + var k: Option[Int] = config("k") + var all: Option[Int] = config("all") + + /* Effort */ + var D: Option[Int] = config("D") + var R: Option[Int] = config("R") + + /* Paired-end */ + var minins: Option[Int] = config("minins") + var maxins: Option[Int] = config("maxins") + var fr: Boolean = config("fr", default = false) + var rf: Boolean = config("rf", default = false) + var ff: Boolean = config("ff", default = false) + var no_mixed: Boolean = config("no_mixed", default = false) + var no_discordant: Boolean = config("no_discordant", default = false) + var no_dovetail: Boolean = config("no_dovetail", default = false) + var no_contain: Boolean = config("no_contain", default = false) + var no_overlap: Boolean = config("no_overlap", default = false) + + /* Output */ + var time: Boolean = config("no_overlap", default = false) + + var un: Option[String] = config("un") + var al: Option[String] = config("al") + var un_conc: Option[String] = config("un_conc") + var al_conc: Option[String] = config("al_conc") + + var un_gz: Option[String] = config("un_gz") + var al_gz: Option[String] = config("al_gz") + var un_conc_gz: Option[String] = config("un_conc_gz") + var al_conc_gz: Option[String] = config("al_conc_gz") + + var un_bz2: Option[String] = config("un_bz2") + var al_bz2: Option[String] = config("al_bz2") + var un_conc_bz2: Option[String] = config("un_conc_bz2") + var al_conc_bz2: Option[String] = config("al_conc_bz2") + + var quiet: Boolean = config("quiet", default = false) + var met_file: Option[String] = config("met_file") + var met_stderr: Boolean = config("met_stderr", default = false) + var met: Option[Int] = config("met") + + var no_unal: Boolean = config("no_unal", default = false) + var no_head: Boolean = config("no_head", default = false) + var no_sq: Boolean = config("no_sq", default = false) + + var rg_id: Option[String] = config("rg_id") + var rg: List[String] = config("rg", default = Nil) + + var omit_sec_seq: Boolean = config("omit_sec_seq", default = false) + + /* Performance */ + var reorder: Boolean = config("reorder", default = false) + var mm: Boolean = config("mm", default = false) + + /* Other */ + var qc_filter: Boolean = config("qc_filter", default = false) + var seed: Option[Int] = config("seed") + var non_deterministic: Boolean = config("non_deterministic", default = false) + + override def beforeGraph() { + super.beforeGraph() + val indexDir = new File(bowtieIndex).getParentFile + val basename = bowtieIndex.stripPrefix(indexDir.getPath + File.separator) + if (indexDir.exists()) { + if (!indexDir.list().toList.filter(_.startsWith(basename)).exists(_.endsWith(".bt2"))) + Logging.addError(s"No index files found for bowtie2 in: $indexDir with basename: $basename") + } + } + /** return commandline to execute */ + def cmdLine = required(executable) + + conditional(q, "-q") + + conditional(qseq, "--qseq") + + conditional(f, "-f") + + conditional(r, "-r") + + conditional(c, "-c") + + optional("--skip", skip) + + optional("--upto", upto) + + optional("--trim3", trim3) + + optional("--trim5", trim5) + + conditional(phred33, "--phred33") + + conditional(phred64, "--phred64") + + conditional(int_quals, "--int-quals") + + /* Alignment options */ + optional("-N", N) + + optional("-L", L) + + optional("-i", i) + + optional("--n-ceil", n_ceil) + + optional("--dpad", dpad) + + optional("--gbar", gbar) + + conditional(ignore_quals, "--ignore-quals") + + conditional(nofw, "--nofw") + + conditional(norc, "--norc") + + conditional(no_1mm_upfront, "--no-1mm-upfront") + + conditional(end_to_end, "--end-to-end") + + conditional(local, "--local") + + /* Scoring */ + optional("--ma", ma) + + optional("--mp", mp) + + optional("--np", np) + + optional("--rdg", rdg) + + optional("--rfg", rfg) + + optional("--score-min", score_min) + + /* Reporting */ + optional("-k", k) + + optional("--all", all) + + /* Effort */ + optional("-D", D) + + optional("-R", R) + + /* Paired End */ + optional("--minins", minins) + + optional("--maxins", maxins) + + conditional(fr, "--fr") + + conditional(rf, "--rf") + + conditional(ff, "--ff") + + conditional(no_mixed, "--no-mixed") + + conditional(no_discordant, "--no-discordant") + + conditional(no_dovetail, "--no-dovetail") + + conditional(no_contain, "--no-contain") + + conditional(no_overlap, "--no-overlap") + + /* Output */ + conditional(time, "--time") + + optional("--un", un) + + optional("--al", al) + + optional("--un-conc", un_conc) + + optional("--al-conc", al_conc) + + optional("--un-gz", un_gz) + + optional("--al-gz", al_gz) + + optional("--un-conc-gz", un_conc_gz) + + optional("--al-conc-gz", al_conc_gz) + + optional("--un-bz2", un_bz2) + + optional("--al-bz2", al_bz2) + + optional("--un-conc-bz2", un_conc_bz2) + + optional("--al-conc-bz2", al_conc_bz2) + + conditional(quiet, "--quiet") + + optional("--met-file", met_file) + + conditional(met_stderr, "--met-stderr") + + optional("--met", met) + + conditional(no_unal, "--no-unal") + + conditional(no_head, "--no-head") + + conditional(no_sq, "--no-sq") + + optional("--rg-id", rg_id) + + repeat("--rg", rg) + + conditional(omit_sec_seq, "--omit-sec-seq") + + /* Performance */ + optional("--threads", threads) + + conditional(reorder, "--reorder") + + conditional(mm, "--mm") + + /* Other */ + conditional(qc_filter, "--qc-filter") + + optional("--seed", seed) + + conditional(non_deterministic, "--non-deterministic") + + /* Required */ + required("-x", bowtieIndex) + + (R2 match { + case Some(r2) => + required("-1", R1) + + optional("-2", r2) + case _ => required("-U", R1) + }) + + (if (outputAsStsout) "" else required("-S", output)) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala index e8f9c2caf43ff1163218a5814bfa0ba45f0b6ceb..6e06894f916a5206bcac748d924eb8f3a9f51c53 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala @@ -43,6 +43,9 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su def versionCommand = executable + " --version" def versionRegex = """(.*)""".r + /** Name of the key containing clipped adapters information in the summary stats. */ + def adaptersStatsName = "adapters" + var default_clip_mode: String = config("default_clip_mode", default = "3") var opt_adapter: Set[String] = config("adapter", default = Nil) var opt_anywhere: Set[String] = config("anywhere", default = Nil) @@ -89,7 +92,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su Map("num_reads_affected" -> stats("trimmed"), "num_reads_discarded_too_short" -> stats("tooshort"), "num_reads_discarded_too_long" -> stats("toolong"), - "adapters" -> adapter_stats.toMap + adaptersStatsName -> adapter_stats.toMap ) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala index 721ba8575bc8c7edff7854163fba57a0afe3e474..582866f8ae951902cac8ba8d8efc52d7341654ce 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala @@ -331,6 +331,14 @@ class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Refer def versionRegex = """.* version (.*)""".r def versionCommand = executable + " --version" + override def beforeGraph(): Unit = { + super.beforeGraph() + if ((!gunzip && !bunzip2) && input.forall(_.getName.endsWith(".gz"))) { + logger.debug("Fastq with .gz extension found, enabled --gunzip option") + gunzip = true + } + } + def cmdLine = { required(executable) + optional("--dir", dir) + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala index 84ec59eb9f5bc83b3c6e6b9980c4f29121a2d12d..5f5edbfc4e38b0dccacd017650c818b8aa1c56fb 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala @@ -18,13 +18,13 @@ package nl.lumc.sasc.biopet.extensions import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference } +import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference } import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } /** * Extension for STAR */ -class Star(val root: Configurable) extends BiopetCommandLineFunction with Reference { +class Star(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { @Input(doc = "The reference file for the bam files.", required = false) var reference: File = null @@ -54,6 +54,9 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction with Refere executable = config("exe", "STAR") + def versionCommand = executable + " --version" + def versionRegex = """(.*)""".r + @Argument(doc = "Output Directory") var outputDir: File = _ diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala index daaaf73d96c7d190c61bbdbc45331115eb388aed..a94561d8bc0c2aec368148b20e159e08e2e56ef0 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala @@ -56,7 +56,7 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi var l: Boolean = config("l", default = false) var f: Boolean = config("f", default = false) - executable = config("exe", default = "tabix") + executable = config("exe", default = "tabix", freeVar = false) def versionCommand = executable def versionRegex = """Version: (.*)""".r diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index e9878f9808abb1f6a875e9c35412db6c2ac38d23..43013a16920054acfe8d4008b05186572697cc9f 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions import java.io.File +import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -40,8 +41,8 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu def versionCommand = executable + " " + vepScript + " --help" //Boolean vars - var v: Boolean = config("v", default = true) - var q: Boolean = config("q", default = false) + var v: Boolean = config("v", default = true, freeVar = false) + var q: Boolean = config("q", default = false, freeVar = false) var offline: Boolean = config("offline", default = false) var no_progress: Boolean = config("no_progress", default = false) var everything: Boolean = config("everything", default = false) @@ -75,9 +76,9 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu var maf_esp: Boolean = config("maf_esp", default = false) var old_map: Boolean = config("old_maf", default = false) var pubmed: Boolean = config("pubmed", default = false) - var failed: Boolean = config("failed", default = false) - var vcf: Boolean = config("vcf", default = true) - var json: Boolean = config("json", default = false) + + var vcf: Boolean = config("vcf", default = true, freeVar = false) + var json: Boolean = config("json", default = false, freeVar = false) var gvf: Boolean = config("gvf", default = false) var check_ref: Boolean = config("check_ref", default = false) var coding_only: Boolean = config("coding_only", default = false) @@ -103,8 +104,8 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu var skip_db_check: Boolean = config("skip_db_check", default = false) // Textual args - var vep_config: Option[String] = config("config") - var species: Option[String] = config("species") + var vep_config: Option[String] = config("config", freeVar = false) + var species: Option[String] = config("species", freeVar = false) var assembly: Option[String] = config("assembly") var format: Option[String] = config("format") var dir: Option[String] = config("dir") @@ -140,13 +141,15 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu var port: Option[Int] = config("port") var db_version: Option[Int] = config("db_version") var buffer_size: Option[Int] = config("buffer_size") + // ought to be a flag, but is BUG in VEP; becomes numeric ("1" is true) + var failed: Option[Int] = config("failed") override def beforeGraph(): Unit = { super.beforeGraph() if (!cache && !database) { - throw new IllegalArgumentException("Must supply either cache or database for VariantEffectPredictor") + Logging.addError("Must supply either cache or database for VariantEffectPredictor") } else if (cache && dir.isEmpty) { - throw new IllegalArgumentException("Must supply dir to cache for VariantEffectPredictor") + Logging.addError("Must supply dir to cache for VariantEffectPredictor") } } @@ -189,7 +192,6 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu conditional(maf_1kg, "--maf_1kg") + conditional(maf_esp, "--maf_esp") + conditional(pubmed, "--pubmed") + - conditional(failed, "--failed") + conditional(vcf, "--vcf") + conditional(json, "--json") + conditional(gvf, "--gvf") + @@ -249,6 +251,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu optional("--freq_freq", freq_freq) + optional("--port", port) + optional("--db_version", db_version) + - optional("--buffer_size", buffer_size) + optional("--buffer_size", buffer_size) + + optional("--failed", failed) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala index 6e7bf4146464e9c6f69e7759569985fce7204e23..2291df063b26293132bbd95c9f8ccf56c8ddfd13 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala @@ -19,7 +19,7 @@ import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } abstract class Bcftools extends BiopetCommandLineFunction with Version { override def subPath = "bcftools" :: super.subPath - executable = config("exe", default = "bcftools") + executable = config("exe", default = "bcftools", submodule = "bcftools", freeVar = false) def versionCommand = executable def versionRegex = """Version: (.*)""".r override def versionExitcode = List(0, 1) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala index 3a6fb6978cfb63deae159ad620ef472a28cdd7c2..7235b0f8bb31479e31e4b3a1dad8722d7d99efe3 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.extensions.bcftools import java.io.File diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/Breakdancer.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/Breakdancer.scala deleted file mode 100644 index 9f662e3d7830f542a7361c11c5803385afef44ac..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/Breakdancer.scala +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.breakdancer - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript, PipelineCommand } -import org.broadinstitute.gatk.queue.QScript - -/// Breakdancer is actually a mini pipeline executing binaries from the breakdancer package -class Breakdancer(val root: Configurable) extends QScript with BiopetQScript with Reference { - def this() = this(null) - - @Input(doc = "Input file (bam)") - var input: File = _ - - @Argument(doc = "Work directory") - var workDir: File = _ - - var deps: List[File] = Nil - - @Output(doc = "Breakdancer config") - lazy val configfile: File = { - new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.cfg") - } - @Output(doc = "Breakdancer raw output") - lazy val outputraw: File = { - new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv") - } - @Output(doc = "Breakdancer VCF output") - lazy val outputvcf: File = { - new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.vcf") - } - - override def init(): Unit = { - } - - def biopetScript() { - // read config and set all parameters for the pipeline - logger.info("Starting Breakdancer configuration") - - val bdcfg = BreakdancerConfig(this, input, this.configfile) - bdcfg.deps = this.deps - outputFiles += ("cfg" -> bdcfg.output) - add(bdcfg) - - val breakdancer = BreakdancerCaller(this, bdcfg.output, this.outputraw) - add(breakdancer) - outputFiles += ("tsv" -> breakdancer.output) - - val bdvcf = BreakdancerVCF(this, breakdancer.output, this.outputvcf) - add(bdvcf) - outputFiles += ("vcf" -> bdvcf.output) - } -} - -object Breakdancer extends PipelineCommand { - def apply(root: Configurable, input: File, runDir: File): Breakdancer = { - val breakdancer = new Breakdancer(root) - breakdancer.input = input - breakdancer.workDir = runDir - breakdancer.init() - breakdancer.biopetScript() - breakdancer - } -} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/Delly.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/Delly.scala deleted file mode 100644 index b5fabe35e56555ba1723ab9270c04578b6bde23e..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/Delly.scala +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.delly - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.Ln -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.CatVariants - -class Delly(val root: Configurable) extends QScript with BiopetQScript with Reference { - def this() = this(null) - - @Input(doc = "Input file (bam)") - var input: File = _ - - var workDir: File = _ - - @Output(doc = "Delly result VCF") - var outputVcf: File = _ - - var outputName: String = _ - - // select the analysis types DEL,DUP,INV,TRA - var del: Boolean = config("DEL", default = true) - var dup: Boolean = config("DUP", default = true) - var inv: Boolean = config("INV", default = true) - var tra: Boolean = config("TRA", default = true) - - override def init(): Unit = { - if (outputName == null) outputName = input.getName.stripSuffix(".bam") - if (outputVcf == null) outputVcf = new File(workDir, outputName + ".delly.vcf") - } - - def biopetScript() { - // write the pipeline here - logger.info("Configuring Delly pipeline") - var outputFiles: Map[String, File] = Map() - var vcfFiles: Map[String, File] = Map() - - /// start delly and then copy the vcf into the root directory "<sample>.delly/" - if (del) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "DEL" - delly.outputvcf = new File(workDir, outputName + ".delly.del.vcf") - add(delly) - vcfFiles += ("DEL" -> delly.outputvcf) - } - if (dup) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "DUP" - delly.outputvcf = new File(workDir, outputName + ".delly.dup.vcf") - add(delly) - vcfFiles += ("DUP" -> delly.outputvcf) - } - if (inv) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "INV" - delly.outputvcf = new File(workDir, outputName + ".delly.inv.vcf") - add(delly) - vcfFiles += ("INV" -> delly.outputvcf) - } - if (tra) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "TRA" - delly.outputvcf = new File(workDir, outputName + ".delly.tra.vcf") - // vcfFiles += ("TRA" -> delly.outputvcf) - add(delly) - } - // we need to merge the vcf's - val finalVCF = if (vcfFiles.size > 1) { - // do merging - // CatVariants is a $org.broadinstitute.gatk.utils.commandline.CommandLineProgram$; - //TODO: convert to biopet extension - val variants = new CatVariants() - variants.variant = vcfFiles.values.toList - variants.outputFile = this.outputVcf - variants.reference = referenceFasta() - // add the job - //add(variants) - Some(outputVcf) - } else if (vcfFiles.size == 1) { - // TODO: pretify this - val ln = Ln(this, vcfFiles.head._2, this.outputVcf, relative = true) - //add(ln) - Some(ln.output) - } else None - - finalVCF.foreach(file => outputFiles += ("vcf" -> file)) - } -} - -object Delly extends PipelineCommand { - override val pipeline = "/nl/lumc/sasc/biopet/extensions/svcallers/Delly/Delly.class" - - def apply(root: Configurable, input: File, workDir: File): Delly = { - val dellyPipeline = new Delly(root) - dellyPipeline.input = input - dellyPipeline.workDir = workDir - dellyPipeline.init() - dellyPipeline.biopetScript() - dellyPipeline - } - -} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala new file mode 100644 index 0000000000000000000000000000000000000000..d5e3fffcfaeb7d842743ecc9e8896e7de860bbed --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala @@ -0,0 +1,55 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference { + + javaMainClass = classOf[org.broadinstitute.gatk.tools.CatVariants].getClass.getName + + @Input(required = true) + var inputFiles: List[File] = Nil + + @Output(required = true) + var outputFile: File = null + + @Input + var reference: File = null + + override def beforeGraph(): Unit = { + super.beforeGraph() + if (reference == null) reference = referenceFasta() + } + + override def cmdLine = super.cmdLine + + repeat("-V", inputFiles) + + required("-out", outputFile) + + required("-R", reference) +} + +object CatVariants { + def apply(root: Configurable, input: List[File], output: File): CatVariants = { + val cv = new CatVariants(root) + cv.inputFiles = input + cv.outputFile = output + cv + } +} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala index 8291e667b67df0b03a10114f6386670f18d572e4..cc230a6ad48e50f782f773a54d1537a72ad85e2d 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala @@ -49,6 +49,7 @@ class CombineVariants(val root: Configurable) extends Gatk { override def beforeGraph(): Unit = { super.beforeGraph() + if (outputFile.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputFile.getAbsolutePath + ".tbi") genotypeMergeOptions match { case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None => case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions") diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala index 4bffa97fe173c113697803257e7cd1f206e97027..33522732f42599390238d294c8e6dfb1297f3829 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala @@ -68,7 +68,7 @@ abstract class Gatk extends BiopetJavaCommandLineFunction with Reference with Ve required("-R", reference) + optional("-K", gatkKey) + optional("-et", et) + - repeat("-I", intervals) + + repeat("-L", intervals) + repeat("-XL", excludeIntervals) + repeat("-ped", pedigree) } \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala similarity index 98% rename from protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala index 11b59f2fa52bf56e455475e2e719a746f6ff41ed..f4efc801d2ee4d60c6ffb949dd602334a8491836 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala @@ -10,8 +10,6 @@ import org.broadinstitute.gatk.engine.phonehome.GATKRunReport import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version { - memoryLimit = Option(3) - var executable: String = config("java", default = "java", submodule = "java", freeVar = false) override def subPath = "gatk" :: super.subPath diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala index 5b448c458aa98079a099e910abd7998bd8bfd597..86df5c4151d114fcb2b654cbf7b0c28bb78b9bcf 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala @@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions.kraken import java.io.File -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -39,11 +39,6 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers var db: File = config("db") - var inputFastQ: Boolean = true - var compression: Boolean = false - var compressionGzip: Boolean = false - var compressionBzip: Boolean = false - var quick: Boolean = false var minHits: Option[Int] = config("min_hits") @@ -51,11 +46,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers var paired: Boolean = config("paired", default = false) executable = config("exe", default = "kraken") - def versionRegex = """Kraken version ([\d\w\-\.]+)\n.*""".r + + def versionRegex = """^Kraken version ([\d\w\-\.]+)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " --version" override def defaultCoreMemory = 8.0 + override def defaultThreads = 4 /** Sets readgroup when not set yet */ @@ -66,16 +65,15 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers /** Returns command to execute */ def cmdLine = required(executable) + - "--db" + required(db) + + required("--db", db) + optional("--threads", nCoresRequest) + - conditional(inputFastQ, "--fastq-input") + - conditional(!inputFastQ, "--fasta-input") + conditional(quick, "--quick") + optional("--min_hits", minHits) + optional("--unclassified-out ", unclassified_out.get) + optional("--classified-out ", classified_out.get) + - "--output" + required(output) + + required("--output", output) + conditional(preLoad, "--preload") + conditional(paired, "--paired") + + conditional(paired, "--check-names") + repeat(input) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala index e63beea4198f4f73c367756dda932db51d6584c2..0919728a08ccbd466f7dc1ae1d3f385e49294162 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/KrakenReport.scala @@ -44,9 +44,9 @@ class KrakenReport(val root: Configurable) extends BiopetCommandLineFunction wit var output: File = _ def cmdLine: String = { - val cmd: String = "--db " + required(db) + + val cmd: String = required(executable) + "--db " + required(db) + conditional(show_zeros, "--show-zeros") + - input.getAbsolutePath + ">" + output.getAbsolutePath + required(input.getAbsolutePath) + " > " + required(output.getAbsolutePath) cmd } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala index 392169043c9fa8e10852b4ff61694892c7531b3c..0727b0fcb2d1a26471d38f48bb48fcb2dbdd1c0e 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BuildBamIndex.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.extensions.picard import java.io.File diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala index 0479bbb754d62a13e4e7d99bf6ac949b8cd69aec..087614a6a81367d6826e6d65ffa97d4610e3cd5d 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala @@ -35,7 +35,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza @Output(doc = "Output chart", required = false) var outputChart: File = _ - @Output(doc = "Output summary", required = false) + @Output(doc = "Output summary", required = true) var outputSummary: File = _ @Input(doc = "Reference file", required = false) @@ -67,7 +67,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza required("OUTPUT=", output, spaceSeparated = false) + optional("CHART_OUTPUT=", outputChart, spaceSeparated = false) + required("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) + - optional("SUMMARY_OUTPUT=", outputSummary, spaceSeparated = false) + + required("SUMMARY_OUTPUT=", outputSummary, spaceSeparated = false) + optional("WINDOW_SIZE=", windowSize, spaceSeparated = false) + optional("MINIMUM_GENOME_FRACTION=", minGenomeFraction, spaceSeparated = false) + conditional(assumeSorted, "ASSUME_SORTED=TRUE") + @@ -86,6 +86,7 @@ object CollectGcBiasMetrics { val collectGcBiasMetrics = new CollectGcBiasMetrics(root) collectGcBiasMetrics.input :+= input collectGcBiasMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".gcbiasmetrics") + collectGcBiasMetrics.outputSummary = new File(outputDir, input.getName.stripSuffix(".bam") + ".gcbiasmetrics.summary") collectGcBiasMetrics } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala index d862076f0b8861e032832584a07d77df7c2508ca..b12ff9584da1090aa90970dbbd137c0bb6b792e9 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala @@ -15,16 +15,16 @@ */ package nl.lumc.sasc.biopet.extensions.sambamba -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version } /** General Sambamba extension */ abstract class Sambamba extends BiopetCommandLineFunction with Version { - override def defaultCoreMemory = 2.0 + override def defaultCoreMemory = 4.0 override def defaultThreads = 2 override def subPath = "sambamba" :: super.subPath - executable = config("exe", default = "sambamba", freeVar = false) + executable = config("exe", default = "sambamba", submodule = "sambamba", freeVar = false) def versionCommand = executable def versionRegex = """sambamba v(.*)""".r override def versionExitcode = List(0, 1) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala index 4a012d22950898d662c6285ddf47fbec06e6bce1..ca2470c6094192afef8da6de7b76b6ca1809c1e3 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaView.scala @@ -43,6 +43,6 @@ class SambambaView(val root: Configurable) extends Sambamba { optional("--format", format.get) + optional("--regions", regions) + optional("--compression-level", compression_level) + - required("--output" + output) + + required("--output-filename", output) + required(input) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala index b1f545fb321f79ab2b5605a6e454dee0586ff2eb..f0703990369edb6791ffb11b61bdbbf0927a270a 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala @@ -20,7 +20,7 @@ import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } /** General class for samtools extensions */ abstract class Samtools extends BiopetCommandLineFunction with Version { override def subPath = "samtools" :: super.subPath - executable = config("exe", default = "samtools") + executable = config("exe", default = "samtools", submodule = "samtools", freeVar = false) def versionCommand = executable def versionRegex = """Version: (.*)""".r override def versionExitcode = List(0, 1) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala index 1e145929642f9c0c29e1cd8f07b9eb0a8fcf3da6..fa5130fd1e2d20981582ab7d20b6112ad79e988b 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.extensions.samtools import java.io.File diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala new file mode 100644 index 0000000000000000000000000000000000000000..08c04857a60297d0aa8c268764d7bff44a3866ac --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala @@ -0,0 +1,28 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.vt + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction + +/** + * General vt extension + * + * Created by pjvan_thof on 1/16/15. + */ +abstract class Vt extends BiopetCommandLineFunction { + override def subPath = "vt" :: super.subPath + executable = config("exe", default = "vt") +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala new file mode 100644 index 0000000000000000000000000000000000000000..c5acb62ff24dbf189318122453a8f8db92f24c25 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.extensions.vt + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Reference, Version } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Created by pjvanthof on 20/11/15. + */ +class VtDecompose(val root: Configurable) extends Vt with Version with Reference { + def versionRegex = """decompose (.*)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " decompose" + + @Input(required = true) + var inputVcf: File = _ + + @Output(required = true) + var outputVcf: File = _ + + var intervalsFile: Option[File] = config("intervals_file") + + val smartDecompose: Boolean = config("smart_decompose", default = false) + + def cmdLine = required(executable) + required("decompose") + + required("-o", outputVcf) + + optional("-I", intervalsFile) + + conditional(smartDecompose, "-s") + + required(inputVcf) +} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala new file mode 100644 index 0000000000000000000000000000000000000000..26812e2636a4bd794dc8c74e47c73fb137a3740d --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala @@ -0,0 +1,40 @@ +package nl.lumc.sasc.biopet.extensions.vt + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Reference, Version } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Output, Input } + +/** + * Created by pjvanthof on 20/11/15. + */ +class VtNormalize(val root: Configurable) extends Vt with Version with Reference { + def versionRegex = """normalize (.*)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " normalize" + + @Input(required = true) + var inputVcf: File = _ + + @Output(required = true) + var outputVcf: File = _ + + var windowSize: Option[Int] = config("windows_size") + + var intervalsFile: Option[File] = config("intervals_file") + + var reference: File = _ + + override def beforeGraph(): Unit = { + super.beforeGraph() + reference = referenceFasta() + } + + def cmdLine = required(executable) + required("normalize") + + required("-o", outputVcf) + + optional("-w", windowSize) + + optional("-I", intervalsFile) + + required("-r", reference) + + required(inputVcf) +} \ No newline at end of file diff --git a/public/biopet-public-package/pom.xml b/public/biopet-public-package/pom.xml index aeed0caa9f6639b0a08f71a3fd0ac6ca646c506c..98945e3a22206579af807c6d2fbe4682ff75bd6c 100644 --- a/public/biopet-public-package/pom.xml +++ b/public/biopet-public-package/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala index b4109a45e70b0dd111463cddb3df7f2ebf74d14f..9a130fae1ee31cb208ccd0f42e7c49ae467ab229 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala @@ -15,6 +15,7 @@ */ package nl.lumc.sasc.biopet +import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand } object BiopetExecutablePublic extends BiopetExecutable { @@ -27,12 +28,13 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig, nl.lumc.sasc.biopet.pipelines.carp.Carp, nl.lumc.sasc.biopet.pipelines.toucan.Toucan, - nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling + nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling, + nl.lumc.sasc.biopet.pipelines.gears.Gears ) def pipelines: List[MainCommand] = List( nl.lumc.sasc.biopet.pipelines.shiva.Shiva, - nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling, + ShivaVariantcalling, nl.lumc.sasc.biopet.pipelines.basty.Basty ) ::: publicPipelines @@ -59,5 +61,6 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.tools.SeqStat, nl.lumc.sasc.biopet.tools.VepNormalizer, nl.lumc.sasc.biopet.tools.AnnotateVcfWithBed, - nl.lumc.sasc.biopet.tools.VcfWithVcf) + nl.lumc.sasc.biopet.tools.VcfWithVcf, + nl.lumc.sasc.biopet.tools.KrakenReportToJson) } diff --git a/public/biopet-tools-extensions/pom.xml b/public/biopet-tools-extensions/pom.xml index e8ef9d9a7b4f21a2dada57c3cb1cfcfe0299dd12..a49c2ebb27913ccaa8080842a48a53392e9a4893 100644 --- a/public/biopet-tools-extensions/pom.xml +++ b/public/biopet-tools-extensions/pom.xml @@ -1,11 +1,28 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala new file mode 100644 index 0000000000000000000000000000000000000000..799f137cdac4077eb84693fdc9c487a90e2b3ce8 --- /dev/null +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala @@ -0,0 +1,70 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.tools + +/** + * Created by waiyileung on 05-10-15. + */ +import java.io.File + +import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } + +/** + * KrakenReportToJson function class for usage in Biopet pipelines + * + * @param root Configuration object for the pipeline + */ +class KrakenReportToJson(val root: Configurable) extends ToolCommandFunction with Summarizable { + def toolObject = nl.lumc.sasc.biopet.tools.KrakenReportToJson + + @Input(doc = "Input Kraken Full report", shortName = "inputReport", required = true) + var inputReport: File = _ + + @Argument(required = false) + var skipNames: Boolean = false + + @Output(doc = "Output JSON", shortName = "output", required = true) + var output: File = _ + + override def defaultCoreMemory = 2.0 + + override def cmdLine = + super.cmdLine + + required("-i", inputReport) + + required("-o", output) + + conditional(skipNames, "--skipnames") + + def summaryStats: Map[String, Any] = { + ConfigUtils.fileToConfigMap(output) + } + + def summaryFiles: Map[String, File] = Map() + +} + +object KrakenReportToJson { + def apply(root: Configurable, input: File, output: File): KrakenReportToJson = { + val report = new KrakenReportToJson(root) + report.inputReport = input + report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json") + report + } +} + diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala index b6f90cc6261b3677340c51c5d1197a50d901181e..6e7f6c86d0c834b193525aa4261cd4a023217412 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala @@ -38,6 +38,9 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe @Output(doc = "Output tag library", shortName = "output", required = true) var output: File = _ + @Output + private var outputIndex: File = _ + var minDP: Option[Int] = config("min_dp") var minAP: Option[Int] = config("min_ap") var homoFraction: Option[Double] = config("homoFraction") @@ -50,6 +53,7 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe override def beforeGraph() { super.beforeGraph() if (reference == null) reference = referenceFasta().getAbsolutePath + if (output.getName.endsWith(".vcf.gz")) outputIndex = new File(output.getAbsolutePath + ".tbi") val samtoolsMpileup = new SamtoolsMpileup(this) } diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala index f318a6861404c103ad5da426a1f7bbf662daedf3..d449bde84417db16879550c0e187250152871c0c 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.extensions.tools import java.io.File diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala index be50e38932bda4a6dca83983e715f8a313be70e0..c0cc0e3ffa884044146698f8ab46c901e227f3e5 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SquishBed.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.extensions.tools import java.io.File diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala index 227f37a59fbc87ae1fbf6220f5803e99e3415b7e..fd43743c2e249d9f34ab012a1dc941f6736b295d 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala @@ -30,6 +30,9 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { @Output(doc = "Output vcf", shortName = "o", required = false) var outputVcf: File = _ + @Output + var outputVcfIndex: File = _ + var minSampleDepth: Option[Int] = config("min_sample_depth") var minTotalDepth: Option[Int] = config("min_total_depth") var minAlternateDepth: Option[Int] = config("min_alternate_depth") @@ -39,6 +42,11 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { override def defaultCoreMemory = 3.0 + override def beforeGraph(): Unit = { + super.beforeGraph() + if (outputVcf.getName.endsWith("vcf.gz")) outputVcfIndex = new File(outputVcf.getAbsolutePath + ".tbi") + } + override def cmdLine = super.cmdLine + required("-I", inputVcf) + required("-o", outputVcf) + diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala index 4db2b44745416068a95c0da190a541f9c25479ea..bf52dbea4111a1dfa1227addada89392336a1834 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala @@ -54,6 +54,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz var allInfoTags = false var allGenotypeTags = false var reference: File = _ + var intervals: Option[File] = None override def beforeGraph(): Unit = { reference = referenceFasta() @@ -76,7 +77,8 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz repeat("--genotypeTag", genotypeTags) + conditional(allInfoTags, "--allInfoTags") + conditional(allGenotypeTags, "--allGenotypeTags") + - required("-R", reference) + required("-R", reference) + + optional("--intervals", intervals) /** Returns general stats to the summary */ def summaryStats: Map[String, Any] = { diff --git a/public/biopet-tools-package/pom.xml b/public/biopet-tools-package/pom.xml index 8801d9280163613168001738d1a6c44c2fc22d2c..e42db61116c57ffce3057675e94de45ff90ef162 100644 --- a/public/biopet-tools-package/pom.xml +++ b/public/biopet-tools-package/pom.xml @@ -1,11 +1,28 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> diff --git a/public/biopet-tools/pom.xml b/public/biopet-tools/pom.xml index 40bd255bd5730293f1973be9a3db1e49fe909d08..cbd59820cef99e2fed1d3bb6a5da137ce90f5e74 100644 --- a/public/biopet-tools/pom.xml +++ b/public/biopet-tools/pom.xml @@ -1,11 +1,28 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> <modelVersion>4.0.0</modelVersion> diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala index d4e6996de89b9a62a3b35f9ea894907882b4484f..e56393b38eb62499c39469b452baefc818812e28 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala @@ -26,11 +26,11 @@ import scala.collection.JavaConverters._ object FastqSync extends ToolCommand { /** Regex for capturing read ID ~ taking into account its read pair mark (if present) */ - private val idRegex = "[_/][12]\\s??|\\s".r + private val idRegex = """[_/][12]$""".r /** Implicit class to allow for lazy retrieval of FastqRecord ID without any read pair mark */ private implicit class FastqPair(fq: FastqRecord) { - lazy val fragId = idRegex.split(fq.getReadHeader)(0) + lazy val fragId = idRegex.split(fq.getReadHeader.split(" ")(0))(0) } /** diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala new file mode 100644 index 0000000000000000000000000000000000000000..ee59a22aaab5a1b21ad8565a74f41d432cde096b --- /dev/null +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala @@ -0,0 +1,186 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.tools + +/** + * Created by wyleung on 25-9-15. + */ + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.utils.ConfigUtils._ +import nl.lumc.sasc.biopet.utils.ToolCommand + +import scala.collection.mutable +import scala.collection.mutable.ListBuffer +import scala.io.Source + +object KrakenReportToJson extends ToolCommand { + + case class KrakenHit(taxonomyID: Long, + taxonomyName: String, + cladeCount: Long, + cladeSize: Long, // size of parent - including itself + taxonRank: String, + cladeLevel: Int, + parentTaxonomyID: Long, + children: ListBuffer[KrakenHit]) { + def toJSON(withChildren: Boolean = false): Map[String, Any] = { + val childJSON = if (withChildren) children.toList.map(entry => entry.toJSON(withChildren)) else List() + Map( + "name" -> taxonomyName, + "taxid" -> taxonomyID, + "taxonrank" -> taxonRank, + "cladelevel" -> cladeLevel, + "count" -> cladeCount, + "size" -> cladeSize, + "children" -> childJSON + ) + } + + } + + var cladeIDs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill(32)(0) + val spacePattern = "^( +)".r + private var lines: Map[Long, KrakenHit] = Map.empty + + case class Args(krakenreport: File = null, outputJson: Option[File] = None, skipNames: Boolean = false) extends AbstractArgs + + class OptParser extends AbstractOptParser { + + head( + s""" + |$commandName - Convert Kraken-report (full) output to JSON + """.stripMargin) + + opt[File]('i', "krakenreport") required () unbounded () valueName "<krakenreport>" action { (x, c) => + c.copy(krakenreport = x) + } validate { + x => if (x.exists) success else failure("Krakenreport not found") + } text "Kraken report to generate stats from" + + opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) => + c.copy(outputJson = Some(x)) + } text "File to write output to, if not supplied output go to stdout" + + opt[Boolean]('n', "skipnames") unbounded () valueName "<skipnames>" action { (x, c) => + c.copy(skipNames = x) + } text "Don't report the scientific name of the taxon." + + } + + /** + * Parses the command line argument + * + * @param args Array of arguments + * @return + */ + def parseArgs(args: Array[String]): Args = new OptParser() + .parse(args, Args()) + .getOrElse(sys.exit(1)) + + /** + * Takes a line from the kraken report, converts into Map with taxonID and + * information on this hit as `KrakenHit`. `KrakenHit` is used later on for + * building the tree + * + * @param krakenRawHit Line from the KrakenReport output + * @param skipNames Specify to skip names in the report output to reduce size of JSON + * @return + */ + def parseLine(krakenRawHit: String, skipNames: Boolean): Map[Long, KrakenHit] = { + val values: Array[String] = krakenRawHit.stripLineEnd.split("\t") + + assert(values.length == 6) + + val scientificName: String = values(5) + val cladeLevel = spacePattern.findFirstIn(scientificName).getOrElse("").length / 2 + + if (cladeIDs.length <= cladeLevel + 1) { + cladeIDs ++= mutable.ArrayBuffer.fill(10)(0L) + } + + cladeIDs(cladeLevel + 1) = values(4).toLong + Map( + values(4).toLong -> new KrakenHit( + taxonomyID = values(4).toLong, + taxonomyName = if (skipNames) "" else scientificName.trim, + cladeCount = values(2).toLong, + cladeSize = values(1).toLong, + taxonRank = values(3), + cladeLevel = cladeLevel, + parentTaxonomyID = cladeIDs(cladeLevel), + children = ListBuffer() + )) + } + + /** + * Read the `KrakenReport` output and transform into `Map` by TaxonID and `KrakenHit` + * A JSON-string output is given. + * + * @param reportRaw The `KrakenReport` output + * @param skipNames Specify to skip names in the report output to reduce size of JSON + * @return + */ + def reportToJson(reportRaw: File, skipNames: Boolean): String = { + val reader = Source.fromFile(reportRaw) + + /* + * http://ccb.jhu.edu/software/kraken/MANUAL.html + * The header layout is: + * 1. Percentage of reads covered by the clade rooted at this taxon + * 2. Number of reads covered by the clade rooted at this taxon + * 3. Number of reads assigned directly to this taxon + * 4. A rank code, indicating (U)nclassified, (D)omain, (K)ingdom, (P)hylum, (C)lass, (O)rder, (F)amily, (G)enus, or (S)pecies. All other ranks are simply '-'. + * 5. NCBI taxonomy ID + * 6. indented scientific name + * */ + + lines = reader.getLines() + .map(line => parseLine(line, skipNames)) + .filter(p => (p.head._2.cladeSize > 0) || List(0L, 1L).contains(p.head._2.taxonomyID)) + .foldLeft(Map.empty[Long, KrakenHit])((a, b) => { + a + b.head + }) + + lines.keys.foreach(k => { + // append itself to the children attribute of the parent + if (lines(k).parentTaxonomyID > 0L) { + // avoid the root and unclassified appending to the unclassified node + lines(lines(k).parentTaxonomyID).children += lines(k) + } + }) + + val result = Map("unclassified" -> lines(0).toJSON(withChildren = false), + "classified" -> lines(1).toJSON(withChildren = true)) + mapToJson(result).spaces2 + + } + + def main(args: Array[String]): Unit = { + val commandArgs: Args = parseArgs(args) + + val jsonString: String = reportToJson(commandArgs.krakenreport, skipNames = commandArgs.skipNames) + commandArgs.outputJson match { + case Some(file) => + val writer = new PrintWriter(file) + writer.println(jsonString) + writer.close() + case _ => println(jsonString) + } + + } +} diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala index ff31439d9f0622ef73e3386e207971caa715a607..ac5a8bfbde5ccaae0156a4249ba26a86b59ae3a7 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala @@ -71,7 +71,7 @@ object SamplesTsvToJson extends ToolCommand { val library = if (libraryColumn != -1) Some(values(libraryColumn)) else None //FIXME: this is a workaround, should be removed after fixing #180 - if (sample.head.isDigit || library.forall(_.head.isDigit)) + if (sample.head.isDigit || library.exists(_.head.isDigit)) throw new IllegalStateException("Sample or library may not start with a number") if (sampleLibCache.contains((sample, library))) diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala index 74aad0081a547e9b0a25ab2e86c851dbafd4ba3b..0941da9e512949c19ebcd10991d29ad56d30ca85 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala index 70e9aefd46251ee3a01b6a133e9ade7f6cc28a09..a88dc561a34fe1b25b39f69e34b25ed30a6f393d 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala index d919fe400154683f5fef7a061ce76c60aab5f5e7..3c986e9c37f06ad4fd9aa77339d88e4864b171ae 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstatTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala index dd66b204fab38a4fd26fb011ffe6739faf304009..8117608ae438487b0c50f1ee443b798192b1ca09 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSplitterTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala index 565498eb6ed0732c1c796f4cf82d91d77a55815b..bfd649f1df2f4160b8846067399a074e8231c872 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala @@ -188,6 +188,32 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers { numKept shouldBe 1 } + @Test(dataProvider = "mockProvider") + def testSeqSolexa(refMock: FastqReader, aMock: FastqReader, bMock: FastqReader, + aOutMock: AsyncFastqWriter, bOutMock: AsyncFastqWriter) = { + + when(refMock.iterator) thenReturn recordsOver( + "SOLEXA12_24:6:117:1388:2001/2", + "SOLEXA12_24:6:96:470:1965/2", + "SOLEXA12_24:6:35:1209:2037/2") + when(aMock.iterator) thenReturn recordsOver( + "SOLEXA12_24:6:96:470:1965/1", + "SOLEXA12_24:6:35:1209:2037/1") + when(bMock.iterator) thenReturn recordsOver( + "SOLEXA12_24:6:117:1388:2001/2", + "SOLEXA12_24:6:96:470:1965/2") + val obs = inOrd(aOutMock, bOutMock) + + val (numDiscard1, numDiscard2, numKept) = syncFastq(refMock, aMock, bMock, aOutMock, bOutMock) + + obs.verify(aOutMock).write(new FastqRecord("SOLEXA12_24:6:96:470:1965/1", "A", "", "H")) + obs.verify(bOutMock).write(new FastqRecord("SOLEXA12_24:6:96:470:1965/2", "A", "", "H")) + + numDiscard1 shouldBe 1 + numDiscard2 shouldBe 1 + numKept shouldBe 1 + } + @Test(dataProvider = "mockProvider") def testSeqABShorterPairMarkSlash(refMock: FastqReader, aMock: FastqReader, bMock: FastqReader, aOutMock: AsyncFastqWriter, bOutMock: AsyncFastqWriter) = { diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala index 907270b8c31f3eb152766f03a612101f754ba6c9..a124ea021f96fe52c8368998b3b4e4dc39136cb9 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBioTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala index e708bc654b3c81699dcab85bfc07a7e21cd3ea94..46e0ffa932eafe3ac20b8642cf52c3033dd5a733 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MpileupToVcfTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala index 611557d836636aebc71f90e70707035033df6b97..fe97d63dd60f4516f64d8127b935353e653f72c2 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/PrefixFastqTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala index 5c7731c6dd2ec60b7c8cd01f84c6929026d7e007..47068c3e603e403c9844c516e17c6248869f858c 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCountFastqTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala index c86c1ba5a9787c0cdcce00293309500d6f6e4b86..6193c4a13d9ded66c3cc09d604f0057cbb9cb0dc 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateLibaryTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala index ecd1dae87d5c43a88e722cbb12f7eb18ece3a9b8..817dc33dc67f1acc2d238f45ca7f13e41dab37b2 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCountsTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala index e8a09a9d7cb9af569f2084c95a1b6c2e7f1e1aad..c534da38410600bc55f10a02ba7860909b55af85 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJsonTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala index 4f921affbc090031b83c0ee5fea3c64b9c9bdf4c..a53c4dd31abbaa326ff6c5f251c0ee3eeb3ac1c6 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/SummaryToTsvTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.tools import java.io.File diff --git a/public/biopet-utils/pom.xml b/public/biopet-utils/pom.xml index bb9f2b2611c31ceed8a3e0f6505d4dca2b1198a0..8b70e105c01ea1fd2eda061129fd4cdc8ae1ff3c 100644 --- a/public/biopet-utils/pom.xml +++ b/public/biopet-utils/pom.xml @@ -1,11 +1,28 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> <modelVersion>4.0.0</modelVersion> @@ -45,7 +62,7 @@ <dependency> <groupId>com.github.samtools</groupId> <artifactId>htsjdk</artifactId> - <version>1.132</version> + <version>1.141</version> </dependency> <dependency> <groupId>org.scala-lang</groupId> diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala new file mode 100644 index 0000000000000000000000000000000000000000..f13230534af0a61fa29f68daa8d48c81a55e1f23 --- /dev/null +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -0,0 +1,33 @@ +package nl.lumc.sasc.biopet.utils + +import java.io.File + +import htsjdk.samtools.SamReaderFactory + +import scala.collection.JavaConversions._ + +/** + * Created by pjvan_thof on 11/19/15. + */ +object BamUtils { + + /** + * This method will convert a list of bam files to a Map[<sampleName>, <bamFile>] + * + * Each sample may only be once in the list + * + * @throws IllegalArgumentException + * @param bamFiles input bam files + * @return + */ + def sampleBamMap(bamFiles: List[File]): Map[String, File] = { + val temp = bamFiles.map { file => + val inputSam = SamReaderFactory.makeDefault.open(file) + val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct + if (samples.size == 1) samples.head -> file + else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) + } + if (temp.map(_._1).distinct.size != temp.size) throw new IllegalArgumentException("Samples has been found twice") + temp.toMap + } +} diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala index 6a1a0889b68811ae40c3028f706eb3ceda9918fc..0679c0d08c441c70e00148594c32054dbb556e80 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala @@ -16,6 +16,7 @@ package nl.lumc.sasc.biopet.utils import java.io.File +import java.util import argonaut.Argonaut._ import argonaut._ @@ -270,9 +271,10 @@ object ConfigUtils extends Logging { def any2set(any: Any): Set[Any] = { if (any == null) return null any match { - case s: Set[_] => s.toSet - case l: List[_] => l.toSet - case _ => Set(any) + case s: Set[_] => s.toSet + case l: List[_] => l.toSet + case l: util.ArrayList[_] => l.toSet + case _ => Set(any) } } diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala index 4c69bf57c8be5332aeb3f1cc3d1f78b2ae204007..a3b8a8ab006a08eaa237ab5c680406344204f1bf 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Config.scala @@ -168,12 +168,12 @@ class Config(protected var _map: Map[String, Any], * @param freeVar Default true, if set false value must exist in module * @return Config value */ - protected[config] def apply(module: String, - path: List[String], - key: String, - default: Any = null, - freeVar: Boolean = true, - fixedValues: Map[String, Any] = Map()): ConfigValue = { + def apply(module: String, + path: List[String], + key: String, + default: Any = null, + freeVar: Boolean = true, + fixedValues: Map[String, Any] = Map()): ConfigValue = { val requestedIndex = ConfigValueIndex(module, path, key, freeVar) if (contains(requestedIndex, fixedValues)) { val fixedValue = fixedCache.get(requestedIndex) diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala index 68fe36e303e6a49e96a7628a741fc120392a5d19..bc0f207df80f003d7120d466a3eea7c0ffcc040c 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/config/Configurable.scala @@ -20,7 +20,7 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils.ImplicitConversions trait Configurable extends ImplicitConversions { /** Should be object of parant object */ - val root: Configurable + def root: Configurable def globalConfig: Config = if (root != null) root.globalConfig else Config.global /** suffix to the path */ diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala index 5b6b1931ab7ea604f8812d88c869893f17929d65..ccb3c089eb4dc07e6b9dfa148e411286ad977c9e 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.utils.intervals import htsjdk.samtools.util.Interval diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala index 56b2f303b0a7161879ea9ae01eaf3ba1cc0b86f7..485b0ae156938784def6937bff29e077cc2f5170 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.utils.intervals import java.io.{ PrintWriter, File } diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala index 3dfac894eeb591a84a31b3fd5bcfa88c34619192..c71a30a154cf6e0ec4776348cfb94753c7f726c8 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/Rscript.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.utils.rscript import java.io.{ File, FileOutputStream } diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala index 371abc9a701e9ed9d224a60bd40df3bcfb8a06cf..dbce03915e2ed8512e9e737f9189147670b76a5e 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.utils.summary /** diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala index 0490b28db7dbf199a9b01c1b48c6783f11720f7d..51da3d9d7ed2a57507045999d712471c44e38433 100644 --- a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala +++ b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordListTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.utils.intervals import java.io.{ PrintWriter, File } diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala index a4ae25293748214d8d0d3b27494b01c34f71a684..ec966d2e7ab11215f7b3cdb1a035651760413530 100644 --- a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala +++ b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordTest.scala @@ -1,3 +1,18 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.utils.intervals import htsjdk.samtools.util.Interval diff --git a/public/carp/pom.xml b/public/carp/pom.xml index 6436a278cc101d9914dfbfdc3b8ef74185e85f23..67f4da89022ba48aff4401fbcfd7ffe52714aa02 100644 --- a/public/carp/pom.xml +++ b/public/carp/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index de37b352ecf8a9091ee67d1b455e85087156f642..a31ca43355b72f09d219340725e23d6a0aca2eba 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -150,8 +150,6 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with macs2.name = Some(sampleId) macs2.outputdir = sampleDir + File.separator + "macs2" + File.separator + sampleId + File.separator add(macs2) - - addSummaryJobs() } } @@ -176,6 +174,8 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with logger.info("Starting CArP pipeline") addSamplesJobs() + + addSummaryJobs() } def addMultiSampleJobs(): Unit = { diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala index 59a07dca040ff07f81438b59697953563b97d972..a06cef16f169732aba3ebe31357f907d24ac2eca 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala @@ -26,7 +26,7 @@ import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport * Created by pjvanthof on 25/06/15. */ class CarpReport(val root: Configurable) extends ReportBuilderExtension { - val builder = CarpReport + def builder = CarpReport } object CarpReport extends MultisampleReportBuilder { diff --git a/public/flexiprep/pom.xml b/public/flexiprep/pom.xml index 60077ccf28c406b7358d247edffb101f2c092c9c..97e2df77149f5dd060e1253984804c8a716cb3a8 100644 --- a/public/flexiprep/pom.xml +++ b/public/flexiprep/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala new file mode 100644 index 0000000000000000000000000000000000000000..b34b3772296f9de419f7a249d45daefc513c7259 --- /dev/null +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -0,0 +1,62 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Cutadapt wrapper specific for Flexiprep. + * + * This wrapper overrides the summary part so that instead of only displaying the clipped adapters, the sequence names + * are also displayed. In Flexiprep the sequence will always have names since they are detected by FastQC from a list + * of known adapters / contaminants. + * + * @param root: Configurable object from which this wrapper is initialized. + * @param fastqc: Fastqc wrapper that contains adapter information. + */ +class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) { + + /** Clipped adapter names from FastQC */ + protected def seqToName = fastqc.foundAdapters + .map(adapter => adapter.seq -> adapter.name).toMap + + override def summaryStats: Map[String, Any] = { + val initStats = super.summaryStats + // Map of adapter sequence and how many times it is found + val adapterCounts: Map[String, Any] = initStats.get(adaptersStatsName) match { + // "adapters" key found in statistics + case Some(m: Map[_, _]) => m.flatMap { + case (seq: String, count) => + seqToName.get(seq) match { + // adapter sequence is found by FastQC + case Some(n) => Some(n -> Map("sequence" -> seq, "count" -> count)) + // adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter + // sequences come from FastQC + case _ => + throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastq_input'.") + } + // FastQC found no adapters + case otherwise => + ; + logger.debug(s"No adapters found for summarizing in '$fastq_input'.") + None + } + // "adapters" key not found ~ something went wrong in our part + case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastq_input'.") + } + initStats.updated(adaptersStatsName, adapterCounts) + } +} diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 1f1f2510cc600544c95d8db117bad3c3e888be01..7c2b9ace137c347e9f78d99543eb56ebf28ef5b4 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -195,6 +195,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with override def beforeGraph(): Unit = { fqSync.beforeGraph() + commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil super.beforeGraph() } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala index 075077872a314c2a510478ff8a151a4ed7f5ba9c..c715f1338e944bd560b6c2103615e620398b88fc 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepReport.scala @@ -23,7 +23,7 @@ import nl.lumc.sasc.biopet.utils.rscript.StackedBarPlot import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue } class FlexiprepReport(val root: Configurable) extends ReportBuilderExtension { - val builder = FlexiprepReport + def builder = FlexiprepReport } /** diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala index 8509b78983f571302ed15265714a62bbb3ab24de..22a9a4a526a0a8d0c640b9262baef99063f07288 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala @@ -1,10 +1,25 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ package nl.lumc.sasc.biopet.pipelines.flexiprep import java.io.File import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, Summarizable } import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetCommandLineFunction } -import nl.lumc.sasc.biopet.extensions.{ Cat, Gzip, Sickle, Cutadapt } +import nl.lumc.sasc.biopet.extensions.{ Cat, Gzip, Sickle } import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } @@ -35,7 +50,15 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val seqtk = new SeqtkSeq(root) var clip: Option[Cutadapt] = None var trim: Option[Sickle] = None - var outputCommand: BiopetCommandLineFunction = null + lazy val outputCommand: BiopetCommandLineFunction = if (compress) { + val gzip = Gzip(root) + gzip.output = output + gzip + } else { + val cat = Cat(root) + cat.output = output + cat + } def jobs = (Some(seqtk) :: clip :: trim :: Some(outputCommand) :: Nil).flatten @@ -78,7 +101,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman clip = if (!flexiprep.skipClip) { val foundAdapters = fastqc.foundAdapters.map(_.seq) if (foundAdapters.nonEmpty) { - val cutadapt = new Cutadapt(root) + val cutadapt = new Cutadapt(root, fastqc) cutadapt.fastq_input = seqtk.output cutadapt.fastq_output = new File(output.getParentFile, input.getName + ".cutadapt.fq") cutadapt.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats") @@ -108,16 +131,9 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman case _ => seqtk.output } - if (compress) outputCommand = { - val gzip = new Gzip(root) - gzip.output = output - outputFile :<: gzip - } - else outputCommand = { - val cat = new Cat(root) - cat.input = outputFile :: Nil - cat.output = output - cat + outputCommand match { + case gzip: Gzip => outputFile :<: gzip + case cat: Cat => cat.input = outputFile :: Nil } seqtk.beforeGraph() diff --git a/public/gears/pom.xml b/public/gears/pom.xml index 8d09f66d1528a295e18ef5467f2ebd3fa99d8657..c1bf7194cec6f3b71683b36ed2be1bd156b3eb63 100644 --- a/public/gears/pom.xml +++ b/public/gears/pom.xml @@ -22,12 +22,15 @@ <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> + <relativePath>../</relativePath> </parent> <modelVersion>4.0.0</modelVersion> <inceptionYear>2015</inceptionYear> <artifactId>Gears</artifactId> + <name>Gears</name> + <packaging>jar</packaging> <dependencies> <dependency> @@ -37,7 +40,7 @@ </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> - <artifactId>Mapping</artifactId> + <artifactId>BiopetToolsExtensions</artifactId> <version>${project.version}</version> </dependency> <dependency> diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp new file mode 100644 index 0000000000000000000000000000000000000000..20ca432859aeee8e0cd2482e190abe8b9586e8b6 --- /dev/null +++ b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp @@ -0,0 +1,38 @@ +#import(nl.lumc.sasc.biopet.utils.summary.Summary) +#import(nl.lumc.sasc.biopet.core.report.ReportPage) +<%@ var summary: Summary %> +<%@ var rootPath: String %> +<%@ var sampleId: Option[String] = None %> +<%@ var libId: Option[String] = None %> + +<table class="table"> +<tbody> + <tr><th>Pipeline</th><td>Gears</td></tr> + <tr><th>Version</th><td>${summary.getValue("meta", "pipeline_version")}</td></tr> + <tr><th>Last commit hash</th><td>${summary.getValue("meta", "last_commit_hash")}</td></tr> + <tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr> + #if(sampleId.isDefined) <tr><th>Sample</th><td>${sampleId}</td></tr> #end + #if(libId.isDefined) <tr><th>Library</th><td>${libId}</td></tr> #end +</tbody> +</table> +<br/> +<div class="row"> +<div class="col-md-1"></div> +<div class="col-md-6"> + <p> + In this web document you can find your <em>Gears</em> pipeline report. + Different categories of data can be found in the left-side menu. + Statistics per sample and library can be accessed through the top-level menu. + Some statistics for target regions can be found in the regions tab. + Futhermore, you can view all versions of software tools used by selecting <em>Versions</em> from the top menu. + </p> + + <p> + <small>Brought to you by <a href="https://sasc.lumc.nl" target="_blank"><abbr + title="Sequence Analysis Support Core">SASC</abbr></a> and <a + href="https://www.lumc.nl/org/klinische-genetica/" target="_blank"><abbr title="Clinical Genetics LUMC">KG</abbr></a>, + LUMC. + </small> + </p> +</div> +</div> \ No newline at end of file diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp new file mode 100644 index 0000000000000000000000000000000000000000..5aff91fe34c36f2e2e18386a52ad5e3717fedbfd --- /dev/null +++ b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp @@ -0,0 +1,67 @@ +#import(nl.lumc.sasc.biopet.utils.summary.Summary) +#import(nl.lumc.sasc.biopet.utils.ConfigUtils) +#import(java.io.File) +<%@ var summary: Summary %> +<%@ var sampleId: Option[String] = None %> +<%@ var libId: Option[String] = None %> +<%@ var rootPath: String %> +<%@ var showPlot: Boolean = true %> +<%@ var showIntro: Boolean = true %> +#{ + val samples = sampleId match { + case Some(sample) => List(sample.toString) + case _ => summary.samples.toList + } + val librariesCount = summary.samples.foldLeft(0)(_ + summary.libraries(_).size) +}# + +#if (showIntro) + + <div class="row"> + <div class="col-md-1"></div> + <div class="col-md-10"> + Here we show a sunburst visualisation of the analysis of the metagenome in sample: ${sampleId} + </div> + <div class="col-md-1"></div> + </div> +#end + + + +#if (showPlot) +<div class="row"> + <div class="col-md-12"> + <h3 id='currentlevel'>Root</h3> + <div> + <span id="selection_name"></span> - + <span id="selection_size"></span> - + <span id="selection_value"></span> + </div> + + <form> + <label><input type="radio" name="mode" value="size"> Size</label> + <label><input type="radio" name="mode" value="count" checked> Count</label> + </form> + <div id="sequence"></div> + + <div id="datatable"></div> + <div id="svgholder"></div> + + </div> + + <script src="${rootPath}ext/js/gears.js"></script> + + <script type="application/ecmascript"> + + #{ + val rawreport = Map("kraken" -> summary.getValue(sampleId, libId, "gears", "stats", "krakenreport")) + val jsonReport = ConfigUtils.mapToJson(rawreport) + }# + + var krakenresult = ${unescape(jsonReport)}; + loadGears(krakenresult.kraken.classified); + </script> + + +</div> +#end diff --git a/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js new file mode 100644 index 0000000000000000000000000000000000000000..9d53decdf1637b10ad24eafb717c6fa2bcdd3d44 --- /dev/null +++ b/public/gears/src/main/resources/nl/lumc/sasc/biopet/pipelines/gears/report/ext/js/gears.js @@ -0,0 +1,356 @@ +/* + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ + +// Breadcrumb dimensions: width, height, spacing, width of tip/tail. +var b = { + w: 130, h: 20, s: 3, t: 10 +}; + +// Given a node in a partition layout, return an array of all of its ancestor +// nodes, highest first, but excluding the root. +function getAncestors(node) { + var path = []; + var current = node; + while (current.parent) { + path.unshift(current); + current = current.parent; + } + return path; +} + + +function initializeBreadcrumbTrail() { + // Add the svg area. + var trail = d3.select("#sequence").append("svg:svg") + .attr("width", width) + .attr("height", 50) + .attr("id", "trail"); + // Add the label at the end, for the percentage. + trail.append("svg:text") + .attr("id", "endlabel") + .style("fill", "#000"); +} + +// Generate a string that describes the points of a breadcrumb polygon. +function breadcrumbPoints(d, i) { + var points = []; + points.push("0,0"); + points.push(b.w + ",0"); + points.push(b.w + b.t + "," + (b.h / 2)); + points.push(b.w + "," + b.h); + points.push("0," + b.h); + if (i > 0) { // Leftmost breadcrumb; don't include 6th vertex. + points.push(b.t + "," + (b.h / 2)); + } + return points.join(" "); +} + +// Update the breadcrumb trail to show the current sequence and percentage. +function updateBreadcrumbs(nodeArray, percentageString) { + // Data join; key function combines name and depth (= position in sequence). + var g = d3.select("#trail") + .selectAll("g") + .data(nodeArray, function(d) { return d.name + d.depth; }); + + // Add breadcrumb and label for entering nodes. + var entering = g.enter().append("svg:g"); + + entering.append("svg:polygon") + .attr("points", breadcrumbPoints) + .style("fill", function(d) { return color((d.children ? d : d.parent).name); }); + + entering.append("svg:text") + .attr("x", (b.w + b.t) / 2) + .attr("y", b.h / 2) + .attr("dy", "0.35em") + .attr("text-anchor", "middle") + .text(function(d) { return d.name; }); + + // Set position for entering and updating nodes. + g.attr("transform", function(d, i) { + return "translate(" + i * (b.w + b.s) + ", 0)"; + }); + + // Remove exiting nodes. + g.exit().remove(); + + // Now move and update the percentage at the end. + d3.select("#trail").select("#endlabel") + .attr("x", (nodeArray.length + 0.5) * (b.w + b.s)) + .attr("y", b.h / 2) + .attr("dy", "0.35em") + .attr("text-anchor", "middle") + .text(percentageString); + + // Make the breadcrumb trail visible, if it's hidden. + d3.select("#trail") + .style("visibility", ""); + +} +// Fade all but the current sequence, and show it in the breadcrumb trail. +function mouseover(d) { + + var percentage = (100 * d.value / totalSize).toPrecision(3); + var percentageString = percentage + "%"; + if (percentage < 0.1) { + percentageString = "< 0.1%"; + } + + d3.select("#percentage") + .text(percentageString); + + d3.select("#explanation") + .style("visibility", ""); + + var sequenceArray = getAncestors(d); + updateBreadcrumbs(sequenceArray, percentageString); + + // Fade all the segments. + d3.selectAll("path") + .style("opacity", 0.3); + + // Then highlight only those that are an ancestor of the current segment. + svg.selectAll("path") + .filter(function(node) { + return (sequenceArray.indexOf(node) >= 0); + }) + .style("opacity", 1); +} + +// Restore everything to full opacity when moving off the visualization. +function mouseleave(d) { + + // Hide the breadcrumb trail + d3.select("#trail") + .style("visibility", "hidden"); + + // Deactivate all segments during transition. + d3.selectAll("path").on("mouseover", null); + + // Transition each segment to full opacity and then reactivate it. + d3.selectAll("path") + .transition() + .duration(500) + .style("opacity", 1) + .each("end", function() { + d3.select(this).on("mouseover", mouseover); + }); + + d3.select("#explanation") + .style("visibility", "hidden"); +} + +function mousein(d) { + d3.select("#selection_name").text(d.name); + d3.select("#selection_value").text(d.value); + d3.select("#selection_size").text(d.size); + + mouseover(d); +} + +function mouseout(d) { + d3.select("#selection_name").text(""); + d3.select("#selection_value").text(""); + d3.select("#selection_size").text(""); + + //mouseleave(d); +} + +function loadTable(d, target) { + var content = ""; + console.log(d); + d3.select(target).html(""); + + var table = d3.select("#datatable").append("table").attr("class", "table"); + var row; + row = table.append("tr"); + row.append("th").text("Name"); + row.append("th").text("Clade count"); + row.append("th").text("Hits on this leaf"); + + for( var i in d.children ) { + var child = d.children[ i ]; + + row = table.append("tr"); + + row.append("td").attr("class", "col-md-6").append("a").text( child.name ) + .data(child) + .on("click", outerclick); + row.append("td").attr("class", "col-md-3").text( child.size ); + row.append("td").attr("class", "col-md-3").text( child.count ); + + } + +} + + +function outerclick(d) { + var path = svg.datum(node).selectAll("path"); + + node = d; + path.transition() + .duration(1000) + .attrTween("d", arcTweenZoom(d)); + + d3.select("#currentlevel").text(d.name); + + loadTable(d, "#datatable"); +} + + + + + + + + + +// Total size of all segments; we set this later, after loading the data. +var totalSize = 0; + + + + +var width = 960, + height = 700, + radius = Math.min(width, height) / 2; + +var x = d3.scale.linear() + .range([0, 2 * Math.PI]); + +var y = d3.scale.sqrt() + .range([0, radius]); + +var color = d3.scale.category20c(); + +var svg = d3.select("#svgholder").append("svg") + .attr("width", width) + .attr("height", height) + .append("g") + .attr("id", "container") + .attr("transform", "translate(" + width / 2 + "," + (height / 2 + 10) + ")"); + +// Bounding circle underneath the sunburst, to make it easier to detect +// when the mouse leaves the parent g. +svg.append("svg:circle") + .attr("r", radius) + .style("opacity", 0); + + +var partition = d3.layout.partition() + .sort(null) + .value(function(d) { return 1; }); + +var arc = d3.svg.arc() + .startAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x))); }) + .endAngle(function(d) { return Math.max(0, Math.min(2 * Math.PI, x(d.x + d.dx))); }) + .innerRadius(function(d) { return Math.max(0, y(d.y)); }) + .outerRadius(function(d) { return Math.max(0, y(d.y + d.dy)); }); + +// Keep track of the node that is currently being displayed as the root. +var node; + +function loadGears( root ) { + console.log(root); + node = root; + var path = svg.datum(root).selectAll("path") + .data(partition.nodes) + .enter().append("path") + .attr("d", arc) + .attr("fill-rule", "evenodd") + .style("fill", function(d) { return color((d.children ? d : d.parent).name); }) + .style("opacity", 1) + .on("click", click) + .on("mouseover", mousein) + .on("mouseleave", mouseout) + .each(stash); + + d3.selectAll("input").on("change", function change() { + var value = this.value === "count" + ? function() { return 1; } + : function(d) { return d.size; }; + + path + .data(partition.value(value).nodes) + .transition() + .duration(1000) + .attrTween("d", arcTweenData); + }); + + function click(d) { + node = d; + path.transition() + .duration(1000) + .attrTween("d", arcTweenZoom(d)); + + d3.select("#currentlevel").text(d.name); + + loadTable(d, "#datatable"); + + } + // d3.select("#container").on("mouseleave", mouseleave); + initializeBreadcrumbTrail(); + + // Get total size of the tree = value of root node from partition. + totalSize = path.node().__data__.value; +} +d3.select(self.frameElement).style("height", height + "px"); + + + + + + + +// Setup for switching data: stash the old values for transition. +function stash(d) { + d.x0 = d.x; + d.dx0 = d.dx; +} + +// When switching data: interpolate the arcs in data space. +function arcTweenData(a, i) { + var oi = d3.interpolate({x: a.x0, dx: a.dx0}, a); + function tween(t) { + var b = oi(t); + a.x0 = b.x; + a.dx0 = b.dx; + return arc(b); + } + if (i == 0) { + // If we are on the first arc, adjust the x domain to match the root node + // at the current zoom level. (We only need to do this once.) + var xd = d3.interpolate(x.domain(), [node.x, node.x + node.dx]); + return function(t) { + x.domain(xd(t)); + return tween(t); + }; + } else { + return tween; + } +} + +// When zooming: interpolate the scales. +function arcTweenZoom(d) { + var xd = d3.interpolate(x.domain(), [d.x, d.x + d.dx]), + yd = d3.interpolate(y.domain(), [d.y, 1]), + yr = d3.interpolate(y.range(), [d.y ? 20 : 0, radius]); + return function(d, i) { + return i + ? function(t) { return arc(d); } + : function(t) { x.domain(xd(t)); y.domain(yd(t)).range(yr(t)); return arc(d); }; + }; +} diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala index c630b0b3e2d13e4c31466da08d984e6269e18d65..780f983d593544ddd5d5ffc017a65195cb0691e5 100644 --- a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala @@ -15,309 +15,143 @@ */ package nl.lumc.sasc.biopet.pipelines.gears -import htsjdk.samtools.SamReaderFactory -import nl.lumc.sasc.biopet.core.{ PipelineCommand, MultiSampleQScript } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.Ln +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile +import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag } import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport } -import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, SamToFastq } -import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import nl.lumc.sasc.biopet.extensions.tools.FastqSync +import nl.lumc.sasc.biopet.extensions.picard.SamToFastq +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView +import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript -import scala.collection.JavaConversions._ - /** - * This is a trait for the Gears pipeline - * The ShivaTrait is used as template for this pipeline + * Created by wyleung */ -class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qscript => +class Gears(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag { def this() = this(null) - /** Executed before running the script */ - def init(): Unit = { - } - - /** Method to add jobs */ - def biopetScript(): Unit = { - addSamplesJobs() - addSummaryJobs() - } - - /** Multisample meta-genome comparison */ - def addMultiSampleJobs(): Unit = { - // generate report from multiple samples, this is: - // - the TSV - // - the Spearman correlation plot + table - } - - /** Location of summary file */ - def summaryFile = new File(outputDir, "gears.summary.json") - - /** Settings of pipeline for summary */ - def summarySettings = Map() - - /** Files for the summary */ - def summaryFiles = Map() - - /** Method to make a sample */ - def makeSample(id: String) = new Sample(id) - - /** Class that will generate jobs for a sample */ - class Sample(sampleId: String) extends AbstractSample(sampleId) { - /** Sample specific files to add to summary */ - def summaryFiles: Map[String, File] = { - preProcessBam match { - case Some(pb) => Map("bamFile" -> pb) - case _ => Map() - } - } ++ Map("alignment" -> alnFile) - - /** Sample specific stats to add to summary */ - def summaryStats: Map[String, Any] = Map() - - /** Method to make a library */ - def makeLibrary(id: String) = new Library(id) - - /** Class to generate jobs for a library */ - class Library(libId: String) extends AbstractLibrary(libId) { - /** Library specific files to add to the summary */ - def summaryFiles: Map[String, File] = { - (bamFile, preProcessBam) match { - case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb) - case (Some(b), _) => Map("bamFile" -> b) - case _ => Map() - } - } - - /** Alignment results of this library ~ can only be accessed after addJobs is run! */ - def alnFile: File = bamFile match { - case Some(b) => b - case _ => throw new IllegalStateException("The bamfile is not generated yet") - } + @Input(doc = "R1 reads in FastQ format", shortName = "R1", required = false) + var fastqR1: Option[File] = None - /** Library specific stats to add to summary */ - def summaryStats: Map[String, Any] = Map() + @Input(doc = "R2 reads in FastQ format", shortName = "R2", required = false) + var fastqR2: Option[File] = None - /** Method to execute library preprocess */ - def preProcess(input: File): Option[File] = None + @Input(doc = "All unmapped reads will be extracted from this bam for analysis", shortName = "bam", required = false) + var bamFile: Option[File] = None - /** Method to make the mapping submodule */ - def makeMapping = { - val mapping = new Mapping(qscript) - mapping.sampleId = Some(sampleId) - mapping.libId = Some(libId) - mapping.outputDir = libDir - mapping.outputName = sampleId + "-" + libId - (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile)) - } + @Argument(required = false) + var outputName: String = _ - /** - * Determine where where to start the pipeline in cases where both R1 (fastq) and BAM is specified - */ - lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) = - (config.contains("R1"), config.contains("bam")) match { - case (true, _) => makeMapping // Default starting from fastq files - case (false, true) => // Starting from bam file - config("bam_to_fastq", default = false).asBoolean match { - case true => makeMapping // bam file will be converted to fastq - case false => - val file = new File(libDir, sampleId + "-" + libId + ".final.bam") - (None, Some(file), preProcess(file)) - } - case _ => (None, None, None) - } - - /** This will add jobs for this library */ - def addJobs(): Unit = { - (config.contains("R1"), config.contains("bam")) match { - case (true, _) => mapping.foreach(mapping => { - mapping.input_R1 = config("R1") - }) - case (false, true) => config("bam_to_fastq", default = false).asBoolean match { - case true => - val samToFastq = SamToFastq(qscript, config("bam"), - new File(libDir, sampleId + "-" + libId + ".R1.fastq"), - new File(libDir, sampleId + "-" + libId + ".R2.fastq")) - samToFastq.isIntermediate = true - qscript.add(samToFastq) - mapping.foreach(mapping => { - mapping.input_R1 = samToFastq.fastqR1 - mapping.input_R2 = Some(samToFastq.fastqR2) - }) - case false => - val inputSam = SamReaderFactory.makeDefault.open(config("bam")) - val readGroups = inputSam.getFileHeader.getReadGroups - - val readGroupOke = readGroups.forall(readGroup => { - if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") - readGroup.getSample == sampleId && readGroup.getLibrary == libId - }) - inputSam.close() - - if (!readGroupOke) { - if (config("correct_readgroups", default = false).asBoolean) { - logger.info("Correcting readgroups, file:" + config("bam")) - val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get) - aorrg.RGID = sampleId + "-" + libId - aorrg.RGLB = libId - aorrg.RGSM = sampleId - aorrg.isIntermediate = true - qscript.add(aorrg) - } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + - "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") - } else { - val oldBamFile: File = config("bam") - val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(qscript, oldBamFile, bamFile.get) - bamLn.deps :+= baiLn.output - add(bamLn) - } - } - case _ => logger.warn("Sample: " + sampleId + " Library: " + libId + ", no reads found") - } - mapping.foreach(mapping => { - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) // Add functions of mapping to current function pool - addSummaryQScript(mapping) - }) - } + /** Executed before running the script */ + def init(): Unit = { + require(fastqR1.isDefined || bamFile.isDefined, "Please specify fastq-file(s) or bam file") + require(fastqR1.isDefined != bamFile.isDefined, "Provide either a bam file or la R1 file") + + if (outputName == null) { + if (fastqR1.isDefined) outputName = fastqR1.map(_.getName + .stripSuffix(".gz") + .stripSuffix(".fastq") + .stripSuffix(".fq")) + .getOrElse("noName") + else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName") } - /** This will add jobs for the double preprocessing */ - protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { - if (input == Nil) None - else if (input.tail == Nil) { - val bamFile = new File(sampleDir, input.head.getName) - val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(qscript, input.head, bamFile) - bamLn.deps :+= baiLn.output - add(bamLn) - Some(bamFile) - } else { - val md = new MarkDuplicates(qscript) - md.input = input - md.output = new File(sampleDir, sampleId + ".dedup.bam") - md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics") - md.isIntermediate = isIntermediate - md.removeDuplicates = true - add(md) - addSummarizable(md, "mark_duplicates") - Some(md.output) - } + if (fastqR1.isDefined) { + fastqR1.foreach(inputFiles :+= InputFile(_)) + fastqR2.foreach(inputFiles :+= InputFile(_)) + } else { + inputFiles :+= InputFile(bamFile.get) } + } - lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => { - (lib._2.bamFile, lib._2.preProcessBam) match { - case (_, Some(file)) => Some(file) - case (Some(file), _) => Some(file) - case _ => None - } - }).toList) - def alnFile: File = sampleBamLinkJob.output + override def reportClass = { + val gears = new GearsReport(this) + gears.outputDir = new File(outputDir, "report") + gears.summaryFile = summaryFile + sampleId.foreach(gears.args += "sampleId" -> _) + libId.foreach(gears.args += "libId" -> _) + Some(gears) + } - /** Job for combining all library BAMs */ - private def sampleBamLinkJob: Ln = - makeCombineJob(libraries.values.map(_.alnFile).toList, createFile(".bam")) + override def defaults = Map( + "samtofastq" -> Map( + "validationstringency" -> "LENIENT" + ) + ) + /** Method to add jobs */ + def biopetScript(): Unit = { + val fastqFiles: List[File] = bamFile.map { bamfile => - /** Ln or MergeSamFile job, depending on how many inputs are supplied */ - private def makeCombineJob(inFiles: List[File], outFile: File, - mergeSortOrder: String = "coordinate"): Ln = { - require(inFiles.nonEmpty, "At least one input files for combine job") - val input: File = { + val samtoolsViewSelectUnmapped = new SamtoolsView(this) + samtoolsViewSelectUnmapped.input = bamfile + samtoolsViewSelectUnmapped.b = true + samtoolsViewSelectUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam") + samtoolsViewSelectUnmapped.f = List("12") + samtoolsViewSelectUnmapped.isIntermediate = true + add(samtoolsViewSelectUnmapped) - if (inFiles.size == 1) inFiles.head - else { - val mergedBam = createFile(".merged.bam") - val mergejob = new MergeSamFiles(qscript) - mergejob.input = inFiles - mergejob.output = mergedBam - mergejob.sortOrder = mergeSortOrder - add(mergejob) - mergejob.output - } - } + // start bam to fastq (only on unaligned reads) also extract the matesam + val samToFastq = new SamToFastq(this) + samToFastq.input = samtoolsViewSelectUnmapped.output + samToFastq.fastqR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz") + samToFastq.fastqR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz") + samToFastq.fastqUnpaired = new File(outputDir, s"$outputName.unmapped.singleton.fq.gz") + samToFastq.isIntermediate = true + add(samToFastq) - val linkJob = new Ln(qscript) - linkJob.input = input - linkJob.output = outFile - linkJob + List(samToFastq.fastqR1, samToFastq.fastqR2) + }.getOrElse(List(fastqR1, fastqR2).flatten) - } + // start kraken + val krakenAnalysis = new Kraken(this) + krakenAnalysis.input = fastqFiles + krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw") - /** This will add sample jobs */ - def addJobs(): Unit = { - addPerLibJobs() - // merge or symlink per-library alignments - add(sampleBamLinkJob) + krakenAnalysis.paired = fastqFiles.length == 2 - if (preProcessBam.isDefined) { - val bamMetrics = new BamMetrics(qscript) - bamMetrics.sampleId = Some(sampleId) - bamMetrics.inputBam = preProcessBam.get - bamMetrics.outputDir = sampleDir - bamMetrics.init() - bamMetrics.biopetScript() - addAll(bamMetrics.functions) - addSummaryQScript(bamMetrics) - } + krakenAnalysis.classified_out = Some(new File(outputDir, s"$outputName.krkn.classified.fastq")) + krakenAnalysis.unclassified_out = Some(new File(outputDir, s"$outputName.krkn.unclassified.fastq")) + add(krakenAnalysis) - // sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam> - val samFilterUnmapped = new SambambaView(qscript) - samFilterUnmapped.input = alnFile - samFilterUnmapped.filter = Some("unmapped or mate_is_unmapped") - samFilterUnmapped.output = createFile(".unmapped.bam") - samFilterUnmapped.isIntermediate = true - qscript.add(samFilterUnmapped) + outputFiles += ("kraken_output_raw" -> krakenAnalysis.output) + outputFiles += ("kraken_classified_out" -> krakenAnalysis.classified_out.getOrElse("")) + outputFiles += ("kraken_unclassified_out" -> krakenAnalysis.unclassified_out.getOrElse("")) - // start bam to fastq (only on unaligned reads) also extract the matesam - val samToFastq = SamToFastq(qscript, alnFile, - createFile(".unmap.R1.fastq"), - createFile(".unmap.R2.fastq") - ) - samToFastq.isIntermediate = true - qscript.add(samToFastq) + // create kraken summary file + val krakenReport = new KrakenReport(this) + krakenReport.input = krakenAnalysis.output + krakenReport.show_zeros = true + krakenReport.output = new File(outputDir, s"$outputName.krkn.full") + add(krakenReport) - // sync the fastq records - val fastqSync = new FastqSync(qscript) - fastqSync.refFastq = samToFastq.fastqR1 - fastqSync.inputFastq1 = samToFastq.fastqR1 - fastqSync.inputFastq2 = samToFastq.fastqR2 - fastqSync.outputFastq1 = createFile(".unmapsynced.R1.fastq.gz") - fastqSync.outputFastq2 = createFile(".unmapsynced.R2.fastq.gz") - fastqSync.outputStats = createFile(".syncstats.json") - qscript.add(fastqSync) + outputFiles += ("kraken_report_input" -> krakenReport.input) + outputFiles += ("kraken_report_output" -> krakenReport.output) - // start kraken - val krakenAnalysis = new Kraken(qscript) - krakenAnalysis.input = List(fastqSync.outputFastq1, fastqSync.outputFastq2) - krakenAnalysis.output = createFile(".krkn.raw") - krakenAnalysis.paired = true - krakenAnalysis.classified_out = Option(createFile(".krkn.classified.fastq")) - krakenAnalysis.unclassified_out = Option(createFile(".krkn.unclassified.fastq")) - qscript.add(krakenAnalysis) + val krakenReportJSON = new KrakenReportToJson(this) + krakenReportJSON.inputReport = krakenReport.output + krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json") + krakenReportJSON.skipNames = config("skipNames", default = false) + add(krakenReportJSON) + addSummarizable(krakenReportJSON, "krakenreport") - // create kraken summary file + outputFiles += ("kraken_report_json_input" -> krakenReportJSON.inputReport) + outputFiles += ("kraken_report_json_output" -> krakenReportJSON.output) - val krakenReport = new KrakenReport(qscript) - krakenReport.input = krakenAnalysis.output - krakenReport.show_zeros = true - krakenReport.output = createFile(".krkn.full") - qscript.add(krakenReport) - } + addSummaryJobs() } + + /** Location of summary file */ + def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".gears.summary.json") + + /** Pipeline settings shown in the summary file */ + def summarySettings: Map[String, Any] = Map.empty + + /** Statistics shown in the summary file */ + def summaryFiles: Map[String, File] = Map.empty ++ + (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++ + (if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++ + outputFiles } /** This object give a default main method to the pipelines */ diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala new file mode 100644 index 0000000000000000000000000000000000000000..f0ad762064f8d4a16b2750c4a3d749ab06b8b581 --- /dev/null +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsReport.scala @@ -0,0 +1,50 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.gears + +import nl.lumc.sasc.biopet.core.report._ +import nl.lumc.sasc.biopet.utils.config.Configurable + +class GearsReport(val root: Configurable) extends ReportBuilderExtension { + def builder = GearsReport +} + +object GearsReport extends ReportBuilder { + + // TODO: Add dustbin analysis (aggregated) + // TODO: Add alignment stats per sample for the dustbin analysis + + override def extFiles = super.extFiles ++ List("js/gears.js") + .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) + + def indexPage = { + ReportPage( + List( + "Versions" -> ReportPage(List(), List(( + "Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp" + ))), Map()) + ), + List( + "Gears intro" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsFront.ssp"), + "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp") + ), + pageArgs + ) + } + + def reportName = "Gears :: Metagenomics Report" + +} diff --git a/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala b/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..842845bb5e88764376ab54dd2e41a83caa61f201 --- /dev/null +++ b/public/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala @@ -0,0 +1,137 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.gears + +import java.io.File + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport } +import nl.lumc.sasc.biopet.extensions.picard.SamToFastq +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView +import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Config +import org.apache.commons.io.FileUtils +import org.broadinstitute.gatk.queue.QSettings +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations._ + +/** + * Test class for [[Gears]] + * + * Created by wyleung on 10/22/15. + */ + +class GearsPipelineTest(val testset: String) extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): Gears = { + new Gears { + override def configName = "gears" + + override def globalConfig = new Config(map) + + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @DataProvider(name = "gearsOptions") + def gearsOptions = { + val startFromBam = Array(true, false) + val paired = Array(true, false) + val hasOutputNames = Array(true, false) + val hasFileExtensions = Array(true, false) + + for ( + fromBam <- startFromBam; + pair <- paired; + hasOutputName <- hasOutputNames; + hasFileExtension <- hasFileExtensions + ) yield Array(testset, fromBam, pair, hasOutputName, hasFileExtension) + } + + @Test(dataProvider = "gearsOptions") + def testGears(testset: String, fromBam: Boolean, paired: Boolean, + hasOutputName: Boolean, hasFileExtension: Boolean) = { + val map = ConfigUtils.mergeMaps(Map( + "output_dir" -> GearsTest.outputDir + ), Map(GearsTest.executables.toSeq: _*)) + + val gears: Gears = initPipeline(map) + + if (fromBam) { + gears.bamFile = if (hasFileExtension) Some(GearsTest.bam) else Some(GearsTest.bam_noext) + } else { + gears.fastqR1 = if (hasFileExtension) Some(GearsTest.r1) else Some(GearsTest.r1_noext) + gears.fastqR2 = if (paired) if (hasFileExtension) Some(GearsTest.r2) else Some(GearsTest.r2_noext) else None + } + if (hasOutputName) + gears.outputName = "test" + + gears.script() + + if (hasOutputName) { + gears.outputName shouldBe "test" + } else { + // in the following cases the filename should have been determined by the filename + if (hasFileExtension) { + gears.outputName shouldBe (if (fromBam) "bamfile" else "R1") + } else { + // no real use-case for this one, have this is for sanity check + gears.outputName shouldBe (if (fromBam) "bamfile" else "R1") + } + } + + // SamToFastq should have started if it was started from bam + gears.functions.count(_.isInstanceOf[SamtoolsView]) shouldBe (if (fromBam) 1 else 0) + gears.functions.count(_.isInstanceOf[SamToFastq]) shouldBe (if (fromBam) 1 else 0) + + gears.functions.count(_.isInstanceOf[Kraken]) shouldBe 1 + gears.functions.count(_.isInstanceOf[KrakenReport]) shouldBe 1 + gears.functions.count(_.isInstanceOf[KrakenReportToJson]) shouldBe 1 + } + + // remove temporary run directory all tests in the class have been run + @AfterClass def removeTempOutputDir() = { + FileUtils.deleteDirectory(GearsTest.outputDir) + } +} + +object GearsTest { + val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + + val r1 = new File(outputDir, "input" + File.separator + "R1.fq") + Files.touch(r1) + val r2 = new File(outputDir, "input" + File.separator + "R2.fq") + Files.touch(r2) + val bam = new File(outputDir, "input" + File.separator + "bamfile.bam") + Files.touch(bam) + + val r1_noext = new File(outputDir, "input" + File.separator + "R1") + Files.touch(r1_noext) + val r2_noext = new File(outputDir, "input" + File.separator + "R2") + Files.touch(r2_noext) + val bam_noext = new File(outputDir, "input" + File.separator + "bamfile") + Files.touch(bam_noext) + + val executables = Map( + "kraken" -> Map("exe" -> "test", "db" -> "test"), + "krakenreport" -> Map("exe" -> "test", "db" -> "test"), + "sambamba" -> Map("exe" -> "test"), + "md5sum" -> Map("exe" -> "test") + ) +} diff --git a/public/gentrap/pom.xml b/public/gentrap/pom.xml index 44da56eaf57596f6999276e183f1e1808f9d26b6..42b558833ce1d09ce16933fc1ed1a3fd4e63e852 100644 --- a/public/gentrap/pom.xml +++ b/public/gentrap/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/lib_seqeval.tex b/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/lib_seqeval.tex index 2edf4b99ff9be01512eedfd4324d89f03c698fd5..8e28e56e9defea0295be0a38005b4bcc6b68e59c 100644 --- a/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/lib_seqeval.tex +++ b/public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/lib_seqeval.tex @@ -72,9 +72,9 @@ More information about clipping using cutadapt is available on the Read 1 & ((( lib.flexiprep.stats.clipping_R1.num_reads_affected|nice_int ))) ((* for adapter, stat in lib.flexiprep.stats.clipping_R1.adapters.iteritems() *)) ((* if loop.first *)) - & ((( adapter ))) & ((( stat[1]|nice_int )))\\ + & ((( adapter ))) & ((( stat|nice_int )))\\ ((* else *)) - & & ((( adapter ))) & ((( stat[1]|nice_int )))\\ + & & ((( adapter ))) & ((( stat|nice_int )))\\ ((* endif *)) ((* endfor *)) ((* else *)) @@ -85,9 +85,9 @@ More information about clipping using cutadapt is available on the Read 2 & ((( lib.flexiprep.stats.clipping_R2.num_reads_affected|nice_int ))) ((* for adapter, stat in lib.flexiprep.stats.clipping_R2.adapters.iteritems() *)) ((* if loop.first *)) - & ((( adapter ))) & ((( stat[1]|nice_int )))\\ + & ((( adapter ))) & ((( stat|nice_int )))\\ ((* else *)) - & & ((( adapter ))) & ((( stat[1]|nice_int )))\\ + & & ((( adapter ))) & ((( stat|nice_int )))\\ ((* endif *)) ((* endfor *)) ((* else *)) @@ -99,26 +99,6 @@ More information about clipping using cutadapt is available on the \end{center} \addtocounter{table}{-1} -((* if lib.is_paired_end *)) -After clipping, all read pairs are then checked ('synced') for their -completeness. Read pairs whose other half has been discarded during -clipping, will also be discarded. The summary of this step is available in -Table~\ref{tab:clipsync}. - -\begin{center} - \captionof{table}{Summary of Post-Clipping Sync Step} - \label{tab:clipsync} - \begin{tabular}{ l r } - \hline - Parameter & Count\\ \hline \hline - Discarded FASTQ records from read 1 & ((( lib.flexiprep.stats.fastq_sync.num_reads_discarded_R1|nice_int )))\\ - Discarded FASTQ records from read 2 & ((( lib.flexiprep.stats.fastq_sync.num_reads_discarded_R2|nice_int )))\\ - \hline - Total kept FASTQ records & ((( lib.flexiprep.stats.fastq_sync.num_reads_kept|nice_int )))\\ - \hline - \end{tabular} -\end{center} -((* endif *)) ((* endif *)) \vspace{2mm} @@ -141,12 +121,8 @@ Table~\ref{tab:clipsync}. \begin{tabular}{ l r } \hline Parameter & Count\\ \hline \hline - Discarded FASTQ records from read 1 & ((( lib.flexiprep.stats.trimming.num_reads_discarded_R1|nice_int )))\\ - Discarded FASTQ records from read 2 & ((( lib.flexiprep.stats.trimming.num_reads_discarded_R2|nice_int )))\\ - \hline - Discarded FASTQ records from both reads & ((( lib.flexiprep.stats.trimming.num_reads_discarded_both|nice_int )))\\ - \hline - Total kept FASTQ reads & ((( lib.flexiprep.stats.trimming.num_reads_kept|nice_int )))\\ + Discarded FASTQ records from read 1 & ((( lib.flexiprep.stats.trimming_R1.num_reads_discarded_total|nice_int )))\\ + Discarded FASTQ records from read 2 & ((( lib.flexiprep.stats.trimming_R2.num_reads_discarded_total|nice_int )))\\ \hline \end{tabular} \end{center} diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/CufflinksProducer.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/CufflinksProducer.scala index 904838f48fc375e0fec389d64225d986ec8235c3..2640c8f21224b7a504bb73c2a07c0e38e674fcbf 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/CufflinksProducer.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/CufflinksProducer.scala @@ -37,7 +37,7 @@ trait CufflinksProducer { /** Cufflink's terms for strand specificity */ lazy val strandedness: String = { - require(pipeline.config.contains("strand_protocol")) + //require(pipeline.config.contains("strand_protocol")) pipeline.strandProtocol match { case NonSpecific => "fr-unstranded" case Dutp => "fr-firststrand" diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala index 724470df1091228bbdaf928b49ae7024f2bb70bd..409e027dc5165e27db853f50c2f9f49b90ee3970 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala @@ -19,30 +19,30 @@ import java.io.File import nl.lumc.sasc.biopet.FullVersion import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.utils.config._ import nl.lumc.sasc.biopet.core.summary._ import nl.lumc.sasc.biopet.extensions.picard.{ MergeSamFiles, SortSam } import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView +import nl.lumc.sasc.biopet.extensions.tools.{ MergeTables, WipeReads } import nl.lumc.sasc.biopet.extensions.{ HtseqCount, Ln } import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.pipelines.gentrap.extensions.{ CustomVarScan, Pdflatex, RawBaseCounter } import nl.lumc.sasc.biopet.pipelines.gentrap.scripts.{ AggrBaseCount, PdfReportTemplateWriter, PlotHeatmap } import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import nl.lumc.sasc.biopet.extensions.tools.{ MergeTables, WipeReads } -import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.config._ import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.function.QFunction import picard.analysis.directed.RnaSeqMetricsCollector.StrandSpecificity import scala.language.reflectiveCalls import scalaz.Scalaz._ -import scalaz._ /** * Gentrap pipeline * Generic transcriptome analysis pipeline * + * @author Peter van 't Hof <p.j.van_t_hof@lumc.nl> * @author Wibowo Arindrarto <w.arindrarto@lumc.nl> */ class Gentrap(val root: Configurable) extends QScript @@ -62,13 +62,27 @@ class Gentrap(val root: Configurable) extends QScript /** Expression measurement modes */ // see the enumeration below for valid modes - var expMeasures: Set[ExpMeasures.Value] = config("expression_measures") - .asStringList - .map { makeExpMeasure } - .toSet + var expMeasures: Set[ExpMeasures.Value] = { + if (config.contains("expression_measures")) + config("expression_measures") + .asStringList + .flatMap { makeExpMeasure } + .toSet + else { + Logging.addError("'expression_measures' is missing in the config") + Set() + } + } /** Strandedness modes */ - var strandProtocol: StrandProtocol.Value = makeStrandProtocol(config("strand_protocol").asString) + var strandProtocol: StrandProtocol.Value = { + if (config.contains("strand_protocol")) + makeStrandProtocol(config("strand_protocol").asString).getOrElse(StrandProtocol.NonSpecific) + else { + Logging.addError("'strand_protocol' is missing in the config") + StrandProtocol.NonSpecific + } + } /** GTF reference file */ var annotationGtf: Option[File] = config("annotation_gtf") @@ -329,24 +343,29 @@ class Gentrap(val root: Configurable) extends QScript def init(): Unit = { // TODO: validate that exons are flattened or not (depending on another option flag?) // validate required annotation files - if (expMeasures.contains(FragmentsPerGene)) - require(annotationGtf.isDefined, "GTF file must be defined for counting fragments per gene") + if (expMeasures.contains(FragmentsPerGene) && annotationGtf.isEmpty) + Logging.addError("GTF file must be defined for counting fragments per gene, config key: 'annotation_gtf'") + + if (expMeasures.contains(FragmentsPerExon) && annotationGtf.isEmpty) + Logging.addError("GTF file must be defined for counting fragments per exon, config key: 'annotation_gtf'") + // TODO: validate that GTF file contains exon features - if (expMeasures.contains(FragmentsPerExon)) - // TODO: validate that GTF file contains exon features - require(annotationGtf.isDefined, "GTF file must be defined for counting fragments per exon") + if (expMeasures.contains(BasesPerGene) && annotationBed.isEmpty) + Logging.addError("BED file must be defined for counting bases per gene, config key: 'annotation_bed'") - if (expMeasures.contains(BasesPerGene)) - require(annotationBed.isDefined, "BED file must be defined for counting bases per gene") + if (expMeasures.contains(BasesPerExon) && annotationBed.isEmpty) + Logging.addError("BED file must be defined for counting bases per exon, config key: 'annotation_bed'") - if (expMeasures.contains(BasesPerExon)) - require(annotationBed.isDefined, "BED file must be defined for counting bases per exon") + if ((expMeasures.contains(CufflinksBlind) || expMeasures.contains(CufflinksGuided) || expMeasures.contains(CufflinksStrict)) && annotationGtf.isEmpty) + Logging.addError("GTF file must be defined for Cufflinks-based modes, config key: 'annotation_gtf'") - if (expMeasures.contains(CufflinksBlind) || expMeasures.contains(CufflinksGuided) || expMeasures.contains(CufflinksStrict)) - require(annotationGtf.isDefined, "GTF file must be defined for Cufflinks-based modes") + if (removeRibosomalReads && ribosomalRefFlat.isEmpty) + Logging.addError("rRNA intervals must be supplied if removeRibosomalReads is set, config key: 'ribosome_refflat'") - if (removeRibosomalReads) - require(ribosomalRefFlat.isDefined, "rRNA intervals must be supplied if removeRibosomalReads is set") + annotationGtf.foreach(inputFiles :+= new InputFile(_)) + annotationBed.foreach(inputFiles :+= new InputFile(_)) + ribosomalRefFlat.foreach(inputFiles :+= new InputFile(_)) + if (annotationRefFlat.getName.nonEmpty) inputFiles :+= new InputFile(annotationRefFlat) } /** Pipeline run for each sample */ @@ -383,9 +402,6 @@ class Gentrap(val root: Configurable) extends QScript /** Shortcut to qscript object */ protected def pipeline: Gentrap = qscript - /** Sample output directory */ - override def sampleDir: File = new File(outputDir, "sample_" + sampleId) - /** Summary stats of the sample */ def summaryStats: Map[String, Any] = Map( "all_paired" -> allPaired, @@ -494,7 +510,7 @@ class Gentrap(val root: Configurable) extends QScript .option { require(idSortingJob.nonEmpty) val job = new HtseqCount(qscript) - job.inputAnnotation = annotationGtf.get + annotationGtf.foreach(job.inputAnnotation = _) job.inputAlignment = idSortingJob.get.output job.output = createFile(".fragments_per_gene") job.format = Option("bam") @@ -629,7 +645,7 @@ class Gentrap(val root: Configurable) extends QScript .option { val job = new RawBaseCounter(qscript) job.inputBoth = alnFile - job.annotationBed = annotationBed.get + annotationBed.foreach(job.annotationBed = _) job.output = createFile(".raw_base_count") job } @@ -640,7 +656,7 @@ class Gentrap(val root: Configurable) extends QScript val job = new RawBaseCounter(qscript) job.inputPlus = alnFilePlusStrand.get job.inputMinus = alnFileMinusStrand.get - job.annotationBed = annotationBed.get + annotationBed.foreach(job.annotationBed = _) job.output = createFile(".raw_base_count") job } @@ -697,10 +713,10 @@ class Gentrap(val root: Configurable) extends QScript /** Job for removing ribosomal reads */ private def wipeJob: Option[WipeReads] = removeRibosomalReads .option { - require(ribosomalRefFlat.isDefined) + //require(ribosomalRefFlat.isDefined) val job = new WipeReads(qscript) job.inputBam = alnFileDirty - job.intervalFile = ribosomalRefFlat.get + ribosomalRefFlat.foreach(job.intervalFile = _) job.outputBam = createFile(".cleaned.bam") job.discardedBam = createFile(".rrna.bam") job @@ -881,22 +897,26 @@ object Gentrap extends PipelineCommand { .mkString("") /** Conversion from raw user-supplied expression measure string to enum value */ - private def makeExpMeasure(rawName: String): ExpMeasures.Value = { + private def makeExpMeasure(rawName: String): Option[ExpMeasures.Value] = { try { - ExpMeasures.withName(camelize(rawName)) + Some(ExpMeasures.withName(camelize(rawName))) } catch { - case nse: NoSuchElementException => throw new IllegalArgumentException("Invalid expression measure: " + rawName) - case e: Exception => throw e + case nse: NoSuchElementException => + Logging.addError(s"Invalid expression measure: $rawName") + None + case e: Exception => throw e } } /** Conversion from raw user-supplied expression measure string to enum value */ - private def makeStrandProtocol(rawName: String): StrandProtocol.Value = { + private def makeStrandProtocol(rawName: String): Option[StrandProtocol.Value] = { try { - StrandProtocol.withName(camelize(rawName)) + Some(StrandProtocol.withName(camelize(rawName))) } catch { - case nse: NoSuchElementException => throw new IllegalArgumentException("Invalid strand protocol: " + rawName) - case e: Exception => throw e + case nse: NoSuchElementException => + Logging.addError(s"Invalid strand protocol: $rawName") + None + case e: Exception => throw e } } } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala index 6bf57ea655f67b5ecbc919d4d5f4bd44153dccd7..55f50882862db3bdc787fffdbeb6be35cf24d009 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala @@ -26,7 +26,7 @@ import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport * Created by pjvanthof on 25/06/15. */ class GentrapReport(val root: Configurable) extends ReportBuilderExtension { - val builder = GentrapReport + def builder = GentrapReport } object GentrapReport extends MultisampleReportBuilder { diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/RawBaseCounter.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/RawBaseCounter.scala index 0dfbacd1aaebf7ef74b87a0125407ca29128c9bb..8be307a9d1323f0252d5b7f2e24f52c07148adcc 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/RawBaseCounter.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/RawBaseCounter.scala @@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.extensions import java.io.File import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -82,7 +83,7 @@ class RawBaseCounter(val root: Configurable) extends BiopetCommandLineFunction { } override def beforeGraph(): Unit = { - require(annotationBed != null, "Annotation BED must be supplied") + if (annotationBed == null) Logging.addError("Annotation BED must be supplied") require(output != null, "Output must be defined") require((mixedStrand && !distinctStrand) || (!mixedStrand && distinctStrand), "Invalid input BAM combinations for RawBaseCounter") diff --git a/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala b/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala index 34a43484056b82d9948a11c16f7c2276be7bd6e8..570a6e378ad07649cc0d0fea1f0548b424f0b04b 100644 --- a/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala +++ b/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala @@ -205,10 +205,10 @@ object GentrapTest { val executables = Map( "reference_fasta" -> (outputDir + File.separator + "ref.fa"), - "refFlat" -> "test", - "annotation_gtf" -> "test", - "annotation_bed" -> "test", - "annotation_refflat" -> "test", + "refFlat" -> (outputDir + File.separator + "ref.fa"), + "annotation_gtf" -> (outputDir + File.separator + "ref.fa"), + "annotation_bed" -> (outputDir + File.separator + "ref.fa"), + "annotation_refflat" -> (outputDir + File.separator + "ref.fa"), "varscan_jar" -> "test" ) ++ Seq( // fastqc executables diff --git a/public/kopisu/pom.xml b/public/kopisu/pom.xml index 21f4f0c60dab41cf8d7d7800e8e8ac7654ed6d81..720016ce0a8af0070bbdc9b0560c2d72ab5b4761 100644 --- a/public/kopisu/pom.xml +++ b/public/kopisu/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/mapping/pom.xml b/public/mapping/pom.xml index b5b45bb49c185ff3953a6a1623d154d88d8e7bf6..0ce243639822c6b338ad71ceafc3f08500818824 100644 --- a/public/mapping/pom.xml +++ b/public/mapping/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> @@ -43,6 +43,11 @@ <artifactId>Flexiprep</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Gears</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>BamMetrics</artifactId> diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 2ef18c8f67e1ea784f8aa6d42d5daa688a4675a6..713ea2b57c5a7f603fc6e16af6bf65b228c8c68f 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -19,17 +19,17 @@ import java.io.File import java.util.Date import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse } import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam } -import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat, _ } +import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter +import nl.lumc.sasc.biopet.extensions._ import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep +import nl.lumc.sasc.biopet.pipelines.gears.Gears import nl.lumc.sasc.biopet.pipelines.mapping.scripts.TophatRecondition -import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter -import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript import scala.math._ @@ -219,6 +219,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S case "bwa-mem" => addBwaMem(R1, R2, outputBam) case "bwa-aln" => addBwaAln(R1, R2, outputBam) case "bowtie" => addBowtie(R1, R2, outputBam) + case "bowtie2" => addBowtie2(R1, R2, outputBam) case "gsnap" => addGsnap(R1, R2, outputBam) // TODO: make TopHat here accept multiple input files case "tophat" => addTophat(R1, R2, outputBam) @@ -258,6 +259,16 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S add(Ln(this, bamFile, finalBamFile)) outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile) + if (config("unmapped_to_gears", default = false).asBoolean) { + val gears = new Gears(this) + gears.bamFile = Some(finalBamFile) + gears.outputDir = new File(outputDir, "gears") + gears.init() + gears.biopetScript() + addAll(gears.functions) + addSummaryQScript(gears) + } + if (config("generate_wig", default = false).asBoolean) addAll(Bam2Wig(this, finalBamFile).functions) @@ -324,10 +335,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S } def addGsnap(R1: File, R2: Option[File], output: File): File = { - val zcatR1 = extractIfNeeded(R1, output.getParentFile) - val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None val gsnapCommand = new Gsnap(this) - gsnapCommand.input = if (paired) List(zcatR1._2, zcatR2.get._2) else List(zcatR1._2) + gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1) gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam") val reorderSam = new ReorderSam(this) @@ -335,11 +344,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam") val ar = addAddOrReplaceReadGroups(reorderSam.output, output) - val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) :: - Some(gsnapCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten) - pipe.threadsCorrection = -1 - zcatR1._1.foreach(x => pipe.threadsCorrection -= 1) - zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1)) + val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil) + pipe.threadsCorrection = -2 add(pipe) ar._2 } @@ -432,6 +438,25 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S ar._2 } + /** Add bowtie2 jobs **/ + def addBowtie2(R1: File, R2: Option[File], output: File): File = { + val bowtie2 = new Bowtie2(this) + bowtie2.rg_id = Some(readgroupId) + bowtie2.rg +:= ("LB:" + libId.get) + bowtie2.rg +:= ("PL:" + platform) + bowtie2.rg +:= ("PU:" + platformUnit) + bowtie2.rg +:= ("SM:" + sampleId.get) + bowtie2.R1 = R1 + bowtie2.R2 = R2 + val sortSam = new SortSam(this) + sortSam.output = output + val pipe = bowtie2 | sortSam + pipe.isIntermediate = chunking || !skipMarkduplicates + pipe.threadsCorrection = -1 + add(pipe) + output + } + /** Adds Star jobs */ def addStar(R1: File, R2: Option[File], output: File): File = { val zcatR1 = extractIfNeeded(R1, output.getParentFile) diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala index b2f1b7a846da3483eeed879e493e651f25a83759..3fc93747477ed985f4e5ccf02f2745513f101817 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala @@ -16,12 +16,12 @@ package nl.lumc.sasc.biopet.pipelines.mapping import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection, ReportPage, ReportBuilder } +import nl.lumc.sasc.biopet.core.report._ import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport class MappingReport(val root: Configurable) extends ReportBuilderExtension { - val builder = MappingReport + def builder = MappingReport } /** @@ -33,6 +33,11 @@ object MappingReport extends ReportBuilder { /** Name of report */ val reportName = "Mapping Report" + override def extFiles = super.extFiles ++ List("js/gears.js") + .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) + + def krakenExecuted = summary.getValue(sampleId, libId, "gears", "stats", "krakenreport").isDefined + /** Root page for single BamMetrcis report */ def indexPage = { val skipFlexiprep = summary.getValue(sampleId, libId, "mapping", "settings", "skip_flexiprep").getOrElse(false) == true @@ -48,7 +53,11 @@ object MappingReport extends ReportBuilder { "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"))) ::: List("Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)) ), Map()) - ), List( + ) ::: + (if (krakenExecuted) List("Gears - Metagenomics" -> ReportPage(List(), List( + "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp" + )), Map())) + else Nil), List( "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/mappingFront.ssp") ) ::: bamMetricsPage.map(_.sections).getOrElse(Nil), Map() diff --git a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala index 377c771cee17b62922381cd09d9c4ebd0aa15aec..b4723721e799c9f4f02d878f6032e5c7695226b7 100644 --- a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala +++ b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala @@ -98,6 +98,7 @@ class MappingBwaAlnTest extends AbstractTestMapping("bwa-aln") class MappingStarTest extends AbstractTestMapping("star") class MappingStar2PassTest extends AbstractTestMapping("star-2pass") class MappingBowtieTest extends AbstractTestMapping("bowtie") +class MappingBowtie2Test extends AbstractTestMapping("bowtie2") class MappingStampyTest extends AbstractTestMapping("stampy") class MappingGsnapTest extends AbstractTestMapping("gsnap") class MappingTophatTest extends AbstractTestMapping("tophat") @@ -142,6 +143,7 @@ object MappingTest { "bwa" -> Map("exe" -> "test"), "star" -> Map("exe" -> "test"), "bowtie" -> Map("exe" -> "test"), + "bowtie2" -> Map("exe" -> "test"), "stampy" -> Map("exe" -> "test", "genome" -> "test", "hash" -> "test"), "samtools" -> Map("exe" -> "test"), "md5sum" -> Map("exe" -> "test") diff --git a/public/pom.xml b/public/pom.xml index 0f2785691ad8dab0d8111a6204894b8801e4eeac..4a2eb7dbb7237603eb95855d39973114d8fe3c3e 100644 --- a/public/pom.xml +++ b/public/pom.xml @@ -22,7 +22,7 @@ <groupId>nl.lumc.sasc</groupId> <name>Biopet</name> <packaging>pom</packaging> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <modules> <!--<module>biopet-framework</module>--> diff --git a/public/sage/pom.xml b/public/sage/pom.xml index b88e699a28879de1fd3b92bf60a126006687f440..e3eab0bf3eac563b68aee8d71ff58acc7dba4875 100644 --- a/public/sage/pom.xml +++ b/public/sage/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/shiva/pom.xml b/public/shiva/pom.xml index 560818c30c444e703432b9160360c97337931025..2638bcfdc1fe39c5dce7c8e2c9a463ec8d8f2e48 100644 --- a/public/shiva/pom.xml +++ b/public/shiva/pom.xml @@ -22,7 +22,7 @@ <parent> <artifactId>Biopet</artifactId> <groupId>nl.lumc.sasc</groupId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> diff --git a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp index 8bc095e84ac3d566d3b0f453ecb0eeccd93f553e..674910dabedc3fcb7d7759cd8952d93a68240f01 100644 --- a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp +++ b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp @@ -8,13 +8,20 @@ <%@ var outputDir: File %> <%@ var showPlot: Boolean = false %> <%@ var showTable: Boolean = true %> -<%@ var showIntro: Boolean = true%> +<%@ var showIntro: Boolean = true %> +<%@ var target: Option[String] = None %> +<%@ var caller: String = "final" %> + #{ val fields = List("Hom", "HomVar", "Het", "HomRef", "NoCall", "Variant", "Total") val samples = sampleId match { case Some(sample) => List(sample.toString) case _ => summary.samples.toList } + val vcfstatsKey = target match { + case Some(t) => s"multisample-vcfstats-$caller-$t" + case _ => s"multisample-vcfstats-$caller" + } }# #if (showIntro) @@ -70,7 +77,7 @@ #for (sample <- samples.toList.sorted) <tr><td><a href="${rootPath}Samples/${sample}/index.html">${sample}</a></td> #for (field <- fields) - <td>${summary.getSampleValue(sample, "shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", field)}</td> + <td>${summary.getSampleValue(sample, "shivavariantcalling", "stats", vcfstatsKey, "genotype", field)}</td> #end </tr> #end diff --git a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp index 5721d22515ced92c9102565df158c20be80a2807..8c850934608710e0a343def57e0392a5a799b164 100644 --- a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp +++ b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp @@ -11,10 +11,13 @@ <tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr> <tr> <th>Variantcallers</th> - <td>${summary.getValue("shivavariantcalling", "settings", "variantcallers").get.asInstanceOf[List[String]].mkString(", ")}</td> + <td>${summary.getValue("shivavariantcalling", "settings", "variantcallers").getOrElse(List("None")).asInstanceOf[List[String]].mkString(", ")}</td> </tr> <tr><th>Reference</th><td>${summary.getValue("shiva", "settings", "reference", "species")} - ${summary.getValue("shiva", "settings", "reference", "name")}</td></tr> <tr><th>Number of samples</th><td>${summary.samples.size}</td></tr> + <tr><th>Annotation</th><td>${summary.getValue("shiva", "settings", "annotation")}</td></tr> + <tr><th>Multisample variantcalling</th><td>${summary.getValue("shiva", "settings", "multisample_variantcalling")}</td></tr> + <tr><th>Sv calling</th><td>${summary.getValue("shiva", "settings", "sv_calling")}</td></tr> </tbody> </table> <br/> diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala index 7e32d72fea38d2e2a2e243604a42438703817946..5c88189833b1b6bd169c3da475c6c2370957abf9 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala @@ -30,12 +30,20 @@ import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport * Created by pjvan_thof on 3/30/15. */ class ShivaReport(val root: Configurable) extends ReportBuilderExtension { - val builder = ShivaReport + def builder = ShivaReport } /** Object for report generation for Shiva pipeline */ object ShivaReport extends MultisampleReportBuilder { + def variantcallingExecuted = summary.getValue("shiva", "settings", "multisample_variantcalling") match { + case Some(true) => true + case _ => false + } + + override def extFiles = super.extFiles ++ List("js/gears.js") + .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) + /** Root page for the shiva report */ def indexPage = { val regions = regionsPage @@ -51,21 +59,22 @@ object ShivaReport extends MultisampleReportBuilder { ), Map()) ), List( - "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp"), - "Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", - Map("showPlot" -> true, "showTable" -> false)), - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", + "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp")) ++ + (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", + Map("showPlot" -> true, "showTable" -> false))) + else Nil) ++ + List("Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false) ), - "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)) - ), + "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), + "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), + "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", + Map("showPlot" -> true, "showTable" -> false)), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", + Map("showPlot" -> true, "showTable" -> false)) + ), pageArgs ) } @@ -98,11 +107,14 @@ object ShivaReport extends MultisampleReportBuilder { } if (regionPages.nonEmpty) Some("Regions" -> ReportPage( - List(), - regionPages.map(p => p._1 -> ReportSection( - "/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp", - Map("target" -> p._1.stripSuffix(" (Amplicon)")) + regionPages.map(p => p._1 -> ReportPage(Nil, + List( + "Variants" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", Map("showPlot" -> true)), + "Coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp") + ), + Map("target" -> Some(p._1.stripSuffix(" (Amplicon)"))) )).toList.sortBy(_._1), + List(), Map()) ) else None @@ -114,9 +126,10 @@ object ShivaReport extends MultisampleReportBuilder { "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)), "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", - Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam")), - "VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", Map("sampleId" -> None)) - ), Map()) + Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam"))) ++ + (if (variantcallingExecuted) List("VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", + Map("sampleId" -> None))) + else Nil), Map()) /** Single sample page */ def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { @@ -127,23 +140,30 @@ object ShivaReport extends MultisampleReportBuilder { ), List( "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()), - "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true)), - "Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp"), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) + "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true))) ++ + (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp")) else Nil) ++ + List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") + ), args) } /** Library page */ def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { - ReportPage(List( - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)), - "QC" -> FlexiprepReport.flexiprepPage - ), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) + val flexiprepExecuted = summary.getLibraryValue(sampleId, libId, "flexiprep").isDefined + val krakenExecuted = summary.getValue(Some(sampleId), Some(libId), "gears", "stats", "krakenreport").isDefined + + ReportPage( + "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)) :: + (if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil + ) ::: (if (krakenExecuted) List("Gears - Metagenomics" -> ReportPage(List(), List( + "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp" + )), Map())) + else Nil), "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp") :: + (if (flexiprepExecuted) List( + "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") + ) + else Nil), args) } /** Name of the report */ @@ -161,7 +181,9 @@ object ShivaReport extends MultisampleReportBuilder { prefix: String, summary: Summary, libraryLevel: Boolean = false, - sampleId: Option[String] = None): Unit = { + sampleId: Option[String] = None, + caller: String = "final", + target: Option[String] = None): Unit = { val tsvFile = new File(outputDir, prefix + ".tsv") val pngFile = new File(outputDir, prefix + ".png") val tsvWriter = new PrintWriter(tsvFile) @@ -169,14 +191,14 @@ object ShivaReport extends MultisampleReportBuilder { tsvWriter.println("\tHomVar\tHet\tHomRef\tNoCall") def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = { - val homVar = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomVar"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong - val homRef = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomRef"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong - val noCall = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "NoCall"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong - val het = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "Het"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val path = target match { + case Some(t) => List("shivavariantcalling", "stats", s"multisample-vcfstats-$caller-$t", "genotype") + case _ => List("shivavariantcalling", "stats", s"multisample-vcfstats-$caller", "genotype") + } + val homVar = new SummaryValue(path :+ "HomVar", summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val homRef = new SummaryValue(path :+ "HomRef", summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val noCall = new SummaryValue(path :+ "NoCall", summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val het = new SummaryValue(path :+ "Het", summary, Some(sample), lib).value.getOrElse(0).toString.toLong val sb = new StringBuffer() if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t") sb.append(homVar + "\t") diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala index 79a5219751a35691885b4f61365f4d71b0333105..a810b5257a94479e5e7125e6755722543457ec30 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala @@ -15,18 +15,13 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import htsjdk.samtools.SamReaderFactory import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } -import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer -import nl.lumc.sasc.biopet.extensions.clever.CleverCaller -import nl.lumc.sasc.biopet.extensions.delly.Delly -import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.pipelines.shiva.svcallers.{ Delly, Breakdancer, Clever, SvCaller } +import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript -import scala.collection.JavaConversions._ - /** * Common trait for ShivaVariantcalling * @@ -40,25 +35,11 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) protected var inputBamsArg: List[File] = Nil - protected var inputBams: Map[String, File] = Map() - - def addBamFile(file: File, sampleId: Option[String] = None): Unit = { - sampleId match { - case Some(sample) => inputBams += sample -> file - case _ if !file.exists() => throw new IllegalArgumentException("Bam file does not exits: " + file) - case _ => { - val inputSam = SamReaderFactory.makeDefault.open(file) - val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct - if (samples.size == 1) { - inputBams += samples.head -> file - } else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) - } - } - } + var inputBams: Map[String, File] = Map() /** Executed before script */ def init(): Unit = { - inputBamsArg.foreach(addBamFile(_)) + if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) } /** Variantcallers requested by the config */ @@ -76,68 +57,16 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript require(inputBams.nonEmpty, "No input bams found") require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", ")) - callers.foreach(_.addJobs()) + callers.foreach { caller => + caller.outputDir = new File(outputDir, caller.name) + add(caller) + } addSummaryJobs() } /** Will generate all available variantcallers */ - protected def callersList: List[SvCaller] = List(new Breakdancer, new Clever, new Delly) - - /** General trait for a variantcaller mode */ - trait SvCaller { - /** Name of mode, this should also be used in the config */ - val name: String - - /** Output dir for this mode */ - def outputDir = new File(qscript.outputDir, name) - - /** This should add the variantcaller jobs */ - def addJobs() - } - - /** default mode of freebayes */ - class Breakdancer extends SvCaller { - val name = "breakdancer" - - def addJobs() { - //TODO: move minipipeline of breakdancer to here - for ((sample, bamFile) <- inputBams) { - val breakdancerDir = new File(outputDir, sample) - val breakdancer = Breakdancer(qscript, bamFile, breakdancerDir) - addAll(breakdancer.functions) - } - } - } - - /** default mode of bcftools */ - class Clever extends SvCaller { - val name = "clever" - - def addJobs() { - //TODO: check double directories - for ((sample, bamFile) <- inputBams) { - val cleverDir = new File(outputDir, sample) - val clever = CleverCaller(qscript, bamFile, cleverDir) - add(clever) - } - } - } - - /** Makes a vcf file from a mpileup without statistics */ - class Delly extends SvCaller { - val name = "delly" - - def addJobs() { - //TODO: Move mini delly pipeline to here - for ((sample, bamFile) <- inputBams) { - val dellyDir = new File(outputDir, sample) - val delly = Delly(qscript, bamFile, dellyDir) - delly.outputName = sample - addAll(delly.functions) - } - } - } + protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this)) /** Location of summary file */ def summaryFile = new File(outputDir, "ShivaSvCalling.summary.json") diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index b628744d325184b46b176c1b9b9871208b72a170..ca2706f710c23ae1625d2908bdc914a2a687afb2 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -15,16 +15,15 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import java.io.File - import htsjdk.samtools.SamReaderFactory -import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference } import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics +import nl.lumc.sasc.biopet.pipelines.bammetrics.{ TargetRegions, BamMetrics } import nl.lumc.sasc.biopet.pipelines.mapping.Mapping import nl.lumc.sasc.biopet.pipelines.toucan.Toucan +import nl.lumc.sasc.biopet.utils.Logging +import org.broadinstitute.gatk.queue.QScript import scala.collection.JavaConversions._ @@ -33,8 +32,7 @@ import scala.collection.JavaConversions._ * * Created by pjvan_thof on 2/26/15. */ -trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { - qscript => +trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { qscript: QScript => /** Executed before running the script */ def init(): Unit = { @@ -85,15 +83,20 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { /** Method to make a library */ def makeLibrary(id: String) = new Library(id) + /** Sample specific settings */ + override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined) + /** Class to generate jobs for a library */ class Library(libId: String) extends AbstractLibrary(libId) { /** Library specific files to add to the summary */ def summaryFiles: Map[String, File] = { - (bamFile, preProcessBam) match { + ((bamFile, preProcessBam) match { case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb) - case (Some(b), _) => Map("bamFile" -> b) + case (Some(b), _) => Map("bamFile" -> b, "preProcessBam" -> b) case _ => Map() - } + }) ++ (inputR1.map("input_R1" -> _) :: + inputR2.map("input_R2" -> _) :: + inputBam.map("input_bam" -> _) :: Nil).flatten.toMap } /** Library specific stats to add to summary */ @@ -102,6 +105,9 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { /** Method to execute library preprocess */ def preProcess(input: File): Option[File] = None + /** Library specific settings */ + override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined) + /** Method to make the mapping submodule */ def makeMapping = { val mapping = new Mapping(qscript) @@ -112,8 +118,20 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile)) } + def fileMustBeAbsulute(file: Option[File]): Option[File] = { + if (file.forall(_.isAbsolute)) file + else { + Logging.addError(s"$file for $sampleId / $libId should be a absolute file path") + file.map(_.getAbsoluteFile) + } + } + + lazy val inputR1: Option[File] = fileMustBeAbsulute(config("R1")) + lazy val inputR2: Option[File] = fileMustBeAbsulute(config("R2")) + lazy val inputBam: Option[File] = fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None) + lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) = - (config.contains("R1"), config.contains("bam")) match { + (inputR1.isDefined, inputBam.isDefined) match { case (true, _) => makeMapping // Default starting from fastq files case (false, true) => // Starting from bam file config("bam_to_fastq", default = false).asBoolean match { @@ -132,18 +150,18 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { /** This will add jobs for this library */ def addJobs(): Unit = { - (config.contains("R1"), config.contains("bam")) match { + (inputR1.isDefined, inputBam.isDefined) match { case (true, _) => mapping.foreach(mapping => { - mapping.input_R1 = config("R1") - mapping.input_R2 = config("R2") + mapping.input_R1 = inputR1.get + mapping.input_R2 = inputR2 inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5")) mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5"))) }) case (false, true) => { - inputFiles :+= new InputFile(config("bam"), config("bam_md5")) + inputFiles :+= new InputFile(inputBam.get, config("bam_md5")) config("bam_to_fastq", default = false).asBoolean match { case true => - val samToFastq = SamToFastq(qscript, config("bam"), + val samToFastq = SamToFastq(qscript, inputBam.get, new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"), new File(libDir, sampleId + "-" + libId + ".R2.fq.gz")) samToFastq.isIntermediate = true @@ -153,7 +171,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { mapping.input_R2 = Some(samToFastq.fastqR2) }) case false => - val inputSam = SamReaderFactory.makeDefault.open(config("bam")) + val inputSam = SamReaderFactory.makeDefault.open(inputBam.get) val readGroups = inputSam.getFileHeader.getReadGroups val readGroupOke = readGroups.forall(readGroup => { @@ -165,25 +183,37 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { if (!readGroupOke) { if (config("correct_readgroups", default = false).asBoolean) { - logger.info("Correcting readgroups, file:" + config("bam")) - val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get) + logger.info("Correcting readgroups, file:" + inputBam.get) + val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get) aorrg.RGID = sampleId + "-" + libId aorrg.RGLB = libId aorrg.RGSM = sampleId + aorrg.RGPL = "unknown" + aorrg.RGPU = "na" aorrg.isIntermediate = true qscript.add(aorrg) } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") } else { - val oldBamFile: File = config("bam") + val oldBamFile: File = inputBam.get val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai") + val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai") val baiLn = Ln(qscript, oldIndex, newIndex) add(baiLn) val bamLn = Ln(qscript, oldBamFile, bamFile.get) bamLn.deps :+= baiLn.output add(bamLn) + + val bamMetrics = new BamMetrics(qscript) + bamMetrics.sampleId = Some(sampleId) + bamMetrics.libId = Some(libId) + bamMetrics.inputBam = bamFile.get + bamMetrics.outputDir = new File(libDir, "metrics") + bamMetrics.init() + bamMetrics.biopetScript() + addAll(bamMetrics.functions) + addSummaryQScript(bamMetrics) } } } @@ -201,8 +231,8 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { vc.sampleId = Some(sampleId) vc.libId = Some(libId) vc.outputDir = new File(libDir, "variantcalling") - if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil - else vc.inputBams = bamFile.get :: Nil + if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get) + else vc.inputBams = Map(sampleId -> bamFile.get) vc.init() vc.biopetScript() addAll(vc.functions) @@ -215,9 +245,9 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { if (input == Nil) None else if (input.tail == Nil) { - val bamFile = new File(sampleDir, input.head.getName) + val bamFile = new File(sampleDir, s"$sampleId.bam") val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai") + val newIndex: File = new File(sampleDir, s"$sampleId.bai") val baiLn = Ln(qscript, oldIndex, newIndex) add(baiLn) @@ -230,7 +260,8 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { md.input = input md.output = new File(sampleDir, sampleId + ".dedup.bam") md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics") - md.isIntermediate = isIntermediate + //FIXME: making this file intermediate make the pipeline restart unnessery jobs + //md.isIntermediate = isIntermediate add(md) addSummarizable(md, "mark_duplicates") Some(md.output) @@ -263,15 +294,10 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { addAll(bamMetrics.functions) addSummaryQScript(bamMetrics) - val oldIndex: File = new File(bam.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(bam + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - variantcalling.foreach(vc => { vc.sampleId = Some(sampleId) vc.outputDir = new File(sampleDir, "variantcalling") - vc.inputBams = bam :: Nil + vc.inputBams = Map(sampleId -> bam) vc.init() vc.biopetScript() addAll(vc.functions) @@ -281,7 +307,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { } } - lazy val variantCalling = if (config("multisample_variantcalling", default = true).asBoolean) { + lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) { Some(makeVariantcalling(multisample = true)) } else None @@ -289,18 +315,22 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { Some(new ShivaSvCalling(this)) } else None + lazy val annotation = if (multisampleVariantCalling.isDefined && + config("annotation", default = false).asBoolean) { + Some(new Toucan(this)) + } else None + /** This will add the mutisample variantcalling */ def addMultiSampleJobs(): Unit = { - variantCalling.foreach(vc => { + multisampleVariantCalling.foreach(vc => { vc.outputDir = new File(outputDir, "variantcalling") - vc.inputBams = samples.flatMap(_._2.preProcessBam).toList + vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } vc.init() vc.biopetScript() addAll(vc.functions) addSummaryQScript(vc) - if (config("annotation", default = false).asBoolean) { - val toucan = new Toucan(this) + annotation.foreach { toucan => toucan.outputDir = new File(outputDir, "annotation") toucan.inputVCF = vc.finalFile toucan.init() @@ -312,7 +342,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { svCalling.foreach(sv => { sv.outputDir = new File(outputDir, "sv_calling") - samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1)))) + sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } sv.init() sv.biopetScript() addAll(sv.functions) @@ -324,16 +354,14 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { def summaryFile = new File(outputDir, "Shiva.summary.json") /** Settings of pipeline for summary */ - def summarySettings = { - val roiBedFiles: List[File] = config("regions_of_interest", Nil) - val ampliconBedFile: Option[File] = config("amplicon_bed") - - Map( - "reference" -> referenceSummary, - "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), - "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) - ) - } + def summarySettings = Map( + "reference" -> referenceSummary, + "annotation" -> annotation.isDefined, + "multisample_variantcalling" -> multisampleVariantCalling.isDefined, + "sv_calling" -> svCalling.isDefined, + "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), + "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) + ) /** Files for the summary */ def summaryFiles = Map("referenceFasta" -> referenceFasta()) diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala index 6eb2832c17acea6bcdc0d6766d48d3100dc73472..31ba0a53f0fc034b406cca237656722ca9de9d6f 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala @@ -15,28 +15,37 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import java.io.File - import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag } -import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } -import nl.lumc.sasc.biopet.extensions.gatk.{ GenotypeConcordance, CombineVariants } -import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup -import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } -import nl.lumc.sasc.biopet.extensions.{ Ln, Bgzip, Tabix } -import nl.lumc.sasc.biopet.utils.Logging -import org.broadinstitute.gatk.utils.commandline.Input +import nl.lumc.sasc.biopet.extensions.Tabix +import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance } +import nl.lumc.sasc.biopet.extensions.tools.VcfStats +import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize } +import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers._ +import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } +import org.broadinstitute.gatk.queue.QScript /** * Common trait for ShivaVariantcalling * * Created by pjvan_thof on 2/26/15. */ -trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with Reference { - qscript => +trait ShivaVariantcallingTrait extends SummaryQScript + with SampleLibraryTag + with Reference + with TargetRegions { + qscript: QScript => @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) - var inputBams: List[File] = Nil + protected var inputBamsArg: List[File] = Nil + + var inputBams: Map[String, File] = Map() + + /** Executed before script */ + def init(): Unit = { + if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) + } var referenceVcf: Option[File] = config("reference_vcf") @@ -53,25 +62,22 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ"))) - /** Executed before script */ - def init(): Unit = { - } - /** Final merged output files of all variantcaller modes */ def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz") /** Variantcallers requested by the config */ protected val configCallers: Set[String] = config("variantcallers") + protected val callers: List[Variantcaller] = { + (for (name <- configCallers) yield { + if (!callersList.exists(_.name == name)) + Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) + callersList.find(_.name == name) + }).flatten.toList.sortBy(_.prio) + } + /** This will add jobs for this pipeline */ def biopetScript(): Unit = { - for (cal <- configCallers) { - if (!callersList.exists(_.name == cal)) - Logging.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) - } - - val callers = callersList.filter(x => configCallers.contains(x.name)).sortBy(_.prio) - require(inputBams.nonEmpty, "No input bams found") require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", ")) @@ -81,215 +87,89 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with cv.genotypeMergeOptions = Some("PRIORITIZE") cv.rodPriorityList = callers.map(_.name).mkString(",") for (caller <- callers) { - caller.addJobs() - cv.addInput(caller.outputFile, caller.name) - - val vcfStats = new VcfStats(qscript) - vcfStats.input = caller.outputFile - vcfStats.setOutputDir(new File(caller.outputDir, "vcfstats")) - add(vcfStats) - addSummarizable(vcfStats, namePrefix + "-vcfstats-" + caller.name) - - referenceVcf.foreach(referenceVcfFile => { - val gc = new GenotypeConcordance(this) - gc.evalFile = caller.outputFile - gc.compFile = referenceVcfFile - gc.outputFile = new File(caller.outputDir, s"$namePrefix-genotype_concordance.${caller.name}.txt") - referenceVcfRegions.foreach(gc.intervals ::= _) - add(gc) - addSummarizable(gc, s"$namePrefix-genotype_concordance-${caller.name}") - }) + caller.inputBams = inputBams + caller.namePrefix = namePrefix + caller.outputDir = new File(outputDir, caller.name) + add(caller) + addStats(caller.outputFile, caller.name) + val normalize: Boolean = config("execute_vt_normalize", default = false, submodule = caller.configName) + val decompose: Boolean = config("execute_vt_decompose", default = false, submodule = caller.configName) + + val vtNormalize = new VtNormalize(this) + vtNormalize.inputVcf = caller.outputFile + val vtDecompose = new VtDecompose(this) + + if (normalize && decompose) { + vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") + vtNormalize.isIntermediate = true + add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) + vtDecompose.inputVcf = vtNormalize.outputVcf + vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz") + add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) + cv.addInput(vtDecompose.outputVcf, caller.name) + } else if (normalize && !decompose) { + vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") + add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) + cv.addInput(vtNormalize.outputVcf, caller.name) + } else if (!normalize && decompose) { + vtDecompose.inputVcf = caller.outputFile + vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz") + add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) + cv.addInput(vtDecompose.outputVcf, caller.name) + } else cv.addInput(caller.outputFile, caller.name) } add(cv) + addStats(finalFile, "final") + + addSummaryJobs() + } + + protected def addStats(vcfFile: File, name: String): Unit = { val vcfStats = new VcfStats(qscript) - vcfStats.input = finalFile - vcfStats.setOutputDir(new File(outputDir, "vcfstats")) - vcfStats.infoTags :+= cv.setKey + vcfStats.input = vcfFile + vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats")) + if (name == "final") vcfStats.infoTags :+= "VariantCaller" add(vcfStats) - addSummarizable(vcfStats, namePrefix + "-vcfstats-final") + addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name") referenceVcf.foreach(referenceVcfFile => { val gc = new GenotypeConcordance(this) - gc.evalFile = finalFile + gc.evalFile = vcfFile gc.compFile = referenceVcfFile - gc.outputFile = new File(outputDir, s"$namePrefix-genotype_concordance.final.txt") + gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") referenceVcfRegions.foreach(gc.intervals ::= _) add(gc) - addSummarizable(gc, s"$namePrefix-genotype_concordance-final") + addSummarizable(gc, s"$namePrefix-genotype_concordance-$name") }) - addSummaryJobs() - } - - /** Will generate all available variantcallers */ - protected def callersList: List[Variantcaller] = List(new Freebayes, new RawVcf, new Bcftools, new BcftoolsSingleSample) - - /** General trait for a variantcaller mode */ - trait Variantcaller { - /** Name of mode, this should also be used in the config */ - val name: String - - /** Output dir for this mode */ - def outputDir = new File(qscript.outputDir, name) - - /** Prio in merging in the final file */ - protected val defaultPrio: Int - - /** Prio from the config */ - lazy val prio: Int = config("prio_" + name, default = defaultPrio) - - /** This should add the variantcaller jobs */ - def addJobs() - - /** Final output file of this mode */ - def outputFile: File - } - - /** default mode of freebayes */ - class Freebayes extends Variantcaller { - val name = "freebayes" - protected val defaultPrio = 7 - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".freebayes.vcf.gz") - - def addJobs() { - val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(qscript) - fb.bamfiles = inputBams - fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf") - fb.isIntermediate = true - add(fb) - - //TODO: need piping for this, see also issue #114 - val bz = new Bgzip(qscript) - bz.input = List(fb.outputVcf) - bz.output = outputFile - add(bz) - - val ti = new Tabix(qscript) - ti.input = bz.output - ti.p = Some("vcf") - add(ti) - } - } - - /** default mode of bcftools */ - class Bcftools extends Variantcaller { - val name = "bcftools" - protected val defaultPrio = 8 - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".bcftools.vcf.gz") - - def addJobs() { - val mp = new SamtoolsMpileup(qscript) - mp.input = inputBams - mp.u = true - mp.reference = referenceFasta() - - val bt = new BcftoolsCall(qscript) - bt.O = Some("z") - bt.v = true - bt.c = true - - add(mp | bt > outputFile) - add(Tabix(qscript, outputFile)) - } - } - - /** default mode of bcftools */ - class BcftoolsSingleSample extends Variantcaller { - val name = "bcftools_singlesample" - protected val defaultPrio = 8 - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".bcftools_singlesample.vcf.gz") - - def addJobs() { - val sampleVcfs = for (inputBam <- inputBams) yield { - val mp = new SamtoolsMpileup(qscript) - mp.input :+= inputBam - mp.u = true - mp.reference = referenceFasta() - - val bt = new BcftoolsCall(qscript) - bt.O = Some("z") - bt.v = true - bt.c = true - bt.output = new File(outputDir, inputBam.getName + ".vcf.gz") - - add(mp | bt) - add(Tabix(qscript, bt.output)) - bt.output - } - - if (sampleVcfs.size > 1) { - val bcfmerge = new BcftoolsMerge(qscript) - bcfmerge.input = sampleVcfs - bcfmerge.output = outputFile - bcfmerge.O = Some("z") - add(bcfmerge) - } else add(Ln.apply(qscript, sampleVcfs.head, outputFile)) - add(Tabix(qscript, outputFile)) + for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) { + val regionName = bedFile.getName.stripSuffix(".bed") + val vcfStats = new VcfStats(qscript) + vcfStats.input = vcfFile + vcfStats.intervals = Some(bedFile) + vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName")) + if (name == "final") vcfStats.infoTags :+= "VariantCaller" + add(vcfStats) + addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName") } } - /** Makes a vcf file from a mpileup without statistics */ - class RawVcf extends Variantcaller { - val name = "raw" - - // This caller is designed as fallback when other variantcallers fails to report - protected val defaultPrio = Int.MaxValue - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".raw.vcf.gz") - - def addJobs() { - val rawFiles = inputBams.map(bamFile => { - val mp = new SamtoolsMpileup(qscript) { - override def configName = "samtoolsmpileup" - override def defaults = Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)) - } - mp.input :+= bamFile - - val m2v = new MpileupToVcf(qscript) - m2v.inputBam = bamFile - m2v.output = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.vcf") - add(mp | m2v) - - val vcfFilter = new VcfFilter(qscript) { - override def configName = "vcffilter" - override def defaults = Map("min_sample_depth" -> 8, - "min_alternate_depth" -> 2, - "min_samples_pass" -> 1, - "filter_ref_calls" -> true - ) - } - vcfFilter.inputVcf = m2v.output - vcfFilter.outputVcf = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.filter.vcf.gz") - add(vcfFilter) - vcfFilter.outputVcf - }) - - val cv = new CombineVariants(qscript) - cv.inputFiles = rawFiles - cv.outputFile = outputFile - cv.setKey = "null" - cv.excludeNonVariants = true - add(cv) - } - } + /** Will generate all available variantcallers */ + protected def callersList: List[Variantcaller] = List(new Freebayes(this), new RawVcf(this), new Bcftools(this), new BcftoolsSingleSample(this)) /** Location of summary file */ def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json") /** Settings for the summary */ - def summarySettings = Map("variantcallers" -> configCallers.toList) + def summarySettings = Map( + "variantcallers" -> configCallers.toList, + "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), + "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) + ) /** Files for the summary */ def summaryFiles: Map[String, File] = { - val callers: Set[String] = config("variantcallers") - callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) + callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) } } \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala new file mode 100644 index 0000000000000000000000000000000000000000..d7ff2ac12e78f7e149a3beea14eaea766d385079 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala @@ -0,0 +1,23 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.extensions.breakdancer.{ BreakdancerVCF, BreakdancerCaller, BreakdancerConfig } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Script for sv caler Breakdancer */ +class Breakdancer(val root: Configurable) extends SvCaller { + def name = "breakdancer" + + def biopetScript() { + for ((sample, bamFile) <- inputBams) { + val breakdancerSampleDir = new File(outputDir, sample) + + // read config and set all parameters for the pipeline + logger.debug("Starting Breakdancer configuration") + + val bdcfg = BreakdancerConfig(this, bamFile, new File(breakdancerSampleDir, sample + ".breakdancer.cfg")) + val breakdancer = BreakdancerCaller(this, bdcfg.output, new File(breakdancerSampleDir, sample + ".breakdancer.tsv")) + val bdvcf = BreakdancerVCF(this, breakdancer.output, new File(breakdancerSampleDir, sample + ".breakdancer.vcf")) + add(bdcfg, breakdancer, bdvcf) + } + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala new file mode 100644 index 0000000000000000000000000000000000000000..ff98b32e5f9fe23f6f9e8b52334742b29973da5c --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala @@ -0,0 +1,18 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.extensions.clever.CleverCaller +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Script for sv caler Clever */ +class Clever(val root: Configurable) extends SvCaller { + def name = "clever" + + def biopetScript() { + //TODO: check double directories + for ((sample, bamFile) <- inputBams) { + val cleverDir = new File(outputDir, sample) + val clever = CleverCaller(this, bamFile, cleverDir) + add(clever) + } + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala new file mode 100644 index 0000000000000000000000000000000000000000..8197f8cc7ea4ff6c72b37e913b8030af17e51151 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala @@ -0,0 +1,60 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.extensions.delly.DellyCaller +import nl.lumc.sasc.biopet.extensions.gatk.CatVariants +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Script for sv caler delly */ +class Delly(val root: Configurable) extends SvCaller { + def name = "delly" + + val del: Boolean = config("DEL", default = true) + val dup: Boolean = config("DUP", default = true) + val inv: Boolean = config("INV", default = true) + val tra: Boolean = config("TRA", default = true) + + def biopetScript() { + for ((sample, bamFile) <- inputBams) { + val dellyDir = new File(outputDir, sample) + + val catVariants = new CatVariants(this) + catVariants.outputFile = new File(dellyDir, sample + ".delly.vcf.gz") + + /// start delly and then copy the vcf into the root directory "<sample>.delly/" + if (del) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "DEL" + delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf") + add(delly) + catVariants.inputFiles :+= delly.outputvcf + } + if (dup) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "DUP" + delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf") + add(delly) + catVariants.inputFiles :+= delly.outputvcf + } + if (inv) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "INV" + delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf") + add(delly) + catVariants.inputFiles :+= delly.outputvcf + } + if (tra) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "TRA" + delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf") + catVariants.inputFiles :+= delly.outputvcf + add(delly) + } + + add(catVariants) + } + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala new file mode 100644 index 0000000000000000000000000000000000000000..1c63aa7b86954ea2d1fba84473b0cc67eb9a76d0 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala @@ -0,0 +1,19 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript } +import org.broadinstitute.gatk.queue.QScript + +/** + * Created by pjvanthof on 23/11/15. + */ +trait SvCaller extends QScript with BiopetQScript with Reference { + + /** Name of mode, this should also be used in the config */ + def name: String + + var namePrefix: String = _ + + var inputBams: Map[String, File] = _ + + def init() = {} +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala new file mode 100644 index 0000000000000000000000000000000000000000..fc88e6c212eb8fc4f6dbfb5fbaced0b1a666b195 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala @@ -0,0 +1,27 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import nl.lumc.sasc.biopet.extensions.Tabix +import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsCall +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** default mode of bcftools */ +class Bcftools(val root: Configurable) extends Variantcaller { + val name = "bcftools" + protected def defaultPrio = 8 + + def biopetScript { + val mp = new SamtoolsMpileup(this) + mp.input = inputBams.values.toList + mp.u = true + mp.reference = referenceFasta() + + val bt = new BcftoolsCall(this) + bt.O = Some("z") + bt.v = true + bt.c = true + + add(mp | bt > outputFile) + add(Tabix(this, outputFile)) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala new file mode 100644 index 0000000000000000000000000000000000000000..80f0980e3d31cd658c30f93693fa844f0218ef6b --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala @@ -0,0 +1,42 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.{ Ln, Tabix } +import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsMerge, BcftoolsCall } +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** default mode of bcftools */ +class BcftoolsSingleSample(val root: Configurable) extends Variantcaller { + val name = "bcftools_singlesample" + protected def defaultPrio = 8 + + def biopetScript { + val sampleVcfs = for ((sample, inputBam) <- inputBams.toList) yield { + val mp = new SamtoolsMpileup(this) + mp.input :+= inputBam + mp.u = true + mp.reference = referenceFasta() + + val bt = new BcftoolsCall(this) + bt.O = Some("z") + bt.v = true + bt.c = true + bt.output = new File(outputDir, sample + ".vcf.gz") + + add(mp | bt) + add(Tabix(this, bt.output)) + bt.output + } + + if (sampleVcfs.size > 1) { + val bcfmerge = new BcftoolsMerge(this) + bcfmerge.input = sampleVcfs + bcfmerge.output = outputFile + bcfmerge.O = Some("z") + add(bcfmerge) + } else add(Ln.apply(this, sampleVcfs.head, outputFile)) + add(Tabix(this, outputFile)) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala new file mode 100644 index 0000000000000000000000000000000000000000..3227c2dc2fc845fdeb18f3915ae4c692369fdcca --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala @@ -0,0 +1,28 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.{ Tabix, Bgzip } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** default mode of freebayes */ +class Freebayes(val root: Configurable) extends Variantcaller { + val name = "freebayes" + protected def defaultPrio = 7 + + def biopetScript { + val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(this) + fb.bamfiles = inputBams.values.toList + fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf") + fb.isIntermediate = true + add(fb) + + //TODO: need piping for this, see also issue #114 + val bz = new Bgzip(this) + bz.input = List(fb.outputVcf) + bz.output = outputFile + add(bz) + + add(Tabix.apply(this, bz.output)) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..3999ba64b0c8ec36eca933127ca60d8e7da8d201 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala @@ -0,0 +1,52 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup +import nl.lumc.sasc.biopet.extensions.tools.{ VcfFilter, MpileupToVcf } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Makes a vcf file from a mpileup without statistics */ +class RawVcf(val root: Configurable) extends Variantcaller { + val name = "raw" + + // This caller is designed as fallback when other variantcallers fails to report + protected def defaultPrio = Int.MaxValue + + def biopetScript { + val rawFiles = inputBams.map { + case (sample, bamFile) => + val mp = new SamtoolsMpileup(this) { + override def configName = "samtoolsmpileup" + override def defaults = Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)) + } + mp.input :+= bamFile + + val m2v = new MpileupToVcf(this) + m2v.inputBam = bamFile + m2v.output = new File(outputDir, sample + ".raw.vcf") + add(mp | m2v) + + val vcfFilter = new VcfFilter(this) { + override def configName = "vcffilter" + override def defaults = Map("min_sample_depth" -> 8, + "min_alternate_depth" -> 2, + "min_samples_pass" -> 1, + "filter_ref_calls" -> true + ) + } + vcfFilter.inputVcf = m2v.output + vcfFilter.outputVcf = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.filter.vcf.gz") + add(vcfFilter) + vcfFilter.outputVcf + } + + val cv = new CombineVariants(this) + cv.inputFiles = rawFiles.toList + cv.outputFile = outputFile + cv.setKey = "null" + cv.excludeNonVariants = true + add(cv) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala new file mode 100644 index 0000000000000000000000000000000000000000..eb0a9ffccd11e6bb9d3a4d92af18ba381ce009eb --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala @@ -0,0 +1,29 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import nl.lumc.sasc.biopet.core.{ BiopetQScript, Reference } +import org.broadinstitute.gatk.queue.QScript + +/** + * Created by pjvan_thof on 11/19/15. + */ +trait Variantcaller extends QScript with BiopetQScript with Reference { + + /** Name of mode, this should also be used in the config */ + def name: String + + var namePrefix: String = _ + + var inputBams: Map[String, File] = _ + + def init() = {} + + /** Prio in merging in the final file */ + protected def defaultPrio: Int + + /** Prio from the config */ + lazy val prio: Int = config("prio_" + name, default = defaultPrio) + + /** Final output file of this mode */ + def outputFile: File = new File(outputDir, namePrefix + s".$name.vcf.gz") +} + diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index 54f5a455364dcd09878667d3a52ca139222190b6..94edda97a3e893ab8bba6d4016f003c2b0242b57 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -21,7 +21,7 @@ import com.google.common.io.Files import nl.lumc.sasc.biopet.utils.config.Config import nl.lumc.sasc.biopet.extensions.Freebayes import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter } +import nl.lumc.sasc.biopet.extensions.tools.VcfFilter import nl.lumc.sasc.biopet.utils.ConfigUtils import org.apache.commons.io.FileUtils import org.broadinstitute.gatk.queue.QSettings @@ -72,7 +72,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) - pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList + pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftools_singlesample && !freebayes) diff --git a/public/src/src/test/resources/log4j.properties b/public/src/src/test/resources/log4j.properties index 52fb824b0a8088346ed39f9de816309d0569ecf6..501af67582a546db584c8538b28cb6f9e07f1692 100644 --- a/public/src/src/test/resources/log4j.properties +++ b/public/src/src/test/resources/log4j.properties @@ -1,7 +1,17 @@ # -# Due to the license issue with GATK, this part of Biopet can only be used inside the -# LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions -# on how to use this protected part of biopet or contact us at sasc@lumc.nl +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. # # Set root logger level to DEBUG and its only appender to A1. diff --git a/public/toucan/pom.xml b/public/toucan/pom.xml index ff6f74eba21281d4010925053466265e017f95ee..64edb91cb6e6da1bcdbc8724591239c5756d1af2 100644 --- a/public/toucan/pom.xml +++ b/public/toucan/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 9122cc760397e85d04436bb560dcea916441abc7..974df31da36c73f8bc4c432f86f8e5b0ab0b4ee3 100644 --- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -51,7 +51,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum } override def defaults = Map( - "varianteffectpredictor" -> Map("everything" -> true) + "varianteffectpredictor" -> Map("everything" -> true, "failed" -> 1, "allow_non_variant" -> true) ) //defaults ++= Map("varianteffectpredictor" -> Map("everything" -> true)) @@ -100,11 +100,13 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum vcfWithVcf.input = outputFile vcfWithVcf.secondaryVcf = exacFile vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz") - vcfWithVcf.fields ::= ("MAF", "MAF_exac", None) + vcfWithVcf.fields ::= ("AF", "AF_exac", None) add(vcfWithVcf) outputFile = vcfWithVcf.output case _ => } + + addSummaryJobs() } /** diff --git a/public/yamsvp/pom.xml b/public/yamsvp/pom.xml index 48742ab6b693c9d2be72cbc2b3b61682ed3ffe14..6f06fcc7dbd1a28fcb1b4a091b3d254929289363 100644 --- a/public/yamsvp/pom.xml +++ b/public/yamsvp/pom.xml @@ -25,7 +25,7 @@ <parent> <groupId>nl.lumc.sasc</groupId> <artifactId>Biopet</artifactId> - <version>0.5.0-SNAPSHOT</version> + <version>0.6.0-SNAPSHOT</version> <relativePath>../</relativePath> </parent> diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala b/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala index a0ade5706206c60d9a1a470002c783d0b98a3590..59489d2caa89ac74af08b53c46b781f3e074760d 100644 --- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala +++ b/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala @@ -21,15 +21,15 @@ package nl.lumc.sasc.biopet.pipelines.yamsvp import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } +import nl.lumc.sasc.biopet.core.{MultiSampleQScript, PipelineCommand} import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer import nl.lumc.sasc.biopet.extensions.clever.CleverCaller import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount -import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge } +import nl.lumc.sasc.biopet.extensions.sambamba.{SambambaMarkdup, SambambaMerge} +import nl.lumc.sasc.biopet.pipelines.shiva.Delly +import nl.lumc.sasc.biopet.utils.config.Configurable //import nl.lumc.sasc.biopet.extensions.pindel.Pindel -import nl.lumc.sasc.biopet.extensions.delly.Delly import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.mapping.Mapping import org.broadinstitute.gatk.queue.QScript