Commit f602182e authored by Peter van 't Hof's avatar Peter van 't Hof

Merge remote-tracking branch 'remotes/origin/develop' into fix-BIOPET-425

parents f6ab5da0 ebebec1a
......@@ -44,6 +44,7 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe
var minAP: Option[Int] = config("min_ap")
var homoFraction: Option[Double] = config("homoFraction")
var ploidy: Option[Int] = config("ploidy")
var refCalls: Boolean = config("ref_calls", default = false)
var sample: String = _
var reference: String = _
......@@ -72,6 +73,7 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe
optional("--minAP", minAP) +
optional("--homoFraction", homoFraction) +
optional("--ploidy", ploidy) +
conditional(refCalls, "--refCalls") +
required("--sample", sample) +
(if (inputAsStdin) "" else required("-I", inputMpileup))
}
......@@ -40,7 +40,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.MpileupToVcf,
nl.lumc.sasc.biopet.tools.PrefixFastq,
nl.lumc.sasc.biopet.tools.SageCountFastq,
nl.lumc.sasc.biopet.tools.SamplesTsvToJson,
nl.lumc.sasc.biopet.tools.SamplesTsvToConfig,
nl.lumc.sasc.biopet.tools.SeqStat,
nl.lumc.sasc.biopet.tools.SquishBed,
nl.lumc.sasc.biopet.tools.SummaryToTsv,
......
......@@ -27,7 +27,7 @@ import scala.math.{ floor, round }
object MpileupToVcf extends ToolCommand {
case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2,
homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005) extends AbstractArgs
homoFraction: Double = 0.8, ploidy: Int = 2, seqError: Double = 0.005, refCalls: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") valueName "<file>" action { (x, c) =>
......@@ -54,6 +54,9 @@ object MpileupToVcf extends ToolCommand {
opt[Double]("seqError") action { (x, c) =>
c.copy(seqError = x)
}
opt[Unit]("refCalls") action { (x, c) =>
c.copy(refCalls = true)
}
}
/**
......@@ -66,6 +69,7 @@ object MpileupToVcf extends ToolCommand {
val writer = new PrintWriter(commandArgs.output)
writer.println("##fileformat=VCFv4.1")
writer.println("##ALT=<ID=REF,Description=\"Placeholder if location has no ALT alleles\">")
writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">")
writer.println("##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
......@@ -172,7 +176,7 @@ object MpileupToVcf extends ToolCommand {
case _ =>
}
if (alt.nonEmpty) {
if (alt.nonEmpty || commandArgs.refCalls) {
val ad = for (ad <- format("AD").toString.split(",")) yield ad.toInt
var left = reads - dels
val gt = ArrayBuffer[Int]()
......@@ -187,11 +191,11 @@ object MpileupToVcf extends ToolCommand {
}
left -= ad(max)
}
writer.println(Array(chr, pos, ".", ref.toUpperCase, alt.mkString(","), ".", ".", info.mkString(";"),
writer.println(Array(chr, pos, ".", ref.toUpperCase, if (alt.nonEmpty) alt.mkString(",") else "<REF>", ".", ".", info.mkString(";"),
"GT:" + format.keys.mkString(":"), gt.sortWith(_ < _).mkString("/") + ":" + format.values.mkString(":")
).mkString("\t"))
}
}
writer.close()
}
}
\ No newline at end of file
}
......@@ -14,18 +14,18 @@
*/
package nl.lumc.sasc.biopet.tools
import java.io.{ PrintWriter, File }
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.ConfigUtils._
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.collection.mutable
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, ToolCommand }
import scala.collection.mutable
import scala.io.Source
/**
* This tool can convert a tsv to a json file
*/
object SamplesTsvToJson extends ToolCommand {
object SamplesTsvToConfig extends ToolCommand {
case class Args(inputFiles: List[File] = Nil,
tagFiles: List[File] = Nil,
outputFile: Option[File] = None) extends AbstractArgs
......@@ -39,7 +39,10 @@ object SamplesTsvToJson extends ToolCommand {
}
opt[File]('o', "outputFile") unbounded () valueName "<file>" action { (x, c) =>
c.copy(outputFile = Some(x))
}
} text """
|When the extension is .yml or .yaml the output is in yaml format, otherwise it is in json.
|When no extension is given the output goes to stdout as yaml.
""".stripMargin
}
/** Executes SamplesTsvToJson */
......@@ -47,14 +50,16 @@ object SamplesTsvToJson extends ToolCommand {
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
val jsonString = stringFromInputs(cmdArgs.inputFiles, cmdArgs.tagFiles)
val configMap = stringFromInputs(cmdArgs.inputFiles, cmdArgs.tagFiles)
cmdArgs.outputFile match {
case Some(file) if file.getName.endsWith(".yml") || file.getName.endsWith(".yaml") =>
ConfigUtils.mapToYamlFile(configMap, file)
case Some(file) => {
val writer = new PrintWriter(file)
writer.println(jsonString)
writer.println(ConfigUtils.mapToJson(configMap).spaces2)
writer.close()
}
case _ => println(jsonString)
case _ => println(ConfigUtils.mapToYaml(configMap))
}
}
......@@ -94,11 +99,11 @@ object SamplesTsvToJson extends ToolCommand {
librariesValues.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv))
}
def stringFromInputs(inputs: List[File], tagsInputs: List[File]): String = {
def stringFromInputs(inputs: List[File], tagsInputs: List[File]): Map[String, Any] = {
val map = inputs.map(f => mapFromFile(f))
.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv))
val tags = tagsInputs.map(f => mapFromFile(f, tags = true))
.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv))
mapToJson(mergeMaps(map, tags)).spaces2
mergeMaps(map, tags)
}
}
......@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar
import org.scalatest.testng.TestNGSuite
......@@ -25,8 +26,8 @@ import org.testng.annotations.Test
/**
* Created by ahbbollen on 28-8-15.
*/
class SamplesTsvToJsonTest extends TestNGSuite with MockitoSugar with Matchers {
import SamplesTsvToJson._
class SamplesTsvToConfigTest extends TestNGSuite with MockitoSugar with Matchers {
import SamplesTsvToConfig._
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
......@@ -73,7 +74,7 @@ class SamplesTsvToJsonTest extends TestNGSuite with MockitoSugar with Matchers {
val tsv = new File(resourcePath("/sample.tsv"))
val json = stringFromInputs(List(tsv), Nil)
json should equal(
ConfigUtils.mapToJson(json).spaces2 should equal(
"""|{
| "samples" : {
| "Sample_ID_1" : {
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import nl.lumc.sasc.biopet.tools.SamplesTsvToJson._
import nl.lumc.sasc.biopet.tools.SamplesTsvToConfig._
import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar
import org.scalatest.testng.TestNGSuite
......
......@@ -76,7 +76,7 @@
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>1.15</version>
<version>1.17</version>
</dependency>
<dependency>
<groupId>io.argonaut</groupId>
......
......@@ -14,7 +14,7 @@
*/
package nl.lumc.sasc.biopet.utils
import java.io.File
import java.io.{File, PrintWriter}
import java.util
import argonaut.Argonaut._
......@@ -150,7 +150,18 @@ object ConfigUtils extends Logging {
def yamlToMap(file: File): Map[String, Any] = {
val yaml = new Yaml()
val a = yaml.load(scala.io.Source.fromFile(file).reader())
ConfigUtils.any2map(a)
if (a == null) throw new IllegalStateException(s"File '$file' is an empty file")
else ConfigUtils.any2map(a)
}
lazy val yaml = new Yaml()
def mapToYaml(map: Map[String, Any]) = yaml.dump(yaml.load(ConfigUtils.mapToJson(map).nospaces))
def mapToYamlFile(map: Map[String, Any], outputFile: File) = {
val writer = new PrintWriter(outputFile)
writer.println(mapToYaml(map))
writer.close()
}
/** Convert json to native scala map/values */
......
......@@ -20,8 +20,8 @@ samples:
Sample_ID1:
libraries:
MySeries_1:
R1: R1.fastq.gz
R2: R2.fastq.gz
R1: /path/to/R1.fastq.gz
R2: /path/to/R2.fastq.gz
```
###### JSON:
......
......@@ -35,15 +35,15 @@ The actual path will vary from version to version, which is controlled by which
Almost all of the pipelines have a common usage pattern with a similar set of flags, for example:
~~~
$ biopet pipeline <pipeline_name> -config <path/to/config.json> -qsub -jobParaEnv BWA -retry 2
$ biopet pipeline <pipeline_name> -config <path/to/config.json> -qsub -jobParaEnv BWA -jobQueue all.q -retry 2
~~~
The command above will do a *dry* run of a pipeline using a config file as if the command would be submitted to the SHARK cluster (the `-qsub` flag) to the `BWA` parallel environment (the `-jobParaEnv BWA` flag). We also set the maximum retry of failing jobs to two times (via the `-retry 2` flag). Doing a good run is a good idea to ensure that your real run proceeds smoothly. It may not catch all the errors, but if the dry run fails you can be sure that the real run will never succeed.
The command above will do a *dry* run of a pipeline using a config file as if the command would be submitted to the SHARK cluster (the `-qsub` flag) to the `BWA` parallel environment (the `-jobParaEnv BWA` flag). The `-jobQueue all.q` flag ensures that the proper Queue is used. We also set the maximum retry of failing jobs to two times (via the `-retry 2` flag). Doing a good run is a good idea to ensure that your real run proceeds smoothly. It may not catch all the errors, but if the dry run fails you can be sure that the real run will never succeed.
If the dry run proceeds without problems, you can then do the real run by using the `-run` flag:
~~~
$ biopet pipeline <pipeline_name> -config <path/to/config.json> -qsub -jobParaEnv BWA -retry 2 -run
$ biopet pipeline <pipeline_name> -config <path/to/config.json> -qsub -jobParaEnv BWA -jobQueue all.q -retry 2 -run
~~~
It is usually a good idea to do the real run using `screen` or `nohup` to prevent the job from terminating when you log out of SHARK. In practice, using `biopet` as it is is also fine. What you need to keep in mind, is that each pipeline has their own expected config layout. You can check out more about the general structure of our config files [here](general/config.md). For the specific structure that each pipeline accepts, please consult the respective pipeline page.
......
......@@ -7,7 +7,8 @@ In case of BAM file as input, it will extract the unaligned read(pair) sequences
Analysis result is reported in a krona graph, which is visible and navigatable in a webbrowser.
Pipeline analysis components include:
- [Centrifuge](https://github.com/infphilo/centrifuge)
- [Kraken, DerrickWood](https://github.com/DerrickWood/kraken)
- [Qiime closed reference](http://qiime.org)
- [Qiime open reference](http://qiime.org)
......@@ -22,7 +23,8 @@ This pipeline is used to analyse a group of samples. This pipeline only accepts
| Key | Type | default | Function |
| --- | ---- | ------- | -------- |
| gears_use_kraken | Boolean | true | Run fastq file with kraken |
| gears_use_centrifuge | Boolean | true | Run fastq files with centrifuge |
| gears_use_kraken | Boolean | false | Run fastq files with kraken |
| gears_use_qiime_closed | Boolean | false | Run fastq files with qiime with the closed reference module |
| gears_use_qiime_open | Boolean | false | Run fastq files with qiime with the open reference module |
| gears_use_qiime_rtax | Boolean | false | Run fastq files with qiime with the rtax module |
......@@ -65,7 +67,7 @@ Command line flags for Gears are:
| -sample | --sampleid | String (**required**) | Name of sample |
| -library | --libid | String (optional) | Name of library |
If `-R2` is given, the pipeline will assume a paired-end setup. `-bam` is mutualy exclusive with the `-R1` and `-R2` flags. Either specify `-bam` or `-R1` and/or `-R2`.
If `-R2` is given, the pipeline will assume a paired-end setup. `-bam` is mutually exclusive with the `-R1` and `-R2` flags. Either specify `-bam` or `-R1` and/or `-R2`.
### Sample input extensions
......
# SamplesTsvToJson
# SamplesTsvToConfig
This tool enables a user to create a full sample sheet in JSON format, suitable for all our Queue pipelines, from TSV file(s).
This tool enables a user to create a full sample sheet in JSON format or YAML format, suitable for all our Queue pipelines, from TSV file(s).
The tool can be called as follows:
~~~ bash
biopet tool SamplesTsvToJson
biopet tool SamplesTsvToConfig
~~~
To open the help:
......@@ -24,40 +24,15 @@ Usage: SamplesTsvToJson [options]
-t <file> | --tagFiles <file>
-o <file> | --outputFile <file>
When extension is .yml or .yaml output is in yaml format, otherwise in json. When not given output goes to stdout as yaml.
~~~
A user provides a TAB separated file (TSV) with sample specific properties which are parsed into JSON format by the tool.
For example, a user wants to add certain properties to the description of a sample, such as the treatment a sample received. Then a TSV file with an extra column called treatment is provided.
The resulting JSON file will have the 'treatment' property in it as well. The order of the columns is not relevant to the end result
The resulting file will have the 'treatment' property in it as well. The order of the columns is not relevant to the end result
The tag files works the same only the value is prefixed in the key `tags`.
#### Example
~~~ json
{
"samples" : {
"Sample_ID_1" : {
"treatment" : "heatshock",
"libraries" : {
"Lib_ID_1" : {
"bam" : "MyFirst.bam"
}
}
},
"Sample_ID_2" : {
"treatment" : "heatshock",
"libraries" : {
"Lib_ID_2" : {
"bam" : "MySecond.bam"
}
}
}
}
}
~~~
#### Sample definition
To get the above example out of the tool one should provide 2 TSV files as follows:
......@@ -83,3 +58,45 @@ Basically anything you want to pass to your pipeline is possible.
| Sample_ID_1 | heatshock |
| Sample_ID_2 | heatshock |
#### Example
###### Yaml
~~~ yaml
samples:
Sample_ID_1:
treatment: heatshock
libraries:
Lib_ID_1:
bam: MyFirst.bam
Sample_ID_2:
treatment: heatshock
libraries:
Lib_ID_2:
bam: MySecond.bam
~~~
###### Json
~~~ json
{
"samples" : {
"Sample_ID_1" : {
"treatment" : "heatshock",
"libraries" : {
"Lib_ID_1" : {
"bam" : "MyFirst.bam"
}
}
},
"Sample_ID_2" : {
"treatment" : "heatshock",
"libraries" : {
"Lib_ID_2" : {
"bam" : "MySecond.bam"
}
}
}
}
}
~~~
......@@ -21,7 +21,7 @@ pages:
- Toucan (Annotation): 'pipelines/toucan.md'
- Tools:
- AnnotateVcfWithBed: 'tools/AnnotateVcfWithBed.md'
- SamplesTsvToJson: 'tools/SamplesTsvToJson.md'
- SamplesTsvToConfig: 'tools/SamplesTsvToConfig.md'
- BedToInterval: 'tools/bedtointerval.md'
- BastyGenerateFasta: 'tools/BastyGenerateFasta.md'
- BedToInterval: 'tools/bedtointerval.md'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment