Commit 0cef5b4b authored by Ruben Vorderman's avatar Ruben Vorderman

Merge remote-tracking branch 'origin/develop' into fix-BIOPET-593

parents 23e9a4a8 8f638fe4
......@@ -149,7 +149,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
private[core] var _inputAsStdin = false
def inputAsStdin = _inputAsStdin
private[core] var _outputAsStdout = false
def outputAsStsout = _outputAsStdout
def outputAsStdout = _outputAsStdout
/**
* This operator sends stdout to `that` and combine this into 1 command line function
......
......@@ -27,7 +27,7 @@ class BiopetPipeTest extends TestNGSuite with Matchers {
def cmdLine =
"pipe1" +
(if (!inputAsStdin) " input1 " else "") +
(if (!outputAsStsout) " output1 " + "")
(if (!outputAsStdout) " output1 " + "")
}
class Pipe2 extends BiopetCommandLineFunction {
......@@ -35,7 +35,7 @@ class BiopetPipeTest extends TestNGSuite with Matchers {
def cmdLine =
"pipe2" +
(if (!inputAsStdin) " input2 " else "") +
(if (!outputAsStsout) " output2 " + "")
(if (!outputAsStdout) " output2 " + "")
}
@Test def testPipeCommands: Unit = {
......
......@@ -44,7 +44,7 @@ class Awk(val parent: Configurable) extends BiopetCommandLineFunction with Versi
executable +
required(command) +
(if (inputAsStdin) "" else required(input)) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
object Awk {
......
......@@ -36,12 +36,12 @@ class Bgzip(val parent: Configurable) extends BiopetCommandLineFunction {
override def beforeGraph(): Unit = {
super.beforeGraph()
if (input.isEmpty && !inputAsStdin) Logging.addError("Input is missing for Bgzip")
if (output == null && !outputAsStsout) Logging.addError("Output is missing for Bgzip")
if (output == null && !outputAsStdout) Logging.addError("Output is missing for Bgzip")
}
def cmdLine =
required(executable) +
conditional(f, "-f") +
" -c " + repeat(input) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
......@@ -38,7 +38,7 @@ class Cat(val parent: Configurable) extends BiopetCommandLineFunction {
def cmdLine =
required(executable) +
(if (inputAsStdin) "" else repeat(input)) +
(if (outputAsStsout) "" else (if (appending) " >> " else " > ") + required(output))
(if (outputAsStdout) "" else (if (appending) " >> " else " > ") + required(output))
}
/**
......
......@@ -34,5 +34,5 @@ class Curl(val parent: Configurable) extends BiopetCommandLineFunction with Vers
def versionRegex = """curl (\w+\.\w+\.\w+) .*""".r
def cmdLine: String =
required(executable) + required(url) + (if (outputAsStsout) "" else " > " + required(output))
required(executable) + required(url) + (if (outputAsStdout) "" else " > " + required(output))
}
......@@ -165,7 +165,7 @@ class Cutadapt(val parent: Configurable)
optional("--untrimmed-paired-output", untrimmedPairedOutput) +
// input / output
required(fastqInput) +
(if (outputAsStsout) ""
(if (outputAsStdout) ""
else
required("--output", fastqOutput) +
" > " + required(statsOutput))
......
......@@ -206,5 +206,5 @@ class Freebayes(val parent: Configurable)
conditional(debug, "--debug") +
optional("--haplotype-length", haplotypeLength) +
(if (inputAsStdin) required("--stdin") else "") +
(if (outputAsStsout) "" else optional("--vcf", outputVcf))
(if (outputAsStdout) "" else optional("--vcf", outputVcf))
}
......@@ -38,6 +38,6 @@ class GffRead(val parent: Configurable) extends BiopetCommandLineFunction {
def cmdLine =
executable +
(if (inputAsStdin) "" else required(input)) +
(if (outputAsStsout) "" else required("-o", output)) +
(if (outputAsStdout) "" else required("-o", output)) +
conditional(T, "-T")
}
......@@ -46,7 +46,7 @@ class Grep(val parent: Configurable) extends BiopetCommandLineFunction {
conditional(perlRegexp, "-P") +
required(grepFor) +
(if (inputAsStdin) "" else required(input)) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
object Grep {
......
......@@ -51,5 +51,5 @@ class GtfToGenePred(val parent: Configurable) extends BiopetCommandLineFunction
conditional(simple, "-simple") +
conditional(geneNameAsName2, "-geneNameAsName2") +
repeat(inputGtfs) +
(if (outputAsStsout) required("/dev/stdout") else required(outputGenePred))
(if (outputAsStdout) required("/dev/stdout") else required(outputGenePred))
}
......@@ -35,7 +35,7 @@ class Gzip(val parent: Configurable) extends BiopetCommandLineFunction with Vers
def cmdLine =
required(executable) + " -c " +
(if (inputAsStdin) "" else repeat(input)) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
object Gzip {
......
......@@ -46,6 +46,6 @@ class Sed(val parent: Configurable) extends BiopetCommandLineFunction with Versi
executable +
repeat("-e", expressions) +
(if (inputAsStdin) "" else required(inputFile)) +
(if (outputAsStsout) "" else " > " + required(outputFile))
(if (outputAsStdout) "" else " > " + required(outputFile))
}
......@@ -78,13 +78,13 @@ class Sickle(val parent: Configurable)
cmd +
(if (inputAsStdin) required("-f", new File("/dev/stdin")) else required("-f", inputR1)) +
required("-t", qualityType) +
(if (outputAsStsout) required("-o", new File("/dev/stdout")) else required("-o", outputR1)) +
(if (outputAsStdout) required("-o", new File("/dev/stdout")) else required("-o", outputR1)) +
optional("-q", qualityThreshold) +
optional("-l", lengthThreshold) +
conditional(noFiveprime, "-x") +
conditional(discardN, "-n") +
conditional(quiet || outputAsStsout, "--quiet") +
(if (outputAsStsout) "" else " > " + required(outputStats))
conditional(quiet || outputAsStdout, "--quiet") +
(if (outputAsStdout) "" else " > " + required(outputStats))
}
override def summaryDeps = outputStats :: super.summaryDeps
......
......@@ -39,7 +39,7 @@ class Zcat(val parent: Configurable) extends BiopetCommandLineFunction with Vers
def cmdLine =
required(executable) +
(if (inputAsStdin) "" else repeat(input)) +
(if (outputAsStsout) "" else (if (appending) " >> " else " > ") + required(output))
(if (outputAsStdout) "" else (if (appending) " >> " else " > ") + required(output))
}
object Zcat {
......
......@@ -82,6 +82,6 @@ class BcftoolsCall(val parent: Configurable) extends Bcftools {
optional("-P", P) +
conditional(X, "-X") +
conditional(Y, "-Y") +
(if (outputAsStsout) "" else required("-o", output)) +
(if (outputAsStdout) "" else required("-o", output)) +
(if (inputAsStdin) "-" else required(input))
}
......@@ -49,7 +49,7 @@ class BcftoolsMerge(val parent: Configurable) extends Bcftools {
def cmdLine =
required(executable) +
required("merge") +
(if (outputAsStsout) "" else required("-o", output)) +
(if (outputAsStdout) "" else required("-o", output)) +
conditional(forcesamples, "--force-samples") +
conditional(printheader, "--print-header") +
optional("--use-header", useheader) +
......
......@@ -59,7 +59,7 @@ class BedtoolsCoverage(val parent: Configurable) extends Bedtools with Reference
conditional(sorted, "-sorted") +
(if (sorted) required("-g", BedtoolsCoverage.getGenomeFile(referenceFai, jobTempDir))
else "") +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
......
......@@ -37,6 +37,6 @@ class BedtoolsSort(val parent: Configurable) extends Bedtools with Reference {
def cmdLine =
required(executable) + required("sort") + required("-i", input) +
optional("-faidx", faidx) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
......@@ -256,5 +256,5 @@ class Bowtie2(val parent: Configurable)
optional("-2", r2)
case _ => required("-U", R1)
}) +
(if (outputAsStsout) "" else required("-S", output))
(if (outputAsStdout) "" else required("-S", output))
}
......@@ -110,6 +110,6 @@ class BwaMem(val parent: Configurable) extends Bwa with Reference {
required(reference) +
required(R1) +
optional(R2) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
}
......@@ -151,7 +151,7 @@ class Centrifuge(val parent: Configurable)
case Some(r2) => required("-1", inputR1) + required("-2", r2)
case _ => required("-U", inputR1)
}) +
(if (outputAsStsout) "" else required("-S", output)) +
(if (outputAsStdout) "" else required("-S", output)) +
optional("--report-file", report)
/** Must return files to store into summary */
......
......@@ -260,5 +260,5 @@ class Hisat2(val parent: Configurable)
case Some(r2) => required("-1", R1) + optional("-2", r2)
case otherwise => required("-U", R1)
}) +
(if (outputAsStsout) "" else required("-S", output))
(if (outputAsStdout) "" else required("-S", output))
}
......@@ -68,7 +68,7 @@ class AddOrReplaceReadGroups(val parent: Configurable) extends Picard {
super.cmdLine +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
(if (outputAsStdout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false)) +
required("SORT_ORDER=", sortOrder, spaceSeparated = false) +
required("RGID=", RGID, spaceSeparated = false) +
......
......@@ -39,7 +39,7 @@ class NormalizeFasta(val parent: Configurable) extends Picard {
super.cmdLine +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
(if (outputAsStdout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false)) +
required("LINE_LENGTH=", lineLength, spaceSeparated = false) +
conditional(truncateSequenceNameAtWhitespace, "TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE=TRUE")
......
......@@ -54,6 +54,6 @@ class ReorderSam(val parent: Configurable) extends Picard with Reference {
required("REFERENCE=", reference, spaceSeparated = false) +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
(if (outputAsStdout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false))
}
......@@ -38,7 +38,7 @@ class SortSam(val parent: Configurable) extends Picard {
override def beforeGraph() {
super.beforeGraph()
if (outputAsStsout) createIndex = false
if (outputAsStdout) createIndex = false
if (createIndex) outputIndex = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai")
}
......@@ -47,7 +47,7 @@ class SortSam(val parent: Configurable) extends Picard {
super.cmdLine +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
(if (outputAsStdout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false)) +
required("SORT_ORDER=", sortOrder, spaceSeparated = false)
}
......
......@@ -51,7 +51,7 @@ class SortVcf(val parent: Configurable) extends Picard with Reference {
super.cmdLine +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
(if (outputAsStdout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false)) +
required("SEQUENCE_DICTIONARY=", sequenceDictionary, spaceSeparated = false)
}
......
......@@ -31,7 +31,7 @@ class SamtoolsFlagstat(val parent: Configurable) extends Samtools {
def cmdLine =
required(executable) + required("flagstat") +
(if (inputAsStdin) "-" else required(input)) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
object SamtoolsFlagstat {
......
......@@ -59,7 +59,7 @@ class SamtoolsMpileup(val parent: Configurable) extends Samtools with Reference
conditional(disableBaq, "-B") +
conditional(u, "-u") +
conditional(v, "-v") +
(if (outputAsStsout) "" else required("-o", output)) +
(if (outputAsStdout) "" else required("-o", output)) +
(if (inputAsStdin) "-" else repeat(input))
}
......
......@@ -48,6 +48,6 @@ class SamtoolsSort(val parent: Configurable) extends Samtools {
optional("-O", outputFormat) +
required("-T", prefix) +
conditional(sortByName, "-n") +
(if (outputAsStsout) "" else required("-o", output)) +
(if (outputAsStdout) "" else required("-o", output)) +
(if (inputAsStdin) "" else required(input))
}
......@@ -47,7 +47,7 @@ class SamtoolsView(val parent: Configurable) extends Samtools {
conditional(b, "-b") +
conditional(h, "-h") +
(if (inputAsStdin) "-" else required(input)) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
object SamtoolsView {
......
......@@ -43,6 +43,6 @@ class SeqtkSample(val parent: Configurable) extends Seqtk {
optional("-s", s) +
required(input) +
(if (sample > 1) required(sample.toInt) else required(sample)) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
......@@ -106,7 +106,7 @@ class SeqtkSeq(val parent: Configurable) extends Seqtk {
conditional(flag2, "-2") +
conditional(V, "-V") +
(if (inputAsStdin) "" else required(input)) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
/**
......
package nl.lumc.sasc.biopet.extensions.taxextract
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import org.broadinstitute.gatk.utils.commandline.{Argument, Input}
/**
* Created by Sander Bollen on 2-5-17.
*/
abstract class TaxExtract extends BiopetCommandLineFunction with Version {
executable = config("exe", namespace = "taxextract", default = "taxextract")
def subCommand: String
@Input
var inputKreport: File = _
@Argument(required = true, doc = "taxonomy name to extract")
var taxName: String = config("taxonomy", namespace = "taxextract")
def cmdLine: String = {
executable +
required(subCommand) +
required("-i", inputKreport) +
required("-n", taxName)
}
def versionCommand = executable + " --version"
def versionRegex = """.+, version (.*)""".r
}
package nl.lumc.sasc.biopet.extensions.taxextract
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Output}
/**
* Created by Sander Bollen on 2-5-17.
*/
class TaxExtractCount(val parent: Configurable) extends TaxExtract {
def subCommand = "count"
@Output(required = false)
var output: Option[File] = None
@Argument(required = false)
var reverse: Boolean = false
override def cmdLine = {
super.cmdLine +
conditional(reverse, "--reverse") +
(if (outputAsStdout) "" else " > " + required(output))
}
}
package nl.lumc.sasc.biopet.extensions.taxextract
import java.io.File
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output}
/**
* Created by Sander Bollen on 2-5-17.
*/
class TaxExtractExtract(val parent: Configurable) extends TaxExtract {
def subCommand = "extract"
@Input
var centrifugeResult: File = _
@Input
var fq1: File = _
@Input(required = false)
var fq2: Option[File] = None
@Output
var out1: File = _
@Output(required = false)
var out2: Option[File] = None
@Argument(required = false)
var noChildren: Boolean = false
@Argument(required = false)
var reverse: Boolean = false
override def cmdLine = {
if (List(fq2, out2).count(_.isDefined) == 1) {
Logging.addError("Both fq2 and out2 must be defined if either one is defined")
}
super.cmdLine +
required("-r", centrifugeResult) +
required("-fq1", fq1) +
required("-o1", out1) +
optional("-fq2", fq2) +
optional("-o2", out2) + conditional(noChildren, "--no-children") +
conditional(reverse, "--reverse")
}
}
package nl.lumc.sasc.biopet.extensions.taxextract
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Output}
/**
* Created by Sander Bollen on 2-5-17.
*/
class TaxExtractId(val parent: Configurable) extends TaxExtract {
def subCommand = "tax-id"
@Output(required = false)
var output: Option[File] = None
@Argument(required = false)
var noChildren: Boolean = false
@Argument(required = false)
var reverse: Boolean = false
override def cmdLine: String = {
super.cmdLine +
conditional(noChildren, "--no-children") +
conditional(reverse, "--reverse") +
(if (outputAsStdout) "" else " > " + required(output))
}
}
......@@ -67,5 +67,5 @@ class VarscanMpileup2cns(val parent: Configurable) extends Varscan {
optional("--output-vcf", outputVcf) +
optional("--vcf-sample-list", vcfSampleList) +
optional("--variants", variants) +
(if (outputAsStsout) "" else " > " + required(output))
(if (outputAsStdout) "" else " > " + required(output))
}
......@@ -130,6 +130,44 @@ Carp can do broad peak-calling by using the following config:
These settings are optimized to call peaks on samples prepared using the ATAC protocol.
## Taxonomy extraction
It is possible to only align reads matching a certain taxonomy.
This is useful in situations where known contaminants exist in the sequencing files.
By default this option is **disabled**.
Due to technical reasons, we **cannot** recover reads that do not match to any known taxonomy.
Taxonomies are determined using [Gears](gears.md) as a sub-pipeline.
To enable taxonomy extraction, specify the following additional flags in your
config file:
| Name | Namespace | Type | Function |
| ---- | --------- | ---- | -------- |
| mapping_to_gears | mapping | Boolean | Must be set to **true** |
| taxonomy_extract | mapping | Boolean (must be **true** for this purpose) | enable taxonomy extraction |
| taxonomy | taxextract | string | The name of the taxonomy you wish to extract |
The extraction can be fine-tuned with two additional optional config values:
| Name | Namespace | Type | Function |
| ---- | --------- | ---- | -------- |
| reverse | taxextract | Boolean | Set to true to select those reads _not_ matching the taxonomy. |
| no_children | taxextract | Boolean | Set to true to put an exact match on the taxonomy, rather than the specific node and its children |
### Example config
```yaml
extract_taxonomies: true
mapping_to_gears: all
taxextract: