Skip to content
Snippets Groups Projects
Commit 38f56c02 authored by Cats's avatar Cats
Browse files

Merge remote-tracking branch 'origin/develop' into UMI-tools

parents e55983c0 9d62a9d9
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,14 @@ that users understand how the changes affect the new version.
version 2.2.0-dev
---------------------------
+ Add `GenomicsDBImport` task for GATK.
+ Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple
annotation groups. The `StandardAnnotation` group is still used as default.
+ GenotypeGVCFs, only allow one input GVCF file, as the tool also only allows
one input file.
+ Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set
whether output should be a GVCF.
+ Centrifuge: Add Krona task specific to Centrifuge.
+ Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located.
+ Update parameter_meta for TALON, Centrifuge and Minimap2.
+ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location.
......
......@@ -37,7 +37,7 @@ task Build {
Int threads = 5
String memory = "20G"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5"
}
command {
......@@ -107,7 +107,7 @@ task Classify {
Int threads = 4
String memory = "16G"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5"
}
Map[String, String] inputFormatOptions = {"fastq": "-q", "fasta": "-f", "qseq": "--qseq", "raw": "-r", "sequences": "-c"}
......@@ -184,7 +184,7 @@ task Inspect {
Int? across
String memory = "4G"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5"
}
Map[String, String] outputOptions = {"fasta": "", "names": "--names", "summary": "--summary", "conversionTable": "--conversion-table", "taxonomyTree": "--taxonomy-tree", "nameTable": "--name-table", "sizeTable": "--size-table"}
......@@ -296,45 +296,100 @@ task DownloadTaxonomy {
task Kreport {
input {
String? preCommand
File centrifugeOut
Boolean inputIsCompressed
String outputDir
String suffix = "kreport"
String prefix = "centrifuge"
String indexPrefix
Boolean? onlyUnique ## removed in 1.0.4
Boolean? showZeros
Boolean? isCountTable
Int? minScore
Int? minLength
Int cores = 1
File centrifugeClassification
String outputPrefix
Array[File]+ indexFiles
Boolean noLCA = false
Boolean showZeros = false
Boolean isCountTable = false
Int? minimumScore
Int? minimumLength
String memory = "4G"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5"
}
String kreportFilePath = outputDir + "/" + prefix + "." + suffix
command {
set -e -o pipefail
~{preCommand}
command <<<
set -e
mkdir -p "$(dirname ~{outputPrefix})"
indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})"
for file in ~{sep=" " indexFiles}
do
ln ${file} $PWD/"$(basename ${file})"
done
centrifuge-kreport \
-x ~{indexPrefix} \
~{true="--only-unique" false="" onlyUnique} \
-x $PWD/${indexBasename} \
~{true="--no-lca" false="" noLCA} \
~{true="--show-zeros" false="" showZeros} \
~{true="--is-count-table" false="" isCountTable} \
~{"--min-score " + minScore} \
~{"--min-length " + minLength} \
~{true="<(zcat" false="" inputIsCompressed} ~{centrifugeOut}\
~{true=")" false="" inputIsCompressed} \
> ~{kreportFilePath}
~{"--min-score " + minimumScore} \
~{"--min-length " + minimumLength} \
~{centrifugeClassification} \
> ~{outputPrefix + "_kreport.tsv"}
>>>
output {
File outputKreport = outputPrefix + "_kreport.tsv"
}
runtime {
memory: memory
docker: dockerImage
}
parameter_meta {
# inputs
centrifugeClassification: {description: "File with Centrifuge classification results.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
indexFiles: {description: "The files of the index for the reference genomes.", category: "required"}
noLCA: {description: "Do not report the LCA of multiple assignments, but report count fractions at the taxa.", category: "advanced"}
showZeros: {description: "Show clades that have zero reads.", category: "advanced"}
isCountTable: {description: "The format of the file is taxID<tab>COUNT.", category: "advanced"}
minimumScore: {description: "Require a minimum score for reads to be counted.", category: "advanced"}
minimumLength: {description: "Require a minimum alignment length to the read.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
outputKreport: {description: "File with kraken style report."}
}
}
task KTimportTaxonomy {
input {
File inputFile
String outputPrefix
String memory = "4G"
String dockerImage = "biocontainers/krona:v2.7.1_cv1"
}
command {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
cat ~{inputFile} | cut -f 1,3 > kronaInput.krona
ktImportTaxonomy kronaInput.krona
cp taxonomy.krona.html ~{outputPrefix + "_krona.html"}
}
output {
File kreport = kreportFilePath
File outputKronaPlot = outputPrefix + "_krona.html"
}
runtime {
cpu: cores
memory: memory
docker: dockerImage
}
parameter_meta {
# inputs
inputFile: {description: "File with Centrifuge classification results.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
outputKronaPlot: {description: "Krona taxonomy plot html file."}
}
}
......@@ -723,15 +723,66 @@ task GatherBqsrReports {
}
}
task GenomicsDBImport {
input {
Array[File] gvcfFiles
Array[File] gvcfFilesIndex
Array[File]+ intervals
String genomicsDBWorkspacePath = "genomics_db"
String genomicsDBTarFile = "genomics_db.tar.gz"
String? tmpDir
String memory = "12G"
String javaXmx = "4G"
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
command {
set -e
mkdir -p "$(dirname ~{genomicsDBWorkspacePath})"
gatk --java-options -Xmx~{javaXmx} \
GenomicsDBImport \
-V ~{sep=" -V " gvcfFiles} \
--genomicsdb-workspace-path ~{genomicsDBWorkspacePath} \
~{"--tmp-dir " + tmpDir} \
-L ~{sep=" -L " intervals}
bash -c 'tar -cvzf ~{genomicsDBTarFile} ~{genomicsDBWorkspacePath}/*'
}
output {
File genomicsDbTarArchive = genomicsDBTarFile
}
runtime {
docker: dockerImage
memory: memory
}
parameter_meta {
gvcfFiles: {description: "The gvcfFiles to be merged.", category: "required"}
gvcfFilesIndex: {description: "Indexes for the gvcfFiles.", category: "required"}
intervals: {description: "intervals over which to operate.", category: "required"}
genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"}
genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"}
tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers",
category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task GenotypeGVCFs {
input {
Array[File]+ gvcfFiles
Array[File]+ gvcfFilesIndex
File gvcfFile
File gvcfFileIndex
Array[File]+ intervals
String outputPath
File referenceFasta
File referenceFastaDict
File referenceFastaFai
Array[String] annotationGroups = ["StandardAnnotation"]
File? dbsnpVCF
File? dbsnpVCFIndex
......@@ -747,11 +798,10 @@ task GenotypeGVCFs {
GenotypeGVCFs \
-R ~{referenceFasta} \
-O ~{outputPath} \
~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \
-G StandardAnnotation \
~{"-D " + dbsnpVCF} \
~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \
--only-output-calls-starting-in-intervals \
-new-qual \
-V ~{sep=' -V ' gvcfFiles} \
-V ~{gvcfFile} \
-L ~{sep=' -L ' intervals}
}
......@@ -767,8 +817,8 @@ task GenotypeGVCFs {
}
parameter_meta {
gvcfFiles: {description: "The GVCF files to be genotypes.", category: "required"}
gvcfFilesIndex: {description: "The index of the input GVCF files.", category: "required"}
gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"}
gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"}
intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"}
outputPath: {description: "The location to write the output VCF file to.", category: "required"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.",
......@@ -776,6 +826,7 @@ task GenotypeGVCFs {
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"}
dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
......@@ -839,20 +890,21 @@ task GetPileupSummaries {
}
# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
task HaplotypeCaller {
input {
Array[File]+ inputBams
Array[File]+ inputBamsIndex
Array[File]+? intervalList
Array[File]+? excludeIntervalList
String gvcfPath
String outputPath
File referenceFasta
File referenceFastaIndex
File referenceFastaDict
Float contamination = 0.0
Float? contamination
File? dbsnpVCF
File? dbsnpVCFIndex
Int? ploidy
Boolean gvcf = false
String memory = "12G"
String javaXmx = "4G"
......@@ -861,23 +913,23 @@ task HaplotypeCallerGvcf {
command {
set -e
mkdir -p "$(dirname ~{gvcfPath})"
mkdir -p "$(dirname ~{outputPath})"
gatk --java-options -Xmx~{javaXmx} \
HaplotypeCaller \
-R ~{referenceFasta} \
-O ~{gvcfPath} \
-O ~{outputPath} \
-I ~{sep=" -I " inputBams} \
~{"--sample-ploidy " + ploidy} \
~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \
~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \
~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \
-contamination ~{contamination} \
-ERC GVCF
~{"-D" + dbsnpVCF} \
~{"--contamination-fraction-per-sample-file " + contamination} \
~{true="-ERC GVCF" false="" gvcf}
}
output {
File outputGVCF = gvcfPath
File outputGVCFIndex = gvcfPath + ".tbi"
File outputVCF = outputPath
File outputVCFIndex = outputPath + ".tbi"
}
runtime {
......@@ -890,8 +942,9 @@ task HaplotypeCallerGvcf {
inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"}
intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"}
excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"}
gvcfPath: {description: "The location to write the output GVCF to.", category: "required"}
outputPath: {description: "The location to write the output to.", category: "required"}
ploidy: {description: "The ploidy with which the variants should be called.", category: "common"}
gvcf: {description: "Whether the output should be a gvcf", category: "common"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.",
category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment