Skip to content
Snippets Groups Projects
Commit 09b97388 authored by Ruben Vorderman's avatar Ruben Vorderman
Browse files

Update Picard and reevaluate use of intel inflater/defaler

parent 7b2d86fe
No related branches found
Tags v3.1.0
No related merge requests found
......@@ -29,7 +29,7 @@ task BedToIntervalList {
String javaXmx = "3G"
String memory = "4G"
Int timeMinutes = 5
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -89,7 +89,7 @@ task CollectHsMetrics {
# Additional * 2 because picard multiple metrics reads the
# reference fasta twice.
Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -158,7 +158,7 @@ task CollectMultipleMetrics {
Int memoryMb = javaXmxMb + 512
# Additional * 2 because picard multiple metrics reads the reference fasta twice.
Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -284,7 +284,7 @@ task CollectRnaSeqMetrics {
String memory = "9G"
# With 6 minutes per G there were several timeouts.
Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics {
String javaXmx = "3G"
String memory = "4G"
Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -404,7 +404,7 @@ task CollectVariantCallingMetrics {
String javaXmx = "8G"
String memory = "9G"
Int timeMinutes = 1440
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -453,7 +453,7 @@ task CreateSequenceDictionary {
String javaXmx = "2G"
String memory = "3G"
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -497,13 +497,15 @@ task GatherBamFiles {
String outputBamPath
Boolean createMd5File = false
Int? compressionLevel
Int compressionLevel = 1
Boolean useJdkInflater = false
Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
Int javaXmxMb = 1024
Int memoryMb = javaXmxMb + 512
# One minute per input gigabyte.
Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -513,7 +515,9 @@ task GatherBamFiles {
GatherBamFiles \
INPUT=~{sep=' INPUT=' inputBams} \
OUTPUT=~{outputBamPath} \
~{"COMPRESSION_LEVEL=" + compressionLevel} \
COMPRESSION_LEVEL=~{compressionLevel} \
USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
CREATE_INDEX=true \
CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
}
......@@ -536,7 +540,9 @@ task GatherBamFiles {
inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"}
outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"}
createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"}
compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
......@@ -555,10 +561,14 @@ task GatherVcfs {
Array[File]+ inputVcfIndexes
String outputVcfPath = "out.vcf.gz"
Int compressionLevel = 1
Boolean useJdkInflater = false
Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
String javaXmx = "4G"
String memory = "5G"
Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -566,6 +576,10 @@ task GatherVcfs {
mkdir -p "$(dirname ~{outputVcfPath})"
picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
GatherVcfs \
COMPRESSION_LEVEL=~{compressionLevel} \
USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
CREATE_INDEX=true \
INPUT=~{sep=' INPUT=' inputVcfs} \
OUTPUT=~{outputVcfPath}
}
......@@ -590,6 +604,10 @@ task GatherVcfs {
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
# outputs
outputVcf: {description: "Multiple VCF files gathered into one file."}
}
......@@ -601,14 +619,11 @@ task MarkDuplicates {
Array[File]+ inputBams
String outputBamPath
String metricsPath
Int compressionLevel = 1
Boolean createMd5File = false
Boolean useJdkInflater = true # Slightly faster than the intel one.
# Better results for compression level 1 (much smaller).
# Higher compression levels similar to intel deflater.
# NOTE: this might change in the future when the intel
# deflater is updated!
Boolean useJdkDeflater = true
Int compressionLevel = 1
Boolean useJdkInflater = false
Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
# The program default for READ_NAME_REGEX is appropriate in nearly every case.
# Sometimes we wish to supply "null" in order to turn off optical duplicate detection.
......@@ -622,7 +637,7 @@ task MarkDuplicates {
String memoryMb = javaXmxMb + 512
Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
# Task is assuming query-sorted input so that the Secondary and Supplementary reads get
......@@ -638,6 +653,8 @@ task MarkDuplicates {
OUTPUT=~{outputBamPath} \
METRICS_FILE=~{metricsPath} \
COMPRESSION_LEVEL=~{compressionLevel} \
USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
VALIDATION_STRINGENCY=SILENT \
~{"READ_NAME_REGEX=" + read_name_regex} \
OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
......@@ -668,9 +685,9 @@ task MarkDuplicates {
outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"}
metricsPath: {description: "The location where the output metrics file should be written.", category: "required"}
compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"}
useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"}
read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"}
javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
......@@ -692,16 +709,20 @@ task MergeVCFs {
Array[File]+ inputVCFsIndexes
String outputVcfPath
Int compressionLevel = 1
Boolean useJdkInflater = true # Slightly faster than the intel one.
Boolean useJdkInflater = false
# Better results for compression level 1 (much smaller).
# Higher compression levels similar to intel deflater.
# NOTE: this might change in the future when the intel deflater is updated!
Boolean useJdkDeflater = true
# Second NOTE: No it did not change. Only the fastest algorithm with
# worse compression is wrapped in the intel GKL. Instead of using
# one of the slightly slower but better compressing alternatives from ISA-L.
# (Which are also faster than zlib.)
Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
String javaXmx = "4G"
String memory = "5G"
Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
# Using MergeVcfs instead of GatherVcfs so we can create indices.
......@@ -757,7 +778,7 @@ task SamToFastq {
String javaXmx = "16G" # High memory default to avoid crashes.
String memory = "17G"
Int timeMinutes = 30
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
File? noneFile
}
......@@ -818,7 +839,7 @@ task ScatterIntervalList {
String javaXmx = "3G"
String memory = "4G"
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -853,13 +874,15 @@ task SortSam {
Boolean createMd5File = false
Int maxRecordsInRam = 500000
Int compressionLevel = 1
Boolean useJdkInflater = false
Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
# Default ram of 4 GB. Using 125001.0 to prevent an answer of
# 4.000000001 which gets rounded to 5.
# GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778
Int XmxGb = ceil(maxRecordsInRam / 125001.0)
Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -872,6 +895,8 @@ task SortSam {
SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \
CREATE_INDEX=true \
COMPRESSION_LEVEL=~{compressionLevel} \
USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
VALIDATION_STRINGENCY=SILENT \
CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
......@@ -896,7 +921,9 @@ task SortSam {
sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"}
createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"}
maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"}
compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"}
compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
......@@ -917,7 +944,7 @@ task SortVcf {
String javaXmx = "8G"
String memory = "9G"
Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
......@@ -967,7 +994,7 @@ task RenameSample {
String javaXmx = "8G"
String memory = "9G"
Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2)
String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -1014,11 +1041,13 @@ task UmiAwareMarkDuplicatesWithMateCigar {
String tempdir = "temp"
Boolean removeDuplicates = true
String umiTagName = "RX"
Int compressionLevel = 1
Boolean useJdkInflater = false
Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
String javaXmx = "8G"
String memory = "9G"
Int timeMinutes = 360
String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0"
String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
}
command {
......@@ -1034,6 +1063,9 @@ task UmiAwareMarkDuplicatesWithMateCigar {
TMP_DIR=~{tempdir} \
REMOVE_DUPLICATES=~{removeDuplicates} \
CREATE_INDEX=true \
COMPRESSION_LEVEL=~{compressionLevel} \
USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater}
}
output {
......@@ -1058,6 +1090,9 @@ task UmiAwareMarkDuplicatesWithMateCigar {
removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"}
umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"}
tempdir: {description: "Temporary directory.", category: "advanced"}
compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment