diff --git a/biopet.wdl b/biopet.wdl index b110defd1e3167397f3d4d8022f9b697acae92af..feb963fc2745f7d523025a3258a4a9f27a0280f7 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -409,6 +409,8 @@ task ValidateFastq { output { File stderr = stderr() + File validatedFastq1 = fastq1 + File? validatedFastq2 = fastq2 } runtime { diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl new file mode 100644 index 0000000000000000000000000000000000000000..c1e83a2f86919ad181dfaa4498c0d94b96752e15 --- /dev/null +++ b/biopet/seqstat.wdl @@ -0,0 +1,44 @@ +version 1.0 + +# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 + +task Generate { + input { + String? preCommand + File? toolJar + File fastqR1 + File? fastqR2 + String outputFile + String sample + String library + String readgroup + + Int memory = 4 + Float memoryMultiplier = 2.0 + } + + String toolCommand = if defined(toolJar) + then "java -Xmx" + memory + "G -jar " + toolJar + else "biopet-seqstat -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + mkdir -p $(dirname ~{outputFile}) + ~{toolCommand} Generate \ + --fastqR1 ~{fastqR1} \ + ~{"--fastqR2 " + fastqR2} \ + --output ~{outputFile} \ + ~{"--sample " + sample} \ + ~{"--library " + library } \ + ~{"--readgroup " + readgroup } + } + + output { + File json = outputFile + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} \ No newline at end of file diff --git a/bwa.wdl b/bwa.wdl index ba0023a3194c58e85b534d06371d6ab5a5e51184..d8ce3e3239a202f93de5ce25fbacd3e75bde6371 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -1,5 +1,7 @@ version 1.0 +import "common.wdl" as common + task Mem { input { String? preCommand @@ -9,29 +11,59 @@ task Mem { String outputPath String? readgroup + String? picardJar + Int threads = 1 Int memory = 8 + Int picardMemory = 4 } + String picardPrefix = if defined(picardJar) + then "java -Xmx" + picardMemory + "G -jar " + picardJar + else "picard -Xmx" + picardMemory + "G" + + # Post alt script from bwa + String altCommand = if (defined(bwaIndex.altIndex)) then "| bwa-postalt " + bwaIndex.altIndex else "" + + # setNmMdAndUqTags is only required if alt sequences are added + String setNmMdAndUqTagsCommand = picardPrefix + " SetNmMdAndUqTags " + + " INPUT=/dev/stdin OUTPUT=" + outputPath + + " CREATE_INDEX=true" + + " R=" + bwaIndex.fastaFile + + String sortSamCommand = picardPrefix + " SortSam " + + " INPUT=/dev/stdin SORT_ORDER=coordinate " + + if(defined(bwaIndex.altIndex)) then " OUTPUT=/dev/stdout " + else " OUTPUT=" + outputPath + " CREATE_INDEX=true " + + String picardCommand = if (defined(bwaIndex.altIndex)) then sortSamCommand + " | " + setNmMdAndUqTagsCommand + else sortSamCommand + + String readgroupArg = if (defined(readgroup)) then "-R '" + readgroup + "'" else "" + command { set -e -o pipefail mkdir -p $(dirname ~{outputPath}) ~{preCommand} bwa mem ~{"-t " + threads} \ - ~{"-R '" + readgroup + "'"} \ + ~{readgroupArg} \ ~{bwaIndex.fastaFile} \ ~{inputR1} \ ~{inputR2} \ - | samtools sort --output-fmt BAM - > ~{outputPath} + ~{altCommand} \ + | ~{picardCommand} } output { - File bamFile = outputPath + IndexedBamFile bamFile = object { + file: outputPath, + index: sub(outputPath, ".bam$", ".bai") + } } runtime{ cpu: threads - memory: memory + memory: memory + picardMemory + picardMemory } } @@ -62,8 +94,10 @@ task Index { } output { - File indexedFasta = outputFile - Array[File] indexFiles = [outputFile + ".bwt",outputFile + ".pac",outputFile + ".sa",outputFile + ".amb",outputFile + ".ann"] + BwaIndex outputIndex = object { + fastaFile: outputFile, + indexFiles: [outputFile + ".bwt",outputFile + ".pac",outputFile + ".sa",outputFile + ".amb",outputFile + ".ann"] + } } parameter_meta { @@ -77,4 +111,5 @@ task Index { struct BwaIndex { File fastaFile Array[File] indexFiles + File? altIndex } diff --git a/centrifuge.wdl b/centrifuge.wdl index 4b128c33627eb4eec4facb184c4e49a321d4b4a7..6ed05eb6e55b366a78de3f6e234538ba795bf99d 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -101,7 +101,7 @@ task Classify { ~{true="-k" false="" defined(assignments)} ~{assignments} \ ~{true="-1" false="-U" defined(read2)} ~{sep=',' read1} \ ~{true="-2" false="" defined(read2)} ~{sep=',' read2} \ - ~{true="-U" false="" defined(unpairedReads)} ~{sep=',' unpairedReads} \ + ~{true="-U" false="" length(select_first([unpairedReads])) > 0} ~{sep=',' unpairedReads} \ ~{"--report-file " + reportFilePath} \ ~{"--min-hitlen " + minHitLen} \ ~{"--min-totallen " + minTotalLen} \ @@ -197,7 +197,7 @@ task Kreport { String suffix = "kreport" String prefix = "centrifuge" String indexPrefix - Boolean? onlyUnique + Boolean? onlyUnique ## removed in 1.0.4 Boolean? showZeros Boolean? isCountTable Int? minScore diff --git a/common.wdl b/common.wdl index d2198aff445a33f8e9a91deca5e67afb1f26ee3f..7bd1da697e678744c521a0432337a4e86714af7e 100644 --- a/common.wdl +++ b/common.wdl @@ -42,11 +42,14 @@ task ConcatenateTextFiles { Boolean zip = false } + # When input and output is both compressed decompression is not needed + String cmdPrefix = if (unzip && !zip) then "zcat " else "cat " + String cmdSuffix = if (!unzip && zip) then " | gzip -c " else "" + command { set -e -o pipefail ~{"mkdir -p $(dirname " + combinedFilePath + ")"} - ~{true='zcat' false= 'cat' unzip} ~{sep=' ' fileList} \ - ~{true="| gzip -c" false="" zip} > ~{combinedFilePath} + ~{cmdPrefix} ~{sep=' ' fileList} ~{cmdSuffix} > ~{combinedFilePath} } output { @@ -150,3 +153,10 @@ struct IndexedBamFile { File file File index } + +struct FastqPair { + File R1 + String? R1_md5 + File? R2 + String? R2_md5 +} diff --git a/gatk.wdl b/gatk.wdl index 265d28478133b8877bc69cc29c9811f41a8463f6..5123050ab2fdbcd051739b50837d4fe8307473e8 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -12,7 +12,6 @@ task ApplyBQSR { File recalibrationReport Array[File]+ sequenceGroupInterval Reference reference - Int? compressionLevel Int memory = 4 Float memoryMultiplier = 3.0 @@ -112,7 +111,6 @@ task CombineGVCFs { Reference reference - Int? compressionLevel #TODO This isn't being used? Int memory = 4 Float memoryMultiplier = 3.0 } @@ -198,7 +196,6 @@ task GenotypeGVCFs { IndexedVcfFile? dbsnpVCF - Int? compressionLevel Int memory = 4 Float memoryMultiplier =3.0 } @@ -243,7 +240,6 @@ task HaplotypeCallerGvcf { String gvcfPath Reference reference Float contamination = 0.0 - Int? compressionLevel String? gatkJar IndexedVcfFile? dbsnpVCF diff --git a/picard.wdl b/picard.wdl index 9dc3709d163dc0e822b0d8fd737c4a653d58ee09..f0175ac9839dae4d22b957e2e29312d131ffaccd 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1,5 +1,42 @@ version 1.0 +task BedToIntervalList { + input { + String? preCommand + File? picardJar + + File bedFile + File dict + String outputPath + + Int memory = 4 + Float memoryMultiplier = 3.0 + } + + String toolCommand = if defined(picardJar) + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" + + command { + set -e -o pipefail + mkdir -p $(dirname "~{outputPath}") + ~{preCommand} + ~{toolCommand} \ + BedToIntervalList \ + I=~{bedFile} \ + O=~{outputPath} \ + SD=~{dict} + } + + output { + File intervalList = outputPath + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} + task CollectMultipleMetrics { input { String? preCommand diff --git a/seqtk.wdl b/seqtk.wdl index 64c604a5ecfe0beba399605dcabb1bb3de0ee59a..662d7e29321372161308eccbe430680315ee7445 100644 --- a/seqtk.wdl +++ b/seqtk.wdl @@ -6,9 +6,8 @@ task Sample { String outFilePath = "subsampledReads.fq.gz" String? preCommand Int? seed - Boolean twoPassMode - Float? fraction - Int? number + Boolean twoPassMode = false + Float fractionOrNumber # when above 1.0 is the number of reads, otherwise it's a fraction Boolean zip = true } @@ -20,8 +19,7 @@ task Sample { ~{"-s " + seed} \ ~{true="-2 " false="" twoPassMode} \ ~{sequenceFile} \ - ~{number} \ - ~{fraction} \ + ~{fractionOrNumber} \ ~{true="| gzip" false="" zip} \ > ~{outFilePath} } diff --git a/spades.wdl b/spades.wdl index 47195f196967b8b1863b908d9c483ed49df9f978..09f4bb0af05d38d066149c4947e2d69cdf680fc6 100644 --- a/spades.wdl +++ b/spades.wdl @@ -32,7 +32,8 @@ task Spades { Int? phredOffset } - Int clusterMemory = ceil(memoryGb / threads) + Int clusterMemory = ceil(memoryGb / threads * 1.2) + Int memoryArg = ceil(memoryGb) command { set -e -o pipefail @@ -60,7 +61,7 @@ task Spades { ~{true="--disable-rr" false="" disableRepeatResolution} \ ~{"--dataset " + dataset} \ ~{"--threads " + threads} \ - ~{"--memory " + memoryGb} \ + ~{"--memory " + memoryArg} \ ~{"-k " + k} \ ~{"--cov-cutoff " + covCutoff} \ ~{"--phred-offset " + phredOffset} diff --git a/star.wdl b/star.wdl index e03f6301052b648f2b86cd126e5ecfd9bdd27aed..3a795c746c5177c8f1705da3b5e44ceb463dc892 100644 --- a/star.wdl +++ b/star.wdl @@ -20,7 +20,7 @@ task Star { Int memory = 10 } - #TODO needs to be extended for all possible output extensions + # Needs to be extended for all possible output extensions Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} command {