diff --git a/LICENSE b/LICENSE index 6e2dff36af8a00f40e4ad9e07e7c282111f72da0..ab753ec28bc50c6c26c03748188fcc7013efb274 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 Peter van 't Hof +Copyright (c) Sequencing Analysis Support Core - Leiden University Medical Center Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/bwa.wdl b/bwa.wdl index 0a8b37fa8ad3fec1d7d3400f6e9b4b5d342dba0a..ffa42313dad13f0c13df6f9c77f938567c0027a0 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -3,6 +3,7 @@ task BwaMem { File inputR1 File? inputR2 String referenceFasta + Array[File] indexFiles # These indexFiles need to be added, otherwise cromwell will not find them. String outputPath String? readgroup @@ -26,3 +27,39 @@ task BwaMem { memory: if defined(memory) then memory else 8 } } + +task index { + File fasta + String? preCommand + String? constructionAlgorithm + Int? blockSize + String? outputDir + String fastaFilename = basename(fasta) + + command { + set -e -o pipefail + ${"mkdir -p " + outputDir} + ${preCommand} + if [[ ! '${outputDir}' = '' ]] + then + ln -sf ${fasta} ${outputDir + "/"}${fastaFilename} + fi + bwa index \ + ${"-a " + constructionAlgorithm} \ + ${"-b" + blockSize} \ + ${outputDir + "/"}${fastaFilename} + } + + output { + File indexBase = if (defined(outputDir)) then select_first([outputDir]) + "/" + fastaFilename else fastaFilename + File indexedFasta = indexBase + Array[File] indexFiles = [indexBase + ".bwt",indexBase + ".pac",indexBase + ".sa",indexBase + ".amb",indexBase + ".ann"] + } + parameter_meta { + fasta: "Fasta file to be indexed" + constructionAlgorithm: "-a STR BWT construction algorithm: bwtsw, is or rb2 [auto]" + blockSize: "-b INT block size for the bwtsw algorithm (effective with -a bwtsw) [10000000]" + outputDir: "index will be created in this output directory" + } +} + diff --git a/centrifuge.wdl b/centrifuge.wdl index 9c75f77257b7643c349b243c817499a2477a22cc..3182261156d76793f95e023581178a27026c96db 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -24,10 +24,7 @@ task download { command { set -e -o pipefail ${preCommand} - ${'if [ ! -f ' + seqTaxMapPath + - ' ]; then mkdir -p ' + seqTaxMapPath + - '; rm -d ' + seqTaxMapPath + - '; fi' } + ${"mkdir -p $(dirname " + seqTaxMapPath + ")"} ${executable} \ -o ${libraryPath} \ ${true='-d ' false='' defined(domain)}${sep=',' domain} \ @@ -92,8 +89,7 @@ task build { command { set -e -o pipefail ${preCommand} - mkdir -p ${centrifugeIndexBase} - rm -d ${centrifugeIndexBase} + ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"} ${centrifugeBuildExecutable} \ ${true='--large-index' false='' largeIndex} \ ${true='--noauto' false='' noAuto} \ diff --git a/common.wdl b/common.wdl index 79be3870be1f1d4d932564b42a8263bce1be4ba9..2ac9cb99366ab27e768c29a947bf239064193186 100644 --- a/common.wdl +++ b/common.wdl @@ -51,12 +51,13 @@ task concatenateTextFiles { Array[File] fileList String combinedFilePath Boolean? unzip=false + Boolean? zip=false command { - mkdir -p ${combinedFilePath} - rm -d ${combinedFilePath} + set -e -o pipefail + ${"mkdir -p $(dirname " + combinedFilePath + ")"} ${true='zcat' false= 'cat' unzip} ${sep=' ' fileList} \ - > ${combinedFilePath} + ${true="| gzip -c" false="" zip} > ${combinedFilePath} } output { diff --git a/cutadapt.wdl b/cutadapt.wdl index f38f2c3d1ed23097b3c9522de656357fcba27dfe..6e6cd56aa2b028c9c18f08b20c60d162d1cf71f6 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -6,13 +6,13 @@ task cutadapt { String? format String? preCommand Int? cores = 1 - String? memory = "4G" - Array[String]? adapter - Array[String]? front - Array[String]? anywhere - Array[String]? adapterRead2 - Array[String]? frontRead2 - Array[String]? anywhereRead2 + Int? memory = 4 + Array[String]+? adapter + Array[String]+? front + Array[String]+? anywhere + Array[String]+? adapterRead2 + Array[String]+? frontRead2 + Array[String]+? anywhereRead2 Boolean? interleaved String? pairFilter Float? errorRate @@ -59,7 +59,9 @@ task cutadapt { command { set -e -o pipefail - mkdir -p $(dirname ${read1output}) + ${"mkdir -p $(dirname " + read1output + ")"} + ${"mkdir -p $(dirname " + read2output + ")"} + ${"mkdir -p $(dirname " + reportPath + ")"} ${preCommand} cutadapt \ ${"--cores=" + cores} \ @@ -120,4 +122,4 @@ task cutadapt { cpu: select_first([cores]) memory: select_first([memory]) } -} \ No newline at end of file +} diff --git a/fastqc.wdl b/fastqc.wdl index 72b4a538358a5fac7c27f6d004ff7a9a026fe4e8..f0fd2fec0371432f054aecd9624dc9bda628f292 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -15,7 +15,11 @@ task fastqc { File? limits Int? kmers String? dir - + # Chops of the .gz extension if present. + String name = sub(seqFile, "\\.gz$","") + # This regex chops of the extension and replaces it with _fastqc for the reportdir. + # Just as fastqc does it. + String reportDir = outdirPath + "/" + sub(basename(name), "\\.[^\\.]*$", "_fastqc") command { set -e -o pipefail ${preCommand} @@ -40,10 +44,10 @@ task fastqc { } output { - File rawReport = select_first(glob(outdirPath + "/*/fastqc_data.txt")) - File htmlReport = select_first(glob(outdirPath + "/*/fastqc_report.html")) - File summary = select_first(glob(outdirPath + "/*/summary.txt")) - Array[File] images = glob(outdirPath + "/*/Images/*.png") + File rawReport = reportDir + "/fastqc_data.txt" + File htmlReport = reportDir + "/fastqc_report.html" + File summary = reportDir + "/summary.txt" + Array[File] images = glob(reportDir + "/Images/*.png") } runtime { @@ -113,4 +117,4 @@ task getConfiguration { runtime { memory: 1 } -} \ No newline at end of file +} diff --git a/samtools.wdl b/samtools.wdl index 249143ffa7d4215650b6b8dce6d0b2b216548d2b..a1192ac3a08419a8584c01a005cd785649a7d60d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -65,3 +65,55 @@ task Flagstat { File flagstat = outputPath } } + +task fastq { + String? preCommand + File inputBam + String outputRead1 + String? outputRead2 + String? outputRead0 + Int? includeFilter + Int? excludeFilter + Int? excludeSpecificFilter + Boolean? appendReadNumber + Boolean? outputQuality + Int? compressionLevel + Int? threads + Int? memory + Int totalThreads = select_first([threads, 1]) + + command { + ${preCommand} + samtools fastq \ + ${true="-1" false="-s" defined(outputRead2)} ${outputRead1} \ + ${"-2 " + outputRead2} \ + ${"-0 " + outputRead0} \ + ${"-f " + includeFilter} \ + ${"-F " + excludeFilter} \ + ${"-G " + excludeSpecificFilter} \ + ${true="-N" false="-n" appendReadNumber} \ + ${true="-O" false="" outputQuality} \ + ${"-c " + compressionLevel} \ + ${"--threads " + totalThreads} \ + ${inputBam} + } + output { + File read1 = outputRead1 + File? read2 = outputRead2 + File? read0 = outputRead0 + } + runtime { + cpu: totalThreads + memory: select_first([memory, 1]) + } + parameter_meta { + preCommand: "A command that is run before the task. Can be used to activate environments" + inputBam: "The bam file to process." + outputRead1: "If only outputRead1 is given '-s' flag is assumed. Else '-1'." + includeFilter: "Include reads with ALL of these flags. Corresponds to '-f'" + excludeFilter: "Exclude reads with ONE OR MORE of these flags. Corresponds to '-F'" + excludeSpecificFilter: "Exclude reads with ALL of these flags. Corresponds to '-G'" + appendReadNumber: "Append /1 and /2 to the read name, or don't. Corresponds to '-n/N" + + } +} diff --git a/seqtk.wdl b/seqtk.wdl new file mode 100644 index 0000000000000000000000000000000000000000..35fbedf368a7d62f1c9ed9643ea96fe0dd7a5a12 --- /dev/null +++ b/seqtk.wdl @@ -0,0 +1,26 @@ +task sample { + File sequenceFile + String? outFilePath = "subsampledReads.fq.gz" + String? preCommand + Int? seed + Boolean? twoPassMode + Float? fraction + Int? number + Boolean? zip = true + + command { + set -e -o pipefail + ${'mkdir -p $(dirname ' + outFilePath + ')'} + ${preCommand} + seqtk sample \ + ${"-s " + seed} \ + ${true="-2 " false="" twoPassMode} \ + ${sequenceFile} \ + ${number} ${fraction} \ + ${true="| gzip" false="" zip} \ + ${"> " + outFilePath} + } + output { + File subsampledReads= select_first([outFilePath]) + } +} \ No newline at end of file diff --git a/spades.wdl b/spades.wdl new file mode 100644 index 0000000000000000000000000000000000000000..ce9de29cecb0b48d20faff177fc1a007e0e7c312 --- /dev/null +++ b/spades.wdl @@ -0,0 +1,80 @@ +task spades { + String outputDir + String? preCommand + File read1 + File? read2 + File? interlacedReads + File? sangerReads + File? pacbioReads + File? nanoporeReads + File? tslrContigs + File? trustedContigs + File? untrustedContigs + Boolean? singleCell + Boolean? metagenomic + Boolean? rna + Boolean? plasmid + Boolean? ionTorrent + Boolean? onlyErrorCorrection + Boolean? onlyAssembler + Boolean? careful + Boolean? disableGzipOutput + Boolean? disableRepeatResolution + File? dataset + Int? threads + Int? memoryGb + File? tmpDir + String? k + Float? covCutoff + Int? phredOffset + Int finalThreads = select_first([threads,1]) + Int totalMemory = select_first([memoryGb, finalThreads * 16]) + Int clusterMemory = totalMemory / finalThreads + + command { + set -e -o pipefail + ${preCommand} + spades.py \ + ${"-o " + outputDir} \ + ${true="--sc" false="" singleCell} \ + ${true="--meta" false="" metagenomic} \ + ${true="--rna" false="" rna} \ + ${true="--plasmid" false="" plasmid} \ + ${true="--iontorrent" false="" ionTorrent} \ + ${"--12 " + interlacedReads } \ + ${true="-1" false="-s" defined(read2)} ${read1} \ + ${"-2 " + read2 } \ + ${"--sanger " + sangerReads } \ + ${"--pacbio " + pacbioReads } \ + ${"--nanopore " + nanoporeReads } \ + ${"--tslr " + tslrContigs } \ + ${"--trusted-contigs " + trustedContigs } \ + ${"--untrusted-contigs " + untrustedContigs } \ + ${true="--only-error-correction" false="" onlyErrorCorrection } \ + ${true="--only-assembler" false="" onlyAssembler } \ + ${true="--careful" false="" careful } \ + ${true="--disable-gzip-output" false="" disableGzipOutput} \ + ${true="--disable-rr" false="" disableRepeatResolution } \ + ${"--dataset " + dataset } \ + ${"--threads " + finalThreads} \ + ${"--memory " + totalMemory } \ + ${"-k " + k } \ + ${"--cov-cutoff " + covCutoff } \ + ${"--phred-offset " + phredOffset } + } + output { + Array[File] correctedReads = glob(outputDir + "/corrected/*.fastq*") + File scaffolds = outputDir + "/scaffolds.fasta" + File contigs = outputDir + "/contigs.fasta" + File assemblyGraphWithScaffoldsGfa = outputDir + "/assembly_graph_with_scaffolds.gfa" + File assemblyGraphFastg = outputDir + "/assembly_graph.fastg" + File contigsPaths = outputDir + "/contigs.paths" + File scaffoldsPaths = outputDir + "/scaffolds.paths" + File params = outputDir + "/params.txt" + File log = outputDir + "/spades.log" + } + runtime { + cpu: finalThreads + memory: clusterMemory + } +} \ No newline at end of file diff --git a/unicycler.wdl b/unicycler.wdl new file mode 100644 index 0000000000000000000000000000000000000000..f7aa36a48067bcd2f880a843d1a4aa401c7f5078 --- /dev/null +++ b/unicycler.wdl @@ -0,0 +1,102 @@ +task unicycler { + String? preCommand + File? short1 + File? short2 + File? unpaired + File? long + String out + Int? verbosity + Int? minFastaLength + Int? keep + Boolean? vcf + Int? threads + Int? memory + Int finalThreads = select_first(threads, 1) + Int finalMemory = select_first(memory, 4) + String? mode + Float? minBridgeQual + Int? linearSeqs + File? spadesPath + Boolean? noCorrect + Float? minKmerFrac + Float? maxKmerFrac + Int? kmerCount + Float? depthFilter + Boolean? noMiniasm + File? raconPath + File? existingLongReadAssembly + Boolean? noRotate + File? startGenes + Float? startGeneId + Float? startGeneCov + String? makeblastdbPath + File? tblastnPath + Boolean? noPilon + File? bowtie2Path + File? bowtie2buildPath + File? samtoolsPath + File? pilonPath + File? javaPath + Int? minPolishSize + File? bcftoolsPath + Int? minComponentSize + Int? minDeadEndSize + File? contamination + String? scores + String? lowScore + command { + set -e -o pipefail + mkdir -p ${out} + ${preCommand} + unicycler \ + ${"--short1 " + short1} \ + ${"--short2 " + short2} \ + ${"--unpaired " + unpaired} \ + ${"--long " + long} \ + --out ${out} \ + ${"--min_fasta_length " + minFastaLength} \ + ${"--keep " + keep } \ + ${true="--vcf" false="" vcf } \ + ${"--threads " + finalThreads } \ + ${"--mode " + mode } \ + ${"--min_bridge_qual " + minBridgeQual } \ + ${"--linear_seqs " + linearSeqs } \ + ${"--spades_path " + spadesPath } \ + ${true="--no_correct" false="" noCorrect } \ + ${"--min_kmer_frac " + minKmerFrac } \ + ${"--max_kmer_frac " + maxKmerFrac } \ + ${"--kmer_count " + kmerCount } \ + ${"--depth_filter " + depthFilter } \ + ${true="--no_miniasm" false="" noMiniasm } \ + ${"--racon_path " + raconPath } \ + ${"--existing_long_read_assembly " + existingLongReadAssembly } \ + ${true="--no_rotate" false="" noRotate } \ + ${"--start_genes " + startGenes } \ + ${"--start_gene_id " + startGeneId } \ + ${"--start_gene_cov " + startGeneCov } \ + ${"--makeblastdb_path " + makeblastdbPath } \ + ${"--tblastn_path " + tblastnPath } \ + ${true="--no_pilon" false="" noPilon } \ + ${"--bowtie2_path " + bowtie2Path } \ + ${"--bowtie2_build_path " + bowtie2buildPath } \ + ${"--samtools_path " + samtoolsPath } \ + ${"--pilon_path " + pilonPath } \ + ${"--java_path " + javaPath } \ + ${"--min_polish_size " + minPolishSize } \ + ${"--bcftools_path " + bcftoolsPath } \ + ${"--min_component_size " + minComponentSize } \ + ${"--min_dead_end_size " + minDeadEndSize } \ + ${"--contamination " + contamination } \ + ${"--scores " + scores } \ + ${"--low_score " + lowScore } + } + output { + File assemblyFasta = out + "/assembly.fasta" + File assemblyGfa = out + "/assembly.gfa" + File log = out + "/unicycler.log" + } + runtime { + cpu: finalThreads + memory: finalMemory + } +} \ No newline at end of file