diff --git a/bioconda.wdl b/bioconda.wdl index f8c14e7a17eb0d86a0988c8b0288ee0206163cbc..0600ff72d79dc27065fbdb7b6e4db1950e37e710 100644 --- a/bioconda.wdl +++ b/bioconda.wdl @@ -20,5 +20,6 @@ task installPrefix { } output { File condaEnvPath=prefix + File condaJson=stdout() } } diff --git a/centrifuge.wdl b/centrifuge.wdl index 55b57739146dbc2255d81d90183ada502f7888b1..9c75f77257b7643c349b243c817499a2477a22cc 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -6,7 +6,7 @@ task download { String libraryPath Array[String]? domain String? executable = "centrifuge-download" - File? condaEnvironment + String? preCommand String? seqTaxMapPath String? database = "refseq" String? assemblyLevel @@ -22,11 +22,12 @@ task download { # The bash statement at the beginning is to make sure # the directory for the SeqTaxMapPath exists. command { + set -e -o pipefail + ${preCommand} ${'if [ ! -f ' + seqTaxMapPath + ' ]; then mkdir -p ' + seqTaxMapPath + '; rm -d ' + seqTaxMapPath + '; fi' } - ${"source activate " + condaEnvironment} ${executable} \ -o ${libraryPath} \ ${true='-d ' false='' defined(domain)}${sep=',' domain} \ @@ -50,9 +51,10 @@ task download { task downloadTaxonomy { String centrifugeTaxonomyDir String? executable = "centrifuge-download" - File? condaEnvironment + String? preCommand command { - ${"source activate " + condaEnvironment} + set -e -o pipefail + ${preCommand} ${executable} \ -o ${centrifugeTaxonomyDir} \ taxonomy @@ -69,7 +71,7 @@ task build { File taxonomyTree File inputFasta String centrifugeIndexBase - File? condaEnvironment + String? preCommand String? centrifugeBuildExecutable = "centrifuge-build" #Boolean? c = false Boolean? largeIndex = false @@ -88,9 +90,10 @@ task build { Int? kmerCount command { + set -e -o pipefail + ${preCommand} mkdir -p ${centrifugeIndexBase} rm -d ${centrifugeIndexBase} - ${"source activate " + condaEnvironment} ${centrifugeBuildExecutable} \ ${true='--large-index' false='' largeIndex} \ ${true='--noauto' false='' noAuto} \ diff --git a/cutadapt.wdl b/cutadapt.wdl new file mode 100644 index 0000000000000000000000000000000000000000..f38f2c3d1ed23097b3c9522de656357fcba27dfe --- /dev/null +++ b/cutadapt.wdl @@ -0,0 +1,123 @@ +task cutadapt { + File read1 + File? read2 + String read1output + String? read2output + String? format + String? preCommand + Int? cores = 1 + String? memory = "4G" + Array[String]? adapter + Array[String]? front + Array[String]? anywhere + Array[String]? adapterRead2 + Array[String]? frontRead2 + Array[String]? anywhereRead2 + Boolean? interleaved + String? pairFilter + Float? errorRate + Boolean? noIndels + Int? times + Int? overlap + Boolean? matchReadWildcards + Boolean? noMatchAdapterWildcards + Boolean? noTrim + Boolean? maskAdapter + Int? cut + String? nextseqTrim + String? qualityCutoff + Int? qualityBase + Int? length + Boolean? trimN + String? lengthTag + String? stripSuffix + String? prefix + String? suffix + Int? minimumLength + Int? maximumLength + Int? maxN + Boolean? discardTrimmed + Boolean? discardUntrimmed + String? infoFilePath + String? restFilePath + String? wildcardFilePath + String? tooShortOutputPath + String? tooLongOutputPath + String? untrimmedOutputPath + String? tooShortPairedOutputPath + String? tooLongPairedOutputPath + String? untrimmedPairedOutputPath + Boolean? colorspace + Boolean? doubleEncode + Boolean? trimPrimer + Boolean? stripF3 + Boolean? maq + Boolean? bwa + Boolean? zeroCap + Boolean? noZeroCap + String? reportPath + + command { + set -e -o pipefail + mkdir -p $(dirname ${read1output}) + ${preCommand} + cutadapt \ + ${"--cores=" + cores} \ + ${true="-a " false="" defined(adapter)} ${sep=" -a " adapter} \ + ${true="-A " false="" defined(adapterRead2)} ${sep=" -A " adapterRead2} \ + ${true="-g " false="" defined(front)} ${sep=" -g " front} \ + ${true="-G " false="" defined(frontRead2)} ${sep=" -G " frontRead2} \ + ${true="-b " false="" defined(anywhere)} ${sep=" -a " anywhere} \ + ${true="-B " false="" defined(anywhereRead2)} ${sep=" -B " anywhereRead2} \ + --output ${read1output} ${"--paired-output " + read2output} \ + ${"--to-short-output " + tooShortOutputPath} ${"--to-short-paired-output " + tooShortPairedOutputPath} \ + ${"--to-long-output " + tooLongOutputPath} ${"--to-long-paired-output " + tooLongPairedOutputPath} \ + ${"--untrimmed-output " + untrimmedOutputPath} ${"--untrimmed-paired-output " + untrimmedPairedOutputPath} \ + ${"--pair-filter " + pairFilter} \ + ${"--error-rate " + errorRate} \ + ${"--times " + times} \ + ${"--overlap " + overlap} \ + ${"--cut " + cut} \ + ${"--nextseq-trim " + nextseqTrim} \ + ${"--quality-cutoff " + qualityCutoff} \ + ${"--quality-base " + qualityBase} \ + ${"--length " + length} \ + ${"--length-tag " + lengthTag} \ + ${"--strip-suffix " + stripSuffix} \ + ${"--prefix " + prefix} \ + ${"--suffix " + suffix} \ + ${"--minimum-length " + minimumLength} \ + ${"--maximum-length " + maximumLength} \ + ${"--max-n " + maxN} \ + ${true="--discard-untrimmed" false="" discardUntrimmed} \ + ${"--info-file " + infoFilePath } \ + ${"--rest-file " + restFilePath } \ + ${"--wildcard-file " + wildcardFilePath} \ + ${true="--match-read-wildcards" false="" matchReadWildcards} ${true="--no-match-adapter-wildcards" false="" noMatchAdapterWildcards} \ + ${true="--no-trim" false="" noTrim} ${true="--mask-adapter" false="" maskAdapter} \ + ${true="--no-indels" false="" noIndels} ${true="--trim-n" false="" trimN} \ + ${true="--interleaved" false="" interleaved} ${true="--discard-trimmed" false="" discardTrimmed } \ + ${true="--colorspace" false="" colorspace} ${true="--double-encode" false="" doubleEncode} \ + ${true="--strip-f3" false="" stripF3} ${true="--maq" false="" maq} ${true="--bwa" false="" bwa} \ + ${true="--zero-cap" false="" zeroCap} ${true="--no-zero-cap" false="" noZeroCap} \ + ${read1} ${read2} ${"> " + reportPath} + } + output{ + File report = if defined(reportPath) then select_first([reportPath]) else stdout() + File cutRead1 = read1output + File? cutRead2 = read2output + File? tooLongOutput=tooLongOutputPath + File? tooShortOutput=tooShortOutputPath + File? untrimmedOutput=untrimmedOutputPath + File? tooLongPairedOutput=tooLongPairedOutputPath + File? tooShortPairedOutput=tooShortPairedOutputPath + File? untrimmedPairedOutput=untrimmedPairedOutputPath + File? infoFile=infoFilePath + File? restFile=restFilePath + File? wildcardFile=wildcardFilePath + } + runtime { + cpu: select_first([cores]) + memory: select_first([memory]) + } +} \ No newline at end of file diff --git a/fastqc.wdl b/fastqc.wdl new file mode 100644 index 0000000000000000000000000000000000000000..657c7929e2f4afb68cf32def2a158fab1ed98d5d --- /dev/null +++ b/fastqc.wdl @@ -0,0 +1,114 @@ +task fastqc { + File seqFile + String outdirPath + String? preCommand + Boolean? casava + Boolean? nano + Boolean? noFilter + Boolean? extract = true + Boolean? nogroup + Int? minLength + String? format + Int? threads = 1 + File? contaminants + File? adapters + File? limits + Int? kmers + String? dir + + command { + set -e -o pipefail + ${preCommand} + mkdir -p ${outdirPath} + fastqc \ + ${"--outdir " + outdirPath} \ + ${true="--casava" false="" casava} \ + ${true="--nano" false="" nano} \ + ${true="--nofilter" false="" noFilter} \ + ${true="--extract" false="" extract} \ + ${true="--nogroup" false="" nogroup} \ + ${"--min_length " + minLength } \ + ${"--format " + format} \ + ${"--threads " + threads} \ + ${"--contaminants " + contaminants} \ + ${"--adapters " + adapters} \ + ${"--limits " + limits} \ + ${"--kmers " + kmers} \ + ${"--dir " + dir} \ + ${seqFile} + + } + + output { + # Chops of the .gz extension if present. + String name = sub(seqFile, "\\.gz$","") + # This regex chops of the extension and replaces it with _fastqc for the reportdir. + # Just as fastqc does it. + String reportDir = outdirPath + "/" + sub(basename(name), "\\.[^\\.]*$", "_fastqc") + File rawReport = reportDir + "/fastqc_data.txt" + File htmlReport = reportDir + "/fastqc_report.html" + File summary = reportDir + "/summary.txt" + File adapterContent = reportDir + "/Images/adapter_content.png" + File duplicationLevels = reportDir + "/Images/duplication_levels.png" + File perBaseNContent = reportDir + "/Images/per_base_n_content.png" + File perBaseQuality = reportDir + "/Images/per_base_quality.png" + File perBaseSequenceContent = reportDir + "/Images/per_base_sequence_content.png" + File perSequenceGCContent = reportDir + "/Images/per_sequence_gc_content.png" + File perSequenceQuality = reportDir + "/Images/per_sequence_quality.png" + File perTileQuality = reportDir + "/Images/per_tile_quality.png" + File sequenceLengthDistribution = reportDir + "/Images/sequence_length_distribution.png" + } + + runtime { + cpu: select_first([threads]) + } +} + +task extractAdapters { + File extractAdaptersFastqcJar + File inputFile + String outputDir + String? adapterOutputFilePath = outputDir + "/adapter.list" + String? contamsOutputFilePath = outputDir + "/contaminations.list" + Boolean? skipContams + File? knownContamFile + File? knownAdapterFile + Float? adapterCutoff + Boolean? outputAsFasta + command { + set -e + mkdir -p ${outputDir} + java -jar ${extractAdaptersFastqcJar} \ + --inputFile ${inputFile} \ + ${"--adapterOutputFile " + adapterOutputFilePath } \ + ${"--contamsOutputFile " + contamsOutputFilePath } \ + ${"--knownContamFile " + knownContamFile} \ + ${"--knownAdapterFile " + knownAdapterFile} \ + ${"--adapterCutoff " + adapterCutoff} \ + ${true="--skipContams" false="" skipContams} \ + ${true="--outputAsFasta" false="" outputAsFasta} + } + + output { + File adapterOutputFile = select_first([adapterOutputFilePath]) + File contamsOutputFile = select_first([contamsOutputFilePath]) + Array[String] adapterList = read_lines(select_first([adapterOutputFilePath])) + Array[String] contamsList = read_lines(select_first([contamsOutputFilePath])) + } +} + +task getConfiguration { + String? preCommand + String? fastqcDirFile = "fastqcDir.txt" + command { + set -e -o pipefail + ${preCommand} + echo $(dirname $(readlink -f $(which fastqc))) > ${fastqcDirFile} + } + output { + String fastqcDir = read_string(fastqcDirFile) + File adapterList = fastqcDir + "/Configuration/adapter_list.txt" + File contaminantList = fastqcDir + "/Configuration/contaminant_list.txt" + File limits = fastqcDir + "/Configuration/limits.txt" + } +} \ No newline at end of file diff --git a/ncbi.wdl b/ncbi.wdl index 77fa0c8ce35757f8535f5a1a65f7de9b59ee56bf..f32959ec6ebe01494e2b6faf6ed580c354e6417d 100644 --- a/ncbi.wdl +++ b/ncbi.wdl @@ -14,11 +14,11 @@ task genomeDownload { String? domain = "all" String? executable = "ncbi-genome-download" - File? condaEnvironment + String? preCommand command { set -e -o pipefail - ${"source activate " + condaEnvironment} + ${preCommand} ${executable} \ ${"--section " + section} \ ${"--format " + format} \