Skip to content
Snippets Groups Projects
Commit 06ef73d9 authored by Ruben Vorderman's avatar Ruben Vorderman
Browse files

Merge branch 'QC' into 'develop'

Changes for flexiprep QC pipeline

See merge request sasc/wdl-tasks!1
parents 618df913 52c8fe03
No related branches found
No related tags found
No related merge requests found
......@@ -20,5 +20,6 @@ task installPrefix {
}
output {
File condaEnvPath=prefix
File condaJson=stdout()
}
}
......@@ -6,7 +6,7 @@ task download {
String libraryPath
Array[String]? domain
String? executable = "centrifuge-download"
File? condaEnvironment
String? preCommand
String? seqTaxMapPath
String? database = "refseq"
String? assemblyLevel
......@@ -22,11 +22,12 @@ task download {
# The bash statement at the beginning is to make sure
# the directory for the SeqTaxMapPath exists.
command {
set -e -o pipefail
${preCommand}
${'if [ ! -f ' + seqTaxMapPath +
' ]; then mkdir -p ' + seqTaxMapPath +
'; rm -d ' + seqTaxMapPath +
'; fi' }
${"source activate " + condaEnvironment}
${executable} \
-o ${libraryPath} \
${true='-d ' false='' defined(domain)}${sep=',' domain} \
......@@ -50,9 +51,10 @@ task download {
task downloadTaxonomy {
String centrifugeTaxonomyDir
String? executable = "centrifuge-download"
File? condaEnvironment
String? preCommand
command {
${"source activate " + condaEnvironment}
set -e -o pipefail
${preCommand}
${executable} \
-o ${centrifugeTaxonomyDir} \
taxonomy
......@@ -69,7 +71,7 @@ task build {
File taxonomyTree
File inputFasta
String centrifugeIndexBase
File? condaEnvironment
String? preCommand
String? centrifugeBuildExecutable = "centrifuge-build"
#Boolean? c = false
Boolean? largeIndex = false
......@@ -88,9 +90,10 @@ task build {
Int? kmerCount
command {
set -e -o pipefail
${preCommand}
mkdir -p ${centrifugeIndexBase}
rm -d ${centrifugeIndexBase}
${"source activate " + condaEnvironment}
${centrifugeBuildExecutable} \
${true='--large-index' false='' largeIndex} \
${true='--noauto' false='' noAuto} \
......
task cutadapt {
File read1
File? read2
String read1output
String? read2output
String? format
String? preCommand
Int? cores = 1
String? memory = "4G"
Array[String]? adapter
Array[String]? front
Array[String]? anywhere
Array[String]? adapterRead2
Array[String]? frontRead2
Array[String]? anywhereRead2
Boolean? interleaved
String? pairFilter
Float? errorRate
Boolean? noIndels
Int? times
Int? overlap
Boolean? matchReadWildcards
Boolean? noMatchAdapterWildcards
Boolean? noTrim
Boolean? maskAdapter
Int? cut
String? nextseqTrim
String? qualityCutoff
Int? qualityBase
Int? length
Boolean? trimN
String? lengthTag
String? stripSuffix
String? prefix
String? suffix
Int? minimumLength
Int? maximumLength
Int? maxN
Boolean? discardTrimmed
Boolean? discardUntrimmed
String? infoFilePath
String? restFilePath
String? wildcardFilePath
String? tooShortOutputPath
String? tooLongOutputPath
String? untrimmedOutputPath
String? tooShortPairedOutputPath
String? tooLongPairedOutputPath
String? untrimmedPairedOutputPath
Boolean? colorspace
Boolean? doubleEncode
Boolean? trimPrimer
Boolean? stripF3
Boolean? maq
Boolean? bwa
Boolean? zeroCap
Boolean? noZeroCap
String? reportPath
command {
set -e -o pipefail
mkdir -p $(dirname ${read1output})
${preCommand}
cutadapt \
${"--cores=" + cores} \
${true="-a " false="" defined(adapter)} ${sep=" -a " adapter} \
${true="-A " false="" defined(adapterRead2)} ${sep=" -A " adapterRead2} \
${true="-g " false="" defined(front)} ${sep=" -g " front} \
${true="-G " false="" defined(frontRead2)} ${sep=" -G " frontRead2} \
${true="-b " false="" defined(anywhere)} ${sep=" -a " anywhere} \
${true="-B " false="" defined(anywhereRead2)} ${sep=" -B " anywhereRead2} \
--output ${read1output} ${"--paired-output " + read2output} \
${"--to-short-output " + tooShortOutputPath} ${"--to-short-paired-output " + tooShortPairedOutputPath} \
${"--to-long-output " + tooLongOutputPath} ${"--to-long-paired-output " + tooLongPairedOutputPath} \
${"--untrimmed-output " + untrimmedOutputPath} ${"--untrimmed-paired-output " + untrimmedPairedOutputPath} \
${"--pair-filter " + pairFilter} \
${"--error-rate " + errorRate} \
${"--times " + times} \
${"--overlap " + overlap} \
${"--cut " + cut} \
${"--nextseq-trim " + nextseqTrim} \
${"--quality-cutoff " + qualityCutoff} \
${"--quality-base " + qualityBase} \
${"--length " + length} \
${"--length-tag " + lengthTag} \
${"--strip-suffix " + stripSuffix} \
${"--prefix " + prefix} \
${"--suffix " + suffix} \
${"--minimum-length " + minimumLength} \
${"--maximum-length " + maximumLength} \
${"--max-n " + maxN} \
${true="--discard-untrimmed" false="" discardUntrimmed} \
${"--info-file " + infoFilePath } \
${"--rest-file " + restFilePath } \
${"--wildcard-file " + wildcardFilePath} \
${true="--match-read-wildcards" false="" matchReadWildcards} ${true="--no-match-adapter-wildcards" false="" noMatchAdapterWildcards} \
${true="--no-trim" false="" noTrim} ${true="--mask-adapter" false="" maskAdapter} \
${true="--no-indels" false="" noIndels} ${true="--trim-n" false="" trimN} \
${true="--interleaved" false="" interleaved} ${true="--discard-trimmed" false="" discardTrimmed } \
${true="--colorspace" false="" colorspace} ${true="--double-encode" false="" doubleEncode} \
${true="--strip-f3" false="" stripF3} ${true="--maq" false="" maq} ${true="--bwa" false="" bwa} \
${true="--zero-cap" false="" zeroCap} ${true="--no-zero-cap" false="" noZeroCap} \
${read1} ${read2} ${"> " + reportPath}
}
output{
File report = if defined(reportPath) then select_first([reportPath]) else stdout()
File cutRead1 = read1output
File? cutRead2 = read2output
File? tooLongOutput=tooLongOutputPath
File? tooShortOutput=tooShortOutputPath
File? untrimmedOutput=untrimmedOutputPath
File? tooLongPairedOutput=tooLongPairedOutputPath
File? tooShortPairedOutput=tooShortPairedOutputPath
File? untrimmedPairedOutput=untrimmedPairedOutputPath
File? infoFile=infoFilePath
File? restFile=restFilePath
File? wildcardFile=wildcardFilePath
}
runtime {
cpu: select_first([cores])
memory: select_first([memory])
}
}
\ No newline at end of file
task fastqc {
File seqFile
String outdirPath
String? preCommand
Boolean? casava
Boolean? nano
Boolean? noFilter
Boolean? extract = true
Boolean? nogroup
Int? minLength
String? format
Int? threads = 1
File? contaminants
File? adapters
File? limits
Int? kmers
String? dir
command {
set -e -o pipefail
${preCommand}
mkdir -p ${outdirPath}
fastqc \
${"--outdir " + outdirPath} \
${true="--casava" false="" casava} \
${true="--nano" false="" nano} \
${true="--nofilter" false="" noFilter} \
${true="--extract" false="" extract} \
${true="--nogroup" false="" nogroup} \
${"--min_length " + minLength } \
${"--format " + format} \
${"--threads " + threads} \
${"--contaminants " + contaminants} \
${"--adapters " + adapters} \
${"--limits " + limits} \
${"--kmers " + kmers} \
${"--dir " + dir} \
${seqFile}
}
output {
# Chops of the .gz extension if present.
String name = sub(seqFile, "\\.gz$","")
# This regex chops of the extension and replaces it with _fastqc for the reportdir.
# Just as fastqc does it.
String reportDir = outdirPath + "/" + sub(basename(name), "\\.[^\\.]*$", "_fastqc")
File rawReport = reportDir + "/fastqc_data.txt"
File htmlReport = reportDir + "/fastqc_report.html"
File summary = reportDir + "/summary.txt"
File adapterContent = reportDir + "/Images/adapter_content.png"
File duplicationLevels = reportDir + "/Images/duplication_levels.png"
File perBaseNContent = reportDir + "/Images/per_base_n_content.png"
File perBaseQuality = reportDir + "/Images/per_base_quality.png"
File perBaseSequenceContent = reportDir + "/Images/per_base_sequence_content.png"
File perSequenceGCContent = reportDir + "/Images/per_sequence_gc_content.png"
File perSequenceQuality = reportDir + "/Images/per_sequence_quality.png"
File perTileQuality = reportDir + "/Images/per_tile_quality.png"
File sequenceLengthDistribution = reportDir + "/Images/sequence_length_distribution.png"
}
runtime {
cpu: select_first([threads])
}
}
task extractAdapters {
File extractAdaptersFastqcJar
File inputFile
String outputDir
String? adapterOutputFilePath = outputDir + "/adapter.list"
String? contamsOutputFilePath = outputDir + "/contaminations.list"
Boolean? skipContams
File? knownContamFile
File? knownAdapterFile
Float? adapterCutoff
Boolean? outputAsFasta
command {
set -e
mkdir -p ${outputDir}
java -jar ${extractAdaptersFastqcJar} \
--inputFile ${inputFile} \
${"--adapterOutputFile " + adapterOutputFilePath } \
${"--contamsOutputFile " + contamsOutputFilePath } \
${"--knownContamFile " + knownContamFile} \
${"--knownAdapterFile " + knownAdapterFile} \
${"--adapterCutoff " + adapterCutoff} \
${true="--skipContams" false="" skipContams} \
${true="--outputAsFasta" false="" outputAsFasta}
}
output {
File adapterOutputFile = select_first([adapterOutputFilePath])
File contamsOutputFile = select_first([contamsOutputFilePath])
Array[String] adapterList = read_lines(select_first([adapterOutputFilePath]))
Array[String] contamsList = read_lines(select_first([contamsOutputFilePath]))
}
}
task getConfiguration {
String? preCommand
String? fastqcDirFile = "fastqcDir.txt"
command {
set -e -o pipefail
${preCommand}
echo $(dirname $(readlink -f $(which fastqc))) > ${fastqcDirFile}
}
output {
String fastqcDir = read_string(fastqcDirFile)
File adapterList = fastqcDir + "/Configuration/adapter_list.txt"
File contaminantList = fastqcDir + "/Configuration/contaminant_list.txt"
File limits = fastqcDir + "/Configuration/limits.txt"
}
}
\ No newline at end of file
......@@ -14,11 +14,11 @@ task genomeDownload {
String? domain = "all"
String? executable = "ncbi-genome-download"
File? condaEnvironment
String? preCommand
command {
set -e -o pipefail
${"source activate " + condaEnvironment}
${preCommand}
${executable} \
${"--section " + section} \
${"--format " + format} \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment