diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa50280fd69ad4501cb482ea674a0db264e9168..390faf25a738576b9c977a82036d633edcbb5775 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.2.0-develop +version 4.0.0-develop --------------------------- ++ Added a log output for STAR. ++ Added report output to Hisat2. ++ Added output with all reports to gffcompare. ++ Change MultiQC inputs. It now accepts an array of reports files. It does not + need access to a folder with the reports anymore. MultiQC can now be used + as a normal WDL task without hacks. + Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. diff --git a/gffcompare.wdl b/gffcompare.wdl index 197dd9ade74cade9b67bb77bbd1eee2492a7924e..e5f62b5ec557ef05afbfcee0bb24d175a2b316b6 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -108,6 +108,7 @@ task GffCompare { File? missedIntrons = if debugMode then totalPrefix + ".missed_introns.gtf" else noneFile + Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) } runtime { diff --git a/hisat2.wdl b/hisat2.wdl index 7d638f1fd96492c1e538f48ea1ef60a9946f0917..5937f86d433fd4be82771bc6214e0779fbe48e95 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -31,6 +31,7 @@ task Hisat2 { String readgroup String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true + String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 4 String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G" @@ -56,6 +57,8 @@ task Hisat2 { --rg 'LB:~{library}' \ --rg 'PL:~{platform}' \ ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ + --new-summary \ + --summary-file ~{summaryFilePath} \ | samtools sort > ~{outputBam} samtools index ~{outputBam} ~{bamIndexPath} } @@ -63,6 +66,7 @@ task Hisat2 { output { File bamFile = outputBam File bamIndex = bamIndexPath + File summaryFile = summaryFilePath } runtime { @@ -82,6 +86,7 @@ task Hisat2 { readgroup: {description: "The readgroup id.", category: "required"} platform: {description: "The platform used for sequencing.", category: "advanced"} downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} + summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/multiqc.wdl b/multiqc.wdl index 3a1908a6857786e96d9bb39990fe25c47269c8f5..6a967b3f18c0fcc49a81168b97c047cc1395de89 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -23,8 +23,7 @@ version 1.0 task MultiQC { input { # Use a string here so cromwell does not relocate an entire analysis directory - String analysisDirectory - Array[File] dependencies = [] # This must be used in order to run multiqc after these tasks. + Array[File] reports Boolean force = false Boolean dirs = false Int? dirsDepth @@ -37,15 +36,13 @@ task MultiQC { String? tag String? ignore String? ignoreSamples - Boolean ignoreSymlinks = false File? sampleNames File? fileList Array[String]+? exclude Array[String]+? module Boolean dataDir = false - Boolean noDataDir = false String? dataFormat - Boolean zipDataDir = false + Boolean zipDataDir = true Boolean export = false Boolean flat = false Boolean interactive = true @@ -54,14 +51,44 @@ task MultiQC { Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig - Array[Boolean] finished = [] # An array of booleans that can be used to let multiqc wait on stuff. - + String memory = "4G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + # This is where the reports end up. It does not need to be changed by the + # user. It is full of symbolic links, so it is not of any use to the user + # anyway. + String reportDir = "reports" + + # Below code requires python 3.6 or higher. + # This makes sure all report files are in a report directory that + # MultiQC can investigate. + # This creates files in report_dir / hashed_parent / file basename. + # By hashing the parent path we make sure there are no file colissions as + # files from the same directory end up in the same directory, while files + # from other directories get their own directory. Cromwell also uses this + # strategy. Using python's builtin hash is unique enough for these purposes. + command { + python3 <<CODE + import os + from pathlib import Path + from typing import List + + reports: List[str] = ["~{sep='","' reports}"] + report_dir: Path = Path("~{reportDir}") + + for report in reports: + report_path = Path(report) + hashed_parent = str(hash(str(report_path.parent))) + new_path = report_dir / hashed_parent / report_path.name + if not new_path.parent.exists(): + new_path.parent.mkdir(parents=True) + os.symlink(report, str(new_path)) + CODE + set -e mkdir -p ~{outDir} multiqc \ @@ -77,15 +104,13 @@ task MultiQC { ~{"--tag " + tag} \ ~{"--ignore " + ignore} \ ~{"--ignore-samples" + ignoreSamples} \ - ~{true="--ignore-symlinks" false="" ignoreSymlinks} \ ~{"--sample-names " + sampleNames} \ ~{"--file-list " + fileList} \ ~{true="--exclude " false="" defined(exclude)}~{sep=" --exclude " exclude} \ ~{true="--module " false="" defined(module)}~{sep=" --module " module} \ - ~{true="--data-dir" false="" dataDir} \ - ~{true="--no-data-dir" false="" noDataDir} \ + ~{true="--data-dir" false="--no-data-dir" dataDir} \ ~{"--data-format " + dataFormat} \ - ~{true="--zip-data-dir" false="" zipDataDir} \ + ~{true="--zip-data-dir" false="" zipDataDir && dataDir} \ ~{true="--export" false="" export} \ ~{true="--flat" false="" flat} \ ~{true="--interactive" false="" interactive} \ @@ -94,7 +119,7 @@ task MultiQC { ~{false="--no-megaqc-upload" true="" megaQCUpload} \ ~{"--config " + config} \ ~{"--cl-config " + clConfig } \ - ~{analysisDirectory} + ~{reportDir} } String reportFilename = if (defined(fileName)) @@ -103,7 +128,7 @@ task MultiQC { output { File multiqcReport = outDir + "/" + reportFilename + "_report.html" - File multiqcDataDir = outDir + "/" +reportFilename + "_data" + File? multiqcDataDirZip = outDir + "/" +reportFilename + "_data.zip" } runtime { @@ -113,8 +138,7 @@ task MultiQC { } parameter_meta { - analysisDirectory: {description: "The directory to run MultiQC on.", category: "required"} - dependencies: {description: "This must be used in order to run multiqc after these tasks.", category: "internal_use_only"} + reports: {description: "Reports which multiqc should run on.", category: "required"} force: {description: "Equivalent to MultiQC's `--force` flag.", category: "advanced"} dirs: {description: "Equivalent to MultiQC's `--dirs` flag.", category: "advanced"} dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} @@ -127,13 +151,11 @@ task MultiQC { tag: {description: "Equivalent to MultiQC's `--tag` option.", category: "advanced"} ignore: {description: "Equivalent to MultiQC's `--ignore` option.", category: "advanced"} ignoreSamples: {description: "Equivalent to MultiQC's `--ignore-samples` option.", category: "advanced"} - ignoreSymlinks: {description: "Equivalent to MultiQC's `--ignore-symlinks` flag.", category: "advanced"} sampleNames: {description: "Equivalent to MultiQC's `--sample-names` option.", category: "advanced"} fileList: {description: "Equivalent to MultiQC's `--file-list` option.", category: "advanced"} exclude: {description: "Equivalent to MultiQC's `--exclude` option.", category: "advanced"} module: {description: "Equivalent to MultiQC's `--module` option.", category: "advanced"} - dataDir: {description: "Equivalent to MultiQC's `--data-dir` flag.", category: "advanced"} - noDataDir: {description: "Equivalent to MultiQC's `--no-data-dir` flag.", category: "advanced"} + dataDir: {description: "Whether to output a data dir. Sets `--data-dir` or `--no-data-dir` flag.", category: "advanced"} dataFormat: {description: "Equivalent to MultiQC's `--data-format` option.", category: "advanced"} zipDataDir: {description: "Equivalent to MultiQC's `--zip-data-dir` flag.", category: "advanced"} export: {description: "Equivalent to MultiQC's `--export` flag.", category: "advanced"} @@ -144,8 +166,6 @@ task MultiQC { megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} config: {description: "Equivalent to MultiQC's `--config` option.", category: "advanced"} clConfig: {description: "Equivalent to MultiQC's `--cl-config` option.", category: "advanced"} - finished: {description: "An array of booleans that can be used to let multiqc wait on stuff.", category: "internal_use_only"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", diff --git a/star.wdl b/star.wdl index 11fde466f110226058ecafac07e16516b48cd10e..4942f35e167a4ff14d272b3911e8bd4d68904914 100644 --- a/star.wdl +++ b/star.wdl @@ -128,6 +128,7 @@ task Star { output { File bamFile = outFileNamePrefix + "Aligned." + samOutputNames[outSAMtype] + File logFinalOut = outFileNamePrefix + "Log.final.out" } runtime {