hmftools.wdl 44.96 KiB
version 1.0
# Copyright (c) 2020 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
task Amber {
input {
String referenceName
File referenceBam
File referenceBamIndex
String tumorName
File tumorBam
File tumorBamIndex
String outputDir = "./amber"
File loci
File referenceFasta
File referenceFastaFai
File referenceFastaDict
Int threads = 2
String memory = "70G"
String javaXmx = "64G"
Int timeMinutes = 240
String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0"
}
command {
AMBER -Xmx~{javaXmx} \
-reference ~{referenceName} \
-reference_bam ~{referenceBam} \
-tumor ~{tumorName} \
-tumor_bam ~{tumorBam} \
-output_dir ~{outputDir} \
-threads ~{threads} \
-ref_genome ~{referenceFasta} \
-loci ~{loci}
}
output {
File version = "~{outputDir}/amber.version"
File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf"
File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv"
File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz"
File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi"
File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz"
File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi"
File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv"
File tumorQc = "~{outputDir}/~{tumorName}.amber.qc"
File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz"
File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi"
Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex,
tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc,
normalSnpVcf, normalSnpVcfIndex]
}
runtime {
memory: memory
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
cpu: threads
}
parameter_meta {
referenceName: {description: "the name of the normal sample.", category: "required"}
referenceBam: {description: "The normal BAM file.", category: "required"}
referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"}
tumorName: {description: "The name of the tumor sample.", category: "required"}
tumorBam: {description: "The tumor BAM file.", category: "required"}
tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"}
outputDir: {description: "The path to the output directory.", category: "common"}
loci: {description: "A VCF file containing likely heterozygous sites.", category: "required"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
threads: {description: "The number of threads the program will use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task Cobalt {
input {
String referenceName
File referenceBam
File referenceBamIndex
String tumorName
File tumorBam
File tumorBamIndex
String outputDir = "./cobalt"
File gcProfile
Int threads = 1
String memory = "5G"
String javaXmx = "4G"
Int timeMinutes = 240
String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0"
}
command {
COBALT -Xmx~{javaXmx} \
-reference ~{referenceName} \
-reference_bam ~{referenceBam} \
-tumor ~{tumorName} \
-tumor_bam ~{tumorBam} \
-output_dir ~{outputDir} \
-threads ~{threads} \
-gc_profile ~{gcProfile}
}
output {
File version = "~{outputDir}/cobalt.version"
File normalGcMedianTsv = "~{outputDir}/~{referenceName}.cobalt.gc.median.tsv"
File normalRationMedianTsv = "~{outputDir}/~{referenceName}.cobalt.ratio.median.tsv"
File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf"
File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv"
File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf"
File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv"
File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len"
Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv,
normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen]
}
runtime {
memory: memory
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
cpu: threads
}
parameter_meta {
referenceName: {description: "the name of the normal sample.", category: "required"}
referenceBam: {description: "The normal BAM file.", category: "required"}
referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"}
tumorName: {description: "The name of the tumor sample.", category: "required"}
tumorBam: {description: "The tumor BAM file.", category: "required"}
tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"}
outputDir: {description: "The path to the output directory.", category: "common"}
gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"}
threads: {description: "The number of threads the program will use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task Cuppa {
input {
Array[File]+ linxOutput
Array[File]+ purpleOutput
String sampleName
Array[String]+ categories = ["DNA"]
Array[File]+ referenceData
File purpleSvVcf
File purpleSvVcfIndex
File purpleSomaticVcf
File purpleSomaticVcfIndex
String outputDir = "./cuppa"
String javaXmx = "4G"
String memory = "5G"
Int timeMinutes = 10
String dockerImage = "quay.io/biowdl/cuppa:1.4"
}
command {
set -e
mkdir -p sampleData ~{outputDir}
ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput}
cuppa -Xmx~{javaXmx} \
-output_dir ~{outputDir} \
-output_id ~{sampleName} \
-categories '~{sep="," categories}' \
-ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \
-sample_data_dir sampleData \
-sample_data ~{sampleName} \
-sample_sv_file ~{purpleSvVcf} \
-sample_somatic_vcf ~{purpleSomaticVcf}
}
output {
File cupData = "~{outputDir}/~{sampleName}.cup.data.csv"
}
runtime {
memory: memory
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
}
parameter_meta {
linxOutput: {description: "The files produced by linx.", category: "required"}
purpleOutput: {description: "The files produced by purple.", category: "required"}
sampleName: {description: "The name of the sample.", category: "required"}
categories: {description: "The classifiers to use.", category: "advanced"}
referenceData : {description: "The reference data.", category: "required"}
purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"}
purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"}
purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"}
purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"}
outputDir: {description: "The directory the ouput will be placed in.", category: "common"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task CuppaChart {
input {
String sampleName
File cupData
String outputDir = "./cuppa"
String memory = "4G"
Int timeMinutes = 5
String dockerImage = "quay.io/biowdl/cuppa:1.4"
}
command {
set -e
mkdir -p ~{outputDir}
cuppa-chart \
-sample ~{sampleName} \
-sample_data ~{cupData} \
-output_dir ~{outputDir}
}
output {
File cuppaChart = "~{outputDir}/~{sampleName}.cuppa.chart.png"
File cuppaConclusion = "~{outputDir}/~{sampleName}.cuppa.conclusion.txt"
}
runtime {
memory: memory
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
}
parameter_meta {
sampleName: {description: "The name of the sample.", category:"common"}
cupData: {description: "The cuppa output.", category: "required"}
outputDir: {description: "The directory the output will be written to.", category:"common"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task GripssApplicationKt {
input {
File inputVcf
String outputPath = "gripss.vcf.gz"
String tumorName
String referenceName
File referenceFasta
File referenceFastaFai
File referenceFastaDict
File breakpointHotspot
File breakendPon
File breakpointPon
String memory = "32G"
String javaXmx = "31G"
Int timeMinutes = 45
String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0"
}
command {
java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \
com.hartwig.hmftools.gripss.GripssApplicationKt \
-tumor ~{tumorName} \
-reference ~{referenceName} \
-ref_genome ~{referenceFasta} \
-breakpoint_hotspot ~{breakpointHotspot} \
-breakend_pon ~{breakendPon} \
-breakpoint_pon ~{breakpointPon} \
-input_vcf ~{inputVcf} \
-output_vcf ~{outputPath} \
-paired_normal_tumor_ordinals
}
output {
File outputVcf = outputPath
File outputVcfIndex = outputPath + ".tbi"
}
runtime {
memory: memory
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
}
parameter_meta {
inputVcf: {description: "The input VCF.", category: "required"}
outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"}
breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"}
breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task GripssHardFilterApplicationKt {
input {
File inputVcf
String outputPath = "gripss_hard_filter.vcf.gz"
String memory = "3G"
String javaXmx = "2G"
Int timeMinutes = 15
String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0"
}
command {
java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \
com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \
-input_vcf ~{inputVcf} \
-output_vcf ~{outputPath}
}
output {
File outputVcf = outputPath
File outputVcfIndex = outputPath + ".tbi"
}
runtime {
memory: memory
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
}
parameter_meta {
inputVcf: {description: "The input VCF.", category: "required"}
outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task HealthChecker {
input {
String outputDir = "."
String referenceName
File referenceFlagstats
File referenceMetrics
String tumorName
File tumorFlagstats
File tumorMetrics
Array[File]+ purpleOutput
String javaXmx = "2G"
String memory = "1G"
Int timeMinutes = 1
String dockerImage = "quay.io/biowdl/health-checker:3.2"
}
command {
set -e
mkdir -p ~{outputDir}
health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-reference ~{referenceName} \
-ref_flagstat_file ~{referenceFlagstats} \
-ref_wgs_metrics_file ~{referenceMetrics} \
-tumor ~{tumorName} \
-tum_flagstat_file ~{tumorFlagstats} \
-tum_wgs_metrics_file ~{tumorMetrics} \
-purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \
-output_dir ~{outputDir}
}
output {
File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded"
File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed"
}
runtime {
memory: memory
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
}
parameter_meta {
outputDir: {description: "The path the output will be written to.", category:"required"}
referenceName: {description: "The name of the normal sample.", category: "required"}
referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"}
referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"}
tumorName: {description: "The name of the tumor sample.", category: "required"}
tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"}
tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"}
purpleOutput: {description: "The files from purple's output directory.", category: "required"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task Linx {
input {
String sampleName
File svVcf
File svVcfIndex
Array[File]+ purpleOutput
String refGenomeVersion
String outputDir = "./linx"
File fragileSiteCsv
File lineElementCsv
File replicationOriginsBed
File viralHostsCsv
File knownFusionCsv
File driverGenePanel
#The following should be in the same directory.
File geneDataCsv
File proteinFeaturesCsv
File transExonDataCsv
File transSpliceDataCsv
String memory = "5G"
String javaXmx = "4G"
Int timeMinutes = 10
String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0"
}
command {
linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-sample ~{sampleName} \
-sv_vcf ~{svVcf} \
-purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \
-ref_genome_version ~{refGenomeVersion} \
-output_dir ~{outputDir} \
-fragile_site_file ~{fragileSiteCsv} \
-line_element_file ~{lineElementCsv} \
-replication_origins_file ~{replicationOriginsBed} \
-viral_hosts_file ~{viralHostsCsv} \
-gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
-check_fusions \
-known_fusion_file ~{knownFusionCsv} \
-check_drivers \
-driver_gene_panel ~{driverGenePanel} \
-chaining_sv_limit 0 \
-write_vis_data
}
output {
File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv"
File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv"
File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv"
File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv"
File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv"
File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv"
File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv"
File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv"
File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv"
File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv"
File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv"
File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv"
File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv"
File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv"
File linxVersion = "~{outputDir}/linx.version"
Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion,
linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber,
linxVisFusion, linxVisGeneExon, linxVisProteinDomain,
linxVisSegments, linxVisSvData, linxVersion]
}
runtime {
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
memory: memory
}
parameter_meta {
sampleName: {description: "The name of the sample.", category: "required"}
svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"}
svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"}
purpleOutput: {description: "The files produced by PURPLE.", category: "required"}
refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"}
outputDir: {description: "The directory the outputs will be written to.", category: "required"}
fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"}
lineElementCsv: {description: "A list of known LINE source regions.", category: "required"}
replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"}
viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"}
knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"}
driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"}
geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task Protect {
input {
String refGenomeVersion
String tumorName
String referenceName
Array[String]+ sampleDoids
String outputDir = "."
Array[File]+ serveActionability
File doidJson
File purplePurity
File purpleQc
File purpleDriverCatalogSomatic
File purpleDriverCatalogGermline
File purpleSomaticVariants
File purpleSomaticVariantsIndex
File purpleGermlineVariants
File purpleGermlineVariantsIndex
File purpleGeneCopyNumber
File linxFusion
File linxBreakend
File linxDriversCatalog
File chordPrediction
File annotatedVirus
String memory = "9G"
String javaXmx = "8G"
Int timeMinutes = 60
String dockerImage = "quay.io/biowdl/protect:v1.4"
}
command {
protect -Xmx~{javaXmx} \
-ref_genome_version ~{refGenomeVersion} \
-tumor_sample_id ~{tumorName} \
-reference_sample_id ~{referenceName} \
-primary_tumor_doids '~{sep=";" sampleDoids}' \
-output_dir ~{outputDir} \
-serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \
-doid_json ~{doidJson} \
-purple_purity_tsv ~{purplePurity} \
-purple_qc_file ~{purpleQc} \
-purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \
-purple_germline_driver_catalog_tsv ~{purpleDriverCatalogGermline} \
-purple_somatic_variant_vcf ~{purpleSomaticVariants} \
-purple_germline_variant_vcf ~{purpleGermlineVariants} \
-purple_gene_copy_number_tsv ~{purpleGeneCopyNumber} \
-linx_fusion_tsv ~{linxFusion} \
-linx_breakend_tsv ~{linxBreakend} \
-linx_driver_catalog_tsv ~{linxDriversCatalog} \
-chord_prediction_txt ~{chordPrediction} \
-annotated_virus_tsv ~{annotatedVirus}
}
output {
File protectTsv = "~{outputDir}/~{tumorName}.protect.tsv"
}
runtime {
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
memory: memory
}
parameter_meta {
refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
tumorName: {description: "The name of the tumor sample.", category: "required"}
referenceName: {description: "The name of the normal sample.", category: "required"}
sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"}
outputDir: {description: "The directory the outputs will be written to.", category: "required"}
serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"}
doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"}
purplePurity: {description: "The purity file generated by purple.", category: "required"}
purpleQc: {description: "The QC file generated by purple.", category: "required"}
purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"}
purpleDriverCatalogGermline: {description: "The germline driver catalog generated by purple.", category: "required"}
purpleSomaticVariants: {description: "The somatic VCF generated by purple.", category: "required"}
purpleSomaticVariantsIndex: {description: "The index for the somatic VCF generated by purple.", category: "required"}
purpleGermlineVariants: {description: "The germline VCF generated by purple.", category: "required"}
purpleGermlineVariantsIndex: {description: "The index of the germline VCF generated by purple.", category: "required"}
purpleGeneCopyNumber: {description: "The gene copy number file generated by purple.", category: "required"}
linxFusion: {description: "The fusion file generated by linx.", category: "required"}
linxBreakend: {description: "The breakend file generated by linx.", category: "required"}
linxDriversCatalog: {description: "The driver catalog generated generated by linx.", category: "required"}
chordPrediction: {description: "The chord prediction file.", category: "required"}
annotatedVirus: {description: "The virus-interpreter output.", category: "required"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task Purple {
input {
String referenceName
String tumorName
String outputDir = "./purple"
Array[File]+ amberOutput
Array[File]+ cobaltOutput
File gcProfile
File somaticVcf
File germlineVcf
File filteredSvVcf
File? fullSvVcf
File? fullSvVcfIndex
File referenceFasta
File referenceFastaFai
File referenceFastaDict
File driverGenePanel
File somaticHotspots
File germlineHotspots
Int threads = 1
Int timeMinutes = 30
String memory = "9G"
String javaXmx = "8G"
# clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6'
String dockerImage = "quay.io/biowdl/hmftools-purple:3.1"
}
command {
PURPLE -Xmx~{javaXmx} \
-reference ~{referenceName} \
-tumor ~{tumorName} \
-output_dir ~{outputDir} \
-amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \
-cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \
-gc_profile ~{gcProfile} \
-somatic_vcf ~{somaticVcf} \
-germline_vcf ~{germlineVcf} \
-structural_vcf ~{filteredSvVcf} \
~{"-sv_recovery_vcf " + fullSvVcf} \
-circos /usr/local/bin/circos \
-ref_genome ~{referenceFasta} \
-driver_catalog \
-driver_gene_panel ~{driverGenePanel} \
-somatic_hotspots ~{somaticHotspots} \
-germline_hotspots ~{germlineHotspots} \
-threads ~{threads}
}
output {
File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv"
File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv"
File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv"
File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv"
File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv"
File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv"
File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv"
File purpleQc = "~{outputDir}/~{tumorName}.purple.qc"
File purpleSegmentTsv = "~{outputDir}/~{tumorName}.purple.segment.tsv"
File purpleSomaticClonalityTsv = "~{outputDir}/~{tumorName}.purple.somatic.clonality.tsv"
File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv"
File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz"
File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi"
File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz"
File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi"
File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz"
File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi"
File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png"
File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png"
File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png"
File mapPlot = "~{outputDir}/plot/~{tumorName}.map.png"
File purityRangePlot = "~{outputDir}/plot/~{tumorName}.purity.range.png"
File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png"
File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png"
File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png"
File purpleVersion = "~{outputDir}/purple.version"
File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos"
File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf"
File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos"
File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos"
File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos"
File circosGaps = "~{outputDir}/circos/gaps.txt"
File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos"
File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos"
File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf"
File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos"
File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos"
Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv,
purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc,
purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv,
purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex,
purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv]
Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot,
segmentPlot, somaticClonalityPlot, somaticPlot]
Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink,
circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap,
circosSnp]
}
runtime {
time_minutes: timeMinutes # !UnknownRuntimeKey
cpu: threads
docker: dockerImage
memory: memory
}
parameter_meta {
referenceName: {description: "the name of the normal sample.", category: "required"}
tumorName: {description: "The name of the tumor sample.", category: "required"}
outputDir: {description: "The path to the output directory.", category: "common"}
amberOutput: {description: "The output files of hmftools amber.", category: "required"}
cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"}
gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"}
somaticVcf: {description: "The somatic variant calling results.", category: "required"}
germlineVcf: {description: "The germline variant calling results.", category: "required"}
filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"}
fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"}
somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"}
germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"}
threads: {description: "The number of threads the program will use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task Sage {
input {
String tumorName
File tumorBam
File tumorBamIndex
File referenceFasta
File referenceFastaDict
File referenceFastaFai
File hotspots
File panelBed
File highConfidenceBed
Boolean hg38 = false
Boolean panelOnly = false
String outputPath = "./sage.vcf.gz"
String? referenceName
File? referenceBam
File? referenceBamIndex
Int? hotspotMinTumorQual
Int? panelMinTumorQual
Int? hotspotMaxGermlineVaf
Int? hotspotMaxGermlineRelRawBaseQual
Int? panelMaxGermlineVaf
Int? panelMaxGermlineRelRawBaseQual
String? mnvFilterEnabled
File? coverageBed
Int threads = 4
String javaXmx = "50G"
String memory = "51G"
Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads)
String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0"
}
command {
SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
-tumor ~{tumorName} \
-tumor_bam ~{tumorBam} \
~{"-reference " + referenceName} \
~{"-reference_bam " + referenceBam} \
-ref_genome ~{referenceFasta} \
-hotspots ~{hotspots} \
-panel_bed ~{panelBed} \
-high_confidence_bed ~{highConfidenceBed} \
-assembly ~{true="hg38" false="hg19" hg38} \
~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \
~{"-panel_min_tumor_qual " + panelMinTumorQual} \
~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \
~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \
~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \
~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \
~{"-mnv_filter_enabled " + mnvFilterEnabled} \
~{"-coverage_bed " + coverageBed} \
~{true="-panel_only" false="" panelOnly} \
-threads ~{threads} \
-out ~{outputPath}
}
output {
File outputVcf = outputPath
File outputVcfIndex = outputPath + ".tbi"
# There is some plots as well, but in the current container the labels in the plots are just series of `□`s.
# This seems to be a systemic issue with R generated plots in biocontainers...
}
runtime {
time_minutes: timeMinutes # !UnknownRuntimeKey
cpu: threads
docker: dockerImage
memory: memory
}
parameter_meta {
tumorName: {description: "The name of the tumor sample.", category: "required"}
tumorBam: {description: "The BAM file for the tumor sample.", category: "required"}
tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"}
referenceName: {description: "The name of the normal/reference sample.", category: "common"}
referenceBam: {description: "The BAM file for the normal sample.", category: "common"}
referenceBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"}
panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"}
highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"}
hotspotMinTumorQual: {description: "Equivalent to sage's `hotspot_min_tumor_qual` option.", category: "advanced"}
panelMinTumorQual: {description: "Equivalent to sage's `panel_min_tumor_qual` option.", category: "advanced"}
hotspotMaxGermlineVaf: {description: "Equivalent to sage's `hotspot_max_germline_vaf` option.", category: "advanced"}
hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"}
panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}
task VirusInterpreter {
input {
String sampleId
File virusBreakendTsv
File taxonomyDbTsv
File virusInterpretationTsv
File virusBlacklistTsv
String outputDir = "."
String memory = "3G"
String javaXmx = "2G"
Int timeMinutes = 15
String dockerImage = "quay.io/biowdl/virus-interpreter:1.0"
}
command {
virus-interpreter -Xmx~{javaXmx} \
-sample_id ~{sampleId} \
-virus_breakend_tsv ~{virusBreakendTsv} \
-taxonomy_db_tsv ~{taxonomyDbTsv} \
-virus_interpretation_tsv ~{virusInterpretationTsv} \
-virus_blacklist_tsv ~{virusBlacklistTsv} \
-output_dir ~{outputDir}
}
output {
File virusAnnotatedTsv = "~{outputDir}/~{sampleId}.virus.annotated.tsv"
}
runtime {
time_minutes: timeMinutes # !UnknownRuntimeKey
docker: dockerImage
memory: memory
}
parameter_meta {
sampleId: {description: "The name of the sample.", category: "required"}
virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"}
taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"}
virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"}
virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"}
outputDir: {description: "The directory the output will be written to.", category: "required"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
category: "advanced"}
}
}