diff --git a/CHANGELOG.md b/CHANGELOG.md index 71309ae880503d69b4b05d3d6dda78108c8a62b3..986582ddcd7fef050f497a352b5af3f581f8a13e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task to add SVTYPE annotations to GRIDSS results + (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't produce a vcf index. + Add a script to subtract UMI's from the read name and add them as diff --git a/bcftools.wdl b/bcftools.wdl index 88d97cd09465fee1384edc6b498504bbd2adf917..2bf1c7323dad2b7c45ba1b91380e4f2396d80730 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -186,8 +186,8 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "5G" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/gridss.wdl b/gridss.wdl index 92d7df1ec9aee08f2dbef8467643d2bb1ded33c1..38daa029cfbf3ad38d23ca903661517e993743bd 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,6 +79,72 @@ task AnnotateInsertedSequence { } } +task AnnotateSvTypes { + input { + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.svtyped.vcf.bgz" + + String memory = "32G" + String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" + Int timeMinutes = 240 + } + + String effectiveOutputPath = sub(outputPath, "\\.bgz", "") + String index = if effectiveOutputPath != outputPath then "T" else "F" + + + # Based on https://github.com/PapenfussLab/gridss/issues/74 + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + R --vanilla << "EOF" + library(VariantAnnotation) + library(StructuralVariantAnnotation) + + vcf_path <- "~{gridssVcf}" + out_path <- "~{effectiveOutputPath}" + + # Simple SV type classifier + simpleEventType <- function(gr) { + return(ifelse(seqnames(gr) != seqnames(partner(gr)), "BND", # inter-chromosomosal + ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS", + ifelse(strand(gr) == strand(partner(gr)), "INV", + ifelse(xor(start(gr) < start(partner(gr)), strand(gr) == "-"), "DEL", + "DUP"))))) + } + + header <- scanVcfHeader(vcf_path) + vcf <- readVcf(vcf_path, seqinfo(header)) + gr <- breakpointRanges(vcf) + svtype <- simpleEventType(gr) + info(vcf[gr$sourceId])$SVTYPE <- svtype + writeVcf(vcf, out_path, index=~{index}) + EOF + >>> + + output { + File vcf = outputPath + File? vcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + gridssVcf: {description: "The VCF produced by GRIDSS.", category: "required"} + gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam