diff --git a/biopet.wdl b/biopet.wdl index 7d0d0a5fcfe5f1cb9711a3da6b3665f34546e3a8..f9d2a6bd162cc82a8115b38f27be6c895205c682 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -1,9 +1,11 @@ # PLEASE ADD TASKS IN ALPHABETIC ORDER. # This makes searching a lot easier. + task BaseCounter { String? preCommand - String tool_jar #Should this be of type File? + File toolJar File bam + File bamIndex File refFlat String outputDir String prefix @@ -14,9 +16,9 @@ task BaseCounter { Int mem = ceil(select_first([memory, 12.0])) command { set -e -o pipefail - ${preCommand} mkdir -p ${outputDir} - java -Xmx${mem}G -jar ${tool_jar} \ + ${preCommand} + java -Xmx${mem}G -jar ${toolJar} \ -b ${bam} \ -r ${refFlat} \ -o ${outputDir} \ @@ -61,7 +63,7 @@ task BaseCounter { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -121,6 +123,7 @@ task SampleConfig { String? preCommand String tool_jar Array[File]+ inputFiles + String keyFilePath String? sample String? library String? readgroup @@ -141,11 +144,12 @@ task SampleConfig { ${"--library " + library} \ ${"--readgroup " + readgroup} \ ${"--jsonOutput " + jsonOutputPath} \ - ${"--tsvOutput " + tsvOutputPath} + ${"--tsvOutput " + tsvOutputPath} \ + > ${keyFilePath} } output { - File keysFile = stdout() + File keysFile = keyFilePath File? jsonOutput = jsonOutputPath File? tsvOutput = tsvOutputPath } @@ -184,7 +188,7 @@ task ScatterRegions { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.0])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } diff --git a/gatk.wdl b/gatk.wdl index bd97b427db13da28d323c0408a58f79f0ca0f275..12b03d9820f79aa8cc420e073b8a36a355ddaa71 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,7 +34,7 @@ task BaseRecalibrator { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -78,7 +78,7 @@ task ApplyBQSR { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -107,7 +107,7 @@ task GatherBqsrReports { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -149,7 +149,7 @@ task HaplotypeCallerGvcf { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -198,7 +198,7 @@ task GenotypeGVCFs { } runtime{ - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -245,7 +245,7 @@ task CombineGVCFs { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -282,6 +282,6 @@ task SplitNCigarReads { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } diff --git a/htseq.wdl b/htseq.wdl index 6376e3ebeac324848bc20fe2a73f6be9fa6a13b2..b634bf5ee0f08128729723eaf71e77536bf401f6 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -11,6 +11,7 @@ task HTSeqCount { command { set -e -o pipefail + mkdir -p ${sub(outputTable, basename(outputTable), "")} ${preCommand} htseq-count \ -f ${default="bam" format} \ diff --git a/mergecounts.wdl b/mergecounts.wdl index c2373f7f02596607f1c9999ecfb35ff49aa6c43a..8cca5dd494eefdaf208bedeffa051ec0960ed6d9 100644 --- a/mergecounts.wdl +++ b/mergecounts.wdl @@ -3,29 +3,38 @@ task MergeCounts { Array[File] inputFiles String outputFile - String idVar - String measurementVar + Int featureColumn + Int valueColumn + Boolean inputHasHeader # Based on a script by Szymon Kielbasa/Ioannis Moustakas command <<< set -e -o pipefail + mkdir -p ${sub(outputFile, basename(outputFile) + "$", "")} ${preCommand} - R --no-save --slave <<CODE > ${outputFile} + R --no-save <<CODE library(dplyr) library(reshape2) listOfFiles <- c("${sep='", "' inputFiles}") + valueI <- ${valueColumn} + featureI <- ${featureColumn} + header <- ${true="TRUE" false="FALSE" inputHasHeader} + d <- do.call(rbind, lapply(listOfFiles, function(file){ - d <- read.table(file, header=TRUE, comment.char="#") - colI <- grep(${measurementVar}, colnames(d)) - colnames(d)[colI] <- strsplit(file, "/")[[1]][3] - d <- d %>% melt(id.vars=${idVar}, measure.vars=colI, - variable.name="sample", value.name="count") + d <- read.table(file, sep="\t", header=header, comment.char="#") + + splitPath <- strsplit(file, "/")[[1]] + colnames(d)[valueI] <- sub("\\\.[^\\\.]*$", "", + splitPath[length(splitPath)]) + colnames(d)[featureI] <- "feature" + + d <- d %>% melt(id.vars=featureI, variable.name="sample", value.name="count") })) - d <- d %>% dcast(paste0(${idVar}, " ~ sample"), value.var="count") - write.table(d, sep="\t", quote=FALSE, row.names=FALSE) + d <- d %>% dcast(feature ~ sample, value.var="count") + write.table(d, file="${outputFile}", sep="\t", quote=FALSE, row.names=FALSE) CODE >>> diff --git a/picard.wdl b/picard.wdl index 2c638df3bc4ae1d98261393d2f74012c692eb3e0..e3bcbc1320c1ef0ce4256f7bcd19400da3273235 100644 --- a/picard.wdl +++ b/picard.wdl @@ -28,7 +28,7 @@ task ScatterIntervalList { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -63,7 +63,7 @@ task GatherBamFiles { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -113,7 +113,7 @@ task MarkDuplicates { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -148,7 +148,7 @@ task MergeVCFs { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } @@ -183,6 +183,6 @@ task SamToFastq { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 3.0])) } } \ No newline at end of file diff --git a/samtools.wdl b/samtools.wdl index 59f5b0eadb9575becb1d783c6e2ddec310fbee68..ccd99cbeace3b3ce5c1641cf14a7e7f87f81a312 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -1,15 +1,16 @@ task Index { String? preCommand - String bamFilePath + File bamFilePath + String? bamIndexPath command { set -e -o pipefail ${preCommand} - samtools index ${bamFilePath} + samtools index ${bamFilePath} ${bamIndexPath} } output { - File indexFile = bamFilePath + ".bai" + File indexFile = if defined(bamIndexPath) then select_first([bamIndexPath]) else bamFilePath + ".bai" } } @@ -21,12 +22,7 @@ task Merge { command { set -e -o pipefail ${preCommand} - if [ ${length(bamFiles)} -gt 1 ] - then - samtools merge ${outputBamPath} ${sep=' ' bamFiles} - else - ln -sf ${bamFiles} ${outputBamPath} - fi + samtools merge ${outputBamPath} ${sep=' ' bamFiles} } output { diff --git a/star.wdl b/star.wdl index 32dd0565dc2511c7c3073531c5f8e28c9ba707bf..92245ecd90fba746be99d99883b467231b85faaa 100644 --- a/star.wdl +++ b/star.wdl @@ -47,4 +47,19 @@ task Star { cpu: select_first([runThreadN, 1]) memory: select_first([memory, 10]) } -} \ No newline at end of file +} + +task makeStarRGline { + String sample + String library + String? platform + String readgroup + + command { + printf '"ID:${readgroup}" "LB:${library}" "PL:${default="ILLUMINA" platform}" "SM:${sample}"' + } + + output { + String rgLine = read_string(stdout()) + } +} diff --git a/stringtie.wdl b/stringtie.wdl index 5fdcd6ddedcac23e06eb4728b01289a95eaa665e..f5c6854cbacb671113fde7b4a0ac6e459e128ac1 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -10,6 +10,7 @@ task Stringtie { command { set -e -o pipefail + mkdir -p $(dirname ${assembledTranscriptsFile}) ${preCommand} stringtie \ ${"-p " + threads} \