diff --git a/htseq.wdl b/htseq.wdl index 6ea0ab45df69180d0ee22ff0e4e5f2bd294b3874..5a53f04e43dd486d6d83e2777ca469b1c43ef2bd 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -14,6 +14,7 @@ task HTSeqCount { String stranded = "no" Int memory = 12 + String dockerTag = "0.9.1--py36h7eb728f_2" } command { @@ -35,5 +36,6 @@ task HTSeqCount { runtime { memory: memory + docker: "quay.io/biocontainers/htseq:" + dockerTag } } \ No newline at end of file diff --git a/mergecounts.wdl b/mergecounts.wdl index 5de98e799a341e0f1ac3a3b4d2b6323e8e5a8c8c..ddcc251e72597eec05f10af1ed5c4cb0b945a00d 100644 --- a/mergecounts.wdl +++ b/mergecounts.wdl @@ -9,6 +9,9 @@ task MergeCounts { Int featureColumn Int valueColumn Boolean inputHasHeader + String featureAttribute = "gene_id" + File referenceGtf + Array[String]+? additionalAttributes } # Based on a script by Szymon Kielbasa/Ioannis Moustakas @@ -19,26 +22,43 @@ task MergeCounts { R --no-save <<CODE library(dplyr) library(reshape2) + library(refGenome) - listOfFiles <- c("~{sep='", "' inputFiles}") + list.of.files <- c("~{sep='", "' inputFiles}") - valueI <- ~{valueColumn} - featureI <- ~{featureColumn} + value.i <- ~{valueColumn} + feature.i <- ~{featureColumn} header <- ~{true="TRUE" false="FALSE" inputHasHeader} + feature.attribute <- "~{featureAttribute}" + additional.attributes <- c(~{true='"' false="" defined(additionalAttributes)}~{sep='", "' additionalAttributes}~{true='"' false="" defined(additionalAttributes)}) + reference.gtf <- "~{referenceGtf}" + output.path <- "~{outputFile}" - d <- do.call(rbind, lapply(listOfFiles, function(file){ + d <- do.call(rbind, lapply(list.of.files, function(file){ d <- read.table(file, sep="\t", header=header, comment.char="#") - splitPath <- strsplit(file, "/")[[1]] - colnames(d)[valueI] <- sub("\\\.[^\\\.]*$", "", - splitPath[length(splitPath)]) - colnames(d)[featureI] <- "feature" + filename <- basename(file) + colnames(d)[value.i] <- sub("\\\.[^\\\.]*\$", "", filename) + colnames(d)[feature.i] <- "feature" - d <- d %>% melt(id.vars=featureI, variable.name="sample", value.name="count") + d <- d %>% melt(id.vars=feature.i, variable.name="sample", + value.name="count") })) d <- d %>% dcast(feature ~ sample, value.var="count") - write.table(d, file="~{outputFile}", sep="\t", quote=FALSE, row.names=FALSE) + + gtf <- ensemblGenome(dirname(reference.gtf)) + read.gtf(gtf, basename(reference.gtf)) + + gtf.table <- gtf@ev\$gtf + gtf.table <- gtf.table[order(gtf.table[,feature.attribute]),] + gtf.table <- gtf.table[!duplicated(gtf.table[,feature.attribute]),] + id.table <- gtf.table[, c(feature.attribute, additional.attributes), drop=F] + output.table <- merge(id.table, d, all.y = T, by.y="feature", + by.x=feature.attribute) + + write.table(output.table, file=output.path, sep="\t", quote=FALSE, + row.names=FALSE, na="") CODE >>> @@ -48,5 +68,6 @@ task MergeCounts { runtime { memory: 4 + (2*length(inputFiles)) + docker: "biowdl/mergecounts:1.0" } } \ No newline at end of file diff --git a/stringtie.wdl b/stringtie.wdl index 57f720622c51177f61359c9416128e38eaf1136f..bce40a17dc0052886e5d4ff19bb3ecb5877ce424 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -13,6 +13,8 @@ task Stringtie { Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile + + String dockerTag = "1.3.3--py36_3" } command { @@ -37,6 +39,7 @@ task Stringtie { runtime { cpu: threads + docker: "quay.io/biocontainers/stringtie:" + dockerTag } } @@ -55,6 +58,8 @@ task Merge { Float? minimumIsoformFraction Boolean keepMergedTranscriptsWithRetainedIntrons = false String? label + + String dockerTag = "1.3.3--py36_3" } command { @@ -77,4 +82,8 @@ task Merge { output { File mergedGtfFile = outputGtfPath } + + runtime { + docker: "quay.io/biocontainers/stringtie:" + dockerTag + } }