From 225505cf347c8b9ffd92ff5f2f9bbeef1bf66b6a Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Mon, 19 Mar 2018 12:50:45 +0100
Subject: [PATCH] add merge counts

---
 mergecounts.wdl        | 19 +++++++++++++++++++
 scripts/merge_counts.R | 28 ++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 mergecounts.wdl
 create mode 100644 scripts/merge_counts.R

diff --git a/mergecounts.wdl b/mergecounts.wdl
new file mode 100644
index 0000000..c71247d
--- /dev/null
+++ b/mergecounts.wdl
@@ -0,0 +1,19 @@
+task MergeCounts {
+    Array[File] inputFiles
+    String outputFile
+    String idVar
+    String measurementVar
+    File script
+
+    command {
+        Rscript ${script} \
+        ${idVar} \
+        ${measurementVar} \
+        ${sep=" " inputFiles} \
+        > ${outputFile}
+    }
+
+    output {
+        File mergedCounts = outputFile
+    }
+}
\ No newline at end of file
diff --git a/scripts/merge_counts.R b/scripts/merge_counts.R
new file mode 100644
index 0000000..8963994
--- /dev/null
+++ b/scripts/merge_counts.R
@@ -0,0 +1,28 @@
+# Author: Ioannis Moustakas, i.moustakas@lumc.nl (Based on a script by Szymon Kielbasa)
+# Modified by: Davy Cats, d.cats@lumc.nl
+# Title: Merge count files from featureCouns output
+# Use: Rscript merge_counts.R columnIDToMergeOn columnIDBeingMerged listOfFilesToBeMerged... > outputFile
+
+### Load Packages
+library(dplyr)
+library(reshape2)
+
+### load arguments from the command line
+args <- commandArgs(trailingOnly=TRUE)
+idVars <- args[1]
+measureVars <- args[2]
+listOfFiles <- args[3:length(args)]
+
+### Iterate over the list of files that are being merged and
+### change the column name to the sample name
+d <- do.call(rbind, lapply(listOfFiles, function(file){
+    d <- read.table(file, header=TRUE, comment.char="#")
+    colI <- grep(measureVars, colnames(d))
+    colnames(d)[colI] <- strsplit(file, "/")[[1]][3]
+    d <- d %>% melt(id.vars=idVars, measure.vars=colI,
+                    variable.name="sample", value.name="count")
+}))
+
+### Reformat the data frame and output (in STDOUT) the merged table.
+d <- d %>% dcast(paste0(idVars, " ~ sample"), value.var="count")
+write.table(d, sep="\t", quote=FALSE, row.names=FALSE)
-- 
GitLab