From 0e7a025e7ed7a5e6006924d8b7f1740063ff4131 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Wed, 27 Jan 2016 15:29:34 +0100 Subject: [PATCH] Give mergeTables a extension --- .../biopet/extensions/tools/MergeTables.scala | 4 +++- .../lumc/sasc/biopet/tools/BaseCounter.scala | 2 +- .../gentrap/measures/BaseCounts.scala | 2 +- .../measures/CufflinksMeasurement.scala | 20 ++++++++++++++++--- .../gentrap/measures/FragmentsPerGene.scala | 4 ++-- .../gentrap/measures/Measurement.scala | 1 + 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala index e1b94b874..96abaf1ed 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala @@ -85,7 +85,8 @@ object MergeTables { idCols: List[Int], valCol: Int, numHeaderLines: Int = 0, - fallback: String = "-"): MergeTables = { + fallback: String = "-", + fileExtension: Option[String] = None): MergeTables = { val job = new MergeTables(root) job.inputTables = tables job.output = outputFile @@ -93,6 +94,7 @@ object MergeTables { job.valueColumnIndex = valCol job.fallbackString = Option(fallback) job.numHeaderLines = Option(numHeaderLines) + job.fileExtension = fileExtension job } } \ No newline at end of file diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala index 8bfc3158e..a88563eb8 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala @@ -307,7 +307,7 @@ object BaseCounter extends ToolCommand { val exonCounts = transcript.exons.map(new RegionCount(_)) val intronCounts = if (transcript.exons.size > 1) intronRegions.allRecords.map(e => new RegionCount(e.start + 1, e.end)).toList - else Nil + else Nil def addRecord(samRecord: SAMRecord, sense: Boolean): Unit = { bamRecordBasesOverlap(samRecord, transcript.start, transcript.end, counts, sense) exonCounts.foreach(_.addRecord(samRecord, sense)) diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala index 3ad9146b6..98d1aeed3 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala @@ -29,7 +29,7 @@ class BaseCounts(val root: Configurable) extends QScript with Measurement with A def addTableAndHeatmap(countFiles: List[File], outputName: String): Unit = { val mergedTable = new File(outputDir, s"$name.$outputName.tsv") val heatmapFile = new File(outputDir, s"$name.$outputName.png") - addMergeTableJob(countFiles, mergedTable, outputName) + addMergeTableJob(countFiles, mergedTable, outputName, countFiles.head.getName.stripPrefix(jobs.head._1)) addHeatmapJob(mergedTable, heatmapFile, outputName) } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala index d1cf59fdd..2d4f40973 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala @@ -1,6 +1,6 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.measures -import nl.lumc.sasc.biopet.extensions.Cufflinks +import nl.lumc.sasc.biopet.extensions.{ Ln, Cufflinks } import nl.lumc.sasc.biopet.extensions.tools.MergeTables import org.broadinstitute.gatk.queue.QScript @@ -23,8 +23,22 @@ trait CufflinksMeasurement extends QScript with Measurement { id -> cufflinks } - addMergeTableJob(jobs.values.map(_.outputGenesFpkm).toList, mergeGenesFpkmTable, "genes_fpkm") - addMergeTableJob(jobs.values.map(_.outputIsoformsFpkm).toList, mergeIsoFormFpkmTable, "iso_form") + val genesFpkmFiles = jobs.toList.map { + case (id, job) => + val file = new File(job.output_dir, s"$id.genes_fpkm.counts") + add(Ln(this, job.outputGenesFpkm, file)) + file + } + + val isoFormFpkmFiles = jobs.toList.map { + case (id, job) => + val file = new File(job.output_dir, s"$id.iso_form_fpkn.counts") + add(Ln(this, job.outputIsoformsFpkm, file)) + file + } + + addMergeTableJob(genesFpkmFiles, mergeGenesFpkmTable, "genes_fpkm", ".genes_fpkm.counts") + addMergeTableJob(isoFormFpkmFiles, mergeIsoFormFpkmTable, "iso_form_fpkn", ".iso_form_fpkn.counts") addHeatmapJob(mergeGenesFpkmTable, genesFpkmHeatmap, "genes_fpkm") addHeatmapJob(mergeIsoFormFpkmTable, isoFormFpkmHeatmap, "iso_form_fpkm") diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala index a383874f1..9ed01b496 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala @@ -20,7 +20,7 @@ class FragmentsPerGene(val root: Configurable) extends QScript with Measurement val job = new HtseqCount(this) job.inputAnnotation = annotationGtf job.inputAlignment = file - job.output = new File(outputDir, s"$name.$id.counts") + job.output = new File(outputDir, s"$id.$name.counts") job.format = Option("bam") add(job) // We are forcing the sort order to be ID-sorted, since HTSeq-count often chokes when using position-sorting due @@ -30,7 +30,7 @@ class FragmentsPerGene(val root: Configurable) extends QScript with Measurement id -> job } - addMergeTableJob(jobs.values.map(_.output).toList, mergedTable, "fragments_per_gene") + addMergeTableJob(jobs.values.map(_.output).toList, mergedTable, "fragments_per_gene", s".$name.counts") addHeatmapJob(mergedTable, heatmap, "fragments_per_gene") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala index 095a01fdb..768fa98b1 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala @@ -41,6 +41,7 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => def addMergeTableJob(countFiles: List[File], outputFile: File, name: String, + fileExtension: String, args: MergeArgs = mergeArgs): Unit = { add(MergeTables(this, countFiles, outputFile, args.idCols, args.valCol, args.numHeaderLines, args.fallback)) -- GitLab