Commit 463d949f authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #149 from biopet/fix-BIOPET-380

Adding general strand stats to BaseCounter
parents b049eb31 37214be5
......@@ -14,16 +14,18 @@
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.{PrintWriter, File}
import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
/**
*
*/
class BaseCounter(val parent: Configurable) extends ToolCommandFunction {
class BaseCounter(val parent: Configurable) extends ToolCommandFunction with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.BaseCounter
@Input(doc = "Input Bed file", required = true)
......@@ -84,6 +86,8 @@ class BaseCounter(val parent: Configurable) extends ToolCommandFunction {
def strandedAntiSenseMetaExonCounts =
new File(outputDir, s"$prefix.base.metaexons.stranded.antisense.counts")
def summaryJson: File = new File(outputDir, s"$prefix.summary.json")
@Output
private var outputFiles: List[File] = Nil
......@@ -122,7 +126,8 @@ class BaseCounter(val parent: Configurable) extends ToolCommandFunction {
nonStrandedMetaExonCounts,
strandedMetaExonCounts,
strandedSenseMetaExonCounts,
strandedAntiSenseMetaExonCounts
strandedAntiSenseMetaExonCounts,
summaryJson
)
jobOutputFile = new File(outputDir, s".$prefix.basecounter.out")
if (bamFile != null) deps :+= new File(bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
......@@ -130,10 +135,16 @@ class BaseCounter(val parent: Configurable) extends ToolCommandFunction {
}
override def cmdLine =
override def cmdLine: String =
super.cmdLine +
required("--refFlat", refFlat) +
required("-b", bamFile) +
required("-o", outputDir) +
optional("--prefix", prefix)
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Must returns stats to store into summary */
def summaryStats: Any = ConfigUtils.fileToConfigMap(summaryJson)
}
......@@ -14,11 +14,11 @@
*/
package nl.lumc.sasc.biopet.tools
import java.io.{PrintWriter, File}
import java.io.{File, PrintWriter}
import htsjdk.samtools.{SAMRecord, SamReaderFactory}
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.intervals.{BedRecordList, BedRecord}
import nl.lumc.sasc.biopet.utils.{ConfigUtils, ToolCommand}
import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList}
import picard.annotation.{Gene, GeneAnnotationReader}
import scala.collection.JavaConversions._
......@@ -83,7 +83,8 @@ object BaseCounter extends ToolCommand {
yield runThread(cmdArgs.bamFile, genes)).toList
logger.info("Done reading bamFile")
writeGeneCounts(counts.flatMap(_.geneCounts), cmdArgs.outputDir, cmdArgs.prefix)
val geneStats =
writeGeneCounts(counts.flatMap(_.geneCounts), cmdArgs.outputDir, cmdArgs.prefix)
writeMergeExonCount(counts.flatMap(_.geneCounts), cmdArgs.outputDir, cmdArgs.prefix)
writeMergeIntronCount(counts.flatMap(_.geneCounts), cmdArgs.outputDir, cmdArgs.prefix)
writeTranscriptCounts(counts.flatMap(_.geneCounts), cmdArgs.outputDir, cmdArgs.prefix)
......@@ -95,6 +96,15 @@ object BaseCounter extends ToolCommand {
writeStrandedMetaExonsCount(counts.flatMap(_.strandedMetaExonCounts),
cmdArgs.outputDir,
cmdArgs.prefix)
val summary = Map(
"gene" -> geneStats
)
val summaryWriter = new PrintWriter(
new File(cmdArgs.outputDir, s"${cmdArgs.prefix}.summary.json"))
summaryWriter.println(ConfigUtils.mapToJson(summary).spaces2)
summaryWriter.close()
}
/**
......@@ -220,41 +230,57 @@ object BaseCounter extends ToolCommand {
* Intronic: then it's not seen as an exon on 1 of the transcripts
* Exonic + Intronic = Total
*/
def writeGeneCounts(genes: List[GeneCount], outputDir: File, prefix: String): Unit = {
def writeGeneCounts(genes: List[GeneCount],
outputDir: File,
prefix: String): Map[String, Map[String, Long]] = {
val geneTotalWriter = new PrintWriter(new File(outputDir, s"$prefix.base.gene.counts"))
val geneTotalSenseWriter = new PrintWriter(
new File(outputDir, s"$prefix.base.gene.sense.counts"))
val geneTotalAntiSenseWriter = new PrintWriter(
new File(outputDir, s"$prefix.base.gene.antisense.counts"))
var geneTotalSenseCounts = 0L
var geneTotalAntiSenseCounts = 0L
val geneExonicWriter = new PrintWriter(new File(outputDir, s"$prefix.base.gene.exonic.counts"))
val geneExonicSenseWriter = new PrintWriter(
new File(outputDir, s"$prefix.base.gene.exonic.sense.counts"))
val geneExonicAntiSenseWriter = new PrintWriter(
new File(outputDir, s"$prefix.base.gene.exonic.antisense.counts"))
var geneExonicSenseCounts = 0L
var geneExonicAntiSenseCounts = 0L
val geneIntronicWriter = new PrintWriter(
new File(outputDir, s"$prefix.base.gene.intronic.counts"))
val geneIntronicSenseWriter = new PrintWriter(
new File(outputDir, s"$prefix.base.gene.intronic.sense.counts"))
val geneIntronicAntiSenseWriter = new PrintWriter(
new File(outputDir, s"$prefix.base.gene.intronic.antisense.counts"))
var geneIntronSenseCounts = 0L
var geneIntronAntiSenseCounts = 0L
genes.sortBy(_.gene.getName).foreach { geneCount =>
geneTotalSenseCounts += geneCount.counts.senseBases
geneTotalAntiSenseCounts += geneCount.counts.antiSenseBases
geneTotalWriter.println(geneCount.gene.getName + "\t" + geneCount.counts.totalBases)
geneTotalSenseWriter.println(geneCount.gene.getName + "\t" + geneCount.counts.senseBases)
geneTotalAntiSenseWriter.println(
geneCount.gene.getName + "\t" + geneCount.counts.antiSenseBases)
geneExonicWriter.println(
geneCount.gene.getName + "\t" + geneCount.exonCounts.map(_.counts.totalBases).sum)
geneExonicSenseWriter.println(
geneCount.gene.getName + "\t" + geneCount.exonCounts.map(_.counts.senseBases).sum)
geneExonicAntiSenseWriter.println(
geneCount.gene.getName + "\t" + geneCount.exonCounts.map(_.counts.antiSenseBases).sum)
geneIntronicWriter.println(
geneCount.gene.getName + "\t" + geneCount.intronCounts.map(_.counts.totalBases).sum)
geneIntronicSenseWriter.println(
geneCount.gene.getName + "\t" + geneCount.intronCounts.map(_.counts.senseBases).sum)
geneIntronicAntiSenseWriter.println(
geneCount.gene.getName + "\t" + geneCount.intronCounts.map(_.counts.antiSenseBases).sum)
val exonSense = geneCount.exonCounts.map(_.counts.senseBases).sum
val exonAntiSense = geneCount.exonCounts.map(_.counts.antiSenseBases).sum
geneExonicSenseCounts += exonSense
geneExonicAntiSenseCounts += exonAntiSense
geneExonicWriter.println(geneCount.gene.getName + "\t" + (exonSense + exonAntiSense))
geneExonicSenseWriter.println(geneCount.gene.getName + "\t" + exonSense)
geneExonicAntiSenseWriter.println(geneCount.gene.getName + "\t" + exonAntiSense)
val intronSense = geneCount.intronCounts.map(_.counts.senseBases).sum
val intronAntiSense = geneCount.intronCounts.map(_.counts.antiSenseBases).sum
geneIntronSenseCounts += intronSense
geneExonicAntiSenseCounts += intronAntiSense
geneIntronicWriter.println(geneCount.gene.getName + "\t" + (intronSense + intronAntiSense))
geneIntronicSenseWriter.println(geneCount.gene.getName + "\t" + intronSense)
geneIntronicAntiSenseWriter.println(geneCount.gene.getName + "\t" + intronAntiSense)
}
geneTotalWriter.close()
......@@ -266,6 +292,24 @@ object BaseCounter extends ToolCommand {
geneIntronicWriter.close()
geneIntronicSenseWriter.close()
geneIntronicAntiSenseWriter.close()
Map(
"total" -> Map(
"total_bases" -> (geneTotalSenseCounts + geneTotalAntiSenseCounts),
"sense_bases" -> geneTotalSenseCounts,
"antisense_bases" -> geneTotalAntiSenseCounts
),
"exonic" -> Map(
"total_bases" -> (geneExonicSenseCounts + geneExonicAntiSenseCounts),
"sense_bases" -> geneExonicSenseCounts,
"antisense_bases" -> geneExonicAntiSenseCounts
),
"intronic" -> Map(
"total_bases" -> (geneIntronSenseCounts + geneIntronAntiSenseCounts),
"sense_bases" -> geneIntronSenseCounts,
"antisense_bases" -> geneIntronAntiSenseCounts
)
)
}
/**
......
......@@ -213,7 +213,7 @@ class BaseCounterTest extends TestNGSuite with Matchers {
bamFile.getAbsolutePath,
"-r",
refflat.getAbsolutePath))
outputDir.list().size shouldBe 34
outputDir.list().size shouldBe 35
}
}
......
......@@ -15,6 +15,7 @@
package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationRefFlat
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.extensions.tools.BaseCounter
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......@@ -27,7 +28,7 @@ class BaseCounts(val parent: Configurable)
with Measurement
with AnnotationRefFlat {
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 0, fallback = "0")
def mergeArgs = MergeArgs(List(1), 2, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = {
......@@ -103,6 +104,15 @@ class BaseCounts(val parent: Configurable)
addTableAndHeatmap(jobs.values.map(_.strandedAntiSenseMetaExonCounts).toList,
"strandedAntiSenseMetaExonCounts")
jobs.foreach(x => baseCounterStats = baseCounterStats ++ Map(x._1 -> x._2.summaryJson))
addSummarizable(new Summarizable {
def summaryFiles: Map[String, File] = Map()
def summaryStats: Any = baseCounterStats
}, "basecounter")
addSummaryJobs()
}
private var baseCounterStats: Map[String, File] = Map()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment