Skip to content
Snippets Groups Projects
Commit 6d9cf7ee authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Implement baseCounter in pipeline

parent 4024e04f
No related branches found
No related tags found
No related merge requests found
...@@ -121,10 +121,7 @@ class Gentrap(val root: Configurable) extends QScript ...@@ -121,10 +121,7 @@ class Gentrap(val root: Configurable) extends QScript
lazy val fragmentsPerExon = if (expMeasures.contains(ExpMeasures.FragmentsPerExon)) lazy val fragmentsPerExon = if (expMeasures.contains(ExpMeasures.FragmentsPerExon))
Some(new FragmentsPerExon(this)) else None Some(new FragmentsPerExon(this)) else None
lazy val basesPerGene = if (expMeasures.contains(ExpMeasures.BasesPerGene)) lazy val baseCounts = if (expMeasures.contains(ExpMeasures.BaseCounts))
Some(new BasesPerGene(this)) else None
lazy val basesPerExon = if (expMeasures.contains(ExpMeasures.BasesPerExon))
Some(new FragmentsPerExon(this)) else None Some(new FragmentsPerExon(this)) else None
lazy val cufflinksBlind = if (expMeasures.contains(ExpMeasures.CufflinksBlind)) lazy val cufflinksBlind = if (expMeasures.contains(ExpMeasures.CufflinksBlind))
...@@ -136,7 +133,7 @@ class Gentrap(val root: Configurable) extends QScript ...@@ -136,7 +133,7 @@ class Gentrap(val root: Configurable) extends QScript
lazy val cufflinksStrict = if (expMeasures.contains(ExpMeasures.CufflinksStrict)) lazy val cufflinksStrict = if (expMeasures.contains(ExpMeasures.CufflinksStrict))
Some(new CufflinksStrict(this)) else None Some(new CufflinksStrict(this)) else None
def executedMeasures = (fragmentsPerGene :: fragmentsPerExon :: basesPerGene :: basesPerExon :: cufflinksBlind :: def executedMeasures = (fragmentsPerGene :: fragmentsPerExon :: baseCounts :: cufflinksBlind ::
cufflinksGuided :: cufflinksStrict :: Nil).flatten cufflinksGuided :: cufflinksStrict :: Nil).flatten
/** Whether to do simple variant calling on RNA or not */ /** Whether to do simple variant calling on RNA or not */
...@@ -232,7 +229,7 @@ object Gentrap extends PipelineCommand { ...@@ -232,7 +229,7 @@ object Gentrap extends PipelineCommand {
/** Enumeration of available expression measures */ /** Enumeration of available expression measures */
object ExpMeasures extends Enumeration { object ExpMeasures extends Enumeration {
val FragmentsPerGene, FragmentsPerExon, BasesPerGene, BasesPerExon, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value val FragmentsPerGene, FragmentsPerExon, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value
} }
/** Enumeration of available strandedness */ /** Enumeration of available strandedness */
......
package nl.lumc.sasc.biopet.pipelines.gentrap.measures package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationRefFlat
import nl.lumc.sasc.biopet.extensions.tools.BaseCounter
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
/** /**
* Created by pjvan_thof on 1/12/16. * Created by pjvan_thof on 1/12/16.
*/ */
class BasesPerExon(val root: Configurable) extends QScript with Measurement { class BaseCounts(val root: Configurable) extends QScript with Measurement with AnnotationRefFlat {
def bamToCountFile(id: String, bamFile: File): (String, File) = ???
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = {
val jobs = bamFiles.map { case (id, file) =>
val baseCounter = new BaseCounter(this)
baseCounter.bamFile = file
baseCounter.outputDir = new File(outputDir, id)
baseCounter.prefix = id
baseCounter.refFlat = annotationRefFlat
add(baseCounter)
id -> baseCounter
}
//TODO: merges
//TODO heatmaps
}
} }
package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 1/12/16.
*/
class BasesPerGene(val root: Configurable) extends QScript with Measurement {
//TODO: splitting on strand if strandspecific
def bamToCountFile(id: String, bamFile: File): (String, File) = ???
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
}
...@@ -15,25 +15,25 @@ trait CufflinksMeasurement extends QScript with Measurement { ...@@ -15,25 +15,25 @@ trait CufflinksMeasurement extends QScript with Measurement {
cufflinks cufflinks
} }
private var isoFormFiles: List[File] = Nil def biopetScript(): Unit = {
val jobs = bamFiles.map { case (id, file) =>
def bamToCountFile(id: String, bamFile: File): (String, File) = { val cufflinks = makeCufflinksJob(id, file)
val cufflinks = makeCufflinksJob(id, bamFile) add(cufflinks)
add(cufflinks) id -> cufflinks
}
isoFormFiles :+= cufflinks.outputIsoformsFpkm
addMergeTableJob(jobs.values.map(_.outputGenesFpkm).toList, mergeGenesFpkmTable)
id -> cufflinks.outputGenesFpkm addMergeTableJob(jobs.values.map(_.outputIsoformsFpkm).toList, mergeIsoFormFpkmTable)
addHeatmapJob(mergeGenesFpkmTable, genesFpkmHeatmap, "genes_fpkm")
addHeatmapJob(mergeIsoFormFpkmTable, isoFormFpkmHeatmap, "iso_form_fpkm")
} }
override def biopetScript(): Unit = { def mergeGenesFpkmTable: File = new File(outputDir, s"$name.genes.fpkm.tsv")
super.biopetScript() def genesFpkmHeatmap: File = new File(outputDir, s"$name.genes.fpkm.png")
add(MergeTables(this, isoFormFiles, mergeIsoFormTable,
mergeArgs.idCols, mergeArgs.valCol, mergeArgs.numHeaderLines, mergeArgs.fallback))
}
def mergeIsoFormTable: File = new File(outputDir, s"$name.iso_form_table.tsv") def mergeIsoFormFpkmTable: File = new File(outputDir, s"$name.iso_form.fpkm.tsv")
def isoFormFpkmHeatmap: File = new File(outputDir, s"$name.iso_form.fpkm.png")
def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0") def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0")
......
...@@ -7,7 +7,8 @@ import org.broadinstitute.gatk.queue.QScript ...@@ -7,7 +7,8 @@ import org.broadinstitute.gatk.queue.QScript
* Created by pjvan_thof on 1/12/16. * Created by pjvan_thof on 1/12/16.
*/ */
class FragmentsPerExon(val root: Configurable) extends QScript with Measurement { class FragmentsPerExon(val root: Configurable) extends QScript with Measurement {
def bamToCountFile(id: String, bamFile: File): (String, File) = ???
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = ???
} }
...@@ -25,4 +25,28 @@ class FragmentsPerGene(val root: Configurable) extends QScript with Measurement ...@@ -25,4 +25,28 @@ class FragmentsPerGene(val root: Configurable) extends QScript with Measurement
} }
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = {
val jobs = bamFiles.map { case (id, file) =>
//TODO: ID sorting job
val job = new HtseqCount(this)
job.inputAnnotation = annotationGtf
job.inputAlignment = file
job.output = new File(outputDir, s"$name.$id.counts")
job.format = Option("bam")
// We are forcing the sort order to be ID-sorted, since HTSeq-count often chokes when using position-sorting due
// to its buffer not being large enough.
//TODO: ID sorting job
//job.order = Option("name")
id -> job
}
addMergeTableJob(jobs.values.map(_.output).toList, mergedTable)
addHeatmapJob(mergedTable, heatmap, "fragments_per_gene")
}
def mergedTable = new File(outputDir, s"$name.fragments_per_gene.tsv")
def heatmap = new File(outputDir, s"$name.fragments_per_gene.png")
} }
package nl.lumc.sasc.biopet.pipelines.gentrap.measures package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import java.io.File
import nl.lumc.sasc.biopet.core.Reference import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.tools.MergeTables import nl.lumc.sasc.biopet.extensions.tools.MergeTables
...@@ -27,15 +25,6 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => ...@@ -27,15 +25,6 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
/** Name of job, this is used as prefix for most of the files */ /** Name of job, this is used as prefix for most of the files */
def name: String = this.getClass.getSimpleName.toLowerCase def name: String = this.getClass.getSimpleName.toLowerCase
/** Locations of single bam count tables */
lazy val countFiles: Map[String, File] = bamFiles.map { case (id, bamFile) => bamToCountFile(id, bamFile) }
/** Location of merge count table */
def mergedCountFile = new File(outputDir, s"$name.merged.tsv")
/** Location of heatmap */
def heatpMap = new File(outputDir, s"$name.heatmap.png")
/** Class to store args for MergeTables */ /** Class to store args for MergeTables */
case class MergeArgs(idCols: List[Int], valCol: Int, numHeaderLines: Int = 0, fallback: String = "-") case class MergeArgs(idCols: List[Int], valCol: Int, numHeaderLines: Int = 0, fallback: String = "-")
...@@ -47,27 +36,26 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => ...@@ -47,27 +36,26 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
require(bamFiles.nonEmpty) require(bamFiles.nonEmpty)
} }
/** Pipeline itself */ def addMergeTableJob(countFiles: List[File],
def biopetScript(): Unit = { outputFile: File,
add(MergeTables(this, countFiles.values.toList, mergedCountFile, args: MergeArgs = mergeArgs): Unit = {
mergeArgs.idCols, mergeArgs.valCol, mergeArgs.numHeaderLines, mergeArgs.fallback)) add(MergeTables(this, countFiles, outputFile,
args.idCols, args.valCol, args.numHeaderLines, args.fallback))
}
def addHeatmapJob(countTable: File, outputFile: File, name: String): Unit = {
val job = new PlotHeatmap(qscript) val job = new PlotHeatmap(qscript)
job.input = mergedCountFile job.input = countTable
job.output = heatpMap job.output = outputFile
job.countType = Some(name) job.countType = Some(name)
add(job) add(job)
} }
/** This function should add the count table for each bamFile */
def bamToCountFile(id: String, bamFile: File): (String, File)
/** Must return a map with used settings for this pipeline */ /** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map() def summarySettings: Map[String, Any] = Map()
/** File to put in the summary for thie pipeline */ /** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = Map("merged_table" -> mergedCountFile, "heatmap" -> heatpMap) ++ def summaryFiles: Map[String, File] = Map() ++ bamFiles.map { case (id, file) => s"input_bam_$id" -> file }
bamFiles.map { case (id, file) => s"input_bam_$id" -> file }
/** Name of summary output file */ /** Name of summary output file */
def summaryFile: File = new File(outputDir, s"$name.summary.json") def summaryFile: File = new File(outputDir, s"$name.summary.json")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment