Skip to content
Snippets Groups Projects
Commit 172b0345 authored by bow's avatar bow
Browse files

Update docs in main Gentrap pipeline

parent e0985baf
No related branches found
No related tags found
No related merge requests found
......@@ -41,6 +41,8 @@ import nl.lumc.sasc.biopet.tools.{ MergeTables, WipeReads }
/**
* Gentrap pipeline
* Generic transcriptome analysis pipeline
*
* @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
*/
class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript with SummaryQScript { qscript =>
......@@ -118,9 +120,11 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
}
}
/** Expression measures which are subject to TMM normalization during correlation calculation */
protected lazy val forTmmNormalization: Set[ExpMeasures.Value] =
Set(FragmentsPerGene, FragmentsPerExon, BasesPerGene, BasesPerExon)
/** Returns a QFunction to generate heatmaps */
private def makeHeatmapJob(mergeJob: Option[MergeTables], outName: String,
expMeasure: ExpMeasures.Value, isCuffIsoform: Boolean = false): Option[PlotHeatmap] =
(mergeJob.isDefined && samples.size > 2)
......@@ -142,6 +146,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val geneFragmentsCountJob =
makeMergeTableJob((s: Sample) => s.geneFragmentsCount, ".fragments_per_gene", List(1), 2, fallback = "0")
/** Heatmap job for gene fragment count */
private lazy val geneFragmentsCountHeatmapJob =
makeHeatmapJob(geneFragmentsCountJob, "fragments_per_gene", FragmentsPerGene)
......@@ -149,6 +154,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val exonFragmentsCountJob =
makeMergeTableJob((s: Sample) => s.exonFragmentsCount, ".fragments_per_exon", List(1), 2, fallback = "0")
/** Heatmap job for exon fragment count */
private lazy val exonFragmentsCountHeatmapJob =
makeHeatmapJob(exonFragmentsCountJob, "fragments_per_exon", FragmentsPerExon)
......@@ -156,6 +162,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val geneBasesCountJob =
makeMergeTableJob((s: Sample) => s.geneBasesCount, ".bases_per_gene", List(1), 2, fallback = "0")
/** Heatmap job for gene base count */
private lazy val geneBasesCountHeatmapJob =
makeHeatmapJob(geneBasesCountJob, "bases_per_gene", BasesPerGene)
......@@ -163,6 +170,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val exonBasesCountJob =
makeMergeTableJob((s: Sample) => s.exonBasesCount, ".bases_per_exon", List(1), 2, fallback = "0")
/** Heatmap job for exon base count */
private lazy val exonBasesCountHeatmapJob =
makeHeatmapJob(exonBasesCountJob, "bases_per_exon", BasesPerExon)
......@@ -170,6 +178,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val geneFpkmCufflinksStrictJob =
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksStrict, ".genes_fpkm_cufflinks_strict", List(1, 7), 10, fallback = "0.0")
/** Heatmap job for gene FPKM Cufflinks, strict mode */
private lazy val geneFpkmCufflinksStrictHeatmapJob =
makeHeatmapJob(geneFpkmCufflinksStrictJob, "genes_fpkm_cufflinks_strict", CufflinksStrict)
......@@ -177,6 +186,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val isoFpkmCufflinksStrictJob =
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksStrict, ".isoforms_fpkm_cufflinks_strict", List(1, 7), 10, fallback = "0.0")
/** Heatmap job for isoform FPKM Cufflinks, strict mode */
private lazy val isoFpkmCufflinksStrictHeatmapJob =
makeHeatmapJob(isoFpkmCufflinksStrictJob, "isoforms_fpkm_cufflinks_strict", CufflinksStrict, true)
......@@ -184,6 +194,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val geneFpkmCufflinksGuidedJob =
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksGuided, ".genes_fpkm_cufflinks_guided", List(1, 7), 10, fallback = "0.0")
/** Heatmap job for gene FPKM Cufflinks, guided mode */
private lazy val geneFpkmCufflinksGuidedHeatmapJob =
makeHeatmapJob(geneFpkmCufflinksGuidedJob, "genes_fpkm_cufflinks_guided", CufflinksGuided)
......@@ -191,6 +202,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val isoFpkmCufflinksGuidedJob =
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksGuided, ".isoforms_fpkm_cufflinks_guided", List(1, 7), 10, fallback = "0.0")
/** Heatmap job for isoform FPKM Cufflinks, guided mode */
private lazy val isoFpkmCufflinksGuidedHeatmapJob =
makeHeatmapJob(isoFpkmCufflinksGuidedJob, "isoforms_fpkm_cufflinks_guided", CufflinksGuided, true)
......@@ -198,6 +210,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val geneFpkmCufflinksBlindJob =
makeMergeTableJob((s: Sample) => s.geneFpkmCufflinksBlind, ".genes_fpkm_cufflinks_blind", List(1, 7), 10, fallback = "0.0")
/** Heatmap job for gene FPKM Cufflinks, blind mode */
private lazy val geneFpkmCufflinksBlindHeatmapJob =
makeHeatmapJob(geneFpkmCufflinksBlindJob, "genes_fpkm_cufflinks_blind", CufflinksBlind)
......@@ -205,9 +218,11 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
private lazy val isoFpkmCufflinksBlindJob =
makeMergeTableJob((s: Sample) => s.isoformFpkmCufflinksBlind, ".isoforms_fpkm_cufflinks_blind", List(1, 7), 10, fallback = "0.0")
/** Heatmap job for isoform FPKM Cufflinks, blind mode */
private lazy val isoFpkmCufflinksBlindHeatmapJob =
makeHeatmapJob(isoFpkmCufflinksBlindJob, "isoforms_fpkm_cufflinks_blind", CufflinksBlind, true)
/** Container for merge table jobs */
private lazy val mergeTableJobs: Map[String, Option[MergeTables]] = Map(
"gene_fragments_count" -> geneFragmentsCountJob,
"exon_fragments_count" -> exonFragmentsCountJob,
......@@ -221,6 +236,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
"isoform_fpkm_cufflinks_blind" -> isoFpkmCufflinksBlindJob
)
/** Container for heatmap jobs */
private lazy val heatmapJobs: Map[String, Option[PlotHeatmap]] = Map(
"gene_fragments_count_heatmap" -> geneFragmentsCountHeatmapJob,
"exon_fragments_count_heatmap" -> exonFragmentsCountHeatmapJob,
......@@ -343,10 +359,12 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
require(ribosomalRefFlat.isDefined, "rRNA intervals must be supplied if removeRibosomalReads is set")
}
/** Pipeline run for each sample */
def biopetScript(): Unit = {
addSamplesJobs()
}
/** Pipeline run for multiple samples */
def addMultiSampleJobs(): Unit = {
// merge expression tables
mergeTableJobs.values.foreach { case maybeJob => maybeJob.foreach(add(_)) }
......@@ -362,8 +380,14 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
add(pdfReportJob)
}
/** Returns a [[Sample]] object */
def makeSample(sampleId: String): Sample = new Sample(sampleId)
/**
* Gentrap sample
*
* @param sampleId Unique identifier of the sample
*/
class Sample(sampleId: String) extends AbstractSample(sampleId) with CufflinksProducer {
/** Shortcut to qscript object */
......@@ -483,6 +507,8 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
job.inputAlignment = idSortingJob.get.output
job.output = createFile(".fragments_per_gene")
job.format = Option("bam")
// We are forcing the sort order to be ID-sorted, since HTSeq-count often chokes when using position-sorting due
// to its buffer not being large enough.
job.order = Option("name")
job.stranded = strandProtocol match {
case NonSpecific => Option("no")
......@@ -521,9 +547,11 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
}
}
/** Alignment file of reads from the plus strand, only defined when run is strand-specific */
def alnFilePlusStrand: Option[File] = alnPlusStrandJobs
.collect { case jobSet => jobSet.combineJob.output }
/** Jobs for generating reads from the plus strand, only defined when run is strand-specific */
private def alnPlusStrandJobs: Option[StrandSeparationJobSet] = strandProtocol match {
case Dutp =>
val r2Job = this.allPaired
......@@ -561,9 +589,11 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
case _ => throw new IllegalStateException
}
/** Alignment file of reads from the minus strand, only defined when run is strand-specific */
def alnFileMinusStrand: Option[File] = alnMinusStrandJobs
.collect { case jobSet => jobSet.combineJob.output }
/** Jobs for generating reads from the minus, only defined when run is strand-specific */
private def alnMinusStrandJobs: Option[StrandSeparationJobSet] = strandProtocol match {
case Dutp =>
val r1Job = this.allPaired
......@@ -678,6 +708,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
makeCollectRnaSeqMetricsJob(alnFileDirty, createFile(".rna_metrics"), Option(createFile(".coverage_bias.pdf")))
}
/** Job for removing ribosomal reads */
private def wipeJob: Option[WipeReads] = removeRibosomalReads
.option {
require(ribosomalRefFlat.isDefined)
......@@ -721,6 +752,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
def allSingle: Boolean = libraries.values.forall(!_.paired)
// TODO: add warnings or other messages for config values that are hard-coded by the pipeline
/** Adds all jobs for the sample */
def addJobs(): Unit = {
// TODO: this is our requirement since it's easier to calculate base counts when all libraries are either paired or single
require(allPaired || allSingle, s"Sample $sampleId contains only single-end or paired-end libraries")
......@@ -766,12 +798,14 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
varCallJob.foreach(add(_))
}
/** Add jobs for fragments per gene counting using HTSeq */
// We are forcing the sort order to be ID-sorted, since HTSeq-count often chokes when using position-sorting due
// to its buffer not being large enough.
/** Returns a [[Library]] object */
def makeLibrary(libId: String): Library = new Library(libId)
/**
* Gentrap library
*
* @param libId Unique identifier of the library
*/
class Library(libId: String) extends AbstractLibrary(libId) {
/** Summary stats of the library */
......@@ -808,6 +842,7 @@ class Gentrap(val root: Configurable) extends QScript with MultiSampleQScript wi
job
}
/** Adds all jobs for the library */
def addJobs(): Unit = {
// create per-library alignment file
addAll(mappingJob.functions)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment