Commit 9b24ef8b authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Update TinyCap with Gentrap Measures and fixes some typos in the measures (from the merge)

parent 01f3bf84
...@@ -6,9 +6,9 @@ ...@@ -6,9 +6,9 @@
package nl.lumc.sasc.biopet.pipelines.gatk package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.broad._ import nl.lumc.sasc.biopet.extensions.gatk.broad._
import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaVariantcallingTrait, ShivaTrait } import nl.lumc.sasc.biopet.pipelines.shiva.ShivaTrait
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
/** /**
......
...@@ -17,6 +17,11 @@ ...@@ -17,6 +17,11 @@
<artifactId>Mapping</artifactId> <artifactId>Mapping</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Gentrap</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies> </dependencies>
</project> </project>
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.tinycap package nl.lumc.sasc.biopet.pipelines.tinycap
import java.io.File
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationGff, AnnotationGtf, AnnotationRefFlat }
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference } import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.HtseqCount import nl.lumc.sasc.biopet.pipelines.gentrap.measures.{ BaseCounts, FragmentsPerGene }
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
import nl.lumc.sasc.biopet.pipelines.tinycap.measures.FragmentsPerSmallRna
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
...@@ -12,12 +16,15 @@ import org.broadinstitute.gatk.queue.QScript ...@@ -12,12 +16,15 @@ import org.broadinstitute.gatk.queue.QScript
* Design based on work from Henk Buermans (e-Mir) * Design based on work from Henk Buermans (e-Mir)
* Implementation by wyleung started 19/01/16 * Implementation by wyleung started 19/01/16
*/ */
class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTrait with Reference { class TinyCap(val root: Configurable) extends QScript
with MultisampleMappingTrait
with AnnotationRefFlat
with AnnotationGff
with AnnotationGtf
with Reference {
qscript => qscript =>
def this() = this(null) def this() = this(null)
var annotationGff: File = config("annotation_gff")
var annotationGtf: File = config("annotation_gtf")
var annotateSam: Boolean = config("annotate_sam", default = false) var annotateSam: Boolean = config("annotate_sam", default = false)
override def defaults = Map( override def defaults = Map(
...@@ -32,6 +39,10 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra ...@@ -32,6 +39,10 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra
"generate_wig" -> true, "generate_wig" -> true,
"skip_markduplicates" -> true "skip_markduplicates" -> true
), ),
"bammetrics" -> Map(
"wgs_metrics" -> false,
"rna_metrics" -> true
),
"bowtie" -> Map( "bowtie" -> Map(
"chunkmbs" -> 256, "chunkmbs" -> 256,
"seedmms" -> 3, "seedmms" -> 3,
...@@ -54,38 +65,34 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra ...@@ -54,38 +65,34 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra
) )
) )
lazy val fragmentsPerGene = Some(new FragmentsPerGene(this))
lazy val fragmentsPerSmallRna = Some(new FragmentsPerSmallRna(this))
lazy val baseCounts = Some(new BaseCounts(this))
def executedMeasures = (fragmentsPerGene :: baseCounts :: Nil).flatten
override def init = {
super.init()
executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expression_measures" + File.separator + x.name))
}
override def makeSample(id: String) = new Sample(id) override def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends super.Sample(sampleId) { class Sample(sampleId: String) extends super.Sample(sampleId) {
override def addJobs(): Unit = { override def addJobs(): Unit = {
super.addJobs() super.addJobs()
// Do expression counting for miRNA and siRNA preProcessBam.foreach { file =>
val htseqCount = new HtseqCount(qscript) executedMeasures.foreach(_.addBamfile(sampleId, file))
htseqCount.inputAlignment = bamFile.get }
htseqCount.inputAnnotation = annotationGff
htseqCount.format = Option("bam")
htseqCount.stranded = Option("yes")
htseqCount.featuretype = Option("miRNA")
htseqCount.idattr = Option("Name")
htseqCount.output = createFile("exprcount.mirna.tsv")
if (annotateSam) htseqCount.samout = Option(createFile("htseqannot.mirna.sam"))
add(htseqCount)
val htseqCountGTF = new HtseqCount(qscript)
htseqCountGTF.inputAlignment = bamFile.get
htseqCountGTF.inputAnnotation = annotationGtf
htseqCountGTF.format = Option("bam")
htseqCountGTF.stranded = Option("yes")
htseqCountGTF.output = createFile("exprcount.tsv")
if (annotateSam) htseqCountGTF.samout = Option(createFile("htseqannot.sam"))
add(htseqCountGTF)
} }
} }
override def summaryFile = new File(outputDir, "tinycap.summary.json") override def summaryFile = new File(outputDir, "tinycap.summary.json")
override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map( override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map(
"annotation_refflat" -> annotationRefFlat(),
"annotationGtf" -> annotationGtf,
"annotationGff" -> annotationGff "annotationGff" -> annotationGff
) )
...@@ -98,6 +105,7 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra ...@@ -98,6 +105,7 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra
override def addMultiSampleJobs = { override def addMultiSampleJobs = {
super.addMultiSampleJobs super.addMultiSampleJobs
executedMeasures.foreach(add)
} }
} }
......
package nl.lumc.sasc.biopet.pipelines.tinycap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationGff
import nl.lumc.sasc.biopet.extensions.HtseqCount
import nl.lumc.sasc.biopet.pipelines.gentrap.measures.Measurement
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by wyleung on 11-2-16.
*/
class FragmentsPerSmallRna(val root: Configurable) extends QScript with Measurement with AnnotationGff {
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = {
val jobs = bamFiles.map {
case (id, file) =>
// Do expression counting for miRNA and siRNA
val job = new HtseqCount(this)
job.inputAlignment = file
job.inputAnnotation = annotationGff
job.format = Option("bam")
job.stranded = Option("yes")
job.featuretype = Option("miRNA")
job.idattr = Option("Name")
job.output = new File(outputDir, s"$id.$name.mirna.counts.tsv")
add(job)
id -> job
}
addMergeTableJob(jobs.values.map(_.output).toList, mergedTable, "mirna.fragments_per_gene", s".$name.mirna.counts")
addHeatmapJob(mergedTable, heatmap, "mirna.fragments_per_gene")
addSummaryJobs()
}
def mergedTable = new File(outputDir, s"$name.mirna.fragments_per_gene.tsv")
def heatmap = new File(outputDir, s"$name.mirna.fragments_per_gene.png")
}
...@@ -18,13 +18,10 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics ...@@ -18,13 +18,10 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.{ File, FileOutputStream } import java.io.{ File, FileOutputStream }
import com.google.common.io.Files import com.google.common.io.Files
import nl.lumc.sasc.biopet.utils.config.Config
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
import nl.lumc.sasc.biopet.extensions.picard._ import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.apache.commons.io.FileUtils import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers import org.scalatest.Matchers
......
...@@ -16,6 +16,14 @@ trait AnnotationGtf extends BiopetQScript { qscript: QScript => ...@@ -16,6 +16,14 @@ trait AnnotationGtf extends BiopetQScript { qscript: QScript =>
file file
} }
} }
trait AnnotationGff extends BiopetQScript { qscript: QScript =>
/** GFF reference file in GFF3 format */
lazy val annotationGff: File = {
val file: File = config("annotation_gff", freeVar = true)
inputFiles :+ InputFile(file, config("annotation_gff_md5", freeVar = true))
file
}
}
trait AnnotationRefFlat extends BiopetQScript { qscript: QScript => trait AnnotationRefFlat extends BiopetQScript { qscript: QScript =>
/** GTF reference file */ /** GTF reference file */
......
...@@ -7,9 +7,9 @@ class LazyCheck[T](function: => T) { ...@@ -7,9 +7,9 @@ class LazyCheck[T](function: => T) {
private var _isSet = false private var _isSet = false
def isSet = _isSet def isSet = _isSet
lazy val value = { lazy val value = {
val chache = function val cache = function
_isSet = true _isSet = true
chache cache
} }
def apply() = value def apply() = value
def get = value def get = value
......
...@@ -177,7 +177,7 @@ class Gentrap(val root: Configurable) extends QScript ...@@ -177,7 +177,7 @@ class Gentrap(val root: Configurable) extends QScript
if (removeRibosomalReads && ribosomalRefFlat().isEmpty) if (removeRibosomalReads && ribosomalRefFlat().isEmpty)
Logging.addError("removeRibosomalReads is enabled but no ribosomalRefFlat is given") Logging.addError("removeRibosomalReads is enabled but no ribosomalRefFlat is given")
executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expresion_measures" + File.separator + x.name)) executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expression_measures" + File.separator + x.name))
} }
/** Pipeline run for multiple samples */ /** Pipeline run for multiple samples */
...@@ -208,8 +208,8 @@ class Gentrap(val root: Configurable) extends QScript ...@@ -208,8 +208,8 @@ class Gentrap(val root: Configurable) extends QScript
val job = new WipeReads(qscript) val job = new WipeReads(qscript)
job.inputBam = bamFile.get job.inputBam = bamFile.get
ribosomalRefFlat().foreach(job.intervalFile = _) ribosomalRefFlat().foreach(job.intervalFile = _)
job.outputBam = createFile(".cleaned.bam") job.outputBam = createFile("cleaned.bam")
job.discardedBam = createFile(".rrna.bam") job.discardedBam = createFile("rrna.bam")
add(job) add(job)
Some(job.outputBam) Some(job.outputBam)
} else bamFile } else bamFile
......
...@@ -2,7 +2,6 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.measures ...@@ -2,7 +2,6 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationRefFlat import nl.lumc.sasc.biopet.core.annotations.AnnotationRefFlat
import nl.lumc.sasc.biopet.extensions.tools.BaseCounter import nl.lumc.sasc.biopet.extensions.tools.BaseCounter
import nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
......
...@@ -14,7 +14,7 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => ...@@ -14,7 +14,7 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
/** /**
* Method to add a bamFile to the pipeline * Method to add a bamFile to the pipeline
* @param id Uniqe id used for this bam file, most likly to be a sampleName * @param id Unique id used for this bam file, most likely to be a sampleName
* @param file Location of the bam file * @param file Location of the bam file
*/ */
def addBamfile(id: String, file: File): Unit = { def addBamfile(id: String, file: File): Unit = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment