Commit 9b24ef8b authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Update TinyCap with Gentrap Measures and fixes some typos in the measures (from the merge)

parent 01f3bf84
......@@ -6,9 +6,9 @@
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.broad._
import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaVariantcallingTrait, ShivaTrait }
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaTrait
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
......
......@@ -17,6 +17,11 @@
<artifactId>Mapping</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Gentrap</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.tinycap
import java.io.File
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationGff, AnnotationGtf, AnnotationRefFlat }
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.HtseqCount
import nl.lumc.sasc.biopet.pipelines.gentrap.measures.{ BaseCounts, FragmentsPerGene }
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
import nl.lumc.sasc.biopet.pipelines.tinycap.measures.FragmentsPerSmallRna
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......@@ -12,12 +16,15 @@ import org.broadinstitute.gatk.queue.QScript
* Design based on work from Henk Buermans (e-Mir)
* Implementation by wyleung started 19/01/16
*/
class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTrait with Reference {
class TinyCap(val root: Configurable) extends QScript
with MultisampleMappingTrait
with AnnotationRefFlat
with AnnotationGff
with AnnotationGtf
with Reference {
qscript =>
def this() = this(null)
var annotationGff: File = config("annotation_gff")
var annotationGtf: File = config("annotation_gtf")
var annotateSam: Boolean = config("annotate_sam", default = false)
override def defaults = Map(
......@@ -32,6 +39,10 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra
"generate_wig" -> true,
"skip_markduplicates" -> true
),
"bammetrics" -> Map(
"wgs_metrics" -> false,
"rna_metrics" -> true
),
"bowtie" -> Map(
"chunkmbs" -> 256,
"seedmms" -> 3,
......@@ -54,38 +65,34 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra
)
)
lazy val fragmentsPerGene = Some(new FragmentsPerGene(this))
lazy val fragmentsPerSmallRna = Some(new FragmentsPerSmallRna(this))
lazy val baseCounts = Some(new BaseCounts(this))
def executedMeasures = (fragmentsPerGene :: baseCounts :: Nil).flatten
override def init = {
super.init()
executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expression_measures" + File.separator + x.name))
}
override def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends super.Sample(sampleId) {
override def addJobs(): Unit = {
super.addJobs()
// Do expression counting for miRNA and siRNA
val htseqCount = new HtseqCount(qscript)
htseqCount.inputAlignment = bamFile.get
htseqCount.inputAnnotation = annotationGff
htseqCount.format = Option("bam")
htseqCount.stranded = Option("yes")
htseqCount.featuretype = Option("miRNA")
htseqCount.idattr = Option("Name")
htseqCount.output = createFile("exprcount.mirna.tsv")
if (annotateSam) htseqCount.samout = Option(createFile("htseqannot.mirna.sam"))
add(htseqCount)
val htseqCountGTF = new HtseqCount(qscript)
htseqCountGTF.inputAlignment = bamFile.get
htseqCountGTF.inputAnnotation = annotationGtf
htseqCountGTF.format = Option("bam")
htseqCountGTF.stranded = Option("yes")
htseqCountGTF.output = createFile("exprcount.tsv")
if (annotateSam) htseqCountGTF.samout = Option(createFile("htseqannot.sam"))
add(htseqCountGTF)
preProcessBam.foreach { file =>
executedMeasures.foreach(_.addBamfile(sampleId, file))
}
}
}
override def summaryFile = new File(outputDir, "tinycap.summary.json")
override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map(
"annotation_refflat" -> annotationRefFlat(),
"annotationGtf" -> annotationGtf,
"annotationGff" -> annotationGff
)
......@@ -98,6 +105,7 @@ class TinyCap(val root: Configurable) extends QScript with MultisampleMappingTra
override def addMultiSampleJobs = {
super.addMultiSampleJobs
executedMeasures.foreach(add)
}
}
......
package nl.lumc.sasc.biopet.pipelines.tinycap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationGff
import nl.lumc.sasc.biopet.extensions.HtseqCount
import nl.lumc.sasc.biopet.pipelines.gentrap.measures.Measurement
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by wyleung on 11-2-16.
*/
class FragmentsPerSmallRna(val root: Configurable) extends QScript with Measurement with AnnotationGff {
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = {
val jobs = bamFiles.map {
case (id, file) =>
// Do expression counting for miRNA and siRNA
val job = new HtseqCount(this)
job.inputAlignment = file
job.inputAnnotation = annotationGff
job.format = Option("bam")
job.stranded = Option("yes")
job.featuretype = Option("miRNA")
job.idattr = Option("Name")
job.output = new File(outputDir, s"$id.$name.mirna.counts.tsv")
add(job)
id -> job
}
addMergeTableJob(jobs.values.map(_.output).toList, mergedTable, "mirna.fragments_per_gene", s".$name.mirna.counts")
addHeatmapJob(mergedTable, heatmap, "mirna.fragments_per_gene")
addSummaryJobs()
}
def mergedTable = new File(outputDir, s"$name.mirna.fragments_per_gene.tsv")
def heatmap = new File(outputDir, s"$name.mirna.fragments_per_gene.png")
}
......@@ -18,13 +18,10 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.{ File, FileOutputStream }
import com.google.common.io.Files
import nl.lumc.sasc.biopet.utils.config.Config
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
......
......@@ -16,6 +16,14 @@ trait AnnotationGtf extends BiopetQScript { qscript: QScript =>
file
}
}
trait AnnotationGff extends BiopetQScript { qscript: QScript =>
/** GFF reference file in GFF3 format */
lazy val annotationGff: File = {
val file: File = config("annotation_gff", freeVar = true)
inputFiles :+ InputFile(file, config("annotation_gff_md5", freeVar = true))
file
}
}
trait AnnotationRefFlat extends BiopetQScript { qscript: QScript =>
/** GTF reference file */
......
......@@ -7,9 +7,9 @@ class LazyCheck[T](function: => T) {
private var _isSet = false
def isSet = _isSet
lazy val value = {
val chache = function
val cache = function
_isSet = true
chache
cache
}
def apply() = value
def get = value
......
......@@ -177,7 +177,7 @@ class Gentrap(val root: Configurable) extends QScript
if (removeRibosomalReads && ribosomalRefFlat().isEmpty)
Logging.addError("removeRibosomalReads is enabled but no ribosomalRefFlat is given")
executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expresion_measures" + File.separator + x.name))
executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expression_measures" + File.separator + x.name))
}
/** Pipeline run for multiple samples */
......@@ -208,8 +208,8 @@ class Gentrap(val root: Configurable) extends QScript
val job = new WipeReads(qscript)
job.inputBam = bamFile.get
ribosomalRefFlat().foreach(job.intervalFile = _)
job.outputBam = createFile(".cleaned.bam")
job.discardedBam = createFile(".rrna.bam")
job.outputBam = createFile("cleaned.bam")
job.discardedBam = createFile("rrna.bam")
add(job)
Some(job.outputBam)
} else bamFile
......
......@@ -2,7 +2,6 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationRefFlat
import nl.lumc.sasc.biopet.extensions.tools.BaseCounter
import nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......
......@@ -14,7 +14,7 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
/**
* Method to add a bamFile to the pipeline
* @param id Uniqe id used for this bam file, most likly to be a sampleName
* @param id Unique id used for this bam file, most likely to be a sampleName
* @param file Location of the bam file
*/
def addBamfile(id: String, file: File): Unit = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment