diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala index 8bf19381ed25f995fe73bc507f5029830a3b1cb0..ffad82b7e2f5c4ab3df0f886516bcc7fb83af45c 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala @@ -121,10 +121,7 @@ class Gentrap(val root: Configurable) extends QScript lazy val fragmentsPerExon = if (expMeasures.contains(ExpMeasures.FragmentsPerExon)) Some(new FragmentsPerExon(this)) else None - lazy val basesPerGene = if (expMeasures.contains(ExpMeasures.BasesPerGene)) - Some(new BasesPerGene(this)) else None - - lazy val basesPerExon = if (expMeasures.contains(ExpMeasures.BasesPerExon)) + lazy val baseCounts = if (expMeasures.contains(ExpMeasures.BaseCounts)) Some(new FragmentsPerExon(this)) else None lazy val cufflinksBlind = if (expMeasures.contains(ExpMeasures.CufflinksBlind)) @@ -136,7 +133,7 @@ class Gentrap(val root: Configurable) extends QScript lazy val cufflinksStrict = if (expMeasures.contains(ExpMeasures.CufflinksStrict)) Some(new CufflinksStrict(this)) else None - def executedMeasures = (fragmentsPerGene :: fragmentsPerExon :: basesPerGene :: basesPerExon :: cufflinksBlind :: + def executedMeasures = (fragmentsPerGene :: fragmentsPerExon :: baseCounts :: cufflinksBlind :: cufflinksGuided :: cufflinksStrict :: Nil).flatten /** Whether to do simple variant calling on RNA or not */ @@ -232,7 +229,7 @@ object Gentrap extends PipelineCommand { /** Enumeration of available expression measures */ object ExpMeasures extends Enumeration { - val FragmentsPerGene, FragmentsPerExon, BasesPerGene, BasesPerExon, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value + val FragmentsPerGene, FragmentsPerExon, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value } /** Enumeration of available strandedness */ diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala new file mode 100644 index 0000000000000000000000000000000000000000..7a56733ea8ec669436c392f91ddc9ec0222ec2c1 --- /dev/null +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BaseCounts.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.pipelines.gentrap.measures + +import nl.lumc.sasc.biopet.core.annotations.AnnotationRefFlat +import nl.lumc.sasc.biopet.extensions.tools.BaseCounter +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.QScript + +/** + * Created by pjvan_thof on 1/12/16. + */ +class BaseCounts(val root: Configurable) extends QScript with Measurement with AnnotationRefFlat { + + def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") + + /** Pipeline itself */ + def biopetScript(): Unit = { + val jobs = bamFiles.map { case (id, file) => + val baseCounter = new BaseCounter(this) + baseCounter.bamFile = file + baseCounter.outputDir = new File(outputDir, id) + baseCounter.prefix = id + baseCounter.refFlat = annotationRefFlat + add(baseCounter) + id -> baseCounter + } + + //TODO: merges + //TODO heatmaps + } +} diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala deleted file mode 100644 index d0feb6c8b2b9fc4c440d3ecfae601e5c3d9337a6..0000000000000000000000000000000000000000 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala +++ /dev/null @@ -1,13 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gentrap.measures - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * Created by pjvan_thof on 1/12/16. - */ -class BasesPerExon(val root: Configurable) extends QScript with Measurement { - def bamToCountFile(id: String, bamFile: File): (String, File) = ??? - - def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") -} diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala deleted file mode 100644 index d2650362f471088c2e8248974022dcb960a3e895..0000000000000000000000000000000000000000 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala +++ /dev/null @@ -1,14 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gentrap.measures - -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * Created by pjvan_thof on 1/12/16. - */ -class BasesPerGene(val root: Configurable) extends QScript with Measurement { - //TODO: splitting on strand if strandspecific - def bamToCountFile(id: String, bamFile: File): (String, File) = ??? - - def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") -} diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala index 1a9116c12d662ecca1de719f699ef72bac32fcee..229d78e6581a2627337ea6dd87d1378e219cc0e7 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksMeasurement.scala @@ -15,25 +15,25 @@ trait CufflinksMeasurement extends QScript with Measurement { cufflinks } - private var isoFormFiles: List[File] = Nil - - def bamToCountFile(id: String, bamFile: File): (String, File) = { - val cufflinks = makeCufflinksJob(id, bamFile) - add(cufflinks) - - isoFormFiles :+= cufflinks.outputIsoformsFpkm - - id -> cufflinks.outputGenesFpkm + def biopetScript(): Unit = { + val jobs = bamFiles.map { case (id, file) => + val cufflinks = makeCufflinksJob(id, file) + add(cufflinks) + id -> cufflinks + } + + addMergeTableJob(jobs.values.map(_.outputGenesFpkm).toList, mergeGenesFpkmTable) + addMergeTableJob(jobs.values.map(_.outputIsoformsFpkm).toList, mergeIsoFormFpkmTable) + + addHeatmapJob(mergeGenesFpkmTable, genesFpkmHeatmap, "genes_fpkm") + addHeatmapJob(mergeIsoFormFpkmTable, isoFormFpkmHeatmap, "iso_form_fpkm") } - override def biopetScript(): Unit = { - super.biopetScript() - - add(MergeTables(this, isoFormFiles, mergeIsoFormTable, - mergeArgs.idCols, mergeArgs.valCol, mergeArgs.numHeaderLines, mergeArgs.fallback)) - } + def mergeGenesFpkmTable: File = new File(outputDir, s"$name.genes.fpkm.tsv") + def genesFpkmHeatmap: File = new File(outputDir, s"$name.genes.fpkm.png") - def mergeIsoFormTable: File = new File(outputDir, s"$name.iso_form_table.tsv") + def mergeIsoFormFpkmTable: File = new File(outputDir, s"$name.iso_form.fpkm.tsv") + def isoFormFpkmHeatmap: File = new File(outputDir, s"$name.iso_form.fpkm.png") def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0") diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala index 6f1268fcc5b677ad77dff37d76a457097c53669d..dbca36eed14d1203473b5b1d5d5ddaa33846733a 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala @@ -7,7 +7,8 @@ import org.broadinstitute.gatk.queue.QScript * Created by pjvan_thof on 1/12/16. */ class FragmentsPerExon(val root: Configurable) extends QScript with Measurement { - def bamToCountFile(id: String, bamFile: File): (String, File) = ??? - def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") + + /** Pipeline itself */ + def biopetScript(): Unit = ??? } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala index a614f93e9249a9396555a6b2da85694309d75f6f..fd94996f6a797e6aee596a16b801e70cdcd87cc5 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala @@ -25,4 +25,28 @@ class FragmentsPerGene(val root: Configurable) extends QScript with Measurement } def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") + + /** Pipeline itself */ + def biopetScript(): Unit = { + val jobs = bamFiles.map { case (id, file) => + //TODO: ID sorting job + + val job = new HtseqCount(this) + job.inputAnnotation = annotationGtf + job.inputAlignment = file + job.output = new File(outputDir, s"$name.$id.counts") + job.format = Option("bam") + // We are forcing the sort order to be ID-sorted, since HTSeq-count often chokes when using position-sorting due + // to its buffer not being large enough. + //TODO: ID sorting job + //job.order = Option("name") + id -> job + } + + addMergeTableJob(jobs.values.map(_.output).toList, mergedTable) + addHeatmapJob(mergedTable, heatmap, "fragments_per_gene") + } + + def mergedTable = new File(outputDir, s"$name.fragments_per_gene.tsv") + def heatmap = new File(outputDir, s"$name.fragments_per_gene.png") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala index 1c5cc50b67bc93f561e9df7aaa36d1249f1eb707..f09cc09f6cf2fcfefcb55bb8c7296c9324514d2b 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala @@ -1,7 +1,5 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.measures -import java.io.File - import nl.lumc.sasc.biopet.core.Reference import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.tools.MergeTables @@ -27,15 +25,6 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => /** Name of job, this is used as prefix for most of the files */ def name: String = this.getClass.getSimpleName.toLowerCase - /** Locations of single bam count tables */ - lazy val countFiles: Map[String, File] = bamFiles.map { case (id, bamFile) => bamToCountFile(id, bamFile) } - - /** Location of merge count table */ - def mergedCountFile = new File(outputDir, s"$name.merged.tsv") - - /** Location of heatmap */ - def heatpMap = new File(outputDir, s"$name.heatmap.png") - /** Class to store args for MergeTables */ case class MergeArgs(idCols: List[Int], valCol: Int, numHeaderLines: Int = 0, fallback: String = "-") @@ -47,27 +36,26 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => require(bamFiles.nonEmpty) } - /** Pipeline itself */ - def biopetScript(): Unit = { - add(MergeTables(this, countFiles.values.toList, mergedCountFile, - mergeArgs.idCols, mergeArgs.valCol, mergeArgs.numHeaderLines, mergeArgs.fallback)) + def addMergeTableJob(countFiles: List[File], + outputFile: File, + args: MergeArgs = mergeArgs): Unit = { + add(MergeTables(this, countFiles, outputFile, + args.idCols, args.valCol, args.numHeaderLines, args.fallback)) + } + def addHeatmapJob(countTable: File, outputFile: File, name: String): Unit = { val job = new PlotHeatmap(qscript) - job.input = mergedCountFile - job.output = heatpMap + job.input = countTable + job.output = outputFile job.countType = Some(name) add(job) } - /** This function should add the count table for each bamFile */ - def bamToCountFile(id: String, bamFile: File): (String, File) - /** Must return a map with used settings for this pipeline */ def summarySettings: Map[String, Any] = Map() /** File to put in the summary for thie pipeline */ - def summaryFiles: Map[String, File] = Map("merged_table" -> mergedCountFile, "heatmap" -> heatpMap) ++ - bamFiles.map { case (id, file) => s"input_bam_$id" -> file } + def summaryFiles: Map[String, File] = Map() ++ bamFiles.map { case (id, file) => s"input_bam_$id" -> file } /** Name of summary output file */ def summaryFile: File = new File(outputDir, s"$name.summary.json")