FragmentsPerGene.scala 2.49 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16 17
package nl.lumc.sasc.biopet.pipelines.gentrap.measures

Peter van 't Hof's avatar
Peter van 't Hof committed
18 19
import nl.lumc.sasc.biopet.core.annotations.AnnotationGtf
import nl.lumc.sasc.biopet.extensions.HtseqCount
20
import nl.lumc.sasc.biopet.extensions.picard.SortSam
21 22 23 24
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

/**
25 26
 * Created by pjvan_thof on 1/12/16.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
27
class FragmentsPerGene(val root: Configurable) extends QScript with Measurement with AnnotationGtf {
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  def mergeArgs = MergeArgs(idCols = List(1), valCol = 2, numHeaderLines = 0, fallback = "0")
29

Peter van 't Hof's avatar
Peter van 't Hof committed
30
  override def fixedValues: Map[String, Any] = Map("htseqcount" -> Map("order" -> ""))
31

Peter van 't Hof's avatar
Peter van 't Hof committed
32
  lazy val sortOnId: Boolean = config("sort_on_id", default = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
33

34 35
  /** Pipeline itself */
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37
    val jobs = bamFiles.map {
      case (id, file) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
38 39

        val bamFile = if (sortOnId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41 42 43 44 45 46
          val sortSam = new SortSam(this)
          sortSam.input = file
          sortSam.output = swapExt(outputDir, file, ".bam", ".idsorted.bam")
          sortSam.sortOrder = "queryname"
          sortSam.isIntermediate = true
          add(sortSam)
          sortSam.output
Peter van 't Hof's avatar
Peter van 't Hof committed
47
        } else file
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49 50
        val job = new HtseqCount(this)
        job.inputAnnotation = annotationGtf
Peter van 't Hof's avatar
Peter van 't Hof committed
51
        job.inputAlignment = bamFile
52
        job.output = new File(outputDir, s"$id.$name.counts")
Peter van 't Hof's avatar
Peter van 't Hof committed
53
        job.format = Option("bam")
Peter van 't Hof's avatar
Peter van 't Hof committed
54
        job.order = if (sortOnId) Some("name") else Some("pos")
Peter van 't Hof's avatar
Peter van 't Hof committed
55
        add(job)
Peter van 't Hof's avatar
Peter van 't Hof committed
56
        id -> job
57 58
    }

59
    addMergeTableJob(jobs.values.map(_.output).toList, mergedTable, "fragments_per_gene", s".$name.counts")
60
    addHeatmapJob(mergedTable, heatmap, "fragments_per_gene")
Peter van 't Hof's avatar
Peter van 't Hof committed
61 62

    addSummaryJobs()
63 64 65 66
  }

  def mergedTable = new File(outputDir, s"$name.fragments_per_gene.tsv")
  def heatmap = new File(outputDir, s"$name.fragments_per_gene.png")
67
}