MultisampleMapping.scala 7.38 KB
Newer Older
1
2
3
4
5
6
7
package nl.lumc.sasc.biopet.pipelines.mapping

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.{PipelineCommand, Reference, MultiSampleQScript}
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq}
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

import scala.collection.JavaConversions._

/**
  * Created by pjvanthof on 18/12/15.
  */
class MultisampleMapping(val root: Configurable) extends QScript
  with MultiSampleQScript
  with Reference { qscript =>
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
27
28
29
30
31
  def mergeStrategy: MultisampleMapping.MergeStrategy.Value = {
    val value: String = config("merge_strategy", default = "preprocessmarkduplicates")
    MultisampleMapping.MergeStrategy.values.find(_.toString.toLowerCase == value) match {
      case Some(v) => v
      case _ => throw new IllegalArgumentException(s"merge_strategy '$value' does not exist")
    }
  }

32
33
34
35
36
  def init: Unit = {
  }

  def biopetScript: Unit = {
    addSamplesJobs() // This executes jobs for all samples
Peter van 't Hof's avatar
Peter van 't Hof committed
37
    addSummaryJobs()
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  }

  def addMultiSampleJobs: Unit = {
    // this code will be executed after all code of all samples is executed
  }

  def summaryFile: File = new File(outputDir, "MultisamplePipeline.summary.json")

  def summaryFiles: Map[String, File] = Map()

  def summarySettings: Map[String, Any] = Map()

  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {

    def makeLibrary(id: String) = new Library(id)
    class Library(libId: String) extends AbstractLibrary(libId) {
      def summaryFiles: Map[String, File] = Map()

      def summaryStats: Map[String, Any] = Map()

      lazy val inputR1: Option[File] = MultisampleMapping.fileMustBeAbsulute(config("R1"))
      lazy val inputR2: Option[File] = MultisampleMapping.fileMustBeAbsulute(config("R2"))
      lazy val inputBam: Option[File] = MultisampleMapping.fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None)
      lazy val bamToFastq: Boolean = config("bam_to_fastq", default = false)
      lazy val correctReadgroups: Boolean = config("correct_readgroups", default = false)

      lazy val mapping = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) {
        val m = new Mapping(qscript)
        m.sampleId = Some(sampleId)
        m.libId = Some(libId)
        Some(m)
      } else None

      def bamFile = mapping match {
        case Some(m) => Some(m.finalBamFile)
        case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
        case _ => None
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
      def preProcessBam = bamFile

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
      def addJobs: Unit = {
        if (inputR1.isDefined) {
          mapping.foreach { m =>
            m.input_R1 = inputR1.get
            m.input_R2 = inputR2
            add(m)
          }
        } else if (inputBam.isDefined) {
          if (bamToFastq) {
            val samToFastq = SamToFastq(qscript, inputBam.get,
              new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
              new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
            samToFastq.isIntermediate = true
            qscript.add(samToFastq)
            mapping.foreach(m => {
              m.input_R1 = samToFastq.fastqR1
              m.input_R2 = Some(samToFastq.fastqR2)
              add(m)
            })
          } else {
            val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
            val readGroups = inputSam.getFileHeader.getReadGroups

            val readGroupOke = readGroups.forall(readGroup => {
              if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
              if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
              readGroup.getSample == sampleId && readGroup.getLibrary == libId
            })
            inputSam.close()

            if (!readGroupOke) {
              if (correctReadgroups) {
                logger.info("Correcting readgroups, file:" + inputBam.get)
                val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
                aorrg.RGID = s"$sampleId-$libId"
                aorrg.RGLB = libId
                aorrg.RGSM = sampleId
                aorrg.RGPL = "unknown"
                aorrg.RGPU = "na"
                aorrg.isIntermediate = true
                qscript.add(aorrg)
              } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
            } else {
              val oldBamFile: File = inputBam.get
              val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
              val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
              val baiLn = Ln(qscript, oldIndex, newIndex)
              add(baiLn)

              val bamLn = Ln(qscript, oldBamFile, bamFile.get)
              bamLn.deps :+= baiLn.output
              add(bamLn)
            }

            val bamMetrics = new BamMetrics(qscript)
            bamMetrics.sampleId = Some(sampleId)
            bamMetrics.libId = Some(libId)
            bamMetrics.inputBam = bamFile.get
            bamMetrics.outputDir = new File(libDir, "metrics")
            bamMetrics.init()
            bamMetrics.biopetScript()
            addAll(bamMetrics.functions)
            addSummaryQScript(bamMetrics)
          }
        } else logger.warn(s"Sample '$sampleId' does not have any input files")
      }
    }

    def summaryFiles: Map[String, File] = Map()

    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
155
156
157
158
159
160
161
    def executeSamplePreProcess: Boolean = libraries.flatMap(_._2.bamFile).size > 1

    def bamFile = if (libraries.flatMap(_._2.bamFile).nonEmpty &&
      mergeStrategy != MultisampleMapping.MergeStrategy.None)
      Some(new File(sampleDir, s"$sampleId.bam"))
    else None

    def preProcessBam = bamFile

162
163
    def addJobs: Unit = {
      addPerLibJobs() // This add jobs for each library
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
166
167
168
169
170
171
172
173
174
175

      mergeStrategy match {
        case MultisampleMapping.MergeStrategy.MergeSam =>
          add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get))
        case MultisampleMapping.MergeStrategy.PreProcessMergeSam =>
          add(MergeSamFiles(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get))
        case MultisampleMapping.MergeStrategy.MarkDuplicates =>
          add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get))
        case MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates =>
          add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get))
        case _ =>
      }
176
177
178
179
180
181
182
    }
  }
}

object MultisampleMapping extends PipelineCommand {

  object MergeStrategy extends Enumeration {
Peter van 't Hof's avatar
Peter van 't Hof committed
183
    val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value
184
185
186
187
188
189
190
191
192
193
194
  }

  def fileMustBeAbsulute(file: Option[File]): Option[File] = {
    if (file.forall(_.isAbsolute)) file
    else {
      Logging.addError(s"$file should be a absolute file path")
      file.map(_.getAbsoluteFile)
    }
  }

}