MultisampleMappingTrait.scala 8.97 KB
Newer Older
1
2
3
4
5
package nl.lumc.sasc.biopet.pipelines.mapping

import java.io.File

import htsjdk.samtools.SamReaderFactory
Peter van 't Hof's avatar
Peter van 't Hof committed
6
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript }
7
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq }
9
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.utils.config.Configurable
12
13
import org.broadinstitute.gatk.queue.QScript

Peter van 't Hof's avatar
Peter van 't Hof committed
14
15
import MultisampleMapping.MergeStrategy

16
17
18
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
19
20
 * Created by pjvanthof on 18/12/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
trait MultisampleMappingTrait extends MultiSampleQScript
  with Reference { qscript: QScript =>
23

Peter van 't Hof's avatar
Peter van 't Hof committed
24
  def mergeStrategy: MergeStrategy.Value = {
Peter van 't Hof's avatar
Peter van 't Hof committed
25
    val value: String = config("merge_strategy", default = "preprocessmarkduplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
26
    MergeStrategy.values.find(_.toString.toLowerCase == value) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
27
      case Some(v) => v
Peter van 't Hof's avatar
Peter van 't Hof committed
28
      case _       => throw new IllegalArgumentException(s"merge_strategy '$value' does not exist")
Peter van 't Hof's avatar
Peter van 't Hof committed
29
30
31
    }
  }

32
  def init(): Unit = {
33
34
  }

35
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
    addSamplesJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
37
    addSummaryJobs()
38
39
  }

40
  def addMultiSampleJobs(): Unit = {
41
42
43
44
45
    // this code will be executed after all code of all samples is executed
  }

  def summaryFile: File = new File(outputDir, "MultisamplePipeline.summary.json")

46
  def summaryFiles: Map[String, File] = Map("referenceFasta" -> referenceFasta())
47

48
49
50
  def summarySettings: Map[String, Any] = Map(
    "reference" -> referenceSummary,
    "merge_strategy" -> mergeStrategy.toString)
51
52
53
54
55
56

  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {

    def makeLibrary(id: String) = new Library(id)
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
      def summaryFiles: Map[String, File] = (inputR1.map("input_R1" -> _) :: inputR2.map("input_R2" -> _) ::
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
        inputBam.map("input_bam" -> _) :: bamFile.map("output_bam" -> _) ::
        preProcessBam.map("output_preProcessBam" -> _) :: Nil).flatten.toMap
60
61
62
63
64
65
66
67
68
69
70
71
72

      def summaryStats: Map[String, Any] = Map()

      lazy val inputR1: Option[File] = MultisampleMapping.fileMustBeAbsulute(config("R1"))
      lazy val inputR2: Option[File] = MultisampleMapping.fileMustBeAbsulute(config("R2"))
      lazy val inputBam: Option[File] = MultisampleMapping.fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None)
      lazy val bamToFastq: Boolean = config("bam_to_fastq", default = false)
      lazy val correctReadgroups: Boolean = config("correct_readgroups", default = false)

      lazy val mapping = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) {
        val m = new Mapping(qscript)
        m.sampleId = Some(sampleId)
        m.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
73
        m.outputDir = libDir
74
75
76
77
        Some(m)
      } else None

      def bamFile = mapping match {
Peter van 't Hof's avatar
Peter van 't Hof committed
78
        case Some(m)                 => Some(m.finalBamFile)
79
        case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
80
        case _                       => None
81
82
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
      def preProcessBam = bamFile

85
      def addJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
89
        inputR1.foreach(inputFiles :+= new InputFile(_, config("R1_md5")))
        inputR2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
        inputBam.foreach(inputFiles :+= new InputFile(_, config("bam_md5")))

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
        if (inputR1.isDefined) {
          mapping.foreach { m =>
            m.input_R1 = inputR1.get
            m.input_R2 = inputR2
            add(m)
          }
        } else if (inputBam.isDefined) {
          if (bamToFastq) {
            val samToFastq = SamToFastq(qscript, inputBam.get,
              new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
              new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
            samToFastq.isIntermediate = true
            qscript.add(samToFastq)
            mapping.foreach(m => {
              m.input_R1 = samToFastq.fastqR1
              m.input_R2 = Some(samToFastq.fastqR2)
              add(m)
            })
          } else {
            val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
            val readGroups = inputSam.getFileHeader.getReadGroups

            val readGroupOke = readGroups.forall(readGroup => {
              if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
              if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
              readGroup.getSample == sampleId && readGroup.getLibrary == libId
            })
            inputSam.close()

            if (!readGroupOke) {
              if (correctReadgroups) {
                logger.info("Correcting readgroups, file:" + inputBam.get)
                val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
                aorrg.RGID = s"$sampleId-$libId"
                aorrg.RGLB = libId
                aorrg.RGSM = sampleId
                aorrg.RGPL = "unknown"
                aorrg.RGPU = "na"
                aorrg.isIntermediate = true
                qscript.add(aorrg)
              } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
            } else {
              val oldBamFile: File = inputBam.get
              val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
              val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
              val baiLn = Ln(qscript, oldIndex, newIndex)
              add(baiLn)

              val bamLn = Ln(qscript, oldBamFile, bamFile.get)
              bamLn.deps :+= baiLn.output
              add(bamLn)
            }

            val bamMetrics = new BamMetrics(qscript)
            bamMetrics.sampleId = Some(sampleId)
            bamMetrics.libId = Some(libId)
            bamMetrics.inputBam = bamFile.get
            bamMetrics.outputDir = new File(libDir, "metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
149
            add(bamMetrics)
150
151
152
153
154
          }
        } else logger.warn(s"Sample '$sampleId' does not have any input files")
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
    def summaryFiles: Map[String, File] = (bamFile.map("output_bam" -> _) ::
      preProcessBam.map("output_preProcessBam" -> _) :: Nil).flatten.toMap
157
158
159

    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
164
165
166
    def bamFile = if (libraries.flatMap(_._2.bamFile).nonEmpty &&
      mergeStrategy != MultisampleMapping.MergeStrategy.None)
      Some(new File(sampleDir, s"$sampleId.bam"))
    else None

    def preProcessBam = bamFile

Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
    def keepMergedFiles: Boolean = config("keep_merged_files", default = true)

Peter van 't Hof's avatar
Peter van 't Hof committed
169
    def addJobs(): Unit = {
170
      addPerLibJobs() // This add jobs for each library
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172

      mergeStrategy match {
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
        case MergeStrategy.None =>
        case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 =>
Peter van 't Hof's avatar
Peter van 't Hof committed
175
          add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*)
Peter van 't Hof's avatar
Peter van 't Hof committed
176
        case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
Peter van 't Hof's avatar
Peter van 't Hof committed
177
          add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*)
Peter van 't Hof's avatar
Peter van 't Hof committed
178
        case MergeStrategy.MergeSam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
179
          add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
180
        case MergeStrategy.PreProcessMergeSam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
181
          add(MergeSamFiles(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
182
        case MergeStrategy.MarkDuplicates =>
Peter van 't Hof's avatar
Peter van 't Hof committed
183
          add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
184
        case MergeStrategy.PreProcessMarkDuplicates =>
Peter van 't Hof's avatar
Peter van 't Hof committed
185
          add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
        case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?")
      }

189
      if (mergeStrategy != MergeStrategy.None && libraries.flatMap(_._2.bamFile).nonEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
192
193
194
        val bamMetrics = new BamMetrics(qscript)
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
        bamMetrics.outputDir = new File(sampleDir, "metrics")
        add(bamMetrics)
Peter van 't Hof's avatar
Peter van 't Hof committed
195
      }
196
197
198
    }
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
199
200
201
class MultisampleMapping(val root: Configurable) extends QScript with MultisampleMappingTrait {
  def this() = this(null)
}
202
203
204
205

object MultisampleMapping extends PipelineCommand {

  object MergeStrategy extends Enumeration {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value
207
208
209
210
211
212
213
214
215
216
217
  }

  def fileMustBeAbsulute(file: Option[File]): Option[File] = {
    if (file.forall(_.isAbsolute)) file
    else {
      Logging.addError(s"$file should be a absolute file path")
      file.map(_.getAbsoluteFile)
    }
  }

}