MultisampleMappingTrait.scala 8.97 KB
Newer Older
1
2
3
4
5
package nl.lumc.sasc.biopet.pipelines.mapping

import java.io.File

import htsjdk.samtools.SamReaderFactory
Peter van 't Hof's avatar
Peter van 't Hof committed
6
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript }
7
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq }
9
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.utils.config.Configurable
12
13
import org.broadinstitute.gatk.queue.QScript

Peter van 't Hof's avatar
Peter van 't Hof committed
14
15
import MultisampleMapping.MergeStrategy

16
17
18
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
19
20
 * Created by pjvanthof on 18/12/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
trait MultisampleMappingTrait extends MultiSampleQScript
  with Reference { qscript: QScript =>
23

Peter van 't Hof's avatar
Peter van 't Hof committed
24
  def mergeStrategy: MergeStrategy.Value = {
Peter van 't Hof's avatar
Peter van 't Hof committed
25
    val value: String = config("merge_strategy", default = "preprocessmarkduplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
26
    MergeStrategy.values.find(_.toString.toLowerCase == value) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
27
      case Some(v) => v
Peter van 't Hof's avatar
Peter van 't Hof committed
28
      case _       => throw new IllegalArgumentException(s"merge_strategy '$value' does not exist")
Peter van 't Hof's avatar
Peter van 't Hof committed
29
30
31
    }
  }

32
  def init(): Unit = {
33
34
  }

35
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
    addSamplesJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
37
    addSummaryJobs()
38
39
  }

40
  def addMultiSampleJobs(): Unit = {
41
42
43
    // this code will be executed after all code of all samples is executed
  }

44
  def summaryFiles: Map[String, File] = Map("referenceFasta" -> referenceFasta())
45

46
47
48
  def summarySettings: Map[String, Any] = Map(
    "reference" -> referenceSummary,
    "merge_strategy" -> mergeStrategy.toString)
49
50
51
52
53
54

  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {

    def makeLibrary(id: String) = new Library(id)
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
      def summaryFiles: Map[String, File] = (inputR1.map("input_R1" -> _) :: inputR2.map("input_R2" -> _) ::
Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
        inputBam.map("input_bam" -> _) :: bamFile.map("output_bam" -> _) ::
        preProcessBam.map("output_preProcessBam" -> _) :: Nil).flatten.toMap
58
59
60

      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
63
      lazy val inputR1: Option[File] = MultisampleMapping.fileMustBeAbsolute(config("R1"))
      lazy val inputR2: Option[File] = MultisampleMapping.fileMustBeAbsolute(config("R2"))
      lazy val inputBam: Option[File] = MultisampleMapping.fileMustBeAbsolute(if (inputR1.isEmpty) config("bam") else None)
64
65
66
67
68
69
70
      lazy val bamToFastq: Boolean = config("bam_to_fastq", default = false)
      lazy val correctReadgroups: Boolean = config("correct_readgroups", default = false)

      lazy val mapping = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) {
        val m = new Mapping(qscript)
        m.sampleId = Some(sampleId)
        m.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
        m.outputDir = libDir
72
73
74
75
        Some(m)
      } else None

      def bamFile = mapping match {
Peter van 't Hof's avatar
Peter van 't Hof committed
76
        case Some(m)                 => Some(m.finalBamFile)
77
        case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
78
        case _                       => None
79
80
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
      def preProcessBam = bamFile

83
      def addJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86
87
        inputR1.foreach(inputFiles :+= new InputFile(_, config("R1_md5")))
        inputR2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
        inputBam.foreach(inputFiles :+= new InputFile(_, config("bam_md5")))

88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
        if (inputR1.isDefined) {
          mapping.foreach { m =>
            m.input_R1 = inputR1.get
            m.input_R2 = inputR2
            add(m)
          }
        } else if (inputBam.isDefined) {
          if (bamToFastq) {
            val samToFastq = SamToFastq(qscript, inputBam.get,
              new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
              new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
            samToFastq.isIntermediate = true
            qscript.add(samToFastq)
            mapping.foreach(m => {
              m.input_R1 = samToFastq.fastqR1
              m.input_R2 = Some(samToFastq.fastqR2)
              add(m)
            })
          } else {
            val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
            val readGroups = inputSam.getFileHeader.getReadGroups

            val readGroupOke = readGroups.forall(readGroup => {
              if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
              if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
              readGroup.getSample == sampleId && readGroup.getLibrary == libId
            })
            inputSam.close()

            if (!readGroupOke) {
              if (correctReadgroups) {
                logger.info("Correcting readgroups, file:" + inputBam.get)
                val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
                aorrg.RGID = s"$sampleId-$libId"
                aorrg.RGLB = libId
                aorrg.RGSM = sampleId
                aorrg.RGPL = "unknown"
                aorrg.RGPU = "na"
                aorrg.isIntermediate = true
                qscript.add(aorrg)
              } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
            } else {
              val oldBamFile: File = inputBam.get
              val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
              val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
              val baiLn = Ln(qscript, oldIndex, newIndex)
              add(baiLn)

              val bamLn = Ln(qscript, oldBamFile, bamFile.get)
              bamLn.deps :+= baiLn.output
              add(bamLn)
            }

            val bamMetrics = new BamMetrics(qscript)
            bamMetrics.sampleId = Some(sampleId)
            bamMetrics.libId = Some(libId)
            bamMetrics.inputBam = bamFile.get
            bamMetrics.outputDir = new File(libDir, "metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
147
            add(bamMetrics)
148
149
150
151
152
          }
        } else logger.warn(s"Sample '$sampleId' does not have any input files")
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
    def summaryFiles: Map[String, File] = (bamFile.map("output_bam" -> _) ::
      preProcessBam.map("output_preProcessBam" -> _) :: Nil).flatten.toMap
155
156
157

    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
158
159
160
161
162
163
164
    def bamFile = if (libraries.flatMap(_._2.bamFile).nonEmpty &&
      mergeStrategy != MultisampleMapping.MergeStrategy.None)
      Some(new File(sampleDir, s"$sampleId.bam"))
    else None

    def preProcessBam = bamFile

Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
    def keepMergedFiles: Boolean = config("keep_merged_files", default = true)

Peter van 't Hof's avatar
Peter van 't Hof committed
167
    def addJobs(): Unit = {
168
      addPerLibJobs() // This add jobs for each library
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170

      mergeStrategy match {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
        case MergeStrategy.None =>
        case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 =>
Peter van 't Hof's avatar
Peter van 't Hof committed
173
          add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*)
Peter van 't Hof's avatar
Peter van 't Hof committed
174
        case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
Peter van 't Hof's avatar
Peter van 't Hof committed
175
          add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*)
Peter van 't Hof's avatar
Peter van 't Hof committed
176
        case MergeStrategy.MergeSam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
177
          add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
178
        case MergeStrategy.PreProcessMergeSam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
179
          add(MergeSamFiles(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
180
        case MergeStrategy.MarkDuplicates =>
Peter van 't Hof's avatar
Peter van 't Hof committed
181
          add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
182
        case MergeStrategy.PreProcessMarkDuplicates =>
Peter van 't Hof's avatar
Peter van 't Hof committed
183
          add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = keepMergedFiles))
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
        case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?")
      }

187
      if (mergeStrategy != MergeStrategy.None && libraries.flatMap(_._2.bamFile).nonEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
191
192
        val bamMetrics = new BamMetrics(qscript)
        bamMetrics.sampleId = Some(sampleId)
        bamMetrics.inputBam = preProcessBam.get
        bamMetrics.outputDir = new File(sampleDir, "metrics")
        add(bamMetrics)
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      }
194
195
196
    }
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
197

Peter van 't Hof's avatar
Peter van 't Hof committed
198
199
class MultisampleMapping(val root: Configurable) extends QScript with MultisampleMappingTrait {
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201

  def summaryFile: File = new File(outputDir, "MultisamplePipeline.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
202
}
203
204
205
206

object MultisampleMapping extends PipelineCommand {

  object MergeStrategy extends Enumeration {
Peter van 't Hof's avatar
Peter van 't Hof committed
207
    val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value
208
209
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
210
  def fileMustBeAbsolute(file: Option[File]): Option[File] = {
211
212
213
214
215
216
217
218
    if (file.forall(_.isAbsolute)) file
    else {
      Logging.addError(s"$file should be a absolute file path")
      file.map(_.getAbsoluteFile)
    }
  }

}