Mapping.scala 12 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
9
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star }
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
bow's avatar
bow committed
13
import org.broadinstitute.gatk.queue.extensions.picard.{ MergeSamFiles, SortSam, AddOrReplaceReadGroups }
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
15

bow's avatar
bow committed
16
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
17
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  def this() = this(null)
bow's avatar
bow committed
19
20

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
21
  var input_R1: File = _
bow's avatar
bow committed
22
23

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
24
  var input_R2: File = _
bow's avatar
bow committed
25
26

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var outputName: String = _
bow's avatar
bow committed
28
29

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
31
32

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
34
35

  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
36
  var aligner: String = _
bow's avatar
bow committed
37
38

  @Argument(doc = "Reference", shortName = "R", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var referenceFile: File = _
bow's avatar
bow committed
40
41

  @Argument(doc = "auto chuning", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
42
  var chunking: Boolean = false
bow's avatar
bow committed
43

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  // Readgroup items
bow's avatar
bow committed
45
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  var RGID: String = _
bow's avatar
bow committed
47
48

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  var RGLB: String = _
bow's avatar
bow committed
50
51

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  var RGPL: String = _
bow's avatar
bow committed
53
54

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  var RGPU: String = _
bow's avatar
bow committed
56
57

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
58
  var RGSM: String = _
bow's avatar
bow committed
59
60

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  var RGCN: String = _
bow's avatar
bow committed
62
63

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
64
  var RGDS: String = _
bow's avatar
bow committed
65
66

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
67
  var RGDT: Date = _
bow's avatar
bow committed
68
69

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
70
  var RGPI: Int = _
bow's avatar
bow committed
71

Peter van 't Hof's avatar
Peter van 't Hof committed
72
  var paired: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
73
  var numberChunks = 0
bow's avatar
bow committed
74

Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
bow's avatar
bow committed
77
    var inputtype: String = config("inputtype", "dna")
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
80
81
82
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
    if (referenceFile == null) referenceFile = config("referenceFile")
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
85
86
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
87

Peter van 't Hof's avatar
Peter van 't Hof committed
88
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
91
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
92
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
94
95
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
99
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
100

101
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
102

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
105
    if (!chunking && numberChunks > 1) chunking = true
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
bow's avatar
bow committed
106
107
108
      val chunkSize: Int = config("chunksize", (1 << 30))
      val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
      else input_R1.length
Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
111
112
      if (numberChunks == 0 && configContains("numberchunks")) numberChunks = config("numberchunks")
      else if (numberChunks == 0) numberChunks = ceil(filesize.toDouble / chunkSize).toInt
      logger.debug("Chunks: " + numberChunks)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
113
  }
bow's avatar
bow committed
114

Peter van 't Hof's avatar
Peter van 't Hof committed
115
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
118
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
    val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    flexiprep.outputDir = outputDir + "flexiprep/"
bow's avatar
bow committed
120
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
123
124

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
127
128
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
129
    var chunks: Map[String, (String, String)] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
130
131
    if (chunking) for (t <- 1 to numberChunks) {
      chunks += ("chunk_" + t -> (removeGz(outputDir + "chunk_" + t + "/" + fastq_R1.getName),
bow's avatar
bow committed
132
133
134
135
136
        if (paired) removeGz(outputDir + "chunk_" + t + "/" + fastq_R2.getName) else ""))
    }
    else chunks += ("flexiprep" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
      flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))

Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
139
140
141
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
142
      for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
143
      add(fastSplitter_R1)
bow's avatar
bow committed
144

Peter van 't Hof's avatar
Peter van 't Hof committed
145
146
147
148
149
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
150
        for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
151
152
153
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
154
155

    for ((chunk, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
158
159
160
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
        flexiprep.input_R1 = fastq_R1
        if (paired) flexiprep.input_R2 = fastq_R2
161
        if (chunking) flexiprep.skipSummary = true
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
164
165
166
167
168
169
170
171
        flexiprep.init
        flexiprep.runInitialJobs
        //flexiprep.biopetScript
        val flexiout = flexiprep.runTrimClip(R1, R2, outputDir + chunk, chunk)
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
172
      val chunkDir = if (chunking) outputDir + chunk + "/" else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
175
176
177
178
179
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
        bwaCommand.RG = getReadGroup
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
bow's avatar
bow committed
180
        bamFiles :+= addSortSam(List(bwaCommand.output), swapExt(chunkDir, bwaCommand.output, ".sam", ".bam"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
184
185
186
187
188
189
190
      } else if (aligner == "star") {
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true)
        add(starCommand)
        bamFiles :+= addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else if (aligner == "star-2pass") {
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true)
        addAll(star2pass._2)
        bamFiles :+= addAddOrReplaceReadGroups(List(star2pass._1), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
191
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
192
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
195

196
197
198
199
200
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
    } else if (skipMarkduplicates && chunking) bamFile = addMergeBam(bamFiles, new File(outputDir + outputName + ".bam"), outputDir)
bow's avatar
bow committed
201

202
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
  }
bow's avatar
bow committed
204
205

  def addSortSam(inputSam: List[File], outputFile: File, dir: String): File = {
206
207
208
209
210
211
212
    val sortSam = new SortSam
    sortSam.input = inputSam
    sortSam.createIndex = true
    sortSam.output = outputFile
    sortSam.memoryLimit = 2
    sortSam.nCoresRequest = 2
    sortSam.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
213
    if (!skipMarkduplicates) sortSam.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
214
    add(sortSam)
bow's avatar
bow committed
215

Peter van 't Hof's avatar
Peter van 't Hof committed
216
217
    return sortSam.output
  }
bow's avatar
bow committed
218
219

  def addMergeBam(inputSam: List[File], outputFile: File, dir: String): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
220
221
222
223
224
225
226
227
228
229
230
    val mergeSam = new MergeSamFiles
    mergeSam.input = inputSam
    mergeSam.createIndex = true
    mergeSam.output = outputFile
    mergeSam.memoryLimit = 2
    mergeSam.nCoresRequest = 2
    mergeSam.assumeSorted = true
    mergeSam.USE_THREADING = true
    mergeSam.jobResourceRequests :+= "h_vmem=4G"
    if (!skipMarkduplicates) mergeSam.isIntermediate = true
    add(mergeSam)
bow's avatar
bow committed
231

Peter van 't Hof's avatar
Peter van 't Hof committed
232
233
    return mergeSam.output
  }
bow's avatar
bow committed
234
235

  def addAddOrReplaceReadGroups(inputSam: List[File], outputFile: File, dir: String): File = {
236
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    addOrReplaceReadGroups.input = inputSam
    addOrReplaceReadGroups.output = outputFile
    addOrReplaceReadGroups.createIndex = true
    addOrReplaceReadGroups.memoryLimit = 2
    addOrReplaceReadGroups.nCoresRequest = 2
    addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
251
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
252
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
253

Peter van 't Hof's avatar
Peter van 't Hof committed
254
255
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
256
257

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
258
259
260
261
262
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
263
264
265
266
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
267

Peter van 't Hof's avatar
Peter van 't Hof committed
268
269
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
270
271
272
273
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
bow's avatar
bow committed
274
275

  def loadFromRunConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
276
    val mapping = new Mapping(root)
bow's avatar
bow committed
277

Peter van 't Hof's avatar
Peter van 't Hof committed
278
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
279
280
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
281
282
283
284
285
286
287
288
289
290
291
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
292

293
294
295
    mapping.init
    mapping.biopetScript
    return mapping
296
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
297
}