Mapping.scala 12.9 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star }
9
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, SortSam}
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
14
import org.broadinstitute.gatk.queue.extensions.picard.{ MergeSamFiles, AddOrReplaceReadGroups }
Peter van 't Hof's avatar
Peter van 't Hof committed
15
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
16

bow's avatar
bow committed
17
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
19
  def this() = this(null)
bow's avatar
bow committed
20
21

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
22
  var input_R1: File = _
bow's avatar
bow committed
23
24

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
25
  var input_R2: File = _
bow's avatar
bow committed
26
27

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  var outputName: String = _
bow's avatar
bow committed
29
30

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
31
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
32
33

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
  @Argument(doc = "Skip metrics", shortName = "skipmetrics", required = false)
  var skipMetrics: Boolean = false
  
bow's avatar
bow committed
39
  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  var aligner: String = _
bow's avatar
bow committed
41
42

  @Argument(doc = "Reference", shortName = "R", required = false)
43
  var reference: File = _
bow's avatar
bow committed
44

45
  @Argument(doc = "Chunking", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  var chunking: Boolean = false
47
48
49
50
  
  @ClassType(classOf[Int])
  @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
  var numberChunks: Option[Int] = None
bow's avatar
bow committed
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  // Readgroup items
bow's avatar
bow committed
53
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var RGID: String = _
bow's avatar
bow committed
55
56

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
  var RGLB: String = _
bow's avatar
bow committed
58
59

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  var RGPL: String = _
bow's avatar
bow committed
61
62

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
63
  var RGPU: String = _
bow's avatar
bow committed
64
65

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  var RGSM: String = _
bow's avatar
bow committed
67
68

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  var RGCN: String = _
bow's avatar
bow committed
70
71

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
72
  var RGDS: String = _
bow's avatar
bow committed
73
74

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
  var RGDT: Date = _
bow's avatar
bow committed
76
77

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
78
  var RGPI: Int = _
bow's avatar
bow committed
79

Peter van 't Hof's avatar
Peter van 't Hof committed
80
  var paired: Boolean = false
81
82
  
  val flexiprep = new Flexiprep(this)
bow's avatar
bow committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
bow's avatar
bow committed
86
    var inputtype: String = config("inputtype", "dna")
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
90
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
91
    if (reference == null) reference = config("reference")
Peter van 't Hof's avatar
Peter van 't Hof committed
92
93
94
95
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
96

Peter van 't Hof's avatar
Peter van 't Hof committed
97
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
98
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
99
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
100
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
101
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
102
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
103
104
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
108
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
109

110
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
111

112
    if (!chunking && numberChunks.isDefined) chunking = true
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
115
116
117
118
119
120
121
122
123
124
      if (numberChunks.isEmpty) {
        if (configContains("numberchunks")) numberChunks = config("numberchunks", default = None)
        else {
          val chunkSize: Int = config("chunksize", (1 << 30))
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
                         else input_R1.length
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
126
  }
bow's avatar
bow committed
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
131
132
133
134
135
136
137
138
139
    if (!skipFlexiprep) {
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.sampleName = this.RGSM
      flexiprep.libraryName = this.RGLB
      flexiprep.init
      flexiprep.runInitialJobs
    }
bow's avatar
bow committed
140
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
143
144

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
145
146
147
148
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
149
    var chunks: Map[String, (String, String)] = Map()
150
    if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
151
152
153
154
155
      val chunkDir = "chunks/" + t + "/"
      chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
        if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
    } else chunks += ("flexiprep/" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
                        flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
bow's avatar
bow committed
156

Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159
160
161
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
162
      for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
163
      add(fastSplitter_R1)
bow's avatar
bow committed
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
167
168
169
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
170
        for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
173
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
174
175

    for ((chunk, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
      var R1 = fastqfile._1
      var R2 = fastqfile._2
178
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
179
      val chunkDir = if (chunking) outputDir + chunk else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
180
      if (!skipFlexiprep) {
181
        val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
184
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
185
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
189
      
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
192
193
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
194
        bwaCommand.deps = deps
195
        bwaCommand.R = getReadGroup
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
198
199
200
201
        val sortSam = SortSam(this, bwaCommand.output, chunkDir)
        if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
        add(sortSam)
        bamFiles :+= sortSam.output
Peter van 't Hof's avatar
Peter van 't Hof committed
202
      } else if (aligner == "star") {
203
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
206
        add(starCommand)
        bamFiles :+= addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else if (aligner == "star-2pass") {
207
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
210
211
        addAll(star2pass._2)
        bamFiles :+= addAddOrReplaceReadGroups(List(star2pass._1), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
212
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
213
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
216

217
218
219
220
221
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
    } else if (skipMarkduplicates && chunking) bamFile = addMergeBam(bamFiles, new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
222
    
Peter van 't Hof's avatar
Peter van 't Hof committed
223
    if (!skipMetrics) addAll(BamMetrics.apply(this, bamFile, outputDir + "metrics/").functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
224
    
225
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
226
  }
bow's avatar
bow committed
227

228
229
230
231
232
233
234
235
236
237
238
239
240
//  def addSortSam(inputSam: List[File], outputFile: File, dir: String): File = {
//    val sortSam = new SortSam
//    sortSam.input = inputSam
//    sortSam.createIndex = true
//    sortSam.output = outputFile
//    sortSam.memoryLimit = 2
//    sortSam.nCoresRequest = 2
//    sortSam.jobResourceRequests :+= "h_vmem=4G"
//    if (!skipMarkduplicates) sortSam.isIntermediate = true
//    add(sortSam)
//
//    return sortSam.output
//  }
bow's avatar
bow committed
241
242

  def addMergeBam(inputSam: List[File], outputFile: File, dir: String): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
243
244
245
246
247
248
249
250
251
252
253
    val mergeSam = new MergeSamFiles
    mergeSam.input = inputSam
    mergeSam.createIndex = true
    mergeSam.output = outputFile
    mergeSam.memoryLimit = 2
    mergeSam.nCoresRequest = 2
    mergeSam.assumeSorted = true
    mergeSam.USE_THREADING = true
    mergeSam.jobResourceRequests :+= "h_vmem=4G"
    if (!skipMarkduplicates) mergeSam.isIntermediate = true
    add(mergeSam)
bow's avatar
bow committed
254

Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
    return mergeSam.output
  }
bow's avatar
bow committed
257
258

  def addAddOrReplaceReadGroups(inputSam: List[File], outputFile: File, dir: String): File = {
259
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
262
263
264
265
266
267
268
269
270
271
272
273
    addOrReplaceReadGroups.input = inputSam
    addOrReplaceReadGroups.output = outputFile
    addOrReplaceReadGroups.createIndex = true
    addOrReplaceReadGroups.memoryLimit = 2
    addOrReplaceReadGroups.nCoresRequest = 2
    addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
274
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
275
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
276

Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
279
280

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
281
282
283
284
285
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
286
287
288
289
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
290

Peter van 't Hof's avatar
Peter van 't Hof committed
291
292
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
293
294
295
296
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
bow's avatar
bow committed
297

Peter van 't Hof's avatar
Peter van 't Hof committed
298
  def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
299
    val mapping = new Mapping(root)
bow's avatar
bow committed
300

Peter van 't Hof's avatar
Peter van 't Hof committed
301
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
302
303
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
304
305
306
307
308
309
310
311
312
313
314
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
315

316
317
318
    mapping.init
    mapping.biopetScript
    return mapping
319
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
320
}