Mapping.scala 12.6 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
9
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star }
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
bow's avatar
bow committed
14
import org.broadinstitute.gatk.queue.extensions.picard.{ MergeSamFiles, SortSam, AddOrReplaceReadGroups }
Peter van 't Hof's avatar
Peter van 't Hof committed
15
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
16

bow's avatar
bow committed
17
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
19
  def this() = this(null)
bow's avatar
bow committed
20
21

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
22
  var input_R1: File = _
bow's avatar
bow committed
23
24

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
25
  var input_R2: File = _
bow's avatar
bow committed
26
27

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  var outputName: String = _
bow's avatar
bow committed
29
30

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
31
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
32
33

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
35
36

  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var aligner: String = _
bow's avatar
bow committed
38
39

  @Argument(doc = "Reference", shortName = "R", required = false)
40
  var reference: File = _
bow's avatar
bow committed
41

42
  @Argument(doc = "Chunking", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var chunking: Boolean = false
44
45
46
47
  
  @ClassType(classOf[Int])
  @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
  var numberChunks: Option[Int] = None
bow's avatar
bow committed
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  // Readgroup items
bow's avatar
bow committed
50
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
51
  var RGID: String = _
bow's avatar
bow committed
52
53

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var RGLB: String = _
bow's avatar
bow committed
55
56

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
  var RGPL: String = _
bow's avatar
bow committed
58
59

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  var RGPU: String = _
bow's avatar
bow committed
61
62

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
63
  var RGSM: String = _
bow's avatar
bow committed
64
65

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  var RGCN: String = _
bow's avatar
bow committed
67
68

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  var RGDS: String = _
bow's avatar
bow committed
70
71

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
72
  var RGDT: Date = _
bow's avatar
bow committed
73
74

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
  var RGPI: Int = _
bow's avatar
bow committed
76

Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var paired: Boolean = false
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
bow's avatar
bow committed
81
    var inputtype: String = config("inputtype", "dna")
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
84
85
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
86
    if (reference == null) reference = config("reference")
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
90
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
98
99
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
102
103
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
104

105
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
106

107
    if (!chunking && numberChunks.isDefined) chunking = true
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
110
111
112
113
114
115
116
117
118
119
      if (numberChunks.isEmpty) {
        if (configContains("numberchunks")) numberChunks = config("numberchunks", default = None)
        else {
          val chunkSize: Int = config("chunksize", (1 << 30))
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
                         else input_R1.length
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
121
  }
bow's avatar
bow committed
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
126
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
    val flexiprep = new Flexiprep(this)
127
128
129
130
131
132
133
134
135
    if (!skipFlexiprep) {
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.sampleName = this.RGSM
      flexiprep.libraryName = this.RGLB
      flexiprep.init
      flexiprep.runInitialJobs
    }
bow's avatar
bow committed
136
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
139
140

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
143
144
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
145
    var chunks: Map[String, (String, String)] = Map()
146
    if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
149
150
151
      val chunkDir = "chunks/" + t + "/"
      chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
        if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
    } else chunks += ("flexiprep/" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
                        flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
bow's avatar
bow committed
152

Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
155
156
157
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
158
      for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
159
      add(fastSplitter_R1)
bow's avatar
bow committed
160

Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163
164
165
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
166
        for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
169
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
170
171

    for ((chunk, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
      var R1 = fastqfile._1
      var R2 = fastqfile._2
174
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
175
      val chunkDir = if (chunking) outputDir + chunk else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
176
      if (!skipFlexiprep) {
177
        val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
180
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
181
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
184
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
185
      
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
189
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
190
        bwaCommand.deps = deps
Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
193
        bwaCommand.RG = getReadGroup
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
bow's avatar
bow committed
194
        bamFiles :+= addSortSam(List(bwaCommand.output), swapExt(chunkDir, bwaCommand.output, ".sam", ".bam"), chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
195
      } else if (aligner == "star") {
196
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
199
        add(starCommand)
        bamFiles :+= addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else if (aligner == "star-2pass") {
200
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
204
        addAll(star2pass._2)
        bamFiles :+= addAddOrReplaceReadGroups(List(star2pass._1), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
205
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
209

210
211
212
213
214
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
    } else if (skipMarkduplicates && chunking) bamFile = addMergeBam(bamFiles, new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
217
    
    addAll(BamMetrics.apply(this, bamFile, outputDir + "metrics/").functions)
    
218
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
219
  }
bow's avatar
bow committed
220
221

  def addSortSam(inputSam: List[File], outputFile: File, dir: String): File = {
222
223
224
225
226
227
228
    val sortSam = new SortSam
    sortSam.input = inputSam
    sortSam.createIndex = true
    sortSam.output = outputFile
    sortSam.memoryLimit = 2
    sortSam.nCoresRequest = 2
    sortSam.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
229
    if (!skipMarkduplicates) sortSam.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
230
    add(sortSam)
bow's avatar
bow committed
231

Peter van 't Hof's avatar
Peter van 't Hof committed
232
233
    return sortSam.output
  }
bow's avatar
bow committed
234
235

  def addMergeBam(inputSam: List[File], outputFile: File, dir: String): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
239
240
241
242
243
244
245
246
    val mergeSam = new MergeSamFiles
    mergeSam.input = inputSam
    mergeSam.createIndex = true
    mergeSam.output = outputFile
    mergeSam.memoryLimit = 2
    mergeSam.nCoresRequest = 2
    mergeSam.assumeSorted = true
    mergeSam.USE_THREADING = true
    mergeSam.jobResourceRequests :+= "h_vmem=4G"
    if (!skipMarkduplicates) mergeSam.isIntermediate = true
    add(mergeSam)
bow's avatar
bow committed
247

Peter van 't Hof's avatar
Peter van 't Hof committed
248
249
    return mergeSam.output
  }
bow's avatar
bow committed
250
251

  def addAddOrReplaceReadGroups(inputSam: List[File], outputFile: File, dir: String): File = {
252
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
    addOrReplaceReadGroups.input = inputSam
    addOrReplaceReadGroups.output = outputFile
    addOrReplaceReadGroups.createIndex = true
    addOrReplaceReadGroups.memoryLimit = 2
    addOrReplaceReadGroups.nCoresRequest = 2
    addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
267
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
268
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
269

Peter van 't Hof's avatar
Peter van 't Hof committed
270
271
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
272
273

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
276
277
278
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
279
280
281
282
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
283

Peter van 't Hof's avatar
Peter van 't Hof committed
284
285
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
286
287
288
289
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
bow's avatar
bow committed
290

Peter van 't Hof's avatar
Peter van 't Hof committed
291
  def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
292
    val mapping = new Mapping(root)
bow's avatar
bow committed
293

Peter van 't Hof's avatar
Peter van 't Hof committed
294
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
295
296
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
297
298
299
300
301
302
303
304
305
306
307
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
308

309
310
311
    mapping.init
    mapping.biopetScript
    return mapping
312
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
313
}