Mapping.scala 12.7 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star }
9
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, SortSam}
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
14
import org.broadinstitute.gatk.queue.extensions.picard.{ MergeSamFiles, AddOrReplaceReadGroups }
Peter van 't Hof's avatar
Peter van 't Hof committed
15
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
16

bow's avatar
bow committed
17
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
19
  def this() = this(null)
bow's avatar
bow committed
20
21

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
22
  var input_R1: File = _
bow's avatar
bow committed
23
24

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
25
  var input_R2: File = _
bow's avatar
bow committed
26
27

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  var outputName: String = _
bow's avatar
bow committed
29
30

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
31
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
32
33

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
34
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
35
36

  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var aligner: String = _
bow's avatar
bow committed
38
39

  @Argument(doc = "Reference", shortName = "R", required = false)
40
  var reference: File = _
bow's avatar
bow committed
41

42
  @Argument(doc = "Chunking", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var chunking: Boolean = false
44
45
46
47
  
  @ClassType(classOf[Int])
  @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
  var numberChunks: Option[Int] = None
bow's avatar
bow committed
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  // Readgroup items
bow's avatar
bow committed
50
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
51
  var RGID: String = _
bow's avatar
bow committed
52
53

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  var RGLB: String = _
bow's avatar
bow committed
55
56

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
  var RGPL: String = _
bow's avatar
bow committed
58
59

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  var RGPU: String = _
bow's avatar
bow committed
61
62

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
63
  var RGSM: String = _
bow's avatar
bow committed
64
65

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  var RGCN: String = _
bow's avatar
bow committed
67
68

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  var RGDS: String = _
bow's avatar
bow committed
70
71

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
72
  var RGDT: Date = _
bow's avatar
bow committed
73
74

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
  var RGPI: Int = _
bow's avatar
bow committed
76

Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var paired: Boolean = false
78
79
  
  val flexiprep = new Flexiprep(this)
bow's avatar
bow committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
bow's avatar
bow committed
83
    var inputtype: String = config("inputtype", "dna")
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86
87
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
88
    if (reference == null) reference = config("reference")
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
92
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
93

Peter van 't Hof's avatar
Peter van 't Hof committed
94
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
98
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
99
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
100
101
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
104
105
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
106

107
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
108

109
    if (!chunking && numberChunks.isDefined) chunking = true
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
112
113
114
115
116
117
118
119
120
121
      if (numberChunks.isEmpty) {
        if (configContains("numberchunks")) numberChunks = config("numberchunks", default = None)
        else {
          val chunkSize: Int = config("chunksize", (1 << 30))
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
                         else input_R1.length
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
123
  }
bow's avatar
bow committed
124

Peter van 't Hof's avatar
Peter van 't Hof committed
125
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
128
129
130
131
132
133
134
135
136
    if (!skipFlexiprep) {
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.sampleName = this.RGSM
      flexiprep.libraryName = this.RGLB
      flexiprep.init
      flexiprep.runInitialJobs
    }
bow's avatar
bow committed
137
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
138
139
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
140
141

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
145
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
146
    var chunks: Map[String, (String, String)] = Map()
147
    if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
148
149
150
151
152
      val chunkDir = "chunks/" + t + "/"
      chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
        if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
    } else chunks += ("flexiprep/" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
                        flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
bow's avatar
bow committed
153

Peter van 't Hof's avatar
Peter van 't Hof committed
154
155
156
157
158
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
159
      for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
160
      add(fastSplitter_R1)
bow's avatar
bow committed
161

Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
164
165
166
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
167
        for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
168
169
170
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
171
172

    for ((chunk, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
      var R1 = fastqfile._1
      var R2 = fastqfile._2
175
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
176
      val chunkDir = if (chunking) outputDir + chunk else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
177
      if (!skipFlexiprep) {
178
        val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
182
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
183
184
185
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
186
      
Peter van 't Hof's avatar
Peter van 't Hof committed
187
188
189
190
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
191
        bwaCommand.deps = deps
192
        bwaCommand.R = getReadGroup
Peter van 't Hof's avatar
Peter van 't Hof committed
193
194
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
195
196
197
198
        val sortSam = SortSam(this, bwaCommand.output, chunkDir)
        if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
        add(sortSam)
        bamFiles :+= sortSam.output
Peter van 't Hof's avatar
Peter van 't Hof committed
199
      } else if (aligner == "star") {
200
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
        add(starCommand)
        bamFiles :+= addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else if (aligner == "star-2pass") {
204
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
208
        addAll(star2pass._2)
        bamFiles :+= addAddOrReplaceReadGroups(List(star2pass._1), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
209
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
210
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
213

214
215
216
217
218
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
    } else if (skipMarkduplicates && chunking) bamFile = addMergeBam(bamFiles, new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
    
    addAll(BamMetrics.apply(this, bamFile, outputDir + "metrics/").functions)
    
222
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
223
  }
bow's avatar
bow committed
224

225
226
227
228
229
230
231
232
233
234
235
236
237
//  def addSortSam(inputSam: List[File], outputFile: File, dir: String): File = {
//    val sortSam = new SortSam
//    sortSam.input = inputSam
//    sortSam.createIndex = true
//    sortSam.output = outputFile
//    sortSam.memoryLimit = 2
//    sortSam.nCoresRequest = 2
//    sortSam.jobResourceRequests :+= "h_vmem=4G"
//    if (!skipMarkduplicates) sortSam.isIntermediate = true
//    add(sortSam)
//
//    return sortSam.output
//  }
bow's avatar
bow committed
238
239

  def addMergeBam(inputSam: List[File], outputFile: File, dir: String): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
240
241
242
243
244
245
246
247
248
249
250
    val mergeSam = new MergeSamFiles
    mergeSam.input = inputSam
    mergeSam.createIndex = true
    mergeSam.output = outputFile
    mergeSam.memoryLimit = 2
    mergeSam.nCoresRequest = 2
    mergeSam.assumeSorted = true
    mergeSam.USE_THREADING = true
    mergeSam.jobResourceRequests :+= "h_vmem=4G"
    if (!skipMarkduplicates) mergeSam.isIntermediate = true
    add(mergeSam)
bow's avatar
bow committed
251

Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
    return mergeSam.output
  }
bow's avatar
bow committed
254
255

  def addAddOrReplaceReadGroups(inputSam: List[File], outputFile: File, dir: String): File = {
256
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259
260
261
262
263
264
265
266
267
268
269
270
    addOrReplaceReadGroups.input = inputSam
    addOrReplaceReadGroups.output = outputFile
    addOrReplaceReadGroups.createIndex = true
    addOrReplaceReadGroups.memoryLimit = 2
    addOrReplaceReadGroups.nCoresRequest = 2
    addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
271
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
272
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
273

Peter van 't Hof's avatar
Peter van 't Hof committed
274
275
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
276
277

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
278
279
280
281
282
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
283
284
285
286
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
287

Peter van 't Hof's avatar
Peter van 't Hof committed
288
289
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
290
291
292
293
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
bow's avatar
bow committed
294

Peter van 't Hof's avatar
Peter van 't Hof committed
295
  def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
296
    val mapping = new Mapping(root)
bow's avatar
bow committed
297

Peter van 't Hof's avatar
Peter van 't Hof committed
298
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
299
300
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
301
302
303
304
305
306
307
308
309
310
311
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
312

313
314
315
    mapping.init
    mapping.biopetScript
    return mapping
316
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
317
}