Mapping.scala 12 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star , Bowtie}
9
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups}
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
15

bow's avatar
bow committed
16
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
17
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  def this() = this(null)
bow's avatar
bow committed
19
20

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
21
  var input_R1: File = _
bow's avatar
bow committed
22
23

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
24
  var input_R2: File = _
bow's avatar
bow committed
25
26

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var outputName: String = _
bow's avatar
bow committed
28
29

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
31
32

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
  @Argument(doc = "Skip metrics", shortName = "skipmetrics", required = false)
  var skipMetrics: Boolean = false
  
bow's avatar
bow committed
38
  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var aligner: String = _
bow's avatar
bow committed
40
41

  @Argument(doc = "Reference", shortName = "R", required = false)
42
  var reference: File = _
bow's avatar
bow committed
43

44
  @Argument(doc = "Chunking", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  var chunking: Boolean = false
46
47
48
49
  
  @ClassType(classOf[Int])
  @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
  var numberChunks: Option[Int] = None
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  // Readgroup items
bow's avatar
bow committed
52
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
53
  var RGID: String = _
bow's avatar
bow committed
54
55

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
56
  var RGLB: String = _
bow's avatar
bow committed
57
58

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  var RGPL: String = _
bow's avatar
bow committed
60
61

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  var RGPU: String = _
bow's avatar
bow committed
63
64

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  var RGSM: String = _
bow's avatar
bow committed
66
67

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
68
  var RGCN: String = _
bow's avatar
bow committed
69
70

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
  var RGDS: String = _
bow's avatar
bow committed
72
73

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
74
  var RGDT: Date = _
bow's avatar
bow committed
75
76

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var RGPI: Int = _
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
  var paired: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
80
  var defaultAligner = "bwa"
81
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
  
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
    if (aligner == null) aligner = config("aligner", default = defaultAligner)
86
    if (reference == null) reference = config("reference")
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
90
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
91

92
    if (RGLB == null && config.contains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
94
    if (RGSM == null && config.contains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
96
    if (RGID == null && config.contains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
98
99
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
102
103
    if (RGCN == null && config.contains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && config.contains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
104

105
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
106

107
    if (!chunking && numberChunks.isDefined) chunking = true
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
110
      if (numberChunks.isEmpty) {
111
        if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
112
113
114
115
116
117
118
119
        else {
          val chunkSize: Int = config("chunksize", (1 << 30))
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
                         else input_R1.length
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
121
  }
bow's avatar
bow committed
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
126
127
128
129
130
131
132
133
134
    if (!skipFlexiprep) {
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.sampleName = this.RGSM
      flexiprep.libraryName = this.RGLB
      flexiprep.init
      flexiprep.runInitialJobs
    }
bow's avatar
bow committed
135
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
138
139

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
143
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
144
    var chunks: Map[String, (String, String)] = Map()
145
    if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
146
      val chunkDir = outputDir + "chunks/" + t + "/"
Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
      chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
        if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
149
    } else chunks += (outputDir -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
Peter van 't Hof's avatar
Peter van 't Hof committed
150
                        flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
bow's avatar
bow committed
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      for ((chunkDir, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
156
      add(fastSplitter_R1)
bow's avatar
bow committed
157

Peter van 't Hof's avatar
Peter van 't Hof committed
158
159
160
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
Peter van 't Hof's avatar
Peter van 't Hof committed
161
        for ((chunkDir, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
164
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
165

Peter van 't Hof's avatar
Peter van 't Hof committed
166
    for ((chunkDir, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
      var R1 = fastqfile._1
      var R2 = fastqfile._2
169
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
170
      if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
        val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunkDir)
        logger.debug(chunkDir + " - " + flexiout)
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
175
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
178
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
179
      
180
181
182
183
184
185
186
187
188
      val outputBam = new File(chunkDir + outputName + ".bam")
      bamFiles :+= outputBam
      aligner match {
        case "bwa" => addBwa(R1, R2, outputBam, deps)
        case "bowtie" => addBowtie(R1, R2, outputBam, deps)
        case "star" => addStar(R1, R2, outputBam, deps)
        case "star-2pass" => addStar2pass(R1, R2, outputBam, deps)
        case _ => throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
      }
189
190
      if (config("chunk_metrics", default = false))
        addAll(BamMetrics(this, outputBam, chunkDir + "metrics/").functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
191
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
196

197
198
199
200
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
201
202
203
204
205
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    
207
    if (!skipMetrics) addAll(BamMetrics(this, bamFile, outputDir + "metrics/").functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    
209
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
210
  }
bow's avatar
bow committed
211

212
213
  def addBwa(R1:File, R2:File, output:File, deps:List[File]): File = {
    val bwaCommand = new Bwa(this)
214
215
216
217
218
219
220
221
222
223
    bwaCommand.R1 = R1
    if (paired) bwaCommand.R2 = R2
    bwaCommand.deps = deps
    bwaCommand.R = getReadGroup
    bwaCommand.output = this.swapExt(output.getParent, output, ".bam", ".sam")
    add(bwaCommand, isIntermediate = true)
    val sortSam = SortSam(this, bwaCommand.output, output)
    if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
    add(sortSam)
    return sortSam.output
224
225
226
227
  }
  
  def addBowtie(R1:File, R2:File, output:File, deps:List[File]): File = {
    val bowtie = new Bowtie(this)
228
229
230
231
232
233
    bowtie.R1 = R1
    if (paired) bowtie.R2 = R2
    bowtie.deps = deps
    bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
    add(bowtie, isIntermediate = true)
    return addAddOrReplaceReadGroups(bowtie.output, output)
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
  }
  
  def addStar(R1:File, R2:File, output:File, deps:List[File]): File = {
    val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
    add(starCommand)
    return addAddOrReplaceReadGroups(starCommand.outputSam, output)
  }
  
  def addStar2pass(R1:File, R2:File, output:File, deps:List[File]): File = {
    val starCommand = Star._2pass(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
    addAll(starCommand._2)
    return addAddOrReplaceReadGroups(starCommand._1, output)
  }
  
  def addAddOrReplaceReadGroups(input: File, output: File): File = {
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
250
251
252
253
254
255
256
257
258
    addOrReplaceReadGroups.createIndex = true

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
259
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
260
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
261

Peter van 't Hof's avatar
Peter van 't Hof committed
262
263
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
264
265

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
268
269
270
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
271
272
273
274
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
275

Peter van 't Hof's avatar
Peter van 't Hof committed
276
277
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
278
279
280
}

object Mapping extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
281
  def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
282
    val mapping = new Mapping(root)
bow's avatar
bow committed
283

Peter van 't Hof's avatar
Peter van 't Hof committed
284
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
285
286
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
287
    else inputType = root.config("inputtype", "dna").getString
288
    if (inputType == "rna") mapping.defaultAligner = "star-2pass"
289
290
291
292
293
294
295
296
297
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
298

299
300
301
    mapping.init
    mapping.biopetScript
    return mapping
302
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
303
}