Mapping.scala 12.5 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star , Bowtie}
9
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups}
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
15

bow's avatar
bow committed
16
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
17
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  def this() = this(null)
bow's avatar
bow committed
19
20

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
21
  var input_R1: File = _
bow's avatar
bow committed
22
23

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
24
  var input_R2: File = _
bow's avatar
bow committed
25
26

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var outputName: String = _
bow's avatar
bow committed
28
29

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
31
32

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
  @Argument(doc = "Skip metrics", shortName = "skipmetrics", required = false)
  var skipMetrics: Boolean = false
  
bow's avatar
bow committed
38
  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var aligner: String = _
bow's avatar
bow committed
40
41

  @Argument(doc = "Reference", shortName = "R", required = false)
42
  var reference: File = _
bow's avatar
bow committed
43

44
  @Argument(doc = "Chunking", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  var chunking: Boolean = false
46
47
48
49
  
  @ClassType(classOf[Int])
  @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
  var numberChunks: Option[Int] = None
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  // Readgroup items
bow's avatar
bow committed
52
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
53
  var RGID: String = _
bow's avatar
bow committed
54
55

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
56
  var RGLB: String = _
bow's avatar
bow committed
57
58

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  var RGPL: String = _
bow's avatar
bow committed
60
61

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  var RGPU: String = _
bow's avatar
bow committed
63
64

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  var RGSM: String = _
bow's avatar
bow committed
66
67

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
68
  var RGCN: String = _
bow's avatar
bow committed
69
70

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
  var RGDS: String = _
bow's avatar
bow committed
72
73

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
74
  var RGDT: Date = _
bow's avatar
bow committed
75
76

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var RGPI: Int = _
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
  var paired: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
80
  var defaultAligner = "bwa"
81
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
  
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
    if (aligner == null) aligner = config("aligner", default = defaultAligner)
86
    if (reference == null) reference = config("reference")
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
90
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
98
99
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
102
103
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
104

105
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
106

107
    if (!chunking && numberChunks.isDefined) chunking = true
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
110
111
112
113
114
115
116
117
118
119
      if (numberChunks.isEmpty) {
        if (configContains("numberchunks")) numberChunks = config("numberchunks", default = None)
        else {
          val chunkSize: Int = config("chunksize", (1 << 30))
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
                         else input_R1.length
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
121
  }
bow's avatar
bow committed
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
126
127
128
129
130
131
132
133
134
    if (!skipFlexiprep) {
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.sampleName = this.RGSM
      flexiprep.libraryName = this.RGLB
      flexiprep.init
      flexiprep.runInitialJobs
    }
bow's avatar
bow committed
135
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
138
139

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
143
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
144
    var chunks: Map[String, (String, String)] = Map()
145
    if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
149
150
      val chunkDir = "chunks/" + t + "/"
      chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
        if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
    } else chunks += ("flexiprep/" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
                        flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
bow's avatar
bow committed
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
155
156
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
157
      for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
158
      add(fastSplitter_R1)
bow's avatar
bow committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
164
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
165
        for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167
168
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
169
170

    for ((chunk, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
      var R1 = fastqfile._1
      var R2 = fastqfile._2
173
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
174
      val chunkDir = if (chunking) outputDir + chunk else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
175
      if (!skipFlexiprep) {
176
        val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
177
178
179
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
180
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
184
      
185
186
187
188
189
190
191
192
193
      val outputBam = new File(chunkDir + outputName + ".bam")
      bamFiles :+= outputBam
      aligner match {
        case "bwa" => addBwa(R1, R2, outputBam, deps)
        case "bowtie" => addBowtie(R1, R2, outputBam, deps)
        case "star" => addStar(R1, R2, outputBam, deps)
        case "star-2pass" => addStar2pass(R1, R2, outputBam, deps)
        case _ => throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
194
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
195
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
196
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
199

200
201
202
203
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
204
205
206
207
208
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
209
    
Peter van 't Hof's avatar
Peter van 't Hof committed
210
    if (!skipMetrics) addAll(BamMetrics.apply(this, bamFile, outputDir + "metrics/").functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
211
    
212
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
  }
bow's avatar
bow committed
214

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  def addBwa(R1:File, R2:File, output:File, deps:List[File]): File = {
    val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
        bwaCommand.deps = deps
        bwaCommand.R = getReadGroup
        bwaCommand.output = this.swapExt(output.getParent, output, ".bam", ".sam")
        add(bwaCommand, isIntermediate = true)
        val sortSam = SortSam(this, bwaCommand.output, output)
        if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
        add(sortSam)
        return sortSam.output
  }
  
  def addBowtie(R1:File, R2:File, output:File, deps:List[File]): File = {
    val bowtie = new Bowtie(this)
        bowtie.R1 = R1
        if (paired) bowtie.R2 = R2
        bowtie.deps = deps
        bowtie.sam_RG = getReadGroup
        bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
        add(bowtie, isIntermediate = true)
        val sortSam = SortSam(this, bowtie.output, output)
        if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
        add(sortSam)
        return sortSam.output
  }
  
  def addStar(R1:File, R2:File, output:File, deps:List[File]): File = {
    val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
    add(starCommand)
    return addAddOrReplaceReadGroups(starCommand.outputSam, output)
  }
  
  def addStar2pass(R1:File, R2:File, output:File, deps:List[File]): File = {
    val starCommand = Star._2pass(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
    addAll(starCommand._2)
    return addAddOrReplaceReadGroups(starCommand._1, output)
  }
  
  def addAddOrReplaceReadGroups(input: File, output: File): File = {
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
259
260
261
262
263
264
265
    addOrReplaceReadGroups.createIndex = true

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
266
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
267
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
268

Peter van 't Hof's avatar
Peter van 't Hof committed
269
270
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
271
272

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
273
274
275
276
277
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
278
279
280
281
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
282

Peter van 't Hof's avatar
Peter van 't Hof committed
283
284
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
285
286
287
288
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
bow's avatar
bow committed
289

Peter van 't Hof's avatar
Peter van 't Hof committed
290
  def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
291
    val mapping = new Mapping(root)
bow's avatar
bow committed
292

Peter van 't Hof's avatar
Peter van 't Hof committed
293
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
294
295
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
296
297
298
299
300
301
302
303
304
305
306
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
307

308
309
310
    mapping.init
    mapping.biopetScript
    return mapping
311
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
312
}