Mapping.scala 11.6 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star }
9
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups}
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
15

bow's avatar
bow committed
16
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
17
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  def this() = this(null)
bow's avatar
bow committed
19
20

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
21
  var input_R1: File = _
bow's avatar
bow committed
22
23

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
24
  var input_R2: File = _
bow's avatar
bow committed
25
26

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var outputName: String = _
bow's avatar
bow committed
28
29

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
31
32

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
  @Argument(doc = "Skip metrics", shortName = "skipmetrics", required = false)
  var skipMetrics: Boolean = false
  
bow's avatar
bow committed
38
  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var aligner: String = _
bow's avatar
bow committed
40
41

  @Argument(doc = "Reference", shortName = "R", required = false)
42
  var reference: File = _
bow's avatar
bow committed
43

44
  @Argument(doc = "Chunking", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  var chunking: Boolean = false
46
47
48
49
  
  @ClassType(classOf[Int])
  @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
  var numberChunks: Option[Int] = None
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  // Readgroup items
bow's avatar
bow committed
52
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
53
  var RGID: String = _
bow's avatar
bow committed
54
55

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
56
  var RGLB: String = _
bow's avatar
bow committed
57
58

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  var RGPL: String = _
bow's avatar
bow committed
60
61

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  var RGPU: String = _
bow's avatar
bow committed
63
64

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  var RGSM: String = _
bow's avatar
bow committed
66
67

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
68
  var RGCN: String = _
bow's avatar
bow committed
69
70

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
  var RGDS: String = _
bow's avatar
bow committed
72
73

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
74
  var RGDT: Date = _
bow's avatar
bow committed
75
76

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var RGPI: Int = _
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
  var paired: Boolean = false
80
81
  
  val flexiprep = new Flexiprep(this)
bow's avatar
bow committed
82

Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
bow's avatar
bow committed
85
    var inputtype: String = config("inputtype", "dna")
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
89
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
90
    if (reference == null) reference = config("reference")
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
93
94
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
95

Peter van 't Hof's avatar
Peter van 't Hof committed
96
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
98
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
99
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
100
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
101
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
102
103
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
108

109
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
110

111
    if (!chunking && numberChunks.isDefined) chunking = true
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
114
115
116
117
118
119
120
121
122
123
      if (numberChunks.isEmpty) {
        if (configContains("numberchunks")) numberChunks = config("numberchunks", default = None)
        else {
          val chunkSize: Int = config("chunksize", (1 << 30))
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
                         else input_R1.length
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
124
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
125
  }
bow's avatar
bow committed
126

Peter van 't Hof's avatar
Peter van 't Hof committed
127
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
130
131
132
133
134
135
136
137
138
    if (!skipFlexiprep) {
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.sampleName = this.RGSM
      flexiprep.libraryName = this.RGLB
      flexiprep.init
      flexiprep.runInitialJobs
    }
bow's avatar
bow committed
139
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
142
143

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
146
147
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
148
    var chunks: Map[String, (String, String)] = Map()
149
    if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
152
153
154
      val chunkDir = "chunks/" + t + "/"
      chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
        if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
    } else chunks += ("flexiprep/" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
                        flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
bow's avatar
bow committed
155

Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
158
159
160
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
161
      for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
162
      add(fastSplitter_R1)
bow's avatar
bow committed
163

Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
166
167
168
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
169
        for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
172
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
173
174

    for ((chunk, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
      var R1 = fastqfile._1
      var R2 = fastqfile._2
177
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
178
      val chunkDir = if (chunking) outputDir + chunk else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
179
      if (!skipFlexiprep) {
180
        val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
184
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
187
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
188
      
Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191
192
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
193
        bwaCommand.deps = deps
194
        bwaCommand.R = getReadGroup
Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
197
198
199
200
        val sortSam = SortSam(this, bwaCommand.output, chunkDir)
        if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
        add(sortSam)
        bamFiles :+= sortSam.output
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      } else if (aligner == "star") {
202
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
        add(starCommand)
204
        bamFiles :+= addAddOrReplaceReadGroups(starCommand.outputSam, chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
205
      } else if (aligner == "star-2pass") {
206
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
207
        addAll(star2pass._2)
208
        bamFiles :+= addAddOrReplaceReadGroups(star2pass._1, chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
211
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
212
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
215

216
217
218
219
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
220
221
222
223
224
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
225
    
Peter van 't Hof's avatar
Peter van 't Hof committed
226
    if (!skipMetrics) addAll(BamMetrics.apply(this, bamFile, outputDir + "metrics/").functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
227
    
228
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
229
  }
bow's avatar
bow committed
230

231
232
  def addAddOrReplaceReadGroups(inputSam: File, outputDir: String): File = {
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, inputSam, outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
233
234
235
236
237
238
239
240
241
    addOrReplaceReadGroups.createIndex = true

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
242
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
243
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
244

Peter van 't Hof's avatar
Peter van 't Hof committed
245
246
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
247
248

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251
252
253
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
254
255
256
257
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
258

Peter van 't Hof's avatar
Peter van 't Hof committed
259
260
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
261
262
263
264
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
bow's avatar
bow committed
265

Peter van 't Hof's avatar
Peter van 't Hof committed
266
  def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
267
    val mapping = new Mapping(root)
bow's avatar
bow committed
268

Peter van 't Hof's avatar
Peter van 't Hof committed
269
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
270
271
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
272
273
274
275
276
277
278
279
280
281
282
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
283

284
285
286
    mapping.init
    mapping.biopetScript
    return mapping
287
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
288
}