Mapping.scala 11.5 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
bow's avatar
bow committed
6
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
8
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star }
9
import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups}
10
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
15

bow's avatar
bow committed
16
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
17
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  def this() = this(null)
bow's avatar
bow committed
19
20

  @Input(doc = "R1 fastq file", shortName = "R1", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
21
  var input_R1: File = _
bow's avatar
bow committed
22
23

  @Input(doc = "R2 fastq file", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
24
  var input_R2: File = _
bow's avatar
bow committed
25
26

  @Argument(doc = "Output name", shortName = "outputName", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var outputName: String = _
bow's avatar
bow committed
28
29

  @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var skipFlexiprep: Boolean = false
bow's avatar
bow committed
31
32

  @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var skipMarkduplicates: Boolean = false
bow's avatar
bow committed
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
  @Argument(doc = "Skip metrics", shortName = "skipmetrics", required = false)
  var skipMetrics: Boolean = false
  
bow's avatar
bow committed
38
  @Argument(doc = "Alginer", shortName = "ALN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var aligner: String = _
bow's avatar
bow committed
40
41

  @Argument(doc = "Reference", shortName = "R", required = false)
42
  var reference: File = _
bow's avatar
bow committed
43

44
  @Argument(doc = "Chunking", shortName = "chunking", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  var chunking: Boolean = false
46
47
48
49
  
  @ClassType(classOf[Int])
  @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
  var numberChunks: Option[Int] = None
bow's avatar
bow committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  // Readgroup items
bow's avatar
bow committed
52
  @Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
53
  var RGID: String = _
bow's avatar
bow committed
54
55

  @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
56
  var RGLB: String = _
bow's avatar
bow committed
57
58

  @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  var RGPL: String = _
bow's avatar
bow committed
60
61

  @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  var RGPU: String = _
bow's avatar
bow committed
63
64

  @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  var RGSM: String = _
bow's avatar
bow committed
66
67

  @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
68
  var RGCN: String = _
bow's avatar
bow committed
69
70

  @Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
  var RGDS: String = _
bow's avatar
bow committed
72
73

  @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
74
  var RGDT: Date = _
bow's avatar
bow committed
75
76

  @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var RGPI: Int = _
bow's avatar
bow committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
  var paired: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
80
  var defaultAligner = "bwa"
81
  val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
  
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
    if (aligner == null) aligner = config("aligner", default = defaultAligner)
86
    if (reference == null) reference = config("reference")
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
90
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
bow's avatar
bow committed
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
bow's avatar
bow committed
98
99
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")

Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
102
103
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
bow's avatar
bow committed
104

105
    if (outputName == null) outputName = RGID
bow's avatar
bow committed
106

107
    if (!chunking && numberChunks.isDefined) chunking = true
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
110
111
112
113
114
115
116
117
118
119
      if (numberChunks.isEmpty) {
        if (configContains("numberchunks")) numberChunks = config("numberchunks", default = None)
        else {
          val chunkSize: Int = config("chunksize", (1 << 30))
          val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
                         else input_R1.length
          numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
        }
      }
      logger.debug("Chunks: " + numberChunks.getOrElse(1))
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
121
  }
bow's avatar
bow committed
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
126
127
128
129
130
131
132
133
134
    if (!skipFlexiprep) {
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.sampleName = this.RGSM
      flexiprep.libraryName = this.RGLB
      flexiprep.init
      flexiprep.runInitialJobs
    }
bow's avatar
bow committed
135
    var bamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
bow's avatar
bow committed
138
139

    def removeGz(file: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
143
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
bow's avatar
bow committed
144
    var chunks: Map[String, (String, String)] = Map()
145
    if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
149
150
      val chunkDir = "chunks/" + t + "/"
      chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
        if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
    } else chunks += ("flexiprep/" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
                        flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
bow's avatar
bow committed
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
155
156
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
157
      for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
Peter van 't Hof's avatar
Peter van 't Hof committed
158
      add(fastSplitter_R1)
bow's avatar
bow committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
164
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
bow's avatar
bow committed
165
        for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167
168
        add(fastSplitter_R2)
      }
    }
bow's avatar
bow committed
169
170

    for ((chunk, fastqfile) <- chunks) {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
      var R1 = fastqfile._1
      var R2 = fastqfile._2
173
      var deps: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
174
      val chunkDir = if (chunking) outputDir + chunk else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
175
      if (!skipFlexiprep) {
176
        val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
177
178
179
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
180
        deps = flexiout._3
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
184
      
Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
187
188
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
189
        bwaCommand.deps = deps
190
        bwaCommand.R = getReadGroup
Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
193
194
195
196
        val sortSam = SortSam(this, bwaCommand.output, chunkDir)
        if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
        add(sortSam)
        bamFiles :+= sortSam.output
Peter van 't Hof's avatar
Peter van 't Hof committed
197
      } else if (aligner == "star") {
198
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
199
        add(starCommand)
200
        bamFiles :+= addAddOrReplaceReadGroups(starCommand.outputSam, chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      } else if (aligner == "star-2pass") {
202
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true, deps = deps)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
        addAll(star2pass._2)
204
        bamFiles :+= addAddOrReplaceReadGroups(star2pass._1, chunkDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
207
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
208
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
bow's avatar
bow committed
211

212
213
214
215
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
216
217
218
219
220
    } else if (skipMarkduplicates && chunking) {
      val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
      add(mergeSamFile)
      bamFile = mergeSamFile.output
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
221
    
Peter van 't Hof's avatar
Peter van 't Hof committed
222
    if (!skipMetrics) addAll(BamMetrics.apply(this, bamFile, outputDir + "metrics/").functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
223
    
224
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
225
  }
bow's avatar
bow committed
226

227
228
  def addAddOrReplaceReadGroups(inputSam: File, outputDir: String): File = {
    val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, inputSam, outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
231
232
233
234
235
236
237
    addOrReplaceReadGroups.createIndex = true

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
238
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
239
    add(addOrReplaceReadGroups)
bow's avatar
bow committed
240

Peter van 't Hof's avatar
Peter van 't Hof committed
241
242
    return addOrReplaceReadGroups.output
  }
bow's avatar
bow committed
243
244

  def getReadGroup(): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
245
246
247
248
249
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
250
251
252
253
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
bow's avatar
bow committed
254

Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
257
258
259
260
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
bow's avatar
bow committed
261

Peter van 't Hof's avatar
Peter van 't Hof committed
262
  def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = {
263
    val mapping = new Mapping(root)
bow's avatar
bow committed
264

Peter van 't Hof's avatar
Peter van 't Hof committed
265
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
268
269
270
271
272
273
274
275
276
277
278
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
bow's avatar
bow committed
279

280
281
282
    mapping.init
    mapping.biopetScript
    return mapping
283
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
284
}