Mapping.scala 12.3 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

3
4
import nl.lumc.sasc.biopet.function._
import nl.lumc.sasc.biopet.function.aligners._
Peter van 't Hof's avatar
Peter van 't Hof committed
5
6
import java.util.Date
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.core.config._
Peter van 't Hof's avatar
Peter van 't Hof committed
9
10
11
12
import nl.lumc.sasc.biopet.pipelines.flexiprep._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard.MarkDuplicates
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.sting.queue.extensions.picard.MergeSamFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
14
15
16
17
18
import org.broadinstitute.sting.queue.extensions.picard.SortSam
import org.broadinstitute.sting.queue.extensions.picard.AddOrReplaceReadGroups
import org.broadinstitute.sting.queue.function._
import scala.util.parsing.json._
import org.broadinstitute.sting.utils.variant._
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
20

Peter van 't Hof's avatar
Peter van 't Hof committed
21
class Mapping(val root:Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
22
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
  def this() = this(null)
  
  @Input(doc="R1 fastq file", shortName="R1",required=true)
  var input_R1: File = _
  
  @Input(doc="R2 fastq file", shortName="R2", required=false)
  var input_R2: File = _
  
  @Argument(doc="Output name", shortName="outputName", required=false)
  var outputName: String = _
  
  @Argument(doc="Skip flexiprep", shortName="skipflexiprep", required=false)
  var skipFlexiprep: Boolean = false
  
  @Argument(doc="Skip mark duplicates", shortName="skipmarkduplicates", required=false)
  var skipMarkduplicates: Boolean = false
  
  @Argument(doc="Alginer", shortName="ALN", required=false)
  var aligner: String = _
  
  @Argument(doc="Reference", shortName="R", required=false)
  var referenceFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
48
  @Argument(doc="auto chuning", shortName="chunking", required=false)
  var chunking: Boolean = false
  
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  @Argument(doc="Readgroup ID", shortName="RGID", required=false)
  var RGID: String = _
  
  @Argument(doc="Readgroup Library", shortName="RGLB", required=false)
  var RGLB: String = _
  
  @Argument(doc="Readgroup Platform", shortName="RGPL", required=false)
  var RGPL: String = _
  
  @Argument(doc="Readgroup platform unit", shortName="RGPU", required=false)
  var RGPU: String = _
  
  @Argument(doc="Readgroup sample", shortName="RGSM", required=false)
  var RGSM: String = _
  
  @Argument(doc="Readgroup sequencing center", shortName="RGCN", required=false)
  var RGCN: String = _
  
  @Argument(doc="Readgroup description", shortName="RGDS", required=false)
  var RGDS: String = _
  
  @Argument(doc="Readgroup sequencing date", shortName="RGDT", required=false)
  var RGDT: Date = _
  
  @Argument(doc="Readgroup predicted insert size", shortName="RGPI", required=false)
  var RGPI: Int = _
  
Peter van 't Hof's avatar
Peter van 't Hof committed
77
  var paired: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
78
  var numberChunks = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
81
  
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
84
85
86
87
    var inputtype:String = config("inputtype", "dna")
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
    if (referenceFile == null) referenceFile = config("referenceFile")
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90
91
92
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
100
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")    
    
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
103
104
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
105
106
    
    if (outputName == null) outputName = RGID
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
109
110
111
112
113
114
115
116
117
    
    if (!chunking && numberChunks > 1) chunking = true
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
      val chunkSize:Int = config("chunksize", (1 << 30))
      val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3 
                     else input_R1.length
      if (numberChunks == 0 && configContains("numberchunks")) numberChunks = config("numberchunks")
      else if (numberChunks == 0) numberChunks = ceil(filesize.toDouble / chunkSize).toInt
      logger.debug("Chunks: " + numberChunks)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
118
119
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
120
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
    val flexiprep = new Flexiprep(this)
    var bamFiles:List[File] = Nil
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
    
    
    def removeGz(file:String):String = {
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
    var chunks:Map[String, (String,String)] = Map()
    if (chunking) for (t <- 1 to numberChunks) {
      chunks += ("chunk_" + t -> (removeGz(outputDir + "chunk_" + t + "/" + fastq_R1.getName),
                                  if (paired) removeGz(outputDir + "chunk_" + t + "/" + fastq_R2.getName) else ""))
    } else chunks += ("" -> (fastq_R1,fastq_R2))
    
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
      for ((chunk,fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
      add(fastSplitter_R1)
      
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
        for ((chunk,fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
        add(fastSplitter_R2)
      }
    }
    
    for ((chunk,fastqfile) <- chunks) {
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
        flexiprep.input_R1 = fastq_R1
        if (paired) flexiprep.input_R2 = fastq_R2
        flexiprep.outputDir = outputDir + "flexiprep/"
        flexiprep.init
        flexiprep.runInitialJobs
        //flexiprep.biopetScript
        val flexiout = flexiprep.runTrimClip(R1, R2, outputDir + chunk, chunk)
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
      val chunkDir = if (chunk.isEmpty) outputDir else outputDir + chunk + "/"
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
        bwaCommand.RG = getReadGroup
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
        bamFiles :+= addSortSam(List(bwaCommand.output), swapExt(chunkDir,bwaCommand.output,".sam",".bam"), chunkDir)
      } else if (aligner == "star") {
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true)
        add(starCommand)
        bamFiles :+= addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else if (aligner == "star-2pass") {
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true)
        addAll(star2pass._2)
        bamFiles :+= addAddOrReplaceReadGroups(List(star2pass._1), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
194
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
195
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
    
Peter van 't Hof's avatar
Peter van 't Hof committed
199
200
201
    var bamFile = ""
    if (!skipMarkduplicates) bamFile = addMarkDuplicates(bamFiles, new File(outputDir + outputName + ".dedup.bam"), outputDir)
    if (skipMarkduplicates && chunking) bamFile = addMergeBam(bamFiles, new File(outputDir + outputName + ".bam"), outputDir)
202
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
203
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
204
  
Peter van 't Hof's avatar
Peter van 't Hof committed
205
  def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = {
206
207
208
209
210
211
212
    val sortSam = new SortSam
    sortSam.input = inputSam
    sortSam.createIndex = true
    sortSam.output = outputFile
    sortSam.memoryLimit = 2
    sortSam.nCoresRequest = 2
    sortSam.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
213
    if (!skipMarkduplicates) sortSam.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
216
217
218
    add(sortSam)
    
    return sortSam.output
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  def addMergeBam(inputSam:List[File], outputFile:File, dir:String) : File = {
    val mergeSam = new MergeSamFiles
    mergeSam.input = inputSam
    mergeSam.createIndex = true
    mergeSam.output = outputFile
    mergeSam.memoryLimit = 2
    mergeSam.nCoresRequest = 2
    mergeSam.assumeSorted = true
    mergeSam.USE_THREADING = true
    mergeSam.jobResourceRequests :+= "h_vmem=4G"
    if (!skipMarkduplicates) mergeSam.isIntermediate = true
    add(mergeSam)
    
    return mergeSam.output
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
235
  def addAddOrReplaceReadGroups(inputSam:List[File], outputFile:File, dir:String) : File = {
236
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    addOrReplaceReadGroups.input = inputSam
    addOrReplaceReadGroups.output = outputFile
    addOrReplaceReadGroups.createIndex = true
    addOrReplaceReadGroups.memoryLimit = 2
    addOrReplaceReadGroups.nCoresRequest = 2
    addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
251
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
252
253
    add(addOrReplaceReadGroups)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
254
255
256
    return addOrReplaceReadGroups.output
  }
  
257
  def addMarkDuplicates(input_Bams:List[File], outputFile:File, dir:String) : File = {
258
259
260
261
262
263
264
265
    val markDuplicates = new MarkDuplicates
    markDuplicates.input = input_Bams
    markDuplicates.output = outputFile
    markDuplicates.REMOVE_DUPLICATES = false
    markDuplicates.metrics = swapExt(dir,outputFile,".bam",".metrics")
    markDuplicates.outputIndex = swapExt(dir,markDuplicates.output,".bam",".bai")
    markDuplicates.memoryLimit = 2
    markDuplicates.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
268
269
270
271
272
273
274
275
276
    add(markDuplicates)
    
    return markDuplicates.output
  }
  
  def getReadGroup() : String = {
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
277
278
279
280
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
281
282
283
    
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
284
  
Peter van 't Hof's avatar
Peter van 't Hof committed
285
  def loadRunConfig(runConfig:Map[String,Any], sampleConfig:Map[String,Any], runDir: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
286
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
287
288
289
290
291
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
    else inputType = config("inputtype", "dna")
    if (inputType == "rna") aligner = config("rna_aligner", "star-2pass")
    if (runConfig.contains("R1")) input_R1 = runConfig("R1").toString
Peter van 't Hof's avatar
Peter van 't Hof committed
292
    if (runConfig.contains("R2")) input_R2 = runConfig("R2").toString
293
    paired = (input_R2 != null)
Peter van 't Hof's avatar
Peter van 't Hof committed
294
295
296
297
298
    RGLB = runConfig("ID").toString
    RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) RGCN = runConfig("CN").toString
299
300
    outputDir = runDir
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
301
}
302
303

object Mapping extends PipelineCommand {
304
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
305
}