Mapping.scala 12.1 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.config.Configurable
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import java.util.Date
Peter van 't Hof's avatar
Peter van 't Hof committed
6
import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand}
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.apps.FastqSplitter
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.function.aligners.{Bwa, Star}
9
10
import nl.lumc.sasc.biopet.function.picard.MarkDuplicates
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import org.broadinstitute.gatk.queue.extensions.picard.{MergeSamFiles, SortSam, AddOrReplaceReadGroups}
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import scala.math._
Peter van 't Hof's avatar
Peter van 't Hof committed
15

Peter van 't Hof's avatar
Peter van 't Hof committed
16
class Mapping(val root:Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
17
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  def this() = this(null)
  
  @Input(doc="R1 fastq file", shortName="R1",required=true)
  var input_R1: File = _
  
  @Input(doc="R2 fastq file", shortName="R2", required=false)
  var input_R2: File = _
  
  @Argument(doc="Output name", shortName="outputName", required=false)
  var outputName: String = _
  
  @Argument(doc="Skip flexiprep", shortName="skipflexiprep", required=false)
  var skipFlexiprep: Boolean = false
  
  @Argument(doc="Skip mark duplicates", shortName="skipmarkduplicates", required=false)
  var skipMarkduplicates: Boolean = false
  
  @Argument(doc="Alginer", shortName="ALN", required=false)
  var aligner: String = _
  
  @Argument(doc="Reference", shortName="R", required=false)
  var referenceFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
43
  @Argument(doc="auto chuning", shortName="chunking", required=false)
  var chunking: Boolean = false
  
Peter van 't Hof's avatar
Peter van 't Hof committed
44
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  @Argument(doc="Readgroup ID", shortName="RGID", required=false)
  var RGID: String = _
  
  @Argument(doc="Readgroup Library", shortName="RGLB", required=false)
  var RGLB: String = _
  
  @Argument(doc="Readgroup Platform", shortName="RGPL", required=false)
  var RGPL: String = _
  
  @Argument(doc="Readgroup platform unit", shortName="RGPU", required=false)
  var RGPU: String = _
  
  @Argument(doc="Readgroup sample", shortName="RGSM", required=false)
  var RGSM: String = _
  
  @Argument(doc="Readgroup sequencing center", shortName="RGCN", required=false)
  var RGCN: String = _
  
  @Argument(doc="Readgroup description", shortName="RGDS", required=false)
  var RGDS: String = _
  
  @Argument(doc="Readgroup sequencing date", shortName="RGDT", required=false)
  var RGDT: Date = _
  
  @Argument(doc="Readgroup predicted insert size", shortName="RGPI", required=false)
  var RGPI: Int = _
  
Peter van 't Hof's avatar
Peter van 't Hof committed
72
  var paired: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
73
  var numberChunks = 0
Peter van 't Hof's avatar
Peter van 't Hof committed
74
75
76
  
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79
80
81
82
    var inputtype:String = config("inputtype", "dna")
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
    if (referenceFile == null) referenceFile = config("referenceFile")
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
85
86
87
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
91
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
92
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
95
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")    
    
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
99
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
100
101
    
    if (outputName == null) outputName = RGID
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
104
105
106
107
108
109
110
111
112
    
    if (!chunking && numberChunks > 1) chunking = true
    if (!chunking) chunking = config("chunking", false)
    if (chunking) {
      val chunkSize:Int = config("chunksize", (1 << 30))
      val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3 
                     else input_R1.length
      if (numberChunks == 0 && configContains("numberchunks")) numberChunks = config("numberchunks")
      else if (numberChunks == 0) numberChunks = ceil(filesize.toDouble / chunkSize).toInt
      logger.debug("Chunks: " + numberChunks)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
115
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
118
    var fastq_R1: File = input_R1
    var fastq_R2: File = if (paired) input_R2 else ""
    val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    flexiprep.outputDir = outputDir + "flexiprep/"
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
123
124
125
126
127
128
129
130
131
132
    var bamFiles:List[File] = Nil
    var fastq_R1_output: List[File] = Nil
    var fastq_R2_output: List[File] = Nil
    
    def removeGz(file:String):String = {
      if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
      else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
      else return file
    }
    var chunks:Map[String, (String,String)] = Map()
    if (chunking) for (t <- 1 to numberChunks) {
      chunks += ("chunk_" + t -> (removeGz(outputDir + "chunk_" + t + "/" + fastq_R1.getName),
                                  if (paired) removeGz(outputDir + "chunk_" + t + "/" + fastq_R2.getName) else ""))
133
134
    } else chunks += ("flexiprep" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
                                      flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
Peter van 't Hof's avatar
Peter van 't Hof committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
    
    if (chunking) {
      val fastSplitter_R1 = new FastqSplitter(this)
      fastSplitter_R1.input = fastq_R1
      fastSplitter_R1.memoryLimit = 4
      fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G"
      for ((chunk,fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
      add(fastSplitter_R1)
      
      if (paired) {
        val fastSplitter_R2 = new FastqSplitter(this)
        fastSplitter_R2.input = fastq_R2
        fastSplitter_R2.memoryLimit = 4
        fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G"
        for ((chunk,fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
        add(fastSplitter_R2)
      }
    }
    
    for ((chunk,fastqfile) <- chunks) {
      var R1 = fastqfile._1
      var R2 = fastqfile._2
      if (!skipFlexiprep) {
        flexiprep.input_R1 = fastq_R1
        if (paired) flexiprep.input_R2 = fastq_R2
160
        if (chunking) flexiprep.skipSummary = true
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163
164
165
166
167
168
169
170
        flexiprep.init
        flexiprep.runInitialJobs
        //flexiprep.biopetScript
        val flexiout = flexiprep.runTrimClip(R1, R2, outputDir + chunk, chunk)
        logger.debug(chunk + " - " + flexiout)
        R1 = flexiout._1
        if (paired) R2 = flexiout._2
        fastq_R1_output :+= R1
        fastq_R2_output :+= R2
      }
171
      val chunkDir = if (chunking) outputDir + chunk + "/" else outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
      if (aligner == "bwa") {
        val bwaCommand = new Bwa(this)
        bwaCommand.R1 = R1
        if (paired) bwaCommand.R2 = R2
        bwaCommand.RG = getReadGroup
        bwaCommand.output = new File(chunkDir + outputName + ".sam")
        add(bwaCommand, isIntermediate = true)
        bamFiles :+= addSortSam(List(bwaCommand.output), swapExt(chunkDir,bwaCommand.output,".sam",".bam"), chunkDir)
      } else if (aligner == "star") {
        val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true)
        add(starCommand)
        bamFiles :+= addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else if (aligner == "star-2pass") {
        val star2pass = Star._2pass(this, R1, if (paired) R2 else null, chunkDir, isIntermediate = true)
        addAll(star2pass._2)
        bamFiles :+= addAddOrReplaceReadGroups(List(star2pass._1), new File(chunkDir + outputName + ".bam"), chunkDir)
      } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
194
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
    }
    
195
196
197
198
199
200
    var bamFile = bamFiles.head
    if (!skipMarkduplicates) {
      bamFile = new File(outputDir + outputName + ".dedup.bam")
      add(MarkDuplicates(this, bamFiles, bamFile))
    } else if (skipMarkduplicates && chunking) bamFile = addMergeBam(bamFiles, new File(outputDir + outputName + ".bam"), outputDir)
    
201
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
202
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
203
  
Peter van 't Hof's avatar
Peter van 't Hof committed
204
  def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = {
205
206
207
208
209
210
211
    val sortSam = new SortSam
    sortSam.input = inputSam
    sortSam.createIndex = true
    sortSam.output = outputFile
    sortSam.memoryLimit = 2
    sortSam.nCoresRequest = 2
    sortSam.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
212
    if (!skipMarkduplicates) sortSam.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
215
216
217
    add(sortSam)
    
    return sortSam.output
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
  def addMergeBam(inputSam:List[File], outputFile:File, dir:String) : File = {
    val mergeSam = new MergeSamFiles
    mergeSam.input = inputSam
    mergeSam.createIndex = true
    mergeSam.output = outputFile
    mergeSam.memoryLimit = 2
    mergeSam.nCoresRequest = 2
    mergeSam.assumeSorted = true
    mergeSam.USE_THREADING = true
    mergeSam.jobResourceRequests :+= "h_vmem=4G"
    if (!skipMarkduplicates) mergeSam.isIntermediate = true
    add(mergeSam)
    
    return mergeSam.output
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
234
  def addAddOrReplaceReadGroups(inputSam:List[File], outputFile:File, dir:String) : File = {
235
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
239
240
241
242
243
244
245
246
247
248
249
    addOrReplaceReadGroups.input = inputSam
    addOrReplaceReadGroups.output = outputFile
    addOrReplaceReadGroups.createIndex = true
    addOrReplaceReadGroups.memoryLimit = 2
    addOrReplaceReadGroups.nCoresRequest = 2
    addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
250
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
251
252
    add(addOrReplaceReadGroups)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
255
256
257
258
259
260
261
    return addOrReplaceReadGroups.output
  }
  
  def getReadGroup() : String = {
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
262
263
264
265
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
268
    
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
269
270
271
272
}

object Mapping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
273
  
274
275
276
  def loadFromRunConfig(root:Configurable, runConfig:Map[String,Any], sampleConfig:Map[String,Any], runDir: String) : Mapping = {
    val mapping = new Mapping(root)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
277
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
278
279
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
    else inputType = root.config("inputtype", "dna").getString
    if (inputType == "rna") mapping.aligner = root.config("rna_aligner", "star-2pass").getString
    if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
    if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
    mapping.paired = (mapping.input_R2 != null)
    mapping.RGLB = runConfig("ID").toString
    mapping.RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
    mapping.outputDir = runDir
    
    mapping.init
    mapping.biopetScript
    return mapping
295
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
296
}