Mapping.scala 9.09 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

3
4
import nl.lumc.sasc.biopet.function._
import nl.lumc.sasc.biopet.function.aligners._
Peter van 't Hof's avatar
Peter van 't Hof committed
5
6
import java.util.Date
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.config._
Peter van 't Hof's avatar
Peter van 't Hof committed
8
9
10
11
12
13
14
15
16
17
import nl.lumc.sasc.biopet.pipelines.flexiprep._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard.MarkDuplicates
import org.broadinstitute.sting.queue.extensions.picard.SortSam
import org.broadinstitute.sting.queue.extensions.picard.AddOrReplaceReadGroups
import org.broadinstitute.sting.queue.function._
import scala.util.parsing.json._
import org.broadinstitute.sting.utils.variant._

Peter van 't Hof's avatar
Peter van 't Hof committed
18
class Mapping(val root:Configurable) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
19
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
  def this() = this(null)
  
  @Input(doc="R1 fastq file", shortName="R1",required=true)
  var input_R1: File = _
  
  @Input(doc="R2 fastq file", shortName="R2", required=false)
  var input_R2: File = _
  
  @Argument(doc="Output name", shortName="outputName", required=false)
  var outputName: String = _
  
  @Argument(doc="Skip flexiprep", shortName="skipflexiprep", required=false)
  var skipFlexiprep: Boolean = false
  
  @Argument(doc="Skip mark duplicates", shortName="skipmarkduplicates", required=false)
  var skipMarkduplicates: Boolean = false
  
  @Argument(doc="Alginer", shortName="ALN", required=false)
  var aligner: String = _
  
  @Argument(doc="Reference", shortName="R", required=false)
  var referenceFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
  
  // Readgroup items
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  @Argument(doc="Readgroup ID", shortName="RGID", required=false)
  var RGID: String = _
  
  @Argument(doc="Readgroup Library", shortName="RGLB", required=false)
  var RGLB: String = _
  
  @Argument(doc="Readgroup Platform", shortName="RGPL", required=false)
  var RGPL: String = _
  
  @Argument(doc="Readgroup platform unit", shortName="RGPU", required=false)
  var RGPU: String = _
  
  @Argument(doc="Readgroup sample", shortName="RGSM", required=false)
  var RGSM: String = _
  
  @Argument(doc="Readgroup sequencing center", shortName="RGCN", required=false)
  var RGCN: String = _
  
  @Argument(doc="Readgroup description", shortName="RGDS", required=false)
  var RGDS: String = _
  
  @Argument(doc="Readgroup sequencing date", shortName="RGDT", required=false)
  var RGDT: Date = _
  
  @Argument(doc="Readgroup predicted insert size", shortName="RGPI", required=false)
  var RGPI: Int = _
  
  
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
74
75
76
  
  var paired: Boolean = false
  
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79
80
81
82
    var inputtype:String = config("inputtype", "dna")
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
    if (referenceFile == null) referenceFile = config("referenceFile")
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
85
86
87
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
91
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
92
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
95
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")    
    
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
99
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
100
101
    
    if (outputName == null) outputName = RGID
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
104
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
    var fastq_R1: String = input_R1
    var fastq_R2: String = if (paired) input_R2 else ""
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
108
      val flexiprep = new Flexiprep(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
111
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.outputDir = outputDir + "flexiprep/"
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
      flexiprep.init
      flexiprep.biopetScript
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
116
117
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
      fastq_R1 = flexiprep.outputFiles("output_R1")
      if (paired) fastq_R2 = flexiprep.outputFiles("output_R2")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
118
    var bamFile:File = null
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    if (aligner == "bwa") {
Peter van 't Hof's avatar
Peter van 't Hof committed
120
      val bwaCommand = new Bwa(this)
121
122
123
124
      bwaCommand.R1 = fastq_R1
      if (paired) bwaCommand.R2 = fastq_R2
      bwaCommand.RG = getReadGroup
      bwaCommand.output = new File(outputDir + outputName + ".sam")
Peter van 't Hof's avatar
Peter van 't Hof committed
125
      add(bwaCommand, isIntermediate = true)
126
      bamFile = addSortSam(List(bwaCommand.output), swapExt(outputDir,bwaCommand.output,".sam",".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
127
    } else if (aligner == "star") {
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      val starCommand = Star(this, fastq_R1, if (paired) fastq_R2 else null, outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      add(starCommand)
130
      bamFile = addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    } else if (aligner == "star-2pass") {
Peter van 't Hof's avatar
Peter van 't Hof committed
132
      val star2pass = Star._2pass(this, fastq_R1, if (paired) fastq_R2 else null, outputDir, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
      addAll(star2pass._2)
      bamFile = addAddOrReplaceReadGroups(List(star2pass._1), new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
135
    } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
    
    if (!skipMarkduplicates) bamFile = addMarkDuplicates(List(bamFile), swapExt(outputDir,bamFile,".bam",".dedup.bam"), outputDir)
138
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
139
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
140
  
Peter van 't Hof's avatar
Peter van 't Hof committed
141
  def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = {
142
143
144
145
146
147
148
    val sortSam = new SortSam
    sortSam.input = inputSam
    sortSam.createIndex = true
    sortSam.output = outputFile
    sortSam.memoryLimit = 2
    sortSam.nCoresRequest = 2
    sortSam.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
149
    if (!skipMarkduplicates) sortSam.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
152
153
154
155
    add(sortSam)
    
    return sortSam.output
  }
  
  def addAddOrReplaceReadGroups(inputSam:List[File], outputFile:File, dir:String) : File = {
156
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159
160
161
162
163
164
165
166
167
168
169
170
    addOrReplaceReadGroups.input = inputSam
    addOrReplaceReadGroups.output = outputFile
    addOrReplaceReadGroups.createIndex = true
    addOrReplaceReadGroups.memoryLimit = 2
    addOrReplaceReadGroups.nCoresRequest = 2
    addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"

    addOrReplaceReadGroups.RGID = RGID
    addOrReplaceReadGroups.RGLB = RGLB
    addOrReplaceReadGroups.RGPL = RGPL
    addOrReplaceReadGroups.RGPU = RGPU
    addOrReplaceReadGroups.RGSM = RGSM
    if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
    if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
171
    if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
    add(addOrReplaceReadGroups)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
176
    return addOrReplaceReadGroups.output
  }
  
177
  def addMarkDuplicates(input_Bams:List[File], outputFile:File, dir:String) : File = {
178
179
180
181
182
183
184
185
    val markDuplicates = new MarkDuplicates
    markDuplicates.input = input_Bams
    markDuplicates.output = outputFile
    markDuplicates.REMOVE_DUPLICATES = false
    markDuplicates.metrics = swapExt(dir,outputFile,".bam",".metrics")
    markDuplicates.outputIndex = swapExt(dir,markDuplicates.output,".bam",".bai")
    markDuplicates.memoryLimit = 2
    markDuplicates.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
188
189
190
191
192
193
194
195
196
    add(markDuplicates)
    
    return markDuplicates.output
  }
  
  def getReadGroup() : String = {
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
197
198
199
200
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
203
    
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
204
  
Peter van 't Hof's avatar
Peter van 't Hof committed
205
  def loadRunConfig(runConfig:Map[String,Any], sampleConfig:Map[String,Any], runDir: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    logger.debug("Mapping runconfig: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
209
210
211
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
    else inputType = config("inputtype", "dna")
    if (inputType == "rna") aligner = config("rna_aligner", "star-2pass")
    if (runConfig.contains("R1")) input_R1 = runConfig("R1").toString
Peter van 't Hof's avatar
Peter van 't Hof committed
212
    if (runConfig.contains("R2")) input_R2 = runConfig("R2").toString
213
    paired = (input_R2 != null)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
216
217
218
    RGLB = runConfig("ID").toString
    RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) RGCN = runConfig("CN").toString
219
220
    outputDir = runDir
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
221
}
222
223

object Mapping extends PipelineCommand {
224
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
225
}