Mapping.scala 9.64 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

3
4
import nl.lumc.sasc.biopet.function._
import nl.lumc.sasc.biopet.function.aligners._
Peter van 't Hof's avatar
Peter van 't Hof committed
5
6
7
8
9
10
11
12
13
14
15
16
import java.util.Date
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.pipelines.flexiprep._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard.MarkDuplicates
import org.broadinstitute.sting.queue.extensions.picard.SortSam
import org.broadinstitute.sting.queue.extensions.picard.AddOrReplaceReadGroups
import org.broadinstitute.sting.queue.function._
import scala.util.parsing.json._
import org.broadinstitute.sting.utils.variant._

17
class Mapping(private var globalConfig: Config) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
18
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
19
20
21
22
  @Argument(doc="Config Json file",shortName="config", required=false) var configfiles: List[File] = Nil
  @Input(doc="R1 fastq file", shortName="R1",required=true) var input_R1: File = _
  @Input(doc="R2 fastq file", shortName="R2", required=false) var input_R2: File = _
  @Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _
23
  @Argument(doc="Output name", shortName="outputName", required=false) var outputName: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  @Argument(doc="Skip flexiprep", shortName="skipflexiprep", required=false) var skipFlexiprep: Boolean = false
  @Argument(doc="Skip mark duplicates", shortName="skipmarkduplicates", required=false) var skipMarkduplicates: Boolean = false
  @Argument(doc="Alginer", shortName="ALN", required=false) var aligner: String = _
  
  // Readgroup items
  @Argument(doc="Readgroup ID", shortName="RGID", required=false) var RGID: String = _
  @Argument(doc="Readgroup Library", shortName="RGLB", required=false) var RGLB: String = _
  @Argument(doc="Readgroup Platform", shortName="RGPL", required=false) var RGPL: String = _
  @Argument(doc="Readgroup platform unit", shortName="RGPU", required=false) var RGPU: String = _
  @Argument(doc="Readgroup sample", shortName="RGSM", required=false) var RGSM: String = _
  @Argument(doc="Readgroup sequencing center", shortName="RGCN", required=false) var RGCN: String = _
  @Argument(doc="Readgroup description", shortName="RGDS", required=false) var RGDS: String = _
  @Argument(doc="Readgroup sequencing date", shortName="RGDT", required=false) var RGDT: Date = _
  @Argument(doc="Readgroup predicted insert size", shortName="RGPI", required=false) var RGPI: Int = _
  
  def this() = this(new Config())
  
  var referenceFile: File = _
  var paired: Boolean = false
  
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
    config = Config.mergeConfigs(globalConfig.getAsConfig("mapping"), globalConfig)
47
    if (aligner == null) aligner = config.getAsString("aligner", "bwa")
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
    referenceFile = config.getAsString("referenceFile")
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
    
    if (RGLB == null && config.contains("RGLB")) RGLB = config.getAsString("RGLB")
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
    if (RGSM == null && config.contains("RGSM")) RGSM = config.getAsString("RGSM")
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
    if (RGID == null && config.contains("RGID")) RGID = config.getAsString("RGID")
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")    
    
    if (RGPL == null) RGPL = config.getAsString("RGPL", "illumina")
    if (RGPU == null) RGPU = config.getAsString("RGPU", "na")
    if (RGCN == null && config.contains("RGCN")) RGCN = config.getAsString("RGCN")
    if (RGDS == null && config.contains("RGDS")) RGDS = config.getAsString("RGDS")
66
67
    
    if (outputName == null) outputName = RGID
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
  }
  
  def script() {
    this.init()
    var fastq_R1: String = input_R1
    var fastq_R2: String = if (paired) input_R2 else ""
    if (!skipFlexiprep) {
      val flexiprep = new Flexiprep(config)
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.script
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
      fastq_R1 = flexiprep.outputFiles("output_R1")
      if (paired) fastq_R2 = flexiprep.outputFiles("output_R2")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
84
    var bamFile:File = null
Peter van 't Hof's avatar
Peter van 't Hof committed
85
    if (aligner == "bwa") {
86
87
88
89
90
      val bwaCommand = new Bwa(config)
      bwaCommand.R1 = fastq_R1
      if (paired) bwaCommand.R2 = fastq_R2
      bwaCommand.RG = getReadGroup
      bwaCommand.output = new File(outputDir + outputName + ".sam")
Peter van 't Hof's avatar
Peter van 't Hof committed
91
      add(bwaCommand)
92
      bamFile = addSortSam(List(bwaCommand.output), swapExt(outputDir,bwaCommand.output,".sam",".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    } else if (aligner == "star") {
94
      val starCommand = Star(config, fastq_R1, if (paired) fastq_R2 else null, outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      add(starCommand)
96
      bamFile = addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
97
    } else if (aligner == "star-2pass") {
98
99
      val starCommand_pass1 = Star(config, fastq_R1, if (paired) fastq_R2 else null, outputDir + "star-2pass/aln-pass1/")
      starCommand_pass1.afterGraph
Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
      add(starCommand_pass1)
      
102
103
104
105
106
107
      val starCommand_reindex = new Star(config)
      starCommand_reindex.sjdbFileChrStartEnd = starCommand_pass1.outputTab
      starCommand_reindex.outputDir = qscript.outputDir + "star-2pass/re-index/" 
      starCommand_reindex.runmode = "genomeGenerate"
      starCommand_reindex.sjdbOverhang = 75
      starCommand_reindex.afterGraph
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
      add(starCommand_reindex)
      
110
111
112
      val starCommand_pass2 = Star(config, fastq_R1, if (paired) fastq_R2 else null, outputDir + "star-2pass/aln-pass2/")
      starCommand_pass2.genomeDir = starCommand_reindex.outputDir
      starCommand_pass2.afterGraph
Peter van 't Hof's avatar
Peter van 't Hof committed
113
      add(starCommand_pass2)
114
      bamFile = addAddOrReplaceReadGroups(List(starCommand_pass2.outputSam), new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
    } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
    
    if (!skipMarkduplicates) bamFile = addMarkDuplicates(List(bamFile), swapExt(outputDir,bamFile,".bam",".dedup.bam"), outputDir)
118
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
120
  
Peter van 't Hof's avatar
Peter van 't Hof committed
121
  def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = {
122
123
124
125
126
127
128
    val sortSam = new SortSam
    sortSam.input = inputSam
    sortSam.createIndex = true
    sortSam.output = outputFile
    sortSam.memoryLimit = 2
    sortSam.nCoresRequest = 2
    sortSam.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
131
132
133
134
    add(sortSam)
    
    return sortSam.output
  }
  
  def addAddOrReplaceReadGroups(inputSam:List[File], outputFile:File, dir:String) : File = {
135
136
137
138
139
140
141
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
      addOrReplaceReadGroups.input = inputSam
      addOrReplaceReadGroups.output = outputFile
      addOrReplaceReadGroups.createIndex = true
      addOrReplaceReadGroups.memoryLimit = 2
      addOrReplaceReadGroups.nCoresRequest = 2
      addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"
142
      
143
144
145
146
147
      addOrReplaceReadGroups.RGID = qscript.RGID
      addOrReplaceReadGroups.RGLB = qscript.RGLB
      addOrReplaceReadGroups.RGPL = qscript.RGPL
      addOrReplaceReadGroups.RGPU = qscript.RGPU
      addOrReplaceReadGroups.RGSM = qscript.RGSM
148
149
      if (RGCN != null) this.RGCN = qscript.RGCN
      if (RGDS != null) this.RGDS = qscript.RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
    add(addOrReplaceReadGroups)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
    return addOrReplaceReadGroups.output
  }
  
155
  def addMarkDuplicates(input_Bams:List[File], outputFile:File, dir:String) : File = {
156
157
158
159
160
161
162
163
    val markDuplicates = new MarkDuplicates
    markDuplicates.input = input_Bams
    markDuplicates.output = outputFile
    markDuplicates.REMOVE_DUPLICATES = false
    markDuplicates.metrics = swapExt(dir,outputFile,".bam",".metrics")
    markDuplicates.outputIndex = swapExt(dir,markDuplicates.output,".bam",".bai")
    markDuplicates.memoryLimit = 2
    markDuplicates.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
166
167
168
169
170
171
172
173
174
    add(markDuplicates)
    
    return markDuplicates.output
  }
  
  def getReadGroup() : String = {
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
175
176
177
178
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
    
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
182
183
  
  def loadRunConfig(runConfig:Config, sampleConfig:Config, runDir: String) {
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
    config = Config.mergeConfigs(globalConfig.getAsConfig("mapping"), globalConfig)
    val inputType = runConfig.getAsString("inputtype", config.getAsString("inputtype", "dna"))
    if (inputType == "rna") aligner = config.getAsString("rna_aligner", "star-2pass")
187
188
189
190
191
192
193
194
195
196
    input_R1 = runConfig.getAsString("R1", null)
    input_R2 = runConfig.getAsString("R2", null)
    paired = (input_R2 != null)
    RGLB = runConfig.getAsString("ID")
    RGSM = sampleConfig.get("ID").toString
    if (runConfig.contains("PL")) RGPL = runConfig.getAsString("PL")
    if (runConfig.contains("PU")) RGPU = runConfig.getAsString("PU")
    if (runConfig.contains("CN")) RGCN = runConfig.getAsString("CN")
    outputDir = runDir
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
197
}
198
199

object Mapping extends PipelineCommand {
200
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
201
}