Mapping.scala 10 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
package nl.lumc.sasc.biopet.pipelines.mapping

3
4
import nl.lumc.sasc.biopet.function._
import nl.lumc.sasc.biopet.function.aligners._
Peter van 't Hof's avatar
Peter van 't Hof committed
5
6
import java.util.Date
import nl.lumc.sasc.biopet.core._
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.config._
Peter van 't Hof's avatar
Peter van 't Hof committed
8
9
10
11
12
13
14
15
16
17
import nl.lumc.sasc.biopet.pipelines.flexiprep._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard.MarkDuplicates
import org.broadinstitute.sting.queue.extensions.picard.SortSam
import org.broadinstitute.sting.queue.extensions.picard.AddOrReplaceReadGroups
import org.broadinstitute.sting.queue.function._
import scala.util.parsing.json._
import org.broadinstitute.sting.utils.variant._

Peter van 't Hof's avatar
Peter van 't Hof committed
18
class Mapping(val globalConfig: Config, val configPath: List[String]) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
19
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
22
23
  @Argument(doc="Config Json file",shortName="config", required=false) var configfiles: List[File] = Nil
  @Input(doc="R1 fastq file", shortName="R1",required=true) var input_R1: File = _
  @Input(doc="R2 fastq file", shortName="R2", required=false) var input_R2: File = _
  @Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _
24
  @Argument(doc="Output name", shortName="outputName", required=false) var outputName: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
25
26
27
  @Argument(doc="Skip flexiprep", shortName="skipflexiprep", required=false) var skipFlexiprep: Boolean = false
  @Argument(doc="Skip mark duplicates", shortName="skipmarkduplicates", required=false) var skipMarkduplicates: Boolean = false
  @Argument(doc="Alginer", shortName="ALN", required=false) var aligner: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
28
  @Argument(doc="Reference", shortName="R", required=false) var referenceFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
29
30
31
32
33
34
35
36
37
38
39
40
  
  // Readgroup items
  @Argument(doc="Readgroup ID", shortName="RGID", required=false) var RGID: String = _
  @Argument(doc="Readgroup Library", shortName="RGLB", required=false) var RGLB: String = _
  @Argument(doc="Readgroup Platform", shortName="RGPL", required=false) var RGPL: String = _
  @Argument(doc="Readgroup platform unit", shortName="RGPU", required=false) var RGPU: String = _
  @Argument(doc="Readgroup sample", shortName="RGSM", required=false) var RGSM: String = _
  @Argument(doc="Readgroup sequencing center", shortName="RGCN", required=false) var RGCN: String = _
  @Argument(doc="Readgroup description", shortName="RGDS", required=false) var RGDS: String = _
  @Argument(doc="Readgroup sequencing date", shortName="RGDT", required=false) var RGDT: Date = _
  @Argument(doc="Readgroup predicted insert size", shortName="RGPI", required=false) var RGPI: Int = _
  
Peter van 't Hof's avatar
Peter van 't Hof committed
41
  def this() = this(new Config(), Nil)
Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
44
45
46
  
  var paired: Boolean = false
  
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
49
50
51
52
53
    //config = Config.mergeConfigs(globalConfig("mapping"), globalConfig)
    var inputtype:String = config("inputtype", "dna")
    if (aligner == null) {
      if (inputtype == "rna") aligner = config("aligner", "star-2pass")
      else aligner = config("aligner", "bwa")
    }
    if (referenceFile == null) referenceFile = config("referenceFile")
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
56
57
58
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
    if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module")
    paired = (input_R2 != null)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
59
    if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB")
Peter van 't Hof's avatar
Peter van 't Hof committed
60
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
61
    if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM")
Peter van 't Hof's avatar
Peter van 't Hof committed
62
    else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
Peter van 't Hof's avatar
Peter van 't Hof committed
63
    if (RGID == null && configContains("RGID")) RGID = config("RGID")
Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
66
    else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
    else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")    
    
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
69
70
    if (RGPL == null) RGPL = config("RGPL", "illumina")
    if (RGPU == null) RGPU = config("RGPU", "na")
    if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN")
    if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS")
71
72
    
    if (outputName == null) outputName = RGID
Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
75
76
77
78
79
  }
  
  def script() {
    this.init()
    var fastq_R1: String = input_R1
    var fastq_R2: String = if (paired) input_R2 else ""
    if (!skipFlexiprep) {
Peter van 't Hof's avatar
Peter van 't Hof committed
80
      val flexiprep = new Flexiprep(globalConfig, configFullPath)
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
83
84
85
86
87
88
      flexiprep.input_R1 = fastq_R1
      if (paired) flexiprep.input_R2 = fastq_R2
      flexiprep.outputDir = outputDir + "flexiprep/"
      flexiprep.script
      addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
      fastq_R1 = flexiprep.outputFiles("output_R1")
      if (paired) fastq_R2 = flexiprep.outputFiles("output_R2")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    var bamFile:File = null
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    if (aligner == "bwa") {
Peter van 't Hof's avatar
Peter van 't Hof committed
91
      val bwaCommand = new Bwa(globalConfig, configFullPath)
92
93
94
95
      bwaCommand.R1 = fastq_R1
      if (paired) bwaCommand.R2 = fastq_R2
      bwaCommand.RG = getReadGroup
      bwaCommand.output = new File(outputDir + outputName + ".sam")
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      add(bwaCommand)
97
      bamFile = addSortSam(List(bwaCommand.output), swapExt(outputDir,bwaCommand.output,".sam",".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
98
    } else if (aligner == "star") {
Peter van 't Hof's avatar
Peter van 't Hof committed
99
      val starCommand = Star(globalConfig, configPath, fastq_R1, if (paired) fastq_R2 else null, outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
100
      add(starCommand)
101
      bamFile = addAddOrReplaceReadGroups(List(starCommand.outputSam), new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
102
    } else if (aligner == "star-2pass") {
Peter van 't Hof's avatar
Peter van 't Hof committed
103
      val starCommand_pass1 = Star(globalConfig, configPath, fastq_R1, if (paired) fastq_R2 else null, outputDir + "star-2pass/aln-pass1/")
104
      starCommand_pass1.afterGraph
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
      add(starCommand_pass1)
      
Peter van 't Hof's avatar
Peter van 't Hof committed
107
      val starCommand_reindex = new Star(globalConfig, configFullPath)
108
109
110
111
112
      starCommand_reindex.sjdbFileChrStartEnd = starCommand_pass1.outputTab
      starCommand_reindex.outputDir = qscript.outputDir + "star-2pass/re-index/" 
      starCommand_reindex.runmode = "genomeGenerate"
      starCommand_reindex.sjdbOverhang = 75
      starCommand_reindex.afterGraph
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
      add(starCommand_reindex)
      
Peter van 't Hof's avatar
Peter van 't Hof committed
115
      val starCommand_pass2 = Star(globalConfig, configPath, fastq_R1, if (paired) fastq_R2 else null, outputDir + "star-2pass/aln-pass2/")
116
117
      starCommand_pass2.genomeDir = starCommand_reindex.outputDir
      starCommand_pass2.afterGraph
Peter van 't Hof's avatar
Peter van 't Hof committed
118
      add(starCommand_pass2)
119
      bamFile = addAddOrReplaceReadGroups(List(starCommand_pass2.outputSam), new File(outputDir + outputName + ".bam"), outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
120
    } else throw new IllegalStateException("Option Alginer: '" + aligner + "' is not valid")
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
    
    if (!skipMarkduplicates) bamFile = addMarkDuplicates(List(bamFile), swapExt(outputDir,bamFile,".bam",".dedup.bam"), outputDir)
123
    outputFiles += ("finalBamFile" -> bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
125
  
Peter van 't Hof's avatar
Peter van 't Hof committed
126
  def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = {
127
128
129
130
131
132
133
    val sortSam = new SortSam
    sortSam.input = inputSam
    sortSam.createIndex = true
    sortSam.output = outputFile
    sortSam.memoryLimit = 2
    sortSam.nCoresRequest = 2
    sortSam.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
136
137
138
139
    add(sortSam)
    
    return sortSam.output
  }
  
  def addAddOrReplaceReadGroups(inputSam:List[File], outputFile:File, dir:String) : File = {
140
141
142
143
144
145
146
    val addOrReplaceReadGroups = new AddOrReplaceReadGroups
      addOrReplaceReadGroups.input = inputSam
      addOrReplaceReadGroups.output = outputFile
      addOrReplaceReadGroups.createIndex = true
      addOrReplaceReadGroups.memoryLimit = 2
      addOrReplaceReadGroups.nCoresRequest = 2
      addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G"
147
      
148
149
150
151
152
      addOrReplaceReadGroups.RGID = qscript.RGID
      addOrReplaceReadGroups.RGLB = qscript.RGLB
      addOrReplaceReadGroups.RGPL = qscript.RGPL
      addOrReplaceReadGroups.RGPU = qscript.RGPU
      addOrReplaceReadGroups.RGSM = qscript.RGSM
153
154
      if (RGCN != null) this.RGCN = qscript.RGCN
      if (RGDS != null) this.RGDS = qscript.RGDS
Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
    add(addOrReplaceReadGroups)
    
Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159
    return addOrReplaceReadGroups.output
  }
  
160
  def addMarkDuplicates(input_Bams:List[File], outputFile:File, dir:String) : File = {
161
162
163
164
165
166
167
168
    val markDuplicates = new MarkDuplicates
    markDuplicates.input = input_Bams
    markDuplicates.output = outputFile
    markDuplicates.REMOVE_DUPLICATES = false
    markDuplicates.metrics = swapExt(dir,outputFile,".bam",".metrics")
    markDuplicates.outputIndex = swapExt(dir,markDuplicates.output,".bam",".bai")
    markDuplicates.memoryLimit = 2
    markDuplicates.jobResourceRequests :+= "h_vmem=4G"
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
172
173
174
175
176
177
178
179
    add(markDuplicates)
    
    return markDuplicates.output
  }
  
  def getReadGroup() : String = {
    var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
    RG += "LB:" + RGLB + "\\t"
    RG += "PL:" + RGPL + "\\t"
    RG += "PU:" + RGPU + "\\t"
    RG += "SM:" + RGSM + "\\t"
180
181
182
183
    if (RGCN != null) RG += "CN:" + RGCN + "\\t"
    if (RGDS != null) RG += "DS" + RGDS + "\\t"
    if (RGDT != null) RG += "DT" + RGDT + "\\t"
    if (RGPI > 0) RG += "PI" + RGPI + "\\t"
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
    
    return RG.substring(0, RG.lastIndexOf("\\t"))
  }
187
  
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
191
192
193
194
195
  def loadRunConfig(runConfig:Map[String,Any], sampleConfig:Map[String,Any], runDir: String) {
    //config = Config.mergeConfigs(globalConfig.getAsConfig("mapping"), globalConfig)
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
    else inputType = config("inputtype", "dna")
    if (inputType == "rna") aligner = config("rna_aligner", "star-2pass")
    if (runConfig.contains("R1")) input_R1 = runConfig("R1").toString
    if (runConfig.contains("R2")) input_R1 = runConfig("R2").toString
196
    paired = (input_R2 != null)
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
199
200
201
    RGLB = runConfig("ID").toString
    RGSM = sampleConfig("ID").toString
    if (runConfig.contains("PL")) RGPL = runConfig("PL").toString
    if (runConfig.contains("PU")) RGPU = runConfig("PU").toString
    if (runConfig.contains("CN")) RGCN = runConfig("CN").toString
202
203
    outputDir = runDir
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
204
}
205
206

object Mapping extends PipelineCommand {
207
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class"
208
}