Flexiprep.scala 10.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.flexiprep

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.extensions.{ Zcat, Gzip }
21
import nl.lumc.sasc.biopet.utils.config.Configurable
22
import nl.lumc.sasc.biopet.utils.IoUtils._
23
import nl.lumc.sasc.biopet.extensions.tools.{ ValidateFastq, SeqStat, FastqSync }
24

Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.QScript
26

27
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
28
29
30
  def this() = this(null)

  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
31
  var inputR1: File = _
32
33

  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false)
34
  var inputR2: Option[File] = None
35

36
37
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
38

39
40
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
41

42
43
44
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** Location of summary file */
46
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  def summaryFiles: Map[String, File] = {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
50
    Map("input_R1" -> inputR1, "output_R1" -> fastqR1Qc) ++
51
      (if (paired) Map("input_R2" -> inputR2.get, "output_R2" -> fastqR2Qc.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  }
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
56

57
  var paired: Boolean = inputR2.isDefined
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
58
59
  var R1Name: String = _
  var R2Name: String = _
60

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
61
62
63
64
  var fastqcR1: Fastqc = _
  var fastqcR2: Fastqc = _
  var fastqcR1After: Fastqc = _
  var fastqcR2After: Fastqc = _
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
68
69
70
71
72
73
74
75
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
76
  /** Function that's need to be executed before the script is accessed */
77
  def init() {
78
    require(outputDir != null, "Missing output directory on flexiprep module")
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
79
    require(inputR1 != null, "Missing input R1 on flexiprep module")
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
    require(sampleId != null, "Missing sample ID on flexiprep module")
    require(libId != null, "Missing library ID on flexiprep module")
82

83
    paired = inputR2.isDefined
84

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
85
    inputFiles :+= new InputFile(inputR1)
86
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
87

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
88
    R1Name = getUncompressedFileName(inputR1)
89
    inputR2.foreach { fileR2 =>
90
      paired = true
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
91
      R2Name = getUncompressedFileName(fileR2)
92
93
94
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
95
  /** Script to add jobs */
96
97
98
  def biopetScript() {
    runInitialJobs()

99
    if (paired) runTrimClip(inputR1, inputR2, outputDir)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
100
    else runTrimClip(inputR1, outputDir)
101

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
102
103
104
    val R1Files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
    val R2Files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
    runFinalize(R1Files.toList, R2Files.toList)
105
106
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
107
  /** Add init non chunkable jobs */
108
  def runInitialJobs() {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
109
    outputFiles += ("fastq_input_R1" -> inputR1)
110
    if (paired) outputFiles += ("fastq_input_R2" -> inputR2.get)
111

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
112
113
114
115
    fastqcR1 = Fastqc(this, inputR1, new File(outputDir, R1Name + ".fastqc/"))
    add(fastqcR1)
    addSummarizable(fastqcR1, "fastqc_R1")
    outputFiles += ("fastqc_R1" -> fastqcR1.output)
116

117
    val validateFastq = new ValidateFastq(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
118
    validateFastq.r1Fastq = inputR1
119
    validateFastq.r2Fastq = inputR2
120
121
122
    validateFastq.jobOutputFile = new File(outputDir, ".validate_fastq.log.out")
    add(validateFastq)

123
124
125
126
127
128
    if (config("abort_on_corrupt_fastq", default = true)) {
      val checkValidateFastq = new CheckValidateFastq
      checkValidateFastq.inputLogFile = validateFastq.jobOutputFile
      checkValidateFastq.jobOutputFile = new File(outputDir, ".check.validate_fastq.log.out")
      add(checkValidateFastq)
    }
129

130
    if (paired) {
131
      fastqcR2 = Fastqc(this, inputR2.get, new File(outputDir, R2Name + ".fastqc/"))
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
132
133
134
      add(fastqcR2)
      addSummarizable(fastqcR2, "fastqc_R2")
      outputFiles += ("fastqc_R2" -> fastqcR2.output)
135
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
136

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
137
138
139
140
    val seqstatR1 = SeqStat(this, inputR1, outputDir)
    seqstatR1.isIntermediate = true
    add(seqstatR1)
    addSummarizable(seqstatR1, "seqstat_R1")
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142

    if (paired) {
143
      val seqstatR2 = SeqStat(this, inputR2.get, outputDir)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
144
145
146
      seqstatR2.isIntermediate = true
      add(seqstatR2)
      addSummarizable(seqstatR2, "seqstat_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
147
    }
148
149
  }

150
151
152
153
154
155
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
156

Peter van 't Hof's avatar
Peter van 't Hof committed
157
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
158
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
159
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
160

Peter van 't Hof's avatar
Peter van 't Hof committed
161
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
162
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
163
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
166
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File]) =
167
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
168

Peter van 't Hof's avatar
Peter van 't Hof committed
169
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
172
173
  def runTrimClip(R1_in: File,
                  R2_in: Option[File],
                  outDir: File,
                  chunkarg: String): (File, Option[File]) = {
174
175
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
    var R1 = R1_in
    var R2 = R2_in
178

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
179
    val qcCmdR1 = new QcCommand(this, fastqcR1)
180
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
182
    qcCmdR1.output = if (paired) new File(outDir, fastqR1Qc.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
183
    else fastqR1Qc
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
184
    qcCmdR1.deps :+= fastqcR1.output
185
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
186
    addSummarizable(qcCmdR1, "qc_command_R1")
187
188

    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
189
      val qcCmdR2 = new QcCommand(this, fastqcR2)
190
      qcCmdR2.input = R2_in.get
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      qcCmdR2.output = new File(outDir, fastqR2Qc.get.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
192
      qcCmdR2.read = "R2"
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      addSummarizable(qcCmdR2, "qc_command_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
196

      qcCmdR1.compress = false
      qcCmdR2.compress = false
197
198
199

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
      fqSync.inputFastq1 = qcCmdR1.output
      fqSync.inputFastq2 = qcCmdR2.output
Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
      fqSync.outputFastq1 = new File(outDir, fastqR1Qc.getName)
      fqSync.outputFastq2 = new File(outDir, fastqR2Qc.get.getName)
204
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
208
209
210

      val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {
        override def configName = "qc-cmd"

        override def beforeGraph(): Unit = {
          fqSync.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
211
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
214
215
216
217
218
219
          super.beforeGraph()
        }

        override def beforeCmd(): Unit = {
          qcCmdR1.beforeCmd()
          qcCmdR2.beforeCmd()
          fqSync.beforeCmd()
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
220
          commands.foreach(addPipeJob)
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
223
224
          super.beforeCmd()
        }
      }

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
225
226
      pipe.deps ::= fastqcR1.output
      pipe.deps ::= fastqcR2.output
Peter van 't Hof's avatar
Peter van 't Hof committed
227
228
229
      pipe.isIntermediate = !keepQcFastqFiles
      add(pipe)

230
231
232
233
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
234
235
236
237
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
238

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
239
240
241
    val seqstatR1After = SeqStat(this, R1, outDir)
    add(seqstatR1After)
    addSummarizable(seqstatR1After, "seqstat_R1_qc")
242
243

    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
244
245
246
      val seqstatR2After = SeqStat(this, R2.get, outDir)
      add(seqstatR2After)
      addSummarizable(seqstatR2After, "seqstat_R2_qc")
247
248
249
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
250
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
251
    (R1, R2)
252
253
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
254
  /** Adds last non chunkable jobs */
255
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
256
257
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
258

Peter van 't Hof's avatar
Peter van 't Hof committed
259
    if (fastq_R1.length > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
262
263
264
265
266
267
      val zcat = new Zcat(this)
      zcat.input = fastq_R1
      add(zcat | new Gzip(this) > fastqR1Qc)
      if (paired) {
        val zcat = new Zcat(this)
        zcat.input = fastq_R2
        add(zcat | new Gzip(this) > fastqR2Qc.get)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
268
    }
269

270
271
272
273
274
275
    val validateFastq = new ValidateFastq(this)
    validateFastq.r1Fastq = fastqR1Qc
    validateFastq.r2Fastq = fastqR2Qc
    validateFastq.jobOutputFile = new File(outputDir, ".validate_fastq.qc.log.out")
    add(validateFastq)

276
277
278
279
280
281
    if (config("abort_on_corrupt_fastq", default = true)) {
      val checkValidateFastq = new CheckValidateFastq
      checkValidateFastq.inputLogFile = validateFastq.jobOutputFile
      checkValidateFastq.jobOutputFile = new File(outputDir, ".check.validate_fastq.qc.log.out")
      add(checkValidateFastq)
    }
282

Peter van 't Hof's avatar
Peter van 't Hof committed
283
284
    outputFiles += ("output_R1_gzip" -> fastqR1Qc)
    if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
285

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
286
287
288
    fastqcR1After = Fastqc(this, fastqR1Qc, new File(outputDir, R1Name + ".qc.fastqc/"))
    add(fastqcR1After)
    addSummarizable(fastqcR1After, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
289

Peter van 't Hof's avatar
Peter van 't Hof committed
290
    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
291
292
293
      fastqcR2After = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2Name + ".qc.fastqc/"))
      add(fastqcR2After)
      addSummarizable(fastqcR2After, "fastqc_R2_qc")
294
295
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
296
    addSummaryJobs()
297
298
299
300
  }
}

object Flexiprep extends PipelineCommand