Flexiprep.scala 10.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.flexiprep

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.extensions.{ Zcat, Gzip }
21
import nl.lumc.sasc.biopet.utils.config.Configurable
22
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
23

Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.QScript
25

26
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
27
28
29
30
31
32
  def this() = this(null)

  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
  var input_R1: File = _

  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false)
33
  var input_R2: Option[File] = None
34

35
36
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
37

38
39
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
40

41
42
43
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  /** Location of summary file */
45
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  def summaryFiles: Map[String, File] = {
49
50
    Map("input_R1" -> input_R1, "output_R1" -> fastqR1Qc) ++
      (if (paired) Map("input_R2" -> input_R2.get, "output_R2" -> fastqR2Qc.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
51
  }
52

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
55

56
  var paired: Boolean = input_R2.isDefined
57
58
59
60
61
62
63
64
65
66
  var R1_ext: String = _
  var R2_ext: String = _
  var R1_name: String = _
  var R2_name: String = _

  var fastqc_R1: Fastqc = _
  var fastqc_R2: Fastqc = _
  var fastqc_R1_after: Fastqc = _
  var fastqc_R2_after: Fastqc = _

Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
69
70
71
72
73
74
75
76
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

77
78
79
80
81
  /** Possible compression extensions to trim from input files. */
  private val zipExtensions = Set(".gz", ".gzip")

  /**
   * Given a file object and a set of compression extensions, return the filename without any of the compression
bow's avatar
bow committed
82
   * extensions.
83
84
   *
   * Examples:
bow's avatar
bow committed
85
86
   *  - my_file.fq.gz returns "my_file.fq"
   *  - my_other_file.fastq returns "my_file.fastq"
87
88
89
   *
   * @param f Input file object.
   * @param exts Possible compression extensions to trim.
bow's avatar
bow committed
90
   * @return Filename without compression extension.
91
   */
bow's avatar
bow committed
92
  private def getUncompressedName(f: File, exts: Set[String] = zipExtensions): String =
93
94
95
96
    exts.foldLeft(f.toString) { case (fname, ext) =>
      if (fname.toLowerCase.endsWith(ext)) fname.dropRight(ext.length)
      else fname
    }
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
  /** Function that's need to be executed before the script is accessed */
99
  def init() {
100
    require(outputDir != null, "Missing output directory on flexiprep module")
101
    require(input_R1 != null, "Missing input R1 on flexiprep module")
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
    require(sampleId != null, "Missing sample ID on flexiprep module")
    require(libId != null, "Missing library ID on flexiprep module")
104

105
    paired = input_R2.isDefined
106

Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
109

bow's avatar
bow committed
110
    R1_name = getUncompressedName(input_R1)
111
    input_R2.foreach { fileR2 =>
Peter van 't Hof's avatar
Peter van 't Hof committed
112
        paired = true
bow's avatar
bow committed
113
        R2_name = getUncompressedName(fileR2)
114
115
116
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
117
  /** Script to add jobs */
118
119
120
  def biopetScript() {
    runInitialJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
121
    if (paired) runTrimClip(input_R1, input_R2, outputDir)
122
    else runTrimClip(input_R1, outputDir)
123
124
125
126
127
128

    val R1_files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
    val R2_files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
    runFinalize(R1_files.toList, R2_files.toList)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
129
  /** Add init non chunkable jobs */
130
  def runInitialJobs() {
131
132
    outputFiles += ("fastq_input_R1" -> input_R1)
    if (paired) outputFiles += ("fastq_input_R2" -> input_R2.get)
133

Peter van 't Hof's avatar
Peter van 't Hof committed
134
    fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/"))
135
    add(fastqc_R1)
136
    addSummarizable(fastqc_R1, "fastqc_R1")
137
138
139
    outputFiles += ("fastqc_R1" -> fastqc_R1.output)

    if (paired) {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
      fastqc_R2 = Fastqc(this, input_R2.get, new File(outputDir, R2_name + ".fastqc/"))
141
      add(fastqc_R2)
142
      addSummarizable(fastqc_R2, "fastqc_R2")
143
144
      outputFiles += ("fastqc_R2" -> fastqc_R2.output)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
145
146
147
148
149
150
151
152
153
154
155
156

    val seqstat_R1 = SeqStat(this, input_R1, outputDir)
    seqstat_R1.isIntermediate = true
    add(seqstat_R1)
    addSummarizable(seqstat_R1, "seqstat_R1")

    if (paired) {
      val seqstat_R2 = SeqStat(this, input_R2.get, outputDir)
      seqstat_R2.isIntermediate = true
      add(seqstat_R2)
      addSummarizable(seqstat_R2, "seqstat_R2")
    }
157
158
  }

159
160
161
162
163
164
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
165

Peter van 't Hof's avatar
Peter van 't Hof committed
166
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
167
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
168
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
169

Peter van 't Hof's avatar
Peter van 't Hof committed
170
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
171
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
172
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
173

Peter van 't Hof's avatar
Peter van 't Hof committed
174
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
175
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File]) =
176
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
177

Peter van 't Hof's avatar
Peter van 't Hof committed
178
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
181
182
  def runTrimClip(R1_in: File,
                  R2_in: Option[File],
                  outDir: File,
                  chunkarg: String): (File, Option[File]) = {
183
184
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
    var R1 = R1_in
    var R2 = R2_in
187

188
189
    val qcCmdR1 = new QcCommand(this, fastqc_R1)
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
191
    qcCmdR1.output = if (paired) new File(outDir, fastqR1Qc.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    else fastqR1Qc
Peter van 't Hof's avatar
Peter van 't Hof committed
193
    qcCmdR1.deps :+= fastqc_R1.output
194
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
195
    addSummarizable(qcCmdR1, "qc_command_R1")
196
197

    if (paired) {
198
199
      val qcCmdR2 = new QcCommand(this, fastqc_R2)
      qcCmdR2.input = R2_in.get
Peter van 't Hof's avatar
Peter van 't Hof committed
200
      qcCmdR2.output = new File(outDir, fastqR2Qc.get.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      qcCmdR2.read = "R2"
Peter van 't Hof's avatar
Peter van 't Hof committed
202
      addSummarizable(qcCmdR2, "qc_command_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
203
204
205

      qcCmdR1.compress = false
      qcCmdR2.compress = false
206
207
208

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
      fqSync.inputFastq1 = qcCmdR1.output
      fqSync.inputFastq2 = qcCmdR2.output
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
      fqSync.outputFastq1 = new File(outDir, fastqR1Qc.getName)
      fqSync.outputFastq2 = new File(outDir, fastqR2Qc.get.getName)
213
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236

      val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {
        override def configName = "qc-cmd"

        override def beforeGraph(): Unit = {
          fqSync.beforeGraph()
          super.beforeGraph()
        }

        override def beforeCmd(): Unit = {
          qcCmdR1.beforeCmd()
          qcCmdR2.beforeCmd()
          fqSync.beforeCmd()
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
          super.beforeCmd()
        }
      }

      pipe.deps ::= fastqc_R1.output
      pipe.deps ::= fastqc_R2.output
      pipe.isIntermediate = !keepQcFastqFiles
      add(pipe)

237
238
239
240
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
241
242
243
244
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
245

bow's avatar
bow committed
246
    val seqstat_R1_after = SeqStat(this, R1, outDir)
247
    add(seqstat_R1_after)
248
    addSummarizable(seqstat_R1_after, "seqstat_R1_qc")
249
250

    if (paired) {
bow's avatar
bow committed
251
      val seqstat_R2_after = SeqStat(this, R2.get, outDir)
252
      add(seqstat_R2_after)
253
      addSummarizable(seqstat_R2_after, "seqstat_R2_qc")
254
255
256
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
257
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
258
    (R1, R2)
259
260
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
261
  /** Adds last non chunkable jobs */
262
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
263
264
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
265

Peter van 't Hof's avatar
Peter van 't Hof committed
266
    if (fastq_R1.length > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
267
268
269
270
271
272
273
274
      val zcat = new Zcat(this)
      zcat.input = fastq_R1
      add(zcat | new Gzip(this) > fastqR1Qc)
      if (paired) {
        val zcat = new Zcat(this)
        zcat.input = fastq_R2
        add(zcat | new Gzip(this) > fastqR2Qc.get)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
275
    }
276

Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
    outputFiles += ("output_R1_gzip" -> fastqR1Qc)
    if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
279

Peter van 't Hof's avatar
Peter van 't Hof committed
280
281
282
    fastqc_R1_after = Fastqc(this, fastqR1Qc, new File(outputDir, R1_name + ".qc.fastqc/"))
    add(fastqc_R1_after)
    addSummarizable(fastqc_R1_after, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
283

Peter van 't Hof's avatar
Peter van 't Hof committed
284
285
286
287
    if (paired) {
      fastqc_R2_after = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2_name + ".qc.fastqc/"))
      add(fastqc_R2_after)
      addSummarizable(fastqc_R2_after, "fastqc_R2_qc")
288
289
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
290
    addSummaryJobs()
291
292
293
294
  }
}

object Flexiprep extends PipelineCommand