Flexiprep.scala 10.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.flexiprep

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.extensions.{ Zcat, Gzip }
21
import nl.lumc.sasc.biopet.utils.config.Configurable
22
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import org.broadinstitute.gatk.queue.QScript
24

25
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
26
27
28
29
30
31
  def this() = this(null)

  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
  var input_R1: File = _

  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false)
32
  var input_R2: Option[File] = None
33

34
35
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
36

37
38
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
39

40
41
42
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  /** Location of summary file */
44
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
47
  def summaryFiles: Map[String, File] = {
48
49
    Map("input_R1" -> input_R1, "output_R1" -> fastqR1Qc) ++
      (if (paired) Map("input_R2" -> input_R2.get, "output_R2" -> fastqR2Qc.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
50
  }
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
53
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
54

55
  var paired: Boolean = input_R2.isDefined
56
57
58
59
60
61
62
63
64
65
  var R1_ext: String = _
  var R2_ext: String = _
  var R1_name: String = _
  var R2_name: String = _

  var fastqc_R1: Fastqc = _
  var fastqc_R2: Fastqc = _
  var fastqc_R1_after: Fastqc = _
  var fastqc_R2_after: Fastqc = _

Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
68
69
70
71
72
73
74
75
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
76
  /** Function that's need to be executed before the script is accessed */
77
  def init() {
78
    require(outputDir != null, "Missing output directory on flexiprep module")
79
    require(input_R1 != null, "Missing input R1 on flexiprep module")
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
    require(sampleId != null, "Missing sample ID on flexiprep module")
    require(libId != null, "Missing library ID on flexiprep module")
82

83
    paired = input_R2.isDefined
84

Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
87

88
89
90
    if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
    else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
    else R1_name = input_R1.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
91
    R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length)
92
93
    R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext))

Peter van 't Hof's avatar
Peter van 't Hof committed
94
    input_R2 match {
Peter van 't Hof's avatar
Peter van 't Hof committed
95
      case Some(fileR2) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
99
        paired = true
        if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz"))
        else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip"))
        else R2_name = fileR2.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
100
        R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length)
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
        R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext))
      case _ =>
103
104
105
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
106
  /** Script to add jobs */
107
108
109
  def biopetScript() {
    runInitialJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
110
    if (paired) runTrimClip(input_R1, input_R2, outputDir)
111
    else runTrimClip(input_R1, outputDir)
112
113
114
115
116
117

    val R1_files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
    val R2_files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
    runFinalize(R1_files.toList, R2_files.toList)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
118
  /** Add init non chunkable jobs */
119
  def runInitialJobs() {
120
121
    outputFiles += ("fastq_input_R1" -> input_R1)
    if (paired) outputFiles += ("fastq_input_R2" -> input_R2.get)
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123
    fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/"))
124
    add(fastqc_R1)
125
    addSummarizable(fastqc_R1, "fastqc_R1")
126
127
128
    outputFiles += ("fastqc_R1" -> fastqc_R1.output)

    if (paired) {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      fastqc_R2 = Fastqc(this, input_R2.get, new File(outputDir, R2_name + ".fastqc/"))
130
      add(fastqc_R2)
131
      addSummarizable(fastqc_R2, "fastqc_R2")
132
133
      outputFiles += ("fastqc_R2" -> fastqc_R2.output)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
136
137
138
139
140
141
142
143
144
145

    val seqstat_R1 = SeqStat(this, input_R1, outputDir)
    seqstat_R1.isIntermediate = true
    add(seqstat_R1)
    addSummarizable(seqstat_R1, "seqstat_R1")

    if (paired) {
      val seqstat_R2 = SeqStat(this, input_R2.get, outputDir)
      seqstat_R2.isIntermediate = true
      add(seqstat_R2)
      addSummarizable(seqstat_R2, "seqstat_R2")
    }
146
147
  }

148
149
150
151
152
153
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
154

Peter van 't Hof's avatar
Peter van 't Hof committed
155
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
158

Peter van 't Hof's avatar
Peter van 't Hof committed
159
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
162

Peter van 't Hof's avatar
Peter van 't Hof committed
163
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
164
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File], List[File]) =
165
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
166

Peter van 't Hof's avatar
Peter van 't Hof committed
167
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
168
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File, chunkarg: String): (File, Option[File], List[File]) = {
169
170
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
    var R1 = R1_in
    var R2 = R2_in
173
    def deps: List[File] = Nil
174

175
176
    val qcCmdR1 = new QcCommand(this, fastqc_R1)
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
177
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
178
    qcCmdR1.output = if (paired) new File(fastqR1Qc.getAbsolutePath.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
179
    else fastqR1Qc
180
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    addSummarizable(qcCmdR1, "qc_command_R1")
182
183

    if (paired) {
184
185
      val qcCmdR2 = new QcCommand(this, fastqc_R2)
      qcCmdR2.input = R2_in.get
Peter van 't Hof's avatar
Peter van 't Hof committed
186
      qcCmdR2.output = new File(fastqR2Qc.get.getAbsolutePath.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      qcCmdR2.read = "R2"
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      addSummarizable(qcCmdR2, "qc_command_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
189
190
191

      qcCmdR1.compress = false
      qcCmdR2.compress = false
192
193
194

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
      fqSync.inputFastq1 = qcCmdR1.output
      fqSync.inputFastq2 = qcCmdR2.output
197
198
      fqSync.outputFastq1 = fastqR1Qc
      fqSync.outputFastq2 = fastqR2Qc.get
199
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
      //add(fqSync)

      val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {

        override def defaultThreads = 4
        override def defaultCoreMemory = 4.0
        override def configName = "qc-cmd"

        override def beforeGraph(): Unit = {
          fqSync.beforeGraph()
          super.beforeGraph()
        }

        override def beforeCmd(): Unit = {
          qcCmdR1.beforeCmd()
          qcCmdR2.beforeCmd()
          fqSync.beforeCmd()
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
          super.beforeCmd()
        }
      }

      pipe.deps ::= fastqc_R1.output
      pipe.deps ::= fastqc_R2.output
      pipe.isIntermediate = !keepQcFastqFiles
      add(pipe)

227
228
229
230
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
231
232
233
234
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
235

bow's avatar
bow committed
236
    val seqstat_R1_after = SeqStat(this, R1, outDir)
237
    add(seqstat_R1_after)
238
    addSummarizable(seqstat_R1_after, "seqstat_R1_qc")
239
240

    if (paired) {
bow's avatar
bow committed
241
      val seqstat_R2_after = SeqStat(this, R2.get, outDir)
242
      add(seqstat_R2_after)
243
      addSummarizable(seqstat_R2_after, "seqstat_R2_qc")
244
245
246
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
247
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
248
    (R1, R2, deps)
249
250
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
251
  /** Adds last non chunkable jobs */
252
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
253
254
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
255

256
    if (!skipTrim || !skipClip) {
257
258
259
260
      if (fastq_R1.length > 1) {
        add(Zcat(this, fastq_R1, fastqR1Qc) | new Gzip(this) > fastqR1Qc)
        if (paired) add(Zcat(this, fastq_R2, fastqR2Qc.get) | new Gzip(this) > fastqR2Qc.get)
      }
261

262
263
      outputFiles += ("output_R1_gzip" -> fastqR1Qc)
      if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
264

265
      fastqc_R1_after = Fastqc(this, fastqR1Qc, new File(outputDir, R1_name + ".qc.fastqc/"))
266
      add(fastqc_R1_after)
267
      addSummarizable(fastqc_R1_after, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
268

269
      if (paired) {
270
        fastqc_R2_after = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2_name + ".qc.fastqc/"))
271
        add(fastqc_R2_after)
272
        addSummarizable(fastqc_R2_after, "fastqc_R2_qc")
273
274
275
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
276
    addSummaryJobs()
277
278
279
280
  }
}

object Flexiprep extends PipelineCommand