Flexiprep.scala 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.flexiprep

18
19
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
22
23
import nl.lumc.sasc.biopet.extensions.{ Pbzip2, Zcat, Gzip, Sickle }
import nl.lumc.sasc.biopet.utils.config.Configurable
24
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.QScript
26

27
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
28
29
30
31
32
33
  def this() = this(null)

  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
  var input_R1: File = _

  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false)
34
  var input_R2: Option[File] = None
35

36
37
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
38

39
40
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
41

42
43
44
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** Location of summary file */
46
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  def summaryFiles: Map[String, File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
52
53
54
    if (!skipTrim || !skipClip)
      Map("input_R1" -> input_R1, "output_R1" -> outputFiles("output_R1_gzip")) ++
        (if (paired) Map("input_R2" -> input_R2.get, "output_R2" -> outputFiles("output_R2_gzip")) else Map())
    else Map("input_R1" -> input_R1) ++
      (if (paired) Map("input_R2" -> input_R2.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  }
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
58
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
59

60
  var paired: Boolean = input_R2.isDefined
61
62
63
64
65
66
67
68
69
70
  var R1_ext: String = _
  var R2_ext: String = _
  var R1_name: String = _
  var R2_name: String = _

  var fastqc_R1: Fastqc = _
  var fastqc_R2: Fastqc = _
  var fastqc_R1_after: Fastqc = _
  var fastqc_R2_after: Fastqc = _

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
75
76
77
78
79
80
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
81
  /** Function that's need to be executed before the script is accessed */
82
  def init() {
83
    require(outputDir != null, "Missing output directory on flexiprep module")
84
    require(input_R1 != null, "Missing input R1 on flexiprep module")
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    require(sampleId != null, "Missing sample ID on flexiprep module")
    require(libId != null, "Missing library ID on flexiprep module")
87

88
    paired = input_R2.isDefined
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
92

93
94
95
    if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
    else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
    else R1_name = input_R1.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length)
97
98
    R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext))

Peter van 't Hof's avatar
Peter van 't Hof committed
99
    input_R2 match {
Peter van 't Hof's avatar
Peter van 't Hof committed
100
      case Some(fileR2) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
103
104
        paired = true
        if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz"))
        else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip"))
        else R2_name = fileR2.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
105
        R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length)
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
        R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext))
      case _ =>
108
109
110
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
111
  /** Script to add jobs */
112
113
114
  def biopetScript() {
    runInitialJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
115
    val out = if (paired) runTrimClip(outputFiles("fastq_input_R1"), Some(outputFiles("fastq_input_R2")), outputDir)
116
117
118
119
120
121
122
    else runTrimClip(outputFiles("fastq_input_R1"), outputDir)

    val R1_files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
    val R2_files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
    runFinalize(R1_files.toList, R2_files.toList)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
123
  /** Add init non chunkable jobs */
124
  def runInitialJobs() {
125
126
    outputFiles += ("fastq_input_R1" -> input_R1)
    if (paired) outputFiles += ("fastq_input_R2" -> input_R2.get)
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
    fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/"))
129
    add(fastqc_R1)
130
    addSummarizable(fastqc_R1, "fastqc_R1")
131
132
133
    outputFiles += ("fastqc_R1" -> fastqc_R1.output)

    if (paired) {
Peter van 't Hof's avatar
Peter van 't Hof committed
134
      fastqc_R2 = Fastqc(this, input_R2.get, new File(outputDir, R2_name + ".fastqc/"))
135
      add(fastqc_R2)
136
      addSummarizable(fastqc_R2, "fastqc_R2")
137
138
      outputFiles += ("fastqc_R2" -> fastqc_R2.output)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
141
142
143
144
145
146
147
148
149
150

    val seqstat_R1 = SeqStat(this, input_R1, outputDir)
    seqstat_R1.isIntermediate = true
    add(seqstat_R1)
    addSummarizable(seqstat_R1, "seqstat_R1")

    if (paired) {
      val seqstat_R2 = SeqStat(this, input_R2.get, outputDir)
      seqstat_R2.isIntermediate = true
      add(seqstat_R2)
      addSummarizable(seqstat_R2, "seqstat_R2")
    }
151
152
  }

153
154
155
156
157
158
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
163

Peter van 't Hof's avatar
Peter van 't Hof committed
164
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
167

Peter van 't Hof's avatar
Peter van 't Hof committed
168
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
169
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File], List[File]) =
170
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
171

Peter van 't Hof's avatar
Peter van 't Hof committed
172
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
173
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File, chunkarg: String): (File, Option[File], List[File]) = {
174
175
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
    var R1 = R1_in
    var R2 = R2_in
178
    def deps: List[File] = Nil
179

180
181
    val qcCmdR1 = new QcCommand(this, fastqc_R1)
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
182
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
183
    qcCmdR1.output = if (paired) new File(fastqR1Qc.getAbsolutePath.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
184
    else fastqR1Qc
185
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
186
    addSummarizable(qcCmdR1, "qc_command_R1")
187
188

    if (paired) {
189
190
      val qcCmdR2 = new QcCommand(this, fastqc_R2)
      qcCmdR2.input = R2_in.get
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      qcCmdR2.output = new File(fastqR2Qc.get.getAbsolutePath.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
192
      qcCmdR2.read = "R2"
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      addSummarizable(qcCmdR2, "qc_command_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
196

      qcCmdR1.compress = false
      qcCmdR2.compress = false
197
198
199

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
      fqSync.inputFastq1 = qcCmdR1.output
      fqSync.inputFastq2 = qcCmdR2.output
202
203
      fqSync.outputFastq1 = fastqR1Qc
      fqSync.outputFastq2 = fastqR2Qc.get
204
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
      //add(fqSync)

      val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {

        override def defaultThreads = 4
        override def defaultCoreMemory = 4.0
        override def configName = "qc-cmd"

        override def beforeGraph(): Unit = {
          fqSync.beforeGraph()
          super.beforeGraph()
        }

        override def beforeCmd(): Unit = {
          qcCmdR1.beforeCmd()
          qcCmdR2.beforeCmd()
          fqSync.beforeCmd()
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
          super.beforeCmd()
        }
      }

      pipe.deps ::= fastqc_R1.output
      pipe.deps ::= fastqc_R2.output
      pipe.isIntermediate = !keepQcFastqFiles
      add(pipe)

232
233
234
235
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
236
237
238
239
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
240

bow's avatar
bow committed
241
    val seqstat_R1_after = SeqStat(this, R1, outDir)
242
    add(seqstat_R1_after)
243
    addSummarizable(seqstat_R1_after, "seqstat_R1_qc")
244
245

    if (paired) {
bow's avatar
bow committed
246
      val seqstat_R2_after = SeqStat(this, R2.get, outDir)
247
      add(seqstat_R2_after)
248
      addSummarizable(seqstat_R2_after, "seqstat_R2_qc")
249
250
251
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
252
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
253
    (R1, R2, deps)
254
255
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
256
  /** Adds last non chunkable jobs */
257
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
258
259
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
260

261
    if (!skipTrim || !skipClip) {
262
263
264
265
      if (fastq_R1.length > 1) {
        add(Zcat(this, fastq_R1, fastqR1Qc) | new Gzip(this) > fastqR1Qc)
        if (paired) add(Zcat(this, fastq_R2, fastqR2Qc.get) | new Gzip(this) > fastqR2Qc.get)
      }
266

267
268
      outputFiles += ("output_R1_gzip" -> fastqR1Qc)
      if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
269

270
      fastqc_R1_after = Fastqc(this, fastqR1Qc, new File(outputDir, R1_name + ".qc.fastqc/"))
271
      add(fastqc_R1_after)
272
      addSummarizable(fastqc_R1_after, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
273

274
      if (paired) {
275
        fastqc_R2_after = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2_name + ".qc.fastqc/"))
276
        add(fastqc_R2_after)
277
        addSummarizable(fastqc_R2_after, "fastqc_R2_qc")
278
279
280
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
281
    addSummaryJobs()
282
283
284
285
  }
}

object Flexiprep extends PipelineCommand