Flexiprep.scala 10.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.flexiprep

18
19
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
22
23
import nl.lumc.sasc.biopet.extensions.{ Pbzip2, Zcat, Gzip, Sickle }
import nl.lumc.sasc.biopet.utils.config.Configurable
24
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.QScript
26

27
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
28
29
30
31
32
33
  def this() = this(null)

  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
  var input_R1: File = _

  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false)
34
  var input_R2: Option[File] = None
35

36
37
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
38

39
40
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
41

42
43
44
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** Location of summary file */
46
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  def summaryFiles: Map[String, File] = {
50
51
    Map("input_R1" -> input_R1, "output_R1" -> fastqR1Qc) ++
      (if (paired) Map("input_R2" -> input_R2.get, "output_R2" -> fastqR2Qc.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
52
  }
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
56

57
  var paired: Boolean = input_R2.isDefined
58
59
60
61
62
63
64
65
66
67
  var R1_ext: String = _
  var R2_ext: String = _
  var R1_name: String = _
  var R2_name: String = _

  var fastqc_R1: Fastqc = _
  var fastqc_R2: Fastqc = _
  var fastqc_R1_after: Fastqc = _
  var fastqc_R2_after: Fastqc = _

Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
70
71
72
73
74
75
76
77
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
78
  /** Function that's need to be executed before the script is accessed */
79
  def init() {
80
    require(outputDir != null, "Missing output directory on flexiprep module")
81
    require(input_R1 != null, "Missing input R1 on flexiprep module")
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
    require(sampleId != null, "Missing sample ID on flexiprep module")
    require(libId != null, "Missing library ID on flexiprep module")
84

85
    paired = input_R2.isDefined
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
89

90
91
92
    if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
    else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
    else R1_name = input_R1.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length)
94
95
    R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext))

Peter van 't Hof's avatar
Peter van 't Hof committed
96
    input_R2 match {
Peter van 't Hof's avatar
Peter van 't Hof committed
97
      case Some(fileR2) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
100
101
        paired = true
        if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz"))
        else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip"))
        else R2_name = fileR2.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
102
        R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length)
Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
        R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext))
      case _ =>
105
106
107
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
108
  /** Script to add jobs */
109
110
111
  def biopetScript() {
    runInitialJobs()

112
113
    val out = if (paired) runTrimClip(input_R1, input_R2, outputDir)
    else runTrimClip(input_R1, outputDir)
114
115
116
117
118
119

    val R1_files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
    val R2_files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
    runFinalize(R1_files.toList, R2_files.toList)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
120
  /** Add init non chunkable jobs */
121
  def runInitialJobs() {
122
123
    outputFiles += ("fastq_input_R1" -> input_R1)
    if (paired) outputFiles += ("fastq_input_R2" -> input_R2.get)
124

Peter van 't Hof's avatar
Peter van 't Hof committed
125
    fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/"))
126
    add(fastqc_R1)
127
    addSummarizable(fastqc_R1, "fastqc_R1")
128
129
130
    outputFiles += ("fastqc_R1" -> fastqc_R1.output)

    if (paired) {
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      fastqc_R2 = Fastqc(this, input_R2.get, new File(outputDir, R2_name + ".fastqc/"))
132
      add(fastqc_R2)
133
      addSummarizable(fastqc_R2, "fastqc_R2")
134
135
      outputFiles += ("fastqc_R2" -> fastqc_R2.output)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
138
139
140
141
142
143
144
145
146
147

    val seqstat_R1 = SeqStat(this, input_R1, outputDir)
    seqstat_R1.isIntermediate = true
    add(seqstat_R1)
    addSummarizable(seqstat_R1, "seqstat_R1")

    if (paired) {
      val seqstat_R2 = SeqStat(this, input_R2.get, outputDir)
      seqstat_R2.isIntermediate = true
      add(seqstat_R2)
      addSummarizable(seqstat_R2, "seqstat_R2")
    }
148
149
  }

150
151
152
153
154
155
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
156

Peter van 't Hof's avatar
Peter van 't Hof committed
157
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
158
159
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
160

Peter van 't Hof's avatar
Peter van 't Hof committed
161
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
166
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File], List[File]) =
167
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
168

Peter van 't Hof's avatar
Peter van 't Hof committed
169
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
170
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File, chunkarg: String): (File, Option[File], List[File]) = {
171
172
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
    var R1 = R1_in
    var R2 = R2_in
175
    def deps: List[File] = Nil
176

177
178
    val qcCmdR1 = new QcCommand(this, fastqc_R1)
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
179
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
180
    qcCmdR1.output = if (paired) new File(fastqR1Qc.getAbsolutePath.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    else fastqR1Qc
182
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
183
    addSummarizable(qcCmdR1, "qc_command_R1")
184
185

    if (paired) {
186
187
      val qcCmdR2 = new QcCommand(this, fastqc_R2)
      qcCmdR2.input = R2_in.get
Peter van 't Hof's avatar
Peter van 't Hof committed
188
      qcCmdR2.output = new File(fastqR2Qc.get.getAbsolutePath.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
189
      qcCmdR2.read = "R2"
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      addSummarizable(qcCmdR2, "qc_command_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
193

      qcCmdR1.compress = false
      qcCmdR2.compress = false
194
195
196

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
      fqSync.inputFastq1 = qcCmdR1.output
      fqSync.inputFastq2 = qcCmdR2.output
199
200
      fqSync.outputFastq1 = fastqR1Qc
      fqSync.outputFastq2 = fastqR2Qc.get
201
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
      //add(fqSync)

      val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {

        override def defaultThreads = 4
        override def defaultCoreMemory = 4.0
        override def configName = "qc-cmd"

        override def beforeGraph(): Unit = {
          fqSync.beforeGraph()
          super.beforeGraph()
        }

        override def beforeCmd(): Unit = {
          qcCmdR1.beforeCmd()
          qcCmdR2.beforeCmd()
          fqSync.beforeCmd()
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
          super.beforeCmd()
        }
      }

      pipe.deps ::= fastqc_R1.output
      pipe.deps ::= fastqc_R2.output
      pipe.isIntermediate = !keepQcFastqFiles
      add(pipe)

229
230
231
232
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
233
234
235
236
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
237

bow's avatar
bow committed
238
    val seqstat_R1_after = SeqStat(this, R1, outDir)
239
    add(seqstat_R1_after)
240
    addSummarizable(seqstat_R1_after, "seqstat_R1_qc")
241
242

    if (paired) {
bow's avatar
bow committed
243
      val seqstat_R2_after = SeqStat(this, R2.get, outDir)
244
      add(seqstat_R2_after)
245
      addSummarizable(seqstat_R2_after, "seqstat_R2_qc")
246
247
248
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
249
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
250
    (R1, R2, deps)
251
252
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
253
  /** Adds last non chunkable jobs */
254
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
255
256
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
257

258
    if (!skipTrim || !skipClip) {
259
260
261
262
      if (fastq_R1.length > 1) {
        add(Zcat(this, fastq_R1, fastqR1Qc) | new Gzip(this) > fastqR1Qc)
        if (paired) add(Zcat(this, fastq_R2, fastqR2Qc.get) | new Gzip(this) > fastqR2Qc.get)
      }
263

264
265
      outputFiles += ("output_R1_gzip" -> fastqR1Qc)
      if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
266

267
      fastqc_R1_after = Fastqc(this, fastqR1Qc, new File(outputDir, R1_name + ".qc.fastqc/"))
268
      add(fastqc_R1_after)
269
      addSummarizable(fastqc_R1_after, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
270

271
      if (paired) {
272
        fastqc_R2_after = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2_name + ".qc.fastqc/"))
273
        add(fastqc_R2_after)
274
        addSummarizable(fastqc_R2_after, "fastqc_R2_qc")
275
276
277
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
278
    addSummaryJobs()
279
280
281
282
  }
}

object Flexiprep extends PipelineCommand