Flexiprep.scala 9.78 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.flexiprep

18
19
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag }
22
23
import nl.lumc.sasc.biopet.extensions.{ Pbzip2, Zcat, Gzip, Sickle }
import nl.lumc.sasc.biopet.utils.config.Configurable
24
import nl.lumc.sasc.biopet.extensions.tools.{ SeqStat, FastqSync }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.QScript
26

27
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
28
29
30
31
32
33
  def this() = this(null)

  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
  var input_R1: File = _

  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false)
34
  var input_R2: Option[File] = None
35

36
37
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
38

39
40
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
41

42
43
44
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** Location of summary file */
46
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  def summaryFiles: Map[String, File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
52
53
54
    if (!skipTrim || !skipClip)
      Map("input_R1" -> input_R1, "output_R1" -> outputFiles("output_R1_gzip")) ++
        (if (paired) Map("input_R2" -> input_R2.get, "output_R2" -> outputFiles("output_R2_gzip")) else Map())
    else Map("input_R1" -> input_R1) ++
      (if (paired) Map("input_R2" -> input_R2.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
55
  }
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
58
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
59

60
  var paired: Boolean = input_R2.isDefined
61
62
63
64
65
66
67
68
69
70
  var R1_ext: String = _
  var R2_ext: String = _
  var R1_name: String = _
  var R2_name: String = _

  var fastqc_R1: Fastqc = _
  var fastqc_R2: Fastqc = _
  var fastqc_R1_after: Fastqc = _
  var fastqc_R2_after: Fastqc = _

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
75
76
77
78
79
80
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
81
  /** Function that's need to be executed before the script is accessed */
82
  def init() {
83
    require(outputDir != null, "Missing output directory on flexiprep module")
84
    require(input_R1 != null, "Missing input R1 on flexiprep module")
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
    require(sampleId != null, "Missing sample ID on flexiprep module")
    require(libId != null, "Missing library ID on flexiprep module")
87

88
    paired = input_R2.isDefined
89

Peter van 't Hof's avatar
Peter van 't Hof committed
90
91
    inputFiles :+= new InputFile(input_R1)
    input_R2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
92

93
94
95
    if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
    else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
    else R1_name = input_R1.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.length)
97
98
    R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext))

Peter van 't Hof's avatar
Peter van 't Hof committed
99
    input_R2 match {
Peter van 't Hof's avatar
Peter van 't Hof committed
100
      case Some(fileR2) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
103
104
        paired = true
        if (fileR2.endsWith(".gz")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gz"))
        else if (fileR2.endsWith(".gzip")) R2_name = fileR2.getName.substring(0, fileR2.getName.lastIndexOf(".gzip"))
        else R2_name = fileR2.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
105
        R2_ext = R2_name.substring(R2_name.lastIndexOf("."), R2_name.length)
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
        R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext))
      case _ =>
108
109
110
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
111
  /** Script to add jobs */
112
113
114
  def biopetScript() {
    runInitialJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
115
    val out = if (paired) runTrimClip(outputFiles("fastq_input_R1"), Some(outputFiles("fastq_input_R2")), outputDir)
116
117
118
119
120
121
122
    else runTrimClip(outputFiles("fastq_input_R1"), outputDir)

    val R1_files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
    val R2_files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
    runFinalize(R1_files.toList, R2_files.toList)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
123
  /** Add init non chunkable jobs */
124
  def runInitialJobs() {
125
126
    outputFiles += ("fastq_input_R1" -> input_R1)
    if (paired) outputFiles += ("fastq_input_R2" -> input_R2.get)
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
    fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/"))
129
    add(fastqc_R1)
130
    addSummarizable(fastqc_R1, "fastqc_R1")
131
132
133
    outputFiles += ("fastqc_R1" -> fastqc_R1.output)

    if (paired) {
Peter van 't Hof's avatar
Peter van 't Hof committed
134
      fastqc_R2 = Fastqc(this, input_R2.get, new File(outputDir, R2_name + ".fastqc/"))
135
      add(fastqc_R2)
136
      addSummarizable(fastqc_R2, "fastqc_R2")
137
138
      outputFiles += ("fastqc_R2" -> fastqc_R2.output)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
141
142
143
144
145
146
147
148
149
150

    val seqstat_R1 = SeqStat(this, input_R1, outputDir)
    seqstat_R1.isIntermediate = true
    add(seqstat_R1)
    addSummarizable(seqstat_R1, "seqstat_R1")

    if (paired) {
      val seqstat_R2 = SeqStat(this, input_R2.get, outputDir)
      seqstat_R2.isIntermediate = true
      add(seqstat_R2)
      addSummarizable(seqstat_R2, "seqstat_R2")
    }
151
152
  }

153
154
155
156
157
158
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
163

Peter van 't Hof's avatar
Peter van 't Hof committed
164
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File], List[File]) =
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
167

Peter van 't Hof's avatar
Peter van 't Hof committed
168
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
169
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File], List[File]) =
170
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
171

Peter van 't Hof's avatar
Peter van 't Hof committed
172
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
173
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File, chunkarg: String): (File, Option[File], List[File]) = {
174
175
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
    var R1 = R1_in
    var R2 = R2_in
178
    def deps: List[File] = Nil
179

180
181
    val qcCmdR1 = new QcCommand(this, fastqc_R1)
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
182
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
183
184
    qcCmdR1.output = if (paired) new File("/dev/stdout")
    else fastqR1Qc
185
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
186
187

    if (paired) {
188
189
      val qcCmdR2 = new QcCommand(this, fastqc_R2)
      qcCmdR2.input = R2_in.get
190
      qcCmdR2.output = new File("/dev/stdout")
Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
193
194
      qcCmdR2.read = "R2"

      qcCmdR1.compress = false
      qcCmdR2.compress = false
195
196
197

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
198
199
      fqSync.inputFastq1 = Right(qcCmdR1)
      fqSync.inputFastq2 = Right(qcCmdR2)
200
201
      fqSync.outputFastq1 = fastqR1Qc
      fqSync.outputFastq2 = fastqR2Qc.get
202
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
203
      fqSync.isIntermediate = !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
      fqSync.deps ::= fastqc_R1.output
      fqSync.deps ::= fastqc_R2.output
206
207
208
209
210
      add(fqSync)
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
211
212
213
214
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
215

bow's avatar
bow committed
216
    val seqstat_R1_after = SeqStat(this, R1, outDir)
217
    add(seqstat_R1_after)
218
    addSummarizable(seqstat_R1_after, "seqstat_R1_qc")
219
220

    if (paired) {
bow's avatar
bow committed
221
      val seqstat_R2_after = SeqStat(this, R2.get, outDir)
222
      add(seqstat_R2_after)
223
      addSummarizable(seqstat_R2_after, "seqstat_R2_qc")
224
225
226
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
227
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
228
    (R1, R2, deps)
229
230
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
231
  /** Adds last non chunkable jobs */
232
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
233
234
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
235

236
    if (!skipTrim || !skipClip) {
237
238
239
240
      if (fastq_R1.length > 1) {
        add(Zcat(this, fastq_R1, fastqR1Qc) | new Gzip(this) > fastqR1Qc)
        if (paired) add(Zcat(this, fastq_R2, fastqR2Qc.get) | new Gzip(this) > fastqR2Qc.get)
      }
241

242
243
      outputFiles += ("output_R1_gzip" -> fastqR1Qc)
      if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
244

245
      fastqc_R1_after = Fastqc(this, fastqR1Qc, new File(outputDir, R1_name + ".qc.fastqc/"))
246
      add(fastqc_R1_after)
247
      addSummarizable(fastqc_R1_after, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
248

249
      if (paired) {
250
        fastqc_R2_after = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2_name + ".qc.fastqc/"))
251
        add(fastqc_R2_after)
252
        addSummarizable(fastqc_R2_after, "fastqc_R2_qc")
253
254
255
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
256
    addSummaryJobs()
257
258
259
260
  }
}

object Flexiprep extends PipelineCommand