Flexiprep.scala 10.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.flexiprep

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag }
19
import nl.lumc.sasc.biopet.extensions.{ Gzip, Zcat }
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.utils.IoUtils._
22 23
import nl.lumc.sasc.biopet.extensions.tools.{ FastqSync, SeqStat, ValidateFastq }
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.QScript
25

26
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
27 28
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
29
  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", fullName = "inputR1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
30
  var inputR1: File = _
31

Peter van 't Hof's avatar
Peter van 't Hof committed
32
  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", fullName = "inputR2", required = false)
33
  var inputR2: Option[File] = None
34

35 36
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
37

38 39
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
40

41 42 43
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  /** Location of summary file */
45
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  def summaryFiles: Map[String, File] = {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
49
    Map("input_R1" -> inputR1, "output_R1" -> fastqR1Qc) ++
50
      (if (paired) Map("input_R2" -> inputR2.get, "output_R2" -> fastqR2Qc.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
51
  }
52

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
55

56
  var paired: Boolean = inputR2.isDefined
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
57 58
  var R1Name: String = _
  var R2Name: String = _
59

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
60 61 62 63
  var fastqcR1: Fastqc = _
  var fastqcR2: Fastqc = _
  var fastqcR1After: Fastqc = _
  var fastqcR2After: Fastqc = _
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65 66 67 68 69 70 71 72 73 74
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
75
  /** Function that's need to be executed before the script is accessed */
76
  def init() {
77 78 79
    if (inputR1 == null) Logging.addError("Missing input R1 on flexiprep module")
    if (sampleId == null || sampleId == None) Logging.addError("Missing sample ID on flexiprep module")
    if (libId == null || libId == None) Logging.addError("Missing library ID on flexiprep module")
80

81
    paired = inputR2.isDefined
82

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
83
    inputFiles :+= new InputFile(inputR1)
84
    inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
85

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
86
    R1Name = getUncompressedFileName(inputR1)
87
    inputR2.foreach { fileR2 =>
88
      paired = true
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
89
      R2Name = getUncompressedFileName(fileR2)
90 91 92
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
93
  /** Script to add jobs */
94 95 96
  def biopetScript() {
    runInitialJobs()

97
    if (paired) runTrimClip(inputR1, inputR2, outputDir)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
98
    else runTrimClip(inputR1, outputDir)
99

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
100 101 102
    val R1Files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
    val R2Files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
    runFinalize(R1Files.toList, R2Files.toList)
103 104
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
105
  /** Add init non chunkable jobs */
106
  def runInitialJobs() {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
107
    outputFiles += ("fastq_input_R1" -> inputR1)
108
    if (paired) outputFiles += ("fastq_input_R2" -> inputR2.get)
109

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
110 111 112 113
    fastqcR1 = Fastqc(this, inputR1, new File(outputDir, R1Name + ".fastqc/"))
    add(fastqcR1)
    addSummarizable(fastqcR1, "fastqc_R1")
    outputFiles += ("fastqc_R1" -> fastqcR1.output)
114

115
    val validateFastq = new ValidateFastq(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
116
    validateFastq.r1Fastq = inputR1
117
    validateFastq.r2Fastq = inputR2
118 119 120
    validateFastq.jobOutputFile = new File(outputDir, ".validate_fastq.log.out")
    add(validateFastq)

121 122 123 124 125 126
    if (config("abort_on_corrupt_fastq", default = true)) {
      val checkValidateFastq = new CheckValidateFastq
      checkValidateFastq.inputLogFile = validateFastq.jobOutputFile
      checkValidateFastq.jobOutputFile = new File(outputDir, ".check.validate_fastq.log.out")
      add(checkValidateFastq)
    }
127

128
    if (paired) {
129
      fastqcR2 = Fastqc(this, inputR2.get, new File(outputDir, R2Name + ".fastqc/"))
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
130 131 132
      add(fastqcR2)
      addSummarizable(fastqcR2, "fastqc_R2")
      outputFiles += ("fastqc_R2" -> fastqcR2.output)
133
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
134

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
135 136 137 138
    val seqstatR1 = SeqStat(this, inputR1, outputDir)
    seqstatR1.isIntermediate = true
    add(seqstatR1)
    addSummarizable(seqstatR1, "seqstat_R1")
Peter van 't Hof's avatar
Peter van 't Hof committed
139 140

    if (paired) {
141
      val seqstatR2 = SeqStat(this, inputR2.get, outputDir)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
142 143 144
      seqstatR2.isIntermediate = true
      add(seqstatR2)
      addSummarizable(seqstatR2, "seqstat_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
145
    }
146 147
  }

148 149 150 151 152 153
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
154

Peter van 't Hof's avatar
Peter van 't Hof committed
155
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
156
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
157
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
158

Peter van 't Hof's avatar
Peter van 't Hof committed
159
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
160
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
161
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
162

Peter van 't Hof's avatar
Peter van 't Hof committed
163
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
164
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File]) =
165
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
166

Peter van 't Hof's avatar
Peter van 't Hof committed
167
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
168 169 170 171
  def runTrimClip(R1_in: File,
                  R2_in: Option[File],
                  outDir: File,
                  chunkarg: String): (File, Option[File]) = {
172 173
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
174 175
    var R1 = R1_in
    var R2 = R2_in
176

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
177
    val qcCmdR1 = new QcCommand(this, fastqcR1)
178
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
179
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
180
    qcCmdR1.output = if (paired) new File(outDir, fastqR1Qc.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    else fastqR1Qc
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
182
    qcCmdR1.deps :+= fastqcR1.output
183
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
184
    addSummarizable(qcCmdR1, "qc_command_R1")
185 186

    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
187
      val qcCmdR2 = new QcCommand(this, fastqcR2)
188
      qcCmdR2.input = R2_in.get
Peter van 't Hof's avatar
Peter van 't Hof committed
189
      qcCmdR2.output = new File(outDir, fastqR2Qc.get.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      qcCmdR2.read = "R2"
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      addSummarizable(qcCmdR2, "qc_command_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
192 193 194

      qcCmdR1.compress = false
      qcCmdR2.compress = false
195 196 197

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
198 199
      fqSync.inputFastq1 = qcCmdR1.output
      fqSync.inputFastq2 = qcCmdR2.output
Peter van 't Hof's avatar
Peter van 't Hof committed
200 201
      fqSync.outputFastq1 = new File(outDir, fastqR1Qc.getName)
      fqSync.outputFastq2 = new File(outDir, fastqR2Qc.get.getName)
202
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
203 204

      val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {
Sander Bollen's avatar
Sander Bollen committed
205
        override def configNamespace = "qc-cmd"
Peter van 't Hof's avatar
Peter van 't Hof committed
206 207 208

        override def beforeGraph(): Unit = {
          fqSync.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
209
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
210 211 212 213 214 215 216 217
          super.beforeGraph()
        }

        override def beforeCmd(): Unit = {
          qcCmdR1.beforeCmd()
          qcCmdR2.beforeCmd()
          fqSync.beforeCmd()
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
218
          commands.foreach(addPipeJob)
Peter van 't Hof's avatar
Peter van 't Hof committed
219 220 221 222
          super.beforeCmd()
        }
      }

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
223 224
      pipe.deps ::= fastqcR1.output
      pipe.deps ::= fastqcR2.output
Peter van 't Hof's avatar
Peter van 't Hof committed
225 226 227
      pipe.isIntermediate = !keepQcFastqFiles
      add(pipe)

228 229 230 231
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
232 233 234 235
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
236

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
237 238 239
    val seqstatR1After = SeqStat(this, R1, outDir)
    add(seqstatR1After)
    addSummarizable(seqstatR1After, "seqstat_R1_qc")
240 241

    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
242 243 244
      val seqstatR2After = SeqStat(this, R2.get, outDir)
      add(seqstatR2After)
      addSummarizable(seqstatR2After, "seqstat_R2_qc")
245 246 247
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
248
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
249
    (R1, R2)
250 251
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
252
  /** Adds last non chunkable jobs */
253
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
254 255
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
256

Peter van 't Hof's avatar
Peter van 't Hof committed
257
    if (fastq_R1.length > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
258 259 260 261 262 263 264 265
      val zcat = new Zcat(this)
      zcat.input = fastq_R1
      add(zcat | new Gzip(this) > fastqR1Qc)
      if (paired) {
        val zcat = new Zcat(this)
        zcat.input = fastq_R2
        add(zcat | new Gzip(this) > fastqR2Qc.get)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
266
    }
267

268 269 270 271 272 273
    val validateFastq = new ValidateFastq(this)
    validateFastq.r1Fastq = fastqR1Qc
    validateFastq.r2Fastq = fastqR2Qc
    validateFastq.jobOutputFile = new File(outputDir, ".validate_fastq.qc.log.out")
    add(validateFastq)

274 275 276 277 278 279
    if (config("abort_on_corrupt_fastq", default = true)) {
      val checkValidateFastq = new CheckValidateFastq
      checkValidateFastq.inputLogFile = validateFastq.jobOutputFile
      checkValidateFastq.jobOutputFile = new File(outputDir, ".check.validate_fastq.qc.log.out")
      add(checkValidateFastq)
    }
280

Peter van 't Hof's avatar
Peter van 't Hof committed
281 282
    outputFiles += ("output_R1_gzip" -> fastqR1Qc)
    if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
283

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
284 285 286
    fastqcR1After = Fastqc(this, fastqR1Qc, new File(outputDir, R1Name + ".qc.fastqc/"))
    add(fastqcR1After)
    addSummarizable(fastqcR1After, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
287

Peter van 't Hof's avatar
Peter van 't Hof committed
288
    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
289 290 291
      fastqcR2After = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2Name + ".qc.fastqc/"))
      add(fastqcR2After)
      addSummarizable(fastqcR2After, "fastqc_R2_qc")
292 293
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
294
    addSummaryJobs()
295 296 297 298
  }
}

object Flexiprep extends PipelineCommand