Flexiprep.scala 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.flexiprep

Peter van 't Hof's avatar
Peter van 't Hof committed
17 18 19
import nl.lumc.sasc.biopet.core.summary.{Summarizable, SummaryQScript}
import nl.lumc.sasc.biopet.core.{BiopetFifoPipe, PipelineCommand, SampleLibraryTag}
import nl.lumc.sasc.biopet.extensions.{Gzip, Zcat}
20
import nl.lumc.sasc.biopet.utils.config.Configurable
21
import nl.lumc.sasc.biopet.utils.IoUtils._
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions.tools.{FastqSync, SeqStat, ValidateFastq}
23
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.QScript
25

26
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
27 28
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
29
  @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", fullName = "inputR1", required = true)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
30
  var inputR1: File = _
31

Peter van 't Hof's avatar
Peter van 't Hof committed
32
  @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", fullName = "inputR2", required = false)
33
  var inputR2: Option[File] = None
34

35 36
  /** Skip Trim fastq files */
  var skipTrim: Boolean = config("skip_trim", default = false)
37

38 39
  /** Skip Clip fastq files */
  var skipClip: Boolean = config("skip_clip", default = false)
40

41 42 43
  /** Make a final fastq files, by default only when flexiprep is the main pipeline */
  var keepQcFastqFiles: Boolean = config("keepQcFastqFiles", default = root == null)

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  /** Location of summary file */
45
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  /** Returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  def summaryFiles: Map[String, File] = {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
49
    Map("input_R1" -> inputR1, "output_R1" -> fastqR1Qc) ++
50
      (if (paired) Map("input_R2" -> inputR2.get, "output_R2" -> fastqR2Qc.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
51
  }
52

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** returns settings to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
54
  def summarySettings = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired)
55

56
  var paired: Boolean = inputR2.isDefined
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
57 58
  var R1Name: String = _
  var R2Name: String = _
59

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
60 61 62 63
  var fastqcR1: Fastqc = _
  var fastqcR2: Fastqc = _
  var fastqcR1After: Fastqc = _
  var fastqcR2After: Fastqc = _
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65 66 67 68 69 70 71 72 73 74
  override def reportClass = {
    val flexiprepReport = new FlexiprepReport(this)
    flexiprepReport.outputDir = new File(outputDir, "report")
    flexiprepReport.summaryFile = summaryFile
    flexiprepReport.args = Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
    Some(flexiprepReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
75
  /** Function that's need to be executed before the script is accessed */
76
  def init() {
77 78 79
    if (inputR1 == null) Logging.addError("Missing input R1 on flexiprep module")
    if (sampleId == null || sampleId == None) Logging.addError("Missing sample ID on flexiprep module")
    if (libId == null || libId == None) Logging.addError("Missing library ID on flexiprep module")
80

81 82 83
    if (inputR1 == null) Logging.addError("Missing input R1 on flexiprep module")
    else {
      paired = inputR2.isDefined
84

85 86
      inputFiles :+= new InputFile(inputR1)
      inputR2.foreach(inputFiles :+= new InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
87

88 89 90 91 92
      R1Name = getUncompressedFileName(inputR1)
      inputR2.foreach { fileR2 =>
        paired = true
        R2Name = getUncompressedFileName(fileR2)
      }
93 94 95
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
96
  /** Script to add jobs */
97
  def biopetScript() {
98 99
    if (inputR1 != null) {
      runInitialJobs()
100

101 102
      if (paired) runTrimClip(inputR1, inputR2, outputDir)
      else runTrimClip(inputR1, outputDir)
103

104 105 106 107
      val R1Files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v
      val R2Files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v
      runFinalize(R1Files.toList, R2Files.toList)
    }
108 109
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
110
  /** Add init non chunkable jobs */
111
  def runInitialJobs() {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
112
    outputFiles += ("fastq_input_R1" -> inputR1)
113
    if (paired) outputFiles += ("fastq_input_R2" -> inputR2.get)
114

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
115 116 117 118
    fastqcR1 = Fastqc(this, inputR1, new File(outputDir, R1Name + ".fastqc/"))
    add(fastqcR1)
    addSummarizable(fastqcR1, "fastqc_R1")
    outputFiles += ("fastqc_R1" -> fastqcR1.output)
119

120
    val validateFastq = new ValidateFastq(this)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
121
    validateFastq.r1Fastq = inputR1
122
    validateFastq.r2Fastq = inputR2
123 124 125
    validateFastq.jobOutputFile = new File(outputDir, ".validate_fastq.log.out")
    add(validateFastq)

126 127 128 129 130 131
    if (config("abort_on_corrupt_fastq", default = true)) {
      val checkValidateFastq = new CheckValidateFastq
      checkValidateFastq.inputLogFile = validateFastq.jobOutputFile
      checkValidateFastq.jobOutputFile = new File(outputDir, ".check.validate_fastq.log.out")
      add(checkValidateFastq)
    }
132

133
    if (paired) {
134
      fastqcR2 = Fastqc(this, inputR2.get, new File(outputDir, R2Name + ".fastqc/"))
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
135 136 137
      add(fastqcR2)
      addSummarizable(fastqcR2, "fastqc_R2")
      outputFiles += ("fastqc_R2" -> fastqcR2.output)
138
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
139

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
140 141 142 143
    val seqstatR1 = SeqStat(this, inputR1, outputDir)
    seqstatR1.isIntermediate = true
    add(seqstatR1)
    addSummarizable(seqstatR1, "seqstat_R1")
Peter van 't Hof's avatar
Peter van 't Hof committed
144 145

    if (paired) {
146
      val seqstatR2 = SeqStat(this, inputR2.get, outputDir)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
147 148 149
      seqstatR2.isIntermediate = true
      add(seqstatR2)
      addSummarizable(seqstatR2, "seqstat_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    }
151 152
  }

153 154 155 156 157 158
  def fastqR1Qc = if (paired)
    new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
  else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
  def fastqR2Qc = if (paired)
    Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
  else None
Peter van 't Hof's avatar
Peter van 't Hof committed
159

Peter van 't Hof's avatar
Peter van 't Hof committed
160
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
161
  def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
162
    runTrimClip(R1_in, None, outDir, chunk)
Peter van 't Hof's avatar
Peter van 't Hof committed
163

Peter van 't Hof's avatar
Peter van 't Hof committed
164
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
165
  def runTrimClip(R1_in: File, outDir: File): (File, Option[File]) =
Peter van 't Hof's avatar
Peter van 't Hof committed
166
    runTrimClip(R1_in, None, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
167

Peter van 't Hof's avatar
Peter van 't Hof committed
168
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
169
  def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File]) =
170
    runTrimClip(R1_in, R2_in, outDir, "")
Peter van 't Hof's avatar
Peter van 't Hof committed
171

Peter van 't Hof's avatar
Peter van 't Hof committed
172
  /** Adds all chunkable jobs of flexiprep */
Peter van 't Hof's avatar
Peter van 't Hof committed
173 174 175 176
  def runTrimClip(R1_in: File,
                  R2_in: Option[File],
                  outDir: File,
                  chunkarg: String): (File, Option[File]) = {
177 178
    val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"

Peter van 't Hof's avatar
Peter van 't Hof committed
179 180
    var R1 = R1_in
    var R2 = R2_in
181

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
182
    val qcCmdR1 = new QcCommand(this, fastqcR1)
183
    qcCmdR1.input = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
184
    qcCmdR1.read = "R1"
Peter van 't Hof's avatar
Peter van 't Hof committed
185
    qcCmdR1.output = if (paired) new File(outDir, fastqR1Qc.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
186
    else fastqR1Qc
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
187
    qcCmdR1.deps :+= fastqcR1.output
188
    qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    addSummarizable(qcCmdR1, "qc_command_R1")
190 191

    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
192
      val qcCmdR2 = new QcCommand(this, fastqcR2)
193
      qcCmdR2.input = R2_in.get
Peter van 't Hof's avatar
Peter van 't Hof committed
194
      qcCmdR2.output = new File(outDir, fastqR2Qc.get.getName.stripSuffix(".gz"))
Peter van 't Hof's avatar
Peter van 't Hof committed
195
      qcCmdR2.read = "R2"
Peter van 't Hof's avatar
Peter van 't Hof committed
196
      addSummarizable(qcCmdR2, "qc_command_R2")
Peter van 't Hof's avatar
Peter van 't Hof committed
197 198 199

      qcCmdR1.compress = false
      qcCmdR2.compress = false
200 201 202

      val fqSync = new FastqSync(this)
      fqSync.refFastq = R1_in
Peter van 't Hof's avatar
Peter van 't Hof committed
203 204
      fqSync.inputFastq1 = qcCmdR1.output
      fqSync.inputFastq2 = qcCmdR2.output
Peter van 't Hof's avatar
Peter van 't Hof committed
205 206
      fqSync.outputFastq1 = new File(outDir, fastqR1Qc.getName)
      fqSync.outputFastq2 = new File(outDir, fastqR2Qc.get.getName)
207
      fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
208

Peter van 't Hof's avatar
Peter van 't Hof committed
209 210
      val pipe = new BiopetFifoPipe(this, fqSync :: Nil) with Summarizable {
        override def configNamespace = "qc_cmd"
Peter van 't Hof's avatar
Peter van 't Hof committed
211 212 213

        override def beforeGraph(): Unit = {
          fqSync.beforeGraph()
Peter van 't Hof's avatar
Peter van 't Hof committed
214
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
215 216 217 218 219 220 221 222
          super.beforeGraph()
        }

        override def beforeCmd(): Unit = {
          qcCmdR1.beforeCmd()
          qcCmdR2.beforeCmd()
          fqSync.beforeCmd()
          commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
223
          commands.foreach(addPipeJob)
Peter van 't Hof's avatar
Peter van 't Hof committed
224 225
          super.beforeCmd()
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
226 227 228 229 230 231 232 233

        /** Must return files to store into summary */
        def summaryFiles: Map[String, File] = Map()

        /** Must returns stats to store into summary */
        def summaryStats: Any = Map()

        override def summaryDeps = qcCmdR1.summaryDeps ::: qcCmdR2.summaryDeps ::: super.summaryDeps
Peter van 't Hof's avatar
Peter van 't Hof committed
234 235
      }

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
236 237
      pipe.deps ::= fastqcR1.output
      pipe.deps ::= fastqcR2.output
Peter van 't Hof's avatar
Peter van 't Hof committed
238
      pipe.isIntermediate = !keepQcFastqFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
239
      addSummarizable(pipe, "qc_cmd")
Peter van 't Hof's avatar
Peter van 't Hof committed
240 241
      add(pipe)

242 243 244 245
      addSummarizable(fqSync, "fastq_sync")
      outputFiles += ("syncStats" -> fqSync.outputStats)
      R1 = fqSync.outputFastq1
      R2 = Some(fqSync.outputFastq2)
246 247 248 249
    } else {
      add(qcCmdR1)
      R1 = qcCmdR1.output
    }
250

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
251 252 253
    val seqstatR1After = SeqStat(this, R1, outDir)
    add(seqstatR1After)
    addSummarizable(seqstatR1After, "seqstat_R1_qc")
254 255

    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
256 257 258
      val seqstatR2After = SeqStat(this, R2.get, outDir)
      add(seqstatR2After)
      addSummarizable(seqstatR2After, "seqstat_R2_qc")
259 260 261
    }

    outputFiles += (chunk + "output_R1" -> R1)
Peter van 't Hof's avatar
Peter van 't Hof committed
262
    if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
263
    (R1, R2)
264 265
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
266
  /** Adds last non chunkable jobs */
267
  def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
268 269
    if (fastq_R1.length != fastq_R2.length && paired)
      throw new IllegalStateException("R1 and R2 file number is not the same")
270

Peter van 't Hof's avatar
Peter van 't Hof committed
271
    if (fastq_R1.length > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
272 273 274 275 276 277 278 279
      val zcat = new Zcat(this)
      zcat.input = fastq_R1
      add(zcat | new Gzip(this) > fastqR1Qc)
      if (paired) {
        val zcat = new Zcat(this)
        zcat.input = fastq_R2
        add(zcat | new Gzip(this) > fastqR2Qc.get)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
280
    }
281

282 283 284 285 286 287
    val validateFastq = new ValidateFastq(this)
    validateFastq.r1Fastq = fastqR1Qc
    validateFastq.r2Fastq = fastqR2Qc
    validateFastq.jobOutputFile = new File(outputDir, ".validate_fastq.qc.log.out")
    add(validateFastq)

288 289 290 291 292 293
    if (config("abort_on_corrupt_fastq", default = true)) {
      val checkValidateFastq = new CheckValidateFastq
      checkValidateFastq.inputLogFile = validateFastq.jobOutputFile
      checkValidateFastq.jobOutputFile = new File(outputDir, ".check.validate_fastq.qc.log.out")
      add(checkValidateFastq)
    }
294

Peter van 't Hof's avatar
Peter van 't Hof committed
295 296
    outputFiles += ("output_R1_gzip" -> fastqR1Qc)
    if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
297

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
298 299 300
    fastqcR1After = Fastqc(this, fastqR1Qc, new File(outputDir, R1Name + ".qc.fastqc/"))
    add(fastqcR1After)
    addSummarizable(fastqcR1After, "fastqc_R1_qc")
Peter van 't Hof's avatar
Peter van 't Hof committed
301

Peter van 't Hof's avatar
Peter van 't Hof committed
302
    if (paired) {
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
303 304 305
      fastqcR2After = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2Name + ".qc.fastqc/"))
      add(fastqcR2After)
      addSummarizable(fastqcR2After, "fastqc_R2_qc")
306 307
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
308
    addSummaryJobs()
309 310 311 312
  }
}

object Flexiprep extends PipelineCommand