Commit c6c7c899 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Refactor flexiprep, more pipes are used now

parent 8e950ef8
......@@ -24,7 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for zcat */
class Zcat(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Zipped file", required = true)
var input: File = _
var input: List[File] = _
@Output(doc = "Unzipped file", required = true)
var output: File = _
......@@ -46,6 +46,13 @@ object Zcat {
/** Returns Zcat with input and output files */
def apply(root: Configurable, input: File, output: File): Zcat = {
val zcat = new Zcat(root)
zcat.input = input :: Nil
zcat.output = output
zcat
}
def apply(root: Configurable, input: List[File], output: File): Zcat = {
val zcat = new Zcat(root)
zcat.input = input
zcat.output = output
......
......@@ -15,6 +15,8 @@
*/
package nl.lumc.sasc.biopet.pipelines.flexiprep
import java.io.File
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.{ Pbzip2, Zcat, Gzip, Sickle }
......@@ -120,8 +122,8 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
/** Add init non chunkable jobs */
def runInitialJobs() {
outputFiles += ("fastq_input_R1" -> extractIfNeeded(input_R1, outputDir))
if (paired) outputFiles += ("fastq_input_R2" -> extractIfNeeded(input_R2.get, outputDir))
outputFiles += ("fastq_input_R1" -> input_R1)
if (paired) outputFiles += ("fastq_input_R2" -> input_R2.get)
fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/"))
add(fastqc_R1)
......@@ -148,7 +150,12 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
}
}
//TODO: Refactor need to combine all this functions
def fastqR1Qc = if (paired)
new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.sync.fq.gz")
else new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R1.qc.fq.gz")
def fastqR2Qc = if (paired)
Some(new File(outputDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.R2.qc.sync.fq.gz"))
else None
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File], List[File]) =
......@@ -169,92 +176,42 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
var R1 = R1_in
var R2 = R2_in
var deps_R1 = R1 :: Nil
var deps_R2 = if (paired) R2.get :: Nil else Nil
def deps: List[File] = deps_R1 ::: deps_R2
val seqtkSeq_R1 = SeqtkSeq(this, R1, swapExt(outDir, R1, R1_ext, ".sanger" + R1_ext), fastqc_R1)
seqtkSeq_R1.isIntermediate = true
add(seqtkSeq_R1)
R1 = seqtkSeq_R1.output
deps_R1 ::= R1
if (paired) {
val seqtkSeq_R2 = SeqtkSeq(this, R2.get, swapExt(outDir, R2.get, R2_ext, ".sanger" + R2_ext), fastqc_R2)
seqtkSeq_R2.isIntermediate = true
add(seqtkSeq_R2)
R2 = Some(seqtkSeq_R2.output)
deps_R2 ::= R2.get
}
if (!skipClip) { // Adapter clipping
def deps: List[File] = Nil
val cutadapt_R1 = Cutadapt(this, R1, swapExt(outDir, R1, R1_ext, ".clip" + R1_ext))
cutadapt_R1.fastqc = fastqc_R1
cutadapt_R1.isIntermediate = true
cutadapt_R1.deps = deps_R1
add(cutadapt_R1)
addSummarizable(cutadapt_R1, "clipping_R1")
R1 = cutadapt_R1.fastq_output
deps_R1 ::= R1
outputFiles += ("cutadapt_R1_stats" -> cutadapt_R1.stats_output)
val qcCmdR1 = new QcCommand(this, fastqc_R1)
qcCmdR1.input = R1_in
qcCmdR1.output = new File(outDir, s"${sampleId}-${libId}.R1.qc.fq.gz")
qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
add(qcCmdR1)
if (paired) {
val cutadapt_R2 = Cutadapt(this, R2.get, swapExt(outDir, R2.get, R2_ext, ".clip" + R2_ext))
outputFiles += ("cutadapt_R2_stats" -> cutadapt_R2.stats_output)
cutadapt_R2.fastqc = fastqc_R2
cutadapt_R2.isIntermediate = true
cutadapt_R2.deps = deps_R2
add(cutadapt_R2)
addSummarizable(cutadapt_R2, "clipping_R2")
R2 = Some(cutadapt_R2.fastq_output)
deps_R2 ::= R2.get
val qcCmdR2 = new QcCommand(this, fastqc_R2)
qcCmdR2.input = R2_in.get
qcCmdR2.output = new File(outDir, s"${sampleId}-${libId}.R2.qc.fq.gz")
qcCmdR2.isIntermediate = true
add(qcCmdR2)
val fqSync = new FastqSync(this)
fqSync.refFastq = cutadapt_R1.fastq_input
fqSync.inputFastq1 = cutadapt_R1.fastq_output
fqSync.inputFastq2 = cutadapt_R2.fastq_output
fqSync.outputFastq1 = swapExt(outDir, R1, R1_ext, ".sync" + R1_ext)
fqSync.outputFastq2 = swapExt(outDir, R2.get, R2_ext, ".sync" + R2_ext)
fqSync.outputStats = swapExt(outDir, R1, R1_ext, ".sync.stats")
fqSync.isIntermediate = true
fqSync.deps :::= deps
fqSync.refFastq = R1_in
fqSync.inputFastq1 = qcCmdR1.output
fqSync.inputFastq2 = qcCmdR2.output
fqSync.outputFastq1 = fastqR1Qc
fqSync.outputFastq2 = fastqR2Qc.get
fqSync.outputStats = new File(outDir, s"${sampleId}-${libId}.sync.stats")
fqSync.isIntermediate = !keepQcFastqFiles
add(fqSync)
addSummarizable(fqSync, "fastq_sync")
outputFiles += ("syncStats" -> fqSync.outputStats)
R1 = fqSync.outputFastq1
R2 = Some(fqSync.outputFastq2)
deps_R1 ::= R1
deps_R2 ::= R2.get
}
}
if (!skipTrim) { // Quality trimming
val sickle = new Sickle(this)
sickle.input_R1 = R1
sickle.output_R1 = swapExt(outDir, R1, R1_ext, ".trim" + R1_ext)
if (paired) {
sickle.input_R2 = R2.get
sickle.output_R2 = swapExt(outDir, R2.get, R2_ext, ".trim" + R2_ext)
sickle.output_singles = swapExt(outDir, R2.get, R2_ext, ".trim.singles" + R1_ext)
}
sickle.output_stats = swapExt(outDir, R1, R1_ext, ".trim.stats")
sickle.deps = deps
sickle.isIntermediate = true
add(sickle)
addSummarizable(sickle, "trimming")
R1 = sickle.output_R1
if (paired) R2 = Some(sickle.output_R2)
}
} else R1 = qcCmdR1.output
val seqstat_R1_after = SeqStat(this, R1, outDir)
seqstat_R1_after.deps = deps_R1
add(seqstat_R1_after)
addSummarizable(seqstat_R1_after, "seqstat_R1_qc")
if (paired) {
val seqstat_R2_after = SeqStat(this, R2.get, outDir)
seqstat_R2_after.deps = deps_R2
add(seqstat_R2_after)
addSummarizable(seqstat_R2_after, "seqstat_R2_qc")
}
......@@ -266,23 +223,24 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
/** Adds last non chunkable jobs */
def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) {
if (fastq_R1.length != fastq_R2.length && paired) throw new IllegalStateException("R1 and R2 file number is not the same")
val R1 = new File(outputDir, R1_name + ".qc" + R1_ext + ".gz")
val R2 = new File(outputDir, R2_name + ".qc" + R2_ext + ".gz")
if (fastq_R1.length != fastq_R2.length && paired)
throw new IllegalStateException("R1 and R2 file number is not the same")
if (!skipTrim || !skipClip) {
add(Gzip(this, fastq_R1, R1), !keepQcFastqFiles)
if (paired) add(Gzip(this, fastq_R2, R2), !keepQcFastqFiles)
if (fastq_R1.length > 1) {
add(Zcat(this, fastq_R1, fastqR1Qc) | new Gzip(this) > fastqR1Qc)
if (paired) add(Zcat(this, fastq_R2, fastqR2Qc.get) | new Gzip(this) > fastqR2Qc.get)
}
outputFiles += ("output_R1_gzip" -> R1)
if (paired) outputFiles += ("output_R2_gzip" -> R2)
outputFiles += ("output_R1_gzip" -> fastqR1Qc)
if (paired) outputFiles += ("output_R2_gzip" -> fastqR2Qc.get)
fastqc_R1_after = Fastqc(this, R1, new File(outputDir, R1_name + ".qc.fastqc/"))
fastqc_R1_after = Fastqc(this, fastqR1Qc, new File(outputDir, R1_name + ".qc.fastqc/"))
add(fastqc_R1_after)
addSummarizable(fastqc_R1_after, "fastqc_R1_qc")
if (paired) {
fastqc_R2_after = Fastqc(this, R2, new File(outputDir, R2_name + ".qc.fastqc/"))
fastqc_R2_after = Fastqc(this, fastqR2Qc.get, new File(outputDir, R2_name + ".qc.fastqc/"))
add(fastqc_R2_after)
addSummarizable(fastqc_R2_after, "fastqc_R2_qc")
}
......@@ -290,25 +248,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
addSummaryJobs()
}
/** Extracts file if file is compressed */
def extractIfNeeded(file: File, runDir: File): File = {
if (file == null) file
else if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
var newFile: File = swapExt(runDir, file, ".gz", "")
if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
val zcatCommand = Zcat(this, file, newFile)
zcatCommand.isIntermediate = true
add(zcatCommand)
newFile
} else if (file.getName.endsWith(".bz2")) {
val newFile = swapExt(runDir, file, ".bz2", "")
val pbzip2 = Pbzip2(this, file, newFile)
pbzip2.isIntermediate = true
add(pbzip2)
newFile
} else file
}
}
object Flexiprep extends PipelineCommand
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment