Commit f4f1f4cb authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Fix chunking

parent aca99bf8
......@@ -3,7 +3,7 @@ package nl.lumc.sasc.biopet.extensions.samtools
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 22/09/15.
......
......@@ -153,29 +153,31 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
else None
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File], List[File]) =
def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File]) =
runTrimClip(R1_in, None, outDir, chunk)
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, outDir: File): (File, Option[File], List[File]) =
def runTrimClip(R1_in: File, outDir: File): (File, Option[File]) =
runTrimClip(R1_in, None, outDir, "")
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File], List[File]) =
def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File]) =
runTrimClip(R1_in, R2_in, outDir, "")
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File, chunkarg: String): (File, Option[File], List[File]) = {
def runTrimClip(R1_in: File,
R2_in: Option[File],
outDir: File,
chunkarg: String): (File, Option[File]) = {
val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"
var R1 = R1_in
var R2 = R2_in
def deps: List[File] = Nil
val qcCmdR1 = new QcCommand(this, fastqc_R1)
qcCmdR1.input = R1_in
qcCmdR1.read = "R1"
qcCmdR1.output = if (paired) new File(fastqR1Qc.getAbsolutePath.stripSuffix(".gz"))
qcCmdR1.output = if (paired) new File(outDir, fastqR1Qc.getName.stripSuffix(".gz"))
else fastqR1Qc
qcCmdR1.deps :+= fastqc_R1.output
qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
......@@ -184,7 +186,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
if (paired) {
val qcCmdR2 = new QcCommand(this, fastqc_R2)
qcCmdR2.input = R2_in.get
qcCmdR2.output = new File(fastqR2Qc.get.getAbsolutePath.stripSuffix(".gz"))
qcCmdR2.output = new File(outDir, fastqR2Qc.get.getName.stripSuffix(".gz"))
qcCmdR2.read = "R2"
addSummarizable(qcCmdR2, "qc_command_R2")
......@@ -195,8 +197,8 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
fqSync.refFastq = R1_in
fqSync.inputFastq1 = qcCmdR1.output
fqSync.inputFastq2 = qcCmdR2.output
fqSync.outputFastq1 = fastqR1Qc
fqSync.outputFastq2 = fastqR2Qc.get
fqSync.outputFastq1 = new File(outDir, fastqR1Qc.getName)
fqSync.outputFastq2 = new File(outDir, fastqR2Qc.get.getName)
fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {
......@@ -242,7 +244,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
outputFiles += (chunk + "output_R1" -> R1)
if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
(R1, R2, deps)
(R1, R2)
}
/** Adds last non chunkable jobs */
......
......@@ -204,13 +204,11 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
for ((chunkDir, fastqfile) <- chunks) {
var R1 = fastqfile._1
var R2 = fastqfile._2
var deps: List[File] = Nil
if (!skipFlexiprep) {
val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
logger.debug(chunkDir + " - " + flexiout)
R1 = flexiout._1
if (paired) R2 = flexiout._2
deps = flexiout._3
fastq_R1_output :+= R1
R2.foreach(R2 => fastq_R2_output :+= R2)
}
......@@ -218,15 +216,15 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val outputBam = new File(chunkDir, outputName + ".bam")
bamFiles :+= outputBam
aligner match {
case "bwa-mem" => addBwaMem(R1, R2, outputBam, deps)
case "bwa-aln" => addBwaAln(R1, R2, outputBam, deps)
case "bowtie" => addBowtie(R1, R2, outputBam, deps)
case "gsnap" => addGsnap(R1, R2, outputBam, deps)
case "bwa-mem" => addBwaMem(R1, R2, outputBam)
case "bwa-aln" => addBwaAln(R1, R2, outputBam)
case "bowtie" => addBowtie(R1, R2, outputBam)
case "gsnap" => addGsnap(R1, R2, outputBam)
// TODO: make TopHat here accept multiple input files
case "tophat" => addTophat(R1, R2, outputBam, deps)
case "stampy" => addStampy(R1, R2, outputBam, deps)
case "star" => addStar(R1, R2, outputBam, deps)
case "star-2pass" => addStar2pass(R1, R2, outputBam, deps)
case "tophat" => addTophat(R1, R2, outputBam)
case "stampy" => addStampy(R1, R2, outputBam)
case "star" => addStar(R1, R2, outputBam)
case "star-2pass" => addStar2pass(R1, R2, outputBam)
case _ => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
}
if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
......@@ -267,10 +265,9 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
/** Add bwa aln jobs */
def addBwaAln(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addBwaAln(R1: File, R2: Option[File], output: File): File = {
val bwaAlnR1 = new BwaAln(this)
bwaAlnR1.fastq = R1
bwaAlnR1.deps = deps
bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
bwaAlnR1.isIntermediate = true
add(bwaAlnR1)
......@@ -278,7 +275,6 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val samFile: File = if (paired) {
val bwaAlnR2 = new BwaAln(this)
bwaAlnR2.fastq = R2.get
bwaAlnR2.deps = deps
bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
bwaAlnR2.isIntermediate = true
add(bwaAlnR2)
......@@ -313,11 +309,10 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
/** Adds bwa mem jobs */
def addBwaMem(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addBwaMem(R1: File, R2: Option[File], output: File): File = {
val bwaCommand = new BwaMem(this)
bwaCommand.R1 = R1
if (paired) bwaCommand.R2 = R2.get
bwaCommand.deps = deps
bwaCommand.R = Some(getReadGroupBwa)
val sortSam = new SortSam(this)
sortSam.output = output
......@@ -325,10 +320,9 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
output
}
def addGsnap(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addGsnap(R1: File, R2: Option[File], output: File): File = {
val gsnapCommand = new Gsnap(this)
gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
gsnapCommand.deps = deps
gsnapCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
gsnapCommand.isIntermediate = true
add(gsnapCommand)
......@@ -341,13 +335,12 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addAddOrReplaceReadGroups(reorderSam.output, output)
}
def addTophat(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addTophat(R1: File, R2: Option[File], output: File): File = {
// TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
val tophat = new Tophat(this)
tophat.R1 = tophat.R1 :+ R1
if (paired) tophat.R2 = tophat.R2 :+ R2.get
tophat.output_dir = new File(outputDir, "tophat_out")
tophat.deps = deps
// always output BAM
tophat.no_convert_bam = false
// and always keep input ordering
......@@ -384,7 +377,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addAddOrReplaceReadGroups(reorderSam.output, output)
}
/** Adds stampy jobs */
def addStampy(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addStampy(R1: File, R2: Option[File], output: File): File = {
var RG: String = "ID:" + readgroupId + ","
RG += "SM:" + sampleId.get + ","
......@@ -399,7 +392,6 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val stampyCmd = new Stampy(this)
stampyCmd.R1 = R1
if (paired) stampyCmd.R2 = R2.get
stampyCmd.deps = deps
stampyCmd.readgroup = RG
stampyCmd.sanger = true
stampyCmd.output = this.swapExt(output.getParent, output, ".bam", ".sam")
......@@ -412,11 +404,10 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
/** Adds bowtie jobs */
def addBowtie(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addBowtie(R1: File, R2: Option[File], output: File): File = {
val bowtie = new Bowtie(this)
bowtie.R1 = R1
if (paired) bowtie.R2 = R2
bowtie.deps = deps
bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
bowtie.isIntermediate = true
add(bowtie)
......@@ -424,15 +415,15 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
/** Adds Star jobs */
def addStar(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
val starCommand = Star(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
def addStar(R1: File, R2: Option[File], output: File): File = {
val starCommand = Star(this, R1, R2, outputDir, isIntermediate = true)
add(starCommand)
addAddOrReplaceReadGroups(starCommand.outputSam, output)
}
/** Adds Start 2 pass jobs */
def addStar2pass(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
val starCommand = Star._2pass(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
def addStar2pass(R1: File, R2: Option[File], output: File): File = {
val starCommand = Star._2pass(this, R1, R2, outputDir, isIntermediate = true)
addAll(starCommand._2)
addAddOrReplaceReadGroups(starCommand._1, output)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment