Commit 867eac4c authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'fix-gears_single_end' into 'develop'

Fix for gears single end bam files

Fixes #287 

See merge request !337
parents 3fb5cf10 c28c5c82
...@@ -69,8 +69,8 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers ...@@ -69,8 +69,8 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
optional("--threads", nCoresRequest) + optional("--threads", nCoresRequest) +
conditional(quick, "--quick") + conditional(quick, "--quick") +
optional("--min_hits", minHits) + optional("--min_hits", minHits) +
optional("--unclassified-out ", unclassified_out.get) + optional("--unclassified-out ", unclassified_out) +
optional("--classified-out ", classified_out.get) + optional("--classified-out ", classified_out) +
required("--output", output) + required("--output", output) +
conditional(preLoad, "--preload") + conditional(preLoad, "--preload") +
conditional(paired, "--paired") + conditional(paired, "--paired") +
......
...@@ -89,10 +89,10 @@ object SeqStat extends ToolCommand { ...@@ -89,10 +89,10 @@ object SeqStat extends ToolCommand {
(qual_low_boundery < 59, qual_high_boundery > 74) match { (qual_low_boundery < 59, qual_high_boundery > 74) match {
case (false, true) => phredEncoding = Solexa case (false, true) => phredEncoding = Solexa
// TODO: check this later on // TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa // complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa // but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K) // New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
case (true, true) => phredEncoding = Solexa case (true, true) => phredEncoding = Solexa
// this is definite a sanger sequence, the lower end is sanger only // this is definite a sanger sequence, the lower end is sanger only
case (true, false) => phredEncoding = Sanger case (true, false) => phredEncoding = Sanger
...@@ -181,7 +181,7 @@ object SeqStat extends ToolCommand { ...@@ -181,7 +181,7 @@ object SeqStat extends ToolCommand {
quals ++= mutable.ArrayBuffer.fill(baseStats(pos).qual.length - quals.length)(0) quals ++= mutable.ArrayBuffer.fill(baseStats(pos).qual.length - quals.length)(0)
} }
if (nucs.length <= baseStats(pos).nucs.length) { if (nucs.length <= baseStats(pos).nucs.length) {
nucs ++= mutable.ArrayBuffer.fill( baseStats(pos).nucs.length - nucs.length )(0) nucs ++= mutable.ArrayBuffer.fill(baseStats(pos).nucs.length - nucs.length)(0)
} }
// count into the quals // count into the quals
baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value } baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value }
......
...@@ -21,21 +21,26 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS ...@@ -21,21 +21,26 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS
) )
) )
lazy val paired: Boolean = config("paired_bam", default = true)
def init(): Unit = { def init(): Unit = {
require(bamFile != null) require(bamFile != null)
if (outputName == null) outputName = bamFile.getName.stripSuffix(".bam") if (outputName == null) outputName = bamFile.getName.stripSuffix(".bam")
} }
def fastqUnmappedR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz") def fastqUnmappedR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz")
def fastqUnmappedR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz") def fastqUnmappedR2 = if (paired) Some(new File(outputDir, s"$outputName.unmapped.R2.fq.gz"))
else None
def fastqUnmappedSingletons = new File(outputDir, s"$outputName.unmapped.singletons.fq.gz") def fastqUnmappedSingletons = new File(outputDir, s"$outputName.unmapped.singletons.fq.gz")
def biopetScript(): Unit = { def biopetScript(): Unit = {
val samtoolsViewSelectUnmapped = new SamtoolsView(this) val samtoolsViewSelectUnmapped = new SamtoolsView(this)
samtoolsViewSelectUnmapped.input = bamFile samtoolsViewSelectUnmapped.input = bamFile
samtoolsViewSelectUnmapped.b = true samtoolsViewSelectUnmapped.b = true
samtoolsViewSelectUnmapped.output = swapExt(outputDir, bamFile, ".bam", "unmapped.bam") samtoolsViewSelectUnmapped.h = true
samtoolsViewSelectUnmapped.f = List("12") samtoolsViewSelectUnmapped.output = swapExt(outputDir, bamFile, ".bam", ".unmapped.bam")
if (paired) samtoolsViewSelectUnmapped.f = List("12")
else samtoolsViewSelectUnmapped.f = List("4")
samtoolsViewSelectUnmapped.isIntermediate = true samtoolsViewSelectUnmapped.isIntermediate = true
add(samtoolsViewSelectUnmapped) add(samtoolsViewSelectUnmapped)
...@@ -43,9 +48,11 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS ...@@ -43,9 +48,11 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS
val samToFastq = new SamToFastq(this) val samToFastq = new SamToFastq(this)
samToFastq.input = samtoolsViewSelectUnmapped.output samToFastq.input = samtoolsViewSelectUnmapped.output
samToFastq.fastqR1 = fastqUnmappedR1 samToFastq.fastqR1 = fastqUnmappedR1
samToFastq.fastqR2 = fastqUnmappedR2 if (paired) {
samToFastq.fastqUnpaired = fastqUnmappedSingletons samToFastq.fastqR2 = fastqUnmappedR2.get
samToFastq.isIntermediate = true samToFastq.fastqUnpaired = fastqUnmappedSingletons
}
samToFastq.isIntermediate = !config("keep_unmapped_fastq", default = false).asBoolean
add(samToFastq) add(samToFastq)
} }
} }
...@@ -5,6 +5,7 @@ import java.io.{ File, PrintWriter } ...@@ -5,6 +5,7 @@ import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.SampleLibraryTag import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken } import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken }
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
...@@ -32,10 +33,27 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi ...@@ -32,10 +33,27 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
.stripSuffix(".fastq") .stripSuffix(".fastq")
} }
lazy val krakenConvertToFasta: Boolean = config("kraken_discard_quality", default = false)
protected def fastqToFasta(input: File): File = {
val seqtk = new SeqtkSeq(this)
seqtk.input = input
seqtk.output = new File(outputDir, input.getName + ".fasta")
seqtk.A = true
seqtk.isIntermediate = true
add(seqtk)
seqtk.output
}
def biopetScript(): Unit = { def biopetScript(): Unit = {
// start kraken // start kraken
val (fqR1, fqR2) = if (krakenConvertToFasta)
(fastqToFasta(fastqR1), fastqR2.map(fastqToFasta))
else (fastqR1, fastqR2)
val krakenAnalysis = new Kraken(this) val krakenAnalysis = new Kraken(this)
krakenAnalysis.input = fastqR1 :: fastqR2.toList krakenAnalysis.input = fqR1 :: fqR2.toList
krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw") krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
krakenAnalysis.paired = fastqR2.isDefined krakenAnalysis.paired = fastqR2.isDefined
...@@ -79,7 +97,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi ...@@ -79,7 +97,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
/** Statistics shown in the summary file */ /** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match { def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match {
case Some(file) => Map("input_R1" -> file) case Some(file) => Map("input_R2" -> file)
case _ => Map() case _ => Map()
}) })
} }
......
...@@ -97,7 +97,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi ...@@ -97,7 +97,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
extract.bamFile = bam extract.bamFile = bam
extract.outputName = outputName extract.outputName = outputName
add(extract) add(extract)
executeFlexiprep(extract.fastqUnmappedR1, Some(extract.fastqUnmappedR2)) executeFlexiprep(extract.fastqUnmappedR1, extract.fastqUnmappedR2)
case _ => throw new IllegalArgumentException("Missing input files") case _ => throw new IllegalArgumentException("Missing input files")
} }
...@@ -159,6 +159,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi ...@@ -159,6 +159,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
def summaryFiles: Map[String, File] = Map.empty ++ def summaryFiles: Map[String, File] = Map.empty ++
(if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++ (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
(if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++ (if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++
(if (fastqR2.isDefined) Map("input_R2" -> fastqR2.get) else Map()) ++
outputFiles outputFiles
} }
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment