Commit 867eac4c authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'fix-gears_single_end' into 'develop'

Fix for gears single end bam files

Fixes #287 

See merge request !337
parents 3fb5cf10 c28c5c82
......@@ -69,8 +69,8 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
optional("--threads", nCoresRequest) +
conditional(quick, "--quick") +
optional("--min_hits", minHits) +
optional("--unclassified-out ", unclassified_out.get) +
optional("--classified-out ", classified_out.get) +
optional("--unclassified-out ", unclassified_out) +
optional("--classified-out ", classified_out) +
required("--output", output) +
conditional(preLoad, "--preload") +
conditional(paired, "--paired") +
......
......@@ -89,10 +89,10 @@ object SeqStat extends ToolCommand {
(qual_low_boundery < 59, qual_high_boundery > 74) match {
case (false, true) => phredEncoding = Solexa
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
case (true, true) => phredEncoding = Solexa
// this is definite a sanger sequence, the lower end is sanger only
case (true, false) => phredEncoding = Sanger
......@@ -181,7 +181,7 @@ object SeqStat extends ToolCommand {
quals ++= mutable.ArrayBuffer.fill(baseStats(pos).qual.length - quals.length)(0)
}
if (nucs.length <= baseStats(pos).nucs.length) {
nucs ++= mutable.ArrayBuffer.fill( baseStats(pos).nucs.length - nucs.length )(0)
nucs ++= mutable.ArrayBuffer.fill(baseStats(pos).nucs.length - nucs.length)(0)
}
// count into the quals
baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value }
......
......@@ -21,21 +21,26 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS
)
)
lazy val paired: Boolean = config("paired_bam", default = true)
def init(): Unit = {
require(bamFile != null)
if (outputName == null) outputName = bamFile.getName.stripSuffix(".bam")
}
def fastqUnmappedR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz")
def fastqUnmappedR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz")
def fastqUnmappedR2 = if (paired) Some(new File(outputDir, s"$outputName.unmapped.R2.fq.gz"))
else None
def fastqUnmappedSingletons = new File(outputDir, s"$outputName.unmapped.singletons.fq.gz")
def biopetScript(): Unit = {
val samtoolsViewSelectUnmapped = new SamtoolsView(this)
samtoolsViewSelectUnmapped.input = bamFile
samtoolsViewSelectUnmapped.b = true
samtoolsViewSelectUnmapped.output = swapExt(outputDir, bamFile, ".bam", "unmapped.bam")
samtoolsViewSelectUnmapped.f = List("12")
samtoolsViewSelectUnmapped.h = true
samtoolsViewSelectUnmapped.output = swapExt(outputDir, bamFile, ".bam", ".unmapped.bam")
if (paired) samtoolsViewSelectUnmapped.f = List("12")
else samtoolsViewSelectUnmapped.f = List("4")
samtoolsViewSelectUnmapped.isIntermediate = true
add(samtoolsViewSelectUnmapped)
......@@ -43,9 +48,11 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS
val samToFastq = new SamToFastq(this)
samToFastq.input = samtoolsViewSelectUnmapped.output
samToFastq.fastqR1 = fastqUnmappedR1
samToFastq.fastqR2 = fastqUnmappedR2
samToFastq.fastqUnpaired = fastqUnmappedSingletons
samToFastq.isIntermediate = true
if (paired) {
samToFastq.fastqR2 = fastqUnmappedR2.get
samToFastq.fastqUnpaired = fastqUnmappedSingletons
}
samToFastq.isIntermediate = !config("keep_unmapped_fastq", default = false).asBoolean
add(samToFastq)
}
}
......@@ -5,6 +5,7 @@ import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken }
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -32,10 +33,27 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
.stripSuffix(".fastq")
}
lazy val krakenConvertToFasta: Boolean = config("kraken_discard_quality", default = false)
protected def fastqToFasta(input: File): File = {
val seqtk = new SeqtkSeq(this)
seqtk.input = input
seqtk.output = new File(outputDir, input.getName + ".fasta")
seqtk.A = true
seqtk.isIntermediate = true
add(seqtk)
seqtk.output
}
def biopetScript(): Unit = {
// start kraken
val (fqR1, fqR2) = if (krakenConvertToFasta)
(fastqToFasta(fastqR1), fastqR2.map(fastqToFasta))
else (fastqR1, fastqR2)
val krakenAnalysis = new Kraken(this)
krakenAnalysis.input = fastqR1 :: fastqR2.toList
krakenAnalysis.input = fqR1 :: fqR2.toList
krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
krakenAnalysis.paired = fastqR2.isDefined
......@@ -79,7 +97,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
/** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match {
case Some(file) => Map("input_R1" -> file)
case Some(file) => Map("input_R2" -> file)
case _ => Map()
})
}
......
......@@ -97,7 +97,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
extract.bamFile = bam
extract.outputName = outputName
add(extract)
executeFlexiprep(extract.fastqUnmappedR1, Some(extract.fastqUnmappedR2))
executeFlexiprep(extract.fastqUnmappedR1, extract.fastqUnmappedR2)
case _ => throw new IllegalArgumentException("Missing input files")
}
......@@ -159,6 +159,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
def summaryFiles: Map[String, File] = Map.empty ++
(if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
(if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++
(if (fastqR2.isDefined) Map("input_R2" -> fastqR2.get) else Map()) ++
outputFiles
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment