Skip to content
Snippets Groups Projects
Commit 0bc09ac2 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch '361-fix-wipereads-extension' into 'develop'

Fixing WipeReads

Closes #361

See merge request !428
parents 736e0109 8b40a367
Branches
Tags
No related merge requests found
...@@ -18,9 +18,8 @@ import java.io.File ...@@ -18,9 +18,8 @@ import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
// TODO: finish implementation for usage in pipelines
/** /**
* WipeReads function class for usage in Biopet pipelines * WipeReads function class for usage in Biopet pipelines
* *
...@@ -36,9 +35,52 @@ class WipeReads(val root: Configurable) extends ToolCommandFunction { ...@@ -36,9 +35,52 @@ class WipeReads(val root: Configurable) extends ToolCommandFunction {
@Input(doc = "Interval file", shortName = "r", required = true) @Input(doc = "Interval file", shortName = "r", required = true)
var intervalFile: File = null var intervalFile: File = null
@Argument(doc = "Minimum MAPQ of reads in target region to remove (default: 0)")
var minMapQ: Option[Int] = config("min_mapq")
@Argument(doc = "Read group IDs to be removed (default: remove reads from all read groups)")
var readgroup: Set[String] = config("read_group", default = Nil)
@Argument(doc = "Whether to remove multiple-mapped reads outside the target regions (default: yes)")
var limitRemoval: Boolean = config("limit_removal", default = false)
@Argument(doc = "Whether to index output BAM file or not")
var noMakeIndex: Boolean = config("no_make_index", default = false)
@Argument(doc = "GTF feature containing intervals (default: exon)")
var featureType: Option[String] = config("feature_type")
@Argument(doc = "Expected maximum number of reads in target regions (default: 7e7)")
var bloomSize: Option[Long] = config("bloom_size")
@Argument(doc = "False positive rate (default: 4e-7)")
var falsePositive: Option[Long] = config("false_positive")
@Output(doc = "Output BAM", shortName = "o", required = true) @Output(doc = "Output BAM", shortName = "o", required = true)
var outputBam: File = null var outputBam: File = null
@Output(required = false)
private var outputIndex: Option[File] = None
@Output(doc = "BAM containing discarded reads", shortName = "f", required = false) @Output(doc = "BAM containing discarded reads", shortName = "f", required = false)
var discardedBam: File = null var discardedBam: Option[File] = None
override def beforeGraph() {
super.beforeGraph()
if (!noMakeIndex) outputIndex = Some(new File(outputBam.getPath.stripSuffix(".bam") + ".bai"))
}
override def cmdLine = super.cmdLine +
required("-I", inputBam) +
required("-r", intervalFile) +
required("-o", outputBam) +
optional("-f", discardedBam) +
optional("-Q", minMapQ) +
readgroup.foreach(rg => required("-G", rg)) +
conditional(limitRemoval, "--limit_removal") +
conditional(noMakeIndex, "--no_make_index") +
optional("-t", featureType) +
optional("--bloom_size", bloomSize) +
optional("--false_positive", falsePositive)
} }
...@@ -119,6 +119,10 @@ class Gentrap(val root: Configurable) extends QScript ...@@ -119,6 +119,10 @@ class Gentrap(val root: Configurable) extends QScript
"mapping" -> Map( "mapping" -> Map(
"aligner" -> "gsnap", "aligner" -> "gsnap",
"skip_markduplicates" -> true "skip_markduplicates" -> true
),
"wipereads" -> Map(
"limit_removal" -> true,
"no_make_index" -> false
) )
) )
...@@ -209,7 +213,7 @@ class Gentrap(val root: Configurable) extends QScript ...@@ -209,7 +213,7 @@ class Gentrap(val root: Configurable) extends QScript
job.inputBam = bamFile.get job.inputBam = bamFile.get
ribosomalRefFlat().foreach(job.intervalFile = _) ribosomalRefFlat().foreach(job.intervalFile = _)
job.outputBam = createFile("cleaned.bam") job.outputBam = createFile("cleaned.bam")
job.discardedBam = createFile("rrna.bam") job.discardedBam = Some(createFile("rrna.bam"))
add(job) add(job)
Some(job.outputBam) Some(job.outputBam)
} else bamFile } else bamFile
......
...@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetPipe } ...@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetPipe }
import nl.lumc.sasc.biopet.extensions._ import nl.lumc.sasc.biopet.extensions._
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2 import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
import nl.lumc.sasc.biopet.extensions.tools.BaseCounter import nl.lumc.sasc.biopet.extensions.tools.{ BaseCounter, WipeReads }
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings import org.broadinstitute.gatk.queue.QSettings
...@@ -71,10 +71,6 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O ...@@ -71,10 +71,6 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
.toMap .toMap
) )
val validExpressionMeasures = Set(
"fragments_per_gene", "fragments_per_exon", "base_counts",
"cufflinks_strict", "cufflinks_guided", "cufflinks_blind")
@DataProvider(name = "expMeasuresstrandProtocol") @DataProvider(name = "expMeasuresstrandProtocol")
def expMeasuresStrandProtocolProvider = { def expMeasuresStrandProtocolProvider = {
...@@ -89,27 +85,23 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O ...@@ -89,27 +85,23 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
} yield makeSamplesConfig(sampleNum, libNum, libType) } yield makeSamplesConfig(sampleNum, libNum, libType)
val strandProtocols = Array("non_specific", "dutp") val strandProtocols = Array("non_specific", "dutp")
// get all possible combinations of expression measures
val expressionMeasures = validExpressionMeasures
//.subsets
//.map(_.toList)
.toArray
for { for {
sampleConfig <- sampleConfigs.toArray sampleConfig <- sampleConfigs.toArray
//expressionMeasure <- expressionMeasures
strandProtocol <- strandProtocols strandProtocol <- strandProtocols
} yield Array(sampleConfig, List(expressionMeasure), strandProtocol) removeRiboReads <- Array(true, false)
} yield Array(sampleConfig, List(expressionMeasure), strandProtocol, removeRiboReads)
} }
@Test(dataProvider = "expMeasuresstrandProtocol") @Test(dataProvider = "expMeasuresstrandProtocol")
def testGentrap(sampleConfig: SamplesConfig, expMeasures: List[String], strandProtocol: String) = { def testGentrap(sampleConfig: SamplesConfig, expMeasures: List[String], strandProtocol: String, removeRiboReads: Boolean) = {
val settings = Map( val settings = Map(
"output_dir" -> GentrapTest.outputDir, "output_dir" -> GentrapTest.outputDir,
"gsnap" -> Map("db" -> "test", "dir" -> "test"), "gsnap" -> Map("db" -> "test", "dir" -> "test"),
"expression_measures" -> expMeasures, "expression_measures" -> expMeasures,
"strand_protocol" -> strandProtocol "strand_protocol" -> strandProtocol,
"remove_ribosomal_reads" -> removeRiboReads
) ++ aligner.map("aligner" -> _) ) ++ aligner.map("aligner" -> _)
val config = ConfigUtils.mergeMaps(settings ++ sampleConfig, Map(GentrapTest.executables.toSeq: _*)) val config = ConfigUtils.mergeMaps(settings ++ sampleConfig, Map(GentrapTest.executables.toSeq: _*))
val gentrap: Gentrap = initPipeline(config) val gentrap: Gentrap = initPipeline(config)
...@@ -146,6 +138,9 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O ...@@ -146,6 +138,9 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
assert(gentrap.functions.exists(_.isInstanceOf[Ln])) assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
} }
gentrap.removeRibosomalReads shouldBe removeRiboReads
gentrap.functions.exists(_.isInstanceOf[WipeReads]) shouldBe removeRiboReads
val classMap = Map( val classMap = Map(
"gsnap" -> classOf[Gsnap], "gsnap" -> classOf[Gsnap],
"tophat" -> classOf[Tophat], "tophat" -> classOf[Tophat],
...@@ -196,6 +191,7 @@ object GentrapTest { ...@@ -196,6 +191,7 @@ object GentrapTest {
"annotation_gtf" -> (outputDir + File.separator + "ref.fa"), "annotation_gtf" -> (outputDir + File.separator + "ref.fa"),
"annotation_bed" -> (outputDir + File.separator + "ref.fa"), "annotation_bed" -> (outputDir + File.separator + "ref.fa"),
"annotation_refflat" -> (outputDir + File.separator + "ref.fa"), "annotation_refflat" -> (outputDir + File.separator + "ref.fa"),
"ribosome_refflat" -> (outputDir + File.separator + "ref.fa"),
"varscan_jar" -> "test", "varscan_jar" -> "test",
"rscript" -> Map("exe" -> "test") "rscript" -> Map("exe" -> "test")
) ++ Seq( ) ++ Seq(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment