Commit 0bc09ac2 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch '361-fix-wipereads-extension' into 'develop'

Fixing WipeReads

Closes #361

See merge request !428
parents 736e0109 8b40a367
......@@ -18,9 +18,8 @@ import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
// TODO: finish implementation for usage in pipelines
/**
* WipeReads function class for usage in Biopet pipelines
*
......@@ -36,9 +35,52 @@ class WipeReads(val root: Configurable) extends ToolCommandFunction {
@Input(doc = "Interval file", shortName = "r", required = true)
var intervalFile: File = null
@Argument(doc = "Minimum MAPQ of reads in target region to remove (default: 0)")
var minMapQ: Option[Int] = config("min_mapq")
@Argument(doc = "Read group IDs to be removed (default: remove reads from all read groups)")
var readgroup: Set[String] = config("read_group", default = Nil)
@Argument(doc = "Whether to remove multiple-mapped reads outside the target regions (default: yes)")
var limitRemoval: Boolean = config("limit_removal", default = false)
@Argument(doc = "Whether to index output BAM file or not")
var noMakeIndex: Boolean = config("no_make_index", default = false)
@Argument(doc = "GTF feature containing intervals (default: exon)")
var featureType: Option[String] = config("feature_type")
@Argument(doc = "Expected maximum number of reads in target regions (default: 7e7)")
var bloomSize: Option[Long] = config("bloom_size")
@Argument(doc = "False positive rate (default: 4e-7)")
var falsePositive: Option[Long] = config("false_positive")
@Output(doc = "Output BAM", shortName = "o", required = true)
var outputBam: File = null
@Output(required = false)
private var outputIndex: Option[File] = None
@Output(doc = "BAM containing discarded reads", shortName = "f", required = false)
var discardedBam: File = null
var discardedBam: Option[File] = None
override def beforeGraph() {
super.beforeGraph()
if (!noMakeIndex) outputIndex = Some(new File(outputBam.getPath.stripSuffix(".bam") + ".bai"))
}
override def cmdLine = super.cmdLine +
required("-I", inputBam) +
required("-r", intervalFile) +
required("-o", outputBam) +
optional("-f", discardedBam) +
optional("-Q", minMapQ) +
readgroup.foreach(rg => required("-G", rg)) +
conditional(limitRemoval, "--limit_removal") +
conditional(noMakeIndex, "--no_make_index") +
optional("-t", featureType) +
optional("--bloom_size", bloomSize) +
optional("--false_positive", falsePositive)
}
......@@ -119,6 +119,10 @@ class Gentrap(val root: Configurable) extends QScript
"mapping" -> Map(
"aligner" -> "gsnap",
"skip_markduplicates" -> true
),
"wipereads" -> Map(
"limit_removal" -> true,
"no_make_index" -> false
)
)
......@@ -209,7 +213,7 @@ class Gentrap(val root: Configurable) extends QScript
job.inputBam = bamFile.get
ribosomalRefFlat().foreach(job.intervalFile = _)
job.outputBam = createFile("cleaned.bam")
job.discardedBam = createFile("rrna.bam")
job.discardedBam = Some(createFile("rrna.bam"))
add(job)
Some(job.outputBam)
} else bamFile
......
......@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetPipe }
import nl.lumc.sasc.biopet.extensions._
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
import nl.lumc.sasc.biopet.extensions.tools.BaseCounter
import nl.lumc.sasc.biopet.extensions.tools.{ BaseCounter, WipeReads }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
......@@ -71,10 +71,6 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
.toMap
)
val validExpressionMeasures = Set(
"fragments_per_gene", "fragments_per_exon", "base_counts",
"cufflinks_strict", "cufflinks_guided", "cufflinks_blind")
@DataProvider(name = "expMeasuresstrandProtocol")
def expMeasuresStrandProtocolProvider = {
......@@ -89,27 +85,23 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
} yield makeSamplesConfig(sampleNum, libNum, libType)
val strandProtocols = Array("non_specific", "dutp")
// get all possible combinations of expression measures
val expressionMeasures = validExpressionMeasures
//.subsets
//.map(_.toList)
.toArray
for {
sampleConfig <- sampleConfigs.toArray
//expressionMeasure <- expressionMeasures
strandProtocol <- strandProtocols
} yield Array(sampleConfig, List(expressionMeasure), strandProtocol)
removeRiboReads <- Array(true, false)
} yield Array(sampleConfig, List(expressionMeasure), strandProtocol, removeRiboReads)
}
@Test(dataProvider = "expMeasuresstrandProtocol")
def testGentrap(sampleConfig: SamplesConfig, expMeasures: List[String], strandProtocol: String) = {
def testGentrap(sampleConfig: SamplesConfig, expMeasures: List[String], strandProtocol: String, removeRiboReads: Boolean) = {
val settings = Map(
"output_dir" -> GentrapTest.outputDir,
"gsnap" -> Map("db" -> "test", "dir" -> "test"),
"expression_measures" -> expMeasures,
"strand_protocol" -> strandProtocol
"strand_protocol" -> strandProtocol,
"remove_ribosomal_reads" -> removeRiboReads
) ++ aligner.map("aligner" -> _)
val config = ConfigUtils.mergeMaps(settings ++ sampleConfig, Map(GentrapTest.executables.toSeq: _*))
val gentrap: Gentrap = initPipeline(config)
......@@ -146,6 +138,9 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
}
gentrap.removeRibosomalReads shouldBe removeRiboReads
gentrap.functions.exists(_.isInstanceOf[WipeReads]) shouldBe removeRiboReads
val classMap = Map(
"gsnap" -> classOf[Gsnap],
"tophat" -> classOf[Tophat],
......@@ -196,6 +191,7 @@ object GentrapTest {
"annotation_gtf" -> (outputDir + File.separator + "ref.fa"),
"annotation_bed" -> (outputDir + File.separator + "ref.fa"),
"annotation_refflat" -> (outputDir + File.separator + "ref.fa"),
"ribosome_refflat" -> (outputDir + File.separator + "ref.fa"),
"varscan_jar" -> "test",
"rscript" -> Map("exe" -> "test")
) ++ Seq(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment