Commit 337a2abe authored by bow's avatar bow
Browse files

Finalize WipeReads command line parser and its tests

parent 41406ae4
/**
* Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
* Author: Wibowo Arindrarto <w.arindrarto@lumc.nl>
* @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
*/
package nl.lumc.sasc.biopet.core.apps
import java.io.File
import java.io.{ File, IOException }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
class WipeReads(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
......@@ -21,56 +22,95 @@ class WipeReads(val root: Configurable) extends BiopetJavaCommandLineFunction {
@Output(doc = "Output BAM", shortName = "o", required = true)
var outputBAM: File = _
}
object WipeReads {
type OptionMap = Map[String, Any]
def main(args: Array[String]): Unit = {
object Strand extends Enumeration {
type Strand = Value
val Plus, Minus, Ignore = Value
}
// simple command line parser. adapted from @pjotrp's answer at
// http://stackoverflow.com/questions/2315912/scala-best-way-to-parse-command-line-parameters-cli
if (args.length == 0) {
println(usage)
System.exit(1)
def checkInputFile(inFile: File): File =
if (inFile.exists)
inFile
else
throw new IOException("Input file " + inFile.getPath + " not found")
def checkInputBAM(inBAM: File): File = {
// input BAM must have a .bam.bai index
if (new File(inBAM.getPath + ".bai").exists)
checkInputFile(inBAM)
else
throw new IOException("Index for input BAM file " + inBAM.getPath + " not found")
}
def parseOption(opts: OptionMap, list: List[String]): OptionMap =
list match {
case Nil
=> opts
case ("--inputBAM" | "-I") :: value :: tail if !opts.contains("inputBAM")
=> parseOption(opts ++ Map("inputBAM" -> checkInputBAM(new File(value))), tail)
case ("--targetRegions" | "-l") :: value :: tail if !opts.contains("targetRegions")
=> parseOption(opts ++ Map("targetRegions" -> checkInputFile(new File(value))), tail)
case ("--outputBAM" | "-o") :: value :: tail if !opts.contains("outputBAM")
=> parseOption(opts ++ Map("outputBAM" -> new File(value)), tail)
case ("--minOverlapFraction" | "-f") :: value :: tail if !opts.contains("minOverlapFraction")
=> parseOption(opts ++ Map("minOverlapFraction" -> value.toDouble), tail)
case ("--minMapQ" | "-Q") :: value :: tail if !opts.contains("minMapQ")
=> parseOption(opts ++ Map("minMapQ" -> value.toInt), tail)
case ("--strand" | "-s") :: (value @ ("plus" | "minus" | "ignore")) :: tail if !opts.contains("strand")
=> parseOption(opts ++ Map("strand" -> Strand.withName(value.capitalize)), tail)
case ("--makeIndex") :: tail
=> parseOption(opts ++ Map("makeIndex" -> true), tail)
case ("--limitToRegion" | "-limit") :: tail
=> parseOption(opts ++ Map("limitToRegion" -> true), tail)
// TODO: better way to parse multiple flag values?
case ("--readGroup" | "-RG") :: value :: tail if !opts.contains("readGroup")
=> parseOption(opts ++ Map("readGroup" -> value.split(",")), tail)
case option :: tail
=> throw new IllegalArgumentException("Unexpected or duplicate option flag: " + option)
}
val argList = args.toList
def nextOption(map: OptionMap, list: List[String]): OptionMap =
list match {
case ("--inputBAM" | "-I") :: value :: tail => nextOption(map ++ Map("inputBAM" -> new File(value)), tail)
// TODO: add other flags
case option :: tail => map
}
def validateOption(opts: OptionMap): Unit = {
// TODO: better way to check for required arguments ~ use scalaz.Validation?
if (opts.get("inputBAM") == None)
throw new IllegalArgumentException("Input BAM not supplied")
if (opts.get("targetRegions") == None)
throw new IllegalArgumentException("Target regions file not supplied")
}
val options = nextOption(Map(), argList)
def main(args: Array[String]): Unit = {
val inputBAM = options.get("inputBAM") match {
case Some(value) => value
// TODO: make exception object more specific
case None => throw new Exception("Required input flag '--inputBAM / -I' not found")
if (args.length == 0) {
println(usage)
System.exit(1)
}
val options = parseOption(Map(), args.toList)
validateOption(options)
}
val usage: String = """
|usage: java -cp BiopetFramework.jar nl.lumc.sasc.biopet-core.apps.WipeReads [options] -I input -r regions -o output
|
|WipeReads - Tool for reads removal from an indexed BAM file.
|
|positional arguments:
| -I,--inputBAM input BAM file, must be indexed
| -r,--regions input BED file
| -o,--outputBAM output BAM file
|
|optional arguments:
| -f,--fraction_overlap Minimum overlap of reads and target regions
|
|This tool will remove BAM records that overlaps a set of given regions.
|By default, if the removed reads are also mapped to other regions outside
|the given ones, they will also be removed.
""".stripMargin
val usage: String =
"""
|usage: java -cp BiopetFramework.jar nl.lumc.sasc.biopet-core.apps.WipeReads [options] -I input -l regions -o output
|
|WipeReads - Tool for reads removal from an indexed BAM file.
|
|positional arguments:
| -I,--inputBAM Input BAM file, must be indexed with '.bam.bai' extension
| -l,--targetRegions Input BED file
| -o,--outputBAM Output BAM file
|
|optional arguments:
| -f,--minOverlapFraction Minimum overlap of reads and target regions
|
|This tool will remove BAM records that overlaps a set of given regions.
|By default, if the removed reads are also mapped to other regions outside
|the given ones, they will also be removed.
""".stripMargin
}
chrQ 290 320 rRNA01 0 +
chrQ 450 480 rRNA02 0 -
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:1000
@RG ID:002 DS:single-end reads SM:WipeReadsTestCase
r02 0 chrQ 50 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:002
r01 16 chrQ 290 60 10M * 0 0 GGGGGAAAAA GGGGGGGGGG RG:Z:002
r04 0 chrQ 450 60 10M * 0 0 CGTACGTACG EEFFGGHHII RG:Z:002
r03 16 chrQ 690 60 10M * 0 0 CCCCCTTTTT HHHHHHHHHH RG:Z:002
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:002
/**
* Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
* @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
*/
package nl.lumc.sasc.biopet.core.apps
import java.nio.file.Paths
import java.io.{ File, IOException }
import org.scalatest.Assertions
import org.testng.annotations.Test
class WipeReadsUnitTest extends Assertions {
import WipeReads._
private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString
val bam01 = resourcePath("/single.bam")
val bed01 = resourcePath("/rrna01.bed")
val minArgList = List("-I", bam01.toString, "-l", bed01.toString, "-o", "mock.bam")
@Test def testOptMinimum() = {
val opts = parseOption(Map(), minArgList)
assert(opts.contains("inputBAM"))
assert(opts.contains("targetRegions"))
assert(opts.contains("outputBAM"))
}
@Test def testOptMissingBAI() = {
val pathBAM = File.createTempFile("WipeReads", java.util.UUID.randomUUID.toString)
assert(pathBAM.exists)
val argList = List(
"--inputBAM", pathBAM.toPath.toString,
"--targetRegions", bed01.toString,
"--outputBAM", "mock.bam")
val thrown = intercept[IOException] {
parseOption(Map(), argList)
}
assert(thrown.getMessage === "Index for input BAM file " + pathBAM + " not found")
pathBAM.deleteOnExit()
}
@Test def testOptMissingRegions() = {
val pathRegion = "/i/dont/exist.bed"
val argList = List(
"--inputBAM", bam01,
"--targetRegions", pathRegion,
"--outputBAM", "mock.bam"
)
val thrown = intercept[IOException] {
parseOption(Map(), argList)
}
assert(thrown.getMessage === "Input file " + pathRegion + " not found")
}
@Test def testOptUnexpected() = {
val argList = List("--strand", "sense") ::: minArgList
val thrown = intercept[IllegalArgumentException] {
parseOption(Map(), argList)
}
assert(thrown.getMessage === "Unexpected or duplicate option flag: --strand")
}
@Test def testOptMinOverlapFraction() = {
val argList = List("--minOverlapFraction", "0.8") ::: minArgList
val opts = parseOption(Map(), argList)
assert(opts("minOverlapFraction") == 0.8)
}
@Test def testOptMinMapQ() = {
val argList = List("--minMapQ", "13") ::: minArgList
val opts = parseOption(Map(), argList)
assert(opts("minMapQ") == 13)
}
@Test def testOptStrandPlus() = {
val argList = List("--strand", "plus") ::: minArgList
val opts = parseOption(Map(), argList)
assert(opts("strand") == Strand.Plus)
}
@Test def testOptStrandMinus() = {
val argList = List("--strand", "minus") ::: minArgList
val opts = parseOption(Map(), argList)
assert(opts("strand") == Strand.Minus)
}
@Test def testOptStrandIgnore() = {
val argList = List("--strand", "ignore") ::: minArgList
val opts = parseOption(Map(), argList)
assert(opts("strand") == Strand.Ignore)
}
@Test def testOptMakeIndex() = {
val argList = List("--makeIndex") ::: minArgList
val opts = parseOption(Map(), argList)
assert(opts("makeIndex") == true) // why can't we evaluate directly??
}
@Test def testOptLimitToRegion() = {
val argList = List("--limitToRegion") ::: minArgList
val opts = parseOption(Map(), argList)
assert(opts("limitToRegion") == true)
}
@Test def testOptSingleReadGroup() = {
val argList = List("--readGroup", "g1") ::: minArgList
val opts = parseOption(Map(), argList)
assert(Array("g1").sameElements(opts("readGroup")))
}
@Test def testOptMultipleReadGroup() = {
val argList = List("--readGroup", "g1,g2") ::: minArgList
val opts = parseOption(Map(), argList)
assert(Array("g1", "g2").sameElements(opts("readGroup")))
}
}
/*
TODO: missing whole argument
*/
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment