Commit 04599e18 authored by bow's avatar bow
Browse files

Add initial BAM writing function in WipeReads

parent c16a5040
......@@ -11,6 +11,8 @@ import scala.io.Source
import com.twitter.algebird.{ BF, BloomFilter }
import htsjdk.samtools.SAMFileReader
import htsjdk.samtools.SAMFileReader.QueryInterval
import htsjdk.samtools.SAMFileWriter
import htsjdk.samtools.SAMFileWriterFactory
import htsjdk.samtools.SAMRecord
import org.apache.commons.io.FilenameUtils.getExtension
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -165,6 +167,34 @@ object WipeReads {
}
}
// TODO: implement filtered reads BAM output and stats file output
def writeFilteredBAM(filterFunc: (SAMRecord => Boolean), inBAM: File, outBAM: File,
writeIndex: Boolean = true, async: Boolean = true,
filteredOutBAM: File = null) = {
val factory = new SAMFileWriterFactory()
.setCreateIndex(writeIndex)
.setUseAsyncIo(async)
val templateBAM = new SAMFileReader(inBAM)
val targetBAM = factory.makeBAMWriter(templateBAM.getFileHeader, true, outBAM)
val filteredBAM =
if (filteredOutBAM != null)
factory.makeBAMWriter(templateBAM.getFileHeader, true, outBAM)
else
null
try {
for (rec: SAMRecord <- templateBAM.asScala) {
if (!filterFunc(rec)) targetBAM.addAlignment(rec)
else if (filteredBAM != null) filteredBAM.addAlignment(rec)
}
} finally {
templateBAM.close()
targetBAM.close()
if (filteredBAM != null) filteredBAM.close()
}
}
def parseOption(opts: OptionMap, list: List[String]): OptionMap =
list match {
case Nil
......
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:10000
@RG ID:001 DS:paired-end reads SM:WipeReadsTestCase
r02 99 chrQ 50 60 10M = 90 50 TACGTACGTA EEFFGGHHII RG:Z:001
r02 147 chrQ 90 60 10M = 50 -50 ATGCATGCAT EEFFGGHHII RG:Z:001
r01 163 chrQ 150 60 10M = 190 50 AAAAAGGGGG GGGGGGGGGG RG:Z:001
r01 83 chrQ 190 60 10M = 150 -50 GGGGGAAAAA GGGGGGGGGG RG:Z:001
r01 163 chrQ 250 60 10M = 290 50 AAAAAGGGGG GGGGGGGGGG RG:Z:001
r01 83 chrQ 290 60 10M = 250 -50 GGGGGAAAAA GGGGGGGGGG RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
r06 4 * 0 0 * * 0 0 GCGCGCGCGC HIHIHIHIHI RG:Z:001
@HD VN:1.0 SO:coordinate
@SQ SN:chrQ LN:10000
@RG ID:001 DS:single-end reads SM:WipeReadsTestCase
r02 0 chrQ 50 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:001
r01 16 chrQ 190 60 10M * 0 0 TACGTACGTA EEFFGGHHII RG:Z:001
r01 16 chrQ 290 60 10M * 0 0 GGGGGAAAAA GGGGGGGGGG RG:Z:001
r06 4 * 0 0 * * 0 0 ATATATATAT HIHIHIHIHI RG:Z:001
......@@ -6,7 +6,9 @@ package nl.lumc.sasc.biopet.core.apps
import java.nio.file.Paths
import java.io.{ File, IOException }
import scala.collection.JavaConverters._
import htsjdk.samtools.{ SAMFileReader, SAMRecord }
import org.scalatest.Assertions
import org.testng.annotations.Test
......@@ -20,8 +22,10 @@ class WipeReadsUnitTest extends Assertions {
val sbam01 = new File(resourcePath("/single01.bam"))
val sbam02 = new File(resourcePath("/single02.bam"))
val sbam03 = new File(resourcePath("/single03.bam"))
val pbam01 = new File(resourcePath("/paired01.bam"))
val pbam02 = new File(resourcePath("/paired02.bam"))
val pbam03 = new File(resourcePath("/paired03.bam"))
val bed01 = new File(resourcePath("/rrna01.bed"))
val minArgList = List("-I", sbam01.toString, "-l", bed01.toString, "-o", "mock.bam")
......@@ -206,6 +210,36 @@ class WipeReadsUnitTest extends Assertions {
assert(isFilteredOut("r01"))
}
@Test def testWriteSingleBAMDefault() = {
val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
val outBAM = File.createTempFile("WipeReads", java.util.UUID.randomUUID.toString + ".bam")
val outBAMIndex = new File(outBAM.getAbsolutePath.stripSuffix(".bam") + ".bai")
outBAM.deleteOnExit()
outBAMIndex.deleteOnExit()
writeFilteredBAM(mockFilterOutFunc, sbam01, outBAM)
val exp = new SAMFileReader(sbam03).asScala
val obs = new SAMFileReader(outBAM).asScala
val res = for ( (e,o) <- exp.zip(obs)) yield e.getSAMString === o.getSAMString
assert(res.reduceLeft(_ && _))
assert(outBAM.exists)
assert(outBAMIndex.exists)
}
@Test def testWritePairBAMDefault() = {
val mockFilterOutFunc = (r: SAMRecord) => Set("r03", "r04", "r05").contains(r.getReadName)
val outBAM = File.createTempFile("WipeReads", java.util.UUID.randomUUID.toString + ".bam")
val outBAMIndex = new File(outBAM.getAbsolutePath.stripSuffix(".bam") + ".bai")
outBAM.deleteOnExit()
outBAMIndex.deleteOnExit()
writeFilteredBAM(mockFilterOutFunc, pbam01, outBAM)
val exp = new SAMFileReader(pbam03).asScala
val obs = new SAMFileReader(outBAM).asScala
val res = for ( (e,o) <- exp.zip(obs)) yield e.getSAMString === o.getSAMString
assert(res.reduceLeft(_ && _))
assert(outBAM.exists)
assert(outBAMIndex.exists)
}
@Test def testOptMinimum() = {
val opts = parseOption(Map(), minArgList)
assert(opts.contains("inputBAM"))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment