From 1b93ce1c44350f98cc98c2d5dbb9e8c732356589 Mon Sep 17 00:00:00 2001 From: bow <bow@bow.web.id> Date: Fri, 31 Oct 2014 18:29:42 +0100 Subject: [PATCH] ExtractAlignedFastq fix for FASTQ records with descriptions --- .../sasc/biopet/tools/ExtractAlignedFastq.scala | 13 +++++++++---- biopet-framework/src/test/resources/paired01a.fq | 2 +- biopet-framework/src/test/resources/paired01b.fq | 2 +- .../biopet/tools/ExtractAlignedFastqUnitTest.scala | 8 ++++---- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala index 7f7fe2239..8ceb27793 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -21,6 +21,9 @@ object ExtractAlignedFastq extends ToolCommand { type FastqInput = (FastqRecord, Option[FastqRecord]) + /** function to get FastqRecord ID */ + def fastqId(rec: FastqRecord) = rec.getReadHeader.split(" ")(0) + /** * Function to create iterator over Interval given input interval string * @@ -113,11 +116,12 @@ object ExtractAlignedFastq extends ToolCommand { ) (pair: FastqInput) => pair._2 match { - case None => selected.contains(pair._1.getReadHeader) + case None => selected.contains(fastqId(pair._1)) case Some(x) => - require(commonSuffixLength < pair._1.getReadHeader.length) - require(commonSuffixLength < x.getReadHeader.length) - selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength)) + val rec1Id = fastqId(pair._1) + require(commonSuffixLength < rec1Id.length) + require(commonSuffixLength < fastqId(x).length) + selected.contains(rec1Id.dropRight(commonSuffixLength)) } } @@ -224,6 +228,7 @@ object ExtractAlignedFastq extends ToolCommand { minMapQ = commandArgs.minMapQ, commonSuffixLength = commandArgs.commonSuffixLength) + logger.info("Writing to output file(s) ...") (commandArgs.inputFastq2, commandArgs.outputFastq2) match { case (None, None) => extractReads(memFunc, diff --git a/biopet-framework/src/test/resources/paired01a.fq b/biopet-framework/src/test/resources/paired01a.fq index 530a1bb9b..d1fcedf37 100644 --- a/biopet-framework/src/test/resources/paired01a.fq +++ b/biopet-framework/src/test/resources/paired01a.fq @@ -1,4 +1,4 @@ -@r01/1 +@r01/1 hello A + H diff --git a/biopet-framework/src/test/resources/paired01b.fq b/biopet-framework/src/test/resources/paired01b.fq index 72cf9246d..b93021532 100644 --- a/biopet-framework/src/test/resources/paired01b.fq +++ b/biopet-framework/src/test/resources/paired01b.fq @@ -1,4 +1,4 @@ -@r01/2 +@r01/2 hello T + I diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala index 4cd7205c0..e60af8466 100644 --- a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala +++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala @@ -189,7 +189,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat } @Test def testWriteSingleFastqDefault() = { - val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(recs._1.getReadHeader) + val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(fastqId(recs._1)) val in1 = new FastqReader(resourceFile("/single01.fq")) val mo1 = mock[BasicFastqWriter] val obs = inOrd(mo1) @@ -201,15 +201,15 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat @Test def testWritePairFastqDefault() = { val mockSet = Set("r01/1", "r01/2", "r03/1", "r03/2") - val memFunc = (recs: FastqInput) => mockSet.contains(recs._1.getReadHeader) || mockSet.contains(recs._2.get.getReadHeader) + val memFunc = (recs: FastqInput) => mockSet.contains(fastqId(recs._1)) || mockSet.contains(fastqId(recs._2.get)) val in1 = new FastqReader(resourceFile("/paired01a.fq")) val in2 = new FastqReader(resourceFile("/paired01b.fq")) val mo1 = mock[BasicFastqWriter] val mo2 = mock[BasicFastqWriter] val obs = inOrd(mo1, mo2) extractReads(memFunc, in1, mo1, in2, mo2) - obs.verify(mo1).write(new FastqRecord("r01/1", "A", "", "H")) - obs.verify(mo2).write(new FastqRecord("r01/2", "T", "", "I")) + obs.verify(mo1).write(new FastqRecord("r01/1 hello", "A", "", "H")) + obs.verify(mo2).write(new FastqRecord("r01/2 hello", "T", "", "I")) obs.verify(mo1).write(new FastqRecord("r03/1", "G", "", "H")) obs.verify(mo2).write(new FastqRecord("r03/2", "C", "", "I")) verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord]) -- GitLab