From 1b93ce1c44350f98cc98c2d5dbb9e8c732356589 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Fri, 31 Oct 2014 18:29:42 +0100
Subject: [PATCH] ExtractAlignedFastq fix for FASTQ records with descriptions

---
 .../sasc/biopet/tools/ExtractAlignedFastq.scala     | 13 +++++++++----
 biopet-framework/src/test/resources/paired01a.fq    |  2 +-
 biopet-framework/src/test/resources/paired01b.fq    |  2 +-
 .../biopet/tools/ExtractAlignedFastqUnitTest.scala  |  8 ++++----
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
index 7f7fe2239..8ceb27793 100644
--- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
+++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
@@ -21,6 +21,9 @@ object ExtractAlignedFastq extends ToolCommand {
 
   type FastqInput = (FastqRecord, Option[FastqRecord])
 
+  /** function to get FastqRecord ID */
+  def fastqId(rec: FastqRecord) = rec.getReadHeader.split(" ")(0)
+
   /**
    * Function to create iterator over Interval given input interval string
    *
@@ -113,11 +116,12 @@ object ExtractAlignedFastq extends ToolCommand {
       )
 
     (pair: FastqInput) => pair._2 match {
-      case None => selected.contains(pair._1.getReadHeader)
+      case None => selected.contains(fastqId(pair._1))
       case Some(x) =>
-        require(commonSuffixLength < pair._1.getReadHeader.length)
-        require(commonSuffixLength < x.getReadHeader.length)
-        selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength))
+        val rec1Id = fastqId(pair._1)
+        require(commonSuffixLength < rec1Id.length)
+        require(commonSuffixLength < fastqId(x).length)
+        selected.contains(rec1Id.dropRight(commonSuffixLength))
     }
   }
 
@@ -224,6 +228,7 @@ object ExtractAlignedFastq extends ToolCommand {
       minMapQ = commandArgs.minMapQ,
       commonSuffixLength = commandArgs.commonSuffixLength)
 
+    logger.info("Writing to output file(s) ...")
     (commandArgs.inputFastq2, commandArgs.outputFastq2) match {
 
       case (None, None) => extractReads(memFunc,
diff --git a/biopet-framework/src/test/resources/paired01a.fq b/biopet-framework/src/test/resources/paired01a.fq
index 530a1bb9b..d1fcedf37 100644
--- a/biopet-framework/src/test/resources/paired01a.fq
+++ b/biopet-framework/src/test/resources/paired01a.fq
@@ -1,4 +1,4 @@
-@r01/1
+@r01/1 hello
 A
 +
 H
diff --git a/biopet-framework/src/test/resources/paired01b.fq b/biopet-framework/src/test/resources/paired01b.fq
index 72cf9246d..b93021532 100644
--- a/biopet-framework/src/test/resources/paired01b.fq
+++ b/biopet-framework/src/test/resources/paired01b.fq
@@ -1,4 +1,4 @@
-@r01/2
+@r01/2 hello
 T
 +
 I
diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala
index 4cd7205c0..e60af8466 100644
--- a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala
+++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala
@@ -189,7 +189,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
   }
 
   @Test def testWriteSingleFastqDefault() = {
-    val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(recs._1.getReadHeader)
+    val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(fastqId(recs._1))
     val in1 = new FastqReader(resourceFile("/single01.fq"))
     val mo1 = mock[BasicFastqWriter]
     val obs = inOrd(mo1)
@@ -201,15 +201,15 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
 
   @Test def testWritePairFastqDefault() = {
     val mockSet = Set("r01/1", "r01/2", "r03/1", "r03/2")
-    val memFunc = (recs: FastqInput) => mockSet.contains(recs._1.getReadHeader) || mockSet.contains(recs._2.get.getReadHeader)
+    val memFunc = (recs: FastqInput) => mockSet.contains(fastqId(recs._1)) || mockSet.contains(fastqId(recs._2.get))
     val in1 = new FastqReader(resourceFile("/paired01a.fq"))
     val in2 = new FastqReader(resourceFile("/paired01b.fq"))
     val mo1 = mock[BasicFastqWriter]
     val mo2 = mock[BasicFastqWriter]
     val obs = inOrd(mo1, mo2)
     extractReads(memFunc, in1, mo1, in2, mo2)
-    obs.verify(mo1).write(new FastqRecord("r01/1", "A", "", "H"))
-    obs.verify(mo2).write(new FastqRecord("r01/2", "T", "", "I"))
+    obs.verify(mo1).write(new FastqRecord("r01/1 hello", "A", "", "H"))
+    obs.verify(mo2).write(new FastqRecord("r01/2 hello", "T", "", "I"))
     obs.verify(mo1).write(new FastqRecord("r03/1", "G", "", "H"))
     obs.verify(mo2).write(new FastqRecord("r03/2", "C", "", "I"))
     verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord])
-- 
GitLab