diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala index 721ba8575bc8c7edff7854163fba57a0afe3e474..c640da9bc3b82cf1495abd4d8af69b6d1c8bbed5 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala @@ -331,6 +331,14 @@ class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Refer def versionRegex = """.* version (.*)""".r def versionCommand = executable + " --version" + override def beforeGraph(): Unit = { + super.beforeGraph() + if ((!gunzip && !bunzip2) && input.forall(_.getName.endsWith(".gz"))) { + logger.info("Fastq with .gz extension found, enabled --gunzip option") + gunzip = true + } + } + def cmdLine = { required(executable) + optional("--dir", dir) + diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala index d4e6996de89b9a62a3b35f9ea894907882b4484f..e56393b38eb62499c39469b452baefc818812e28 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala @@ -26,11 +26,11 @@ import scala.collection.JavaConverters._ object FastqSync extends ToolCommand { /** Regex for capturing read ID ~ taking into account its read pair mark (if present) */ - private val idRegex = "[_/][12]\\s??|\\s".r + private val idRegex = """[_/][12]$""".r /** Implicit class to allow for lazy retrieval of FastqRecord ID without any read pair mark */ private implicit class FastqPair(fq: FastqRecord) { - lazy val fragId = idRegex.split(fq.getReadHeader)(0) + lazy val fragId = idRegex.split(fq.getReadHeader.split(" ")(0))(0) } /** diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala index 565498eb6ed0732c1c796f4cf82d91d77a55815b..bfd649f1df2f4160b8846067399a074e8231c872 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqSyncTest.scala @@ -188,6 +188,32 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers { numKept shouldBe 1 } + @Test(dataProvider = "mockProvider") + def testSeqSolexa(refMock: FastqReader, aMock: FastqReader, bMock: FastqReader, + aOutMock: AsyncFastqWriter, bOutMock: AsyncFastqWriter) = { + + when(refMock.iterator) thenReturn recordsOver( + "SOLEXA12_24:6:117:1388:2001/2", + "SOLEXA12_24:6:96:470:1965/2", + "SOLEXA12_24:6:35:1209:2037/2") + when(aMock.iterator) thenReturn recordsOver( + "SOLEXA12_24:6:96:470:1965/1", + "SOLEXA12_24:6:35:1209:2037/1") + when(bMock.iterator) thenReturn recordsOver( + "SOLEXA12_24:6:117:1388:2001/2", + "SOLEXA12_24:6:96:470:1965/2") + val obs = inOrd(aOutMock, bOutMock) + + val (numDiscard1, numDiscard2, numKept) = syncFastq(refMock, aMock, bMock, aOutMock, bOutMock) + + obs.verify(aOutMock).write(new FastqRecord("SOLEXA12_24:6:96:470:1965/1", "A", "", "H")) + obs.verify(bOutMock).write(new FastqRecord("SOLEXA12_24:6:96:470:1965/2", "A", "", "H")) + + numDiscard1 shouldBe 1 + numDiscard2 shouldBe 1 + numKept shouldBe 1 + } + @Test(dataProvider = "mockProvider") def testSeqABShorterPairMarkSlash(refMock: FastqReader, aMock: FastqReader, bMock: FastqReader, aOutMock: AsyncFastqWriter, bOutMock: AsyncFastqWriter) = { diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 2ef18c8f67e1ea784f8aa6d42d5daa688a4675a6..266f6bc3b14b9a7aa0c7b89650c6c1aa625054b8 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -324,10 +324,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S } def addGsnap(R1: File, R2: Option[File], output: File): File = { - val zcatR1 = extractIfNeeded(R1, output.getParentFile) - val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None val gsnapCommand = new Gsnap(this) - gsnapCommand.input = if (paired) List(zcatR1._2, zcatR2.get._2) else List(zcatR1._2) + gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1) gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam") val reorderSam = new ReorderSam(this) @@ -335,11 +333,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam") val ar = addAddOrReplaceReadGroups(reorderSam.output, output) - val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) :: - Some(gsnapCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten) - pipe.threadsCorrection = -1 - zcatR1._1.foreach(x => pipe.threadsCorrection -= 1) - zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1)) + val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil) + pipe.threadsCorrection = -2 add(pipe) ar._2 }