Commit 0a897753 authored by Sander van der Zeeuw's avatar Sander van der Zeeuw
Browse files

changes in gsnap piping and fix bug in fastqsync

parent a53e20d9
...@@ -331,6 +331,14 @@ class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Refer ...@@ -331,6 +331,14 @@ class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Refer
def versionRegex = """.* version (.*)""".r def versionRegex = """.* version (.*)""".r
def versionCommand = executable + " --version" def versionCommand = executable + " --version"
override def beforeGraph(): Unit = {
super.beforeGraph()
if ((!gunzip && !bunzip2) && input.forall(_.getName.endsWith(".gz"))) {
logger.info("Fastq with .gz extension found, enabled --gunzip option")
gunzip = true
}
}
def cmdLine = { def cmdLine = {
required(executable) + required(executable) +
optional("--dir", dir) + optional("--dir", dir) +
......
...@@ -26,11 +26,11 @@ import scala.collection.JavaConverters._ ...@@ -26,11 +26,11 @@ import scala.collection.JavaConverters._
object FastqSync extends ToolCommand { object FastqSync extends ToolCommand {
/** Regex for capturing read ID ~ taking into account its read pair mark (if present) */ /** Regex for capturing read ID ~ taking into account its read pair mark (if present) */
private val idRegex = "[_/][12]\\s??|\\s".r private val idRegex = """[_/][12]$""".r
/** Implicit class to allow for lazy retrieval of FastqRecord ID without any read pair mark */ /** Implicit class to allow for lazy retrieval of FastqRecord ID without any read pair mark */
private implicit class FastqPair(fq: FastqRecord) { private implicit class FastqPair(fq: FastqRecord) {
lazy val fragId = idRegex.split(fq.getReadHeader)(0) lazy val fragId = idRegex.split(fq.getReadHeader.split(" ")(0))(0)
} }
/** /**
......
...@@ -188,6 +188,32 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -188,6 +188,32 @@ class FastqSyncTest extends TestNGSuite with MockitoSugar with Matchers {
numKept shouldBe 1 numKept shouldBe 1
} }
@Test(dataProvider = "mockProvider")
def testSeqSolexa(refMock: FastqReader, aMock: FastqReader, bMock: FastqReader,
aOutMock: AsyncFastqWriter, bOutMock: AsyncFastqWriter) = {
when(refMock.iterator) thenReturn recordsOver(
"SOLEXA12_24:6:117:1388:2001/2",
"SOLEXA12_24:6:96:470:1965/2",
"SOLEXA12_24:6:35:1209:2037/2")
when(aMock.iterator) thenReturn recordsOver(
"SOLEXA12_24:6:96:470:1965/1",
"SOLEXA12_24:6:35:1209:2037/1")
when(bMock.iterator) thenReturn recordsOver(
"SOLEXA12_24:6:117:1388:2001/2",
"SOLEXA12_24:6:96:470:1965/2")
val obs = inOrd(aOutMock, bOutMock)
val (numDiscard1, numDiscard2, numKept) = syncFastq(refMock, aMock, bMock, aOutMock, bOutMock)
obs.verify(aOutMock).write(new FastqRecord("SOLEXA12_24:6:96:470:1965/1", "A", "", "H"))
obs.verify(bOutMock).write(new FastqRecord("SOLEXA12_24:6:96:470:1965/2", "A", "", "H"))
numDiscard1 shouldBe 1
numDiscard2 shouldBe 1
numKept shouldBe 1
}
@Test(dataProvider = "mockProvider") @Test(dataProvider = "mockProvider")
def testSeqABShorterPairMarkSlash(refMock: FastqReader, aMock: FastqReader, bMock: FastqReader, def testSeqABShorterPairMarkSlash(refMock: FastqReader, aMock: FastqReader, bMock: FastqReader,
aOutMock: AsyncFastqWriter, bOutMock: AsyncFastqWriter) = { aOutMock: AsyncFastqWriter, bOutMock: AsyncFastqWriter) = {
......
...@@ -324,10 +324,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S ...@@ -324,10 +324,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
} }
def addGsnap(R1: File, R2: Option[File], output: File): File = { def addGsnap(R1: File, R2: Option[File], output: File): File = {
val zcatR1 = extractIfNeeded(R1, output.getParentFile)
val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
val gsnapCommand = new Gsnap(this) val gsnapCommand = new Gsnap(this)
gsnapCommand.input = if (paired) List(zcatR1._2, zcatR2.get._2) else List(zcatR1._2) gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam") gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
val reorderSam = new ReorderSam(this) val reorderSam = new ReorderSam(this)
...@@ -335,11 +333,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S ...@@ -335,11 +333,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam") reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
val ar = addAddOrReplaceReadGroups(reorderSam.output, output) val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) :: val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
Some(gsnapCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten) pipe.threadsCorrection = -2
pipe.threadsCorrection = -1
zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
add(pipe) add(pipe)
ar._2 ar._2
} }
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment