Commit 4fe9ee8c authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Refactor seqstat and add (few) tests.. wip

parent 52a76c4f
......@@ -37,6 +37,20 @@ object Seqstat extends ToolCommand {
var phred_encoding: String = "sanger"
var phred_correction: Int = 33
val reportValues = List(1, 10, 20, 30, 40, 50, 60)
// the base quality for each position on the reads
var quals: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
// generate the baseHistogram and readHistogram
var baseHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var readHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucleotideHistoMap: mutable.Map[Char, Long] = mutable.Map()
var baseQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
var readQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
case class Args(fastq: File = new File("")) extends AbstractArgs
class OptParser extends AbstractOptParser {
......@@ -125,30 +139,16 @@ object Seqstat extends ToolCommand {
readQuals(avgQual) += 1
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
logger.info("Start")
val reader = new FastqReader(commandArgs.fastq)
for (read <- reader.iterator.asScala) {
def seqStat(fqreader: FastqReader): (Long) = {
var numReads: Long = 0
for (read <- fqreader.iterator.asScala) {
procesRead(read)
numReads += 1
}
numReads
}
val reportValues = List(1, 10, 20, 30, 40, 50, 60)
// the base quality for each position on the reads
var quals: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
// generate the baseHistogram and readHistogram
var baseHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var readHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucleotideHistoMap: mutable.Map[Char, Long] = mutable.Map()
var baseQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
var readQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
def summarize(): Unit = {
// for every position to the max length of any read
for (pos <- 0 until baseStats.length) {
// list all qualities at this particular position `pos`
......@@ -163,9 +163,7 @@ object Seqstat extends ToolCommand {
baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value }
// count N into nucs
baseStats(pos).nuc.zipWithIndex foreach { case (value, index) => nucs(index) += value }
}
detectPhredEncoding(quals)
for (pos <- 0 until nucs.length) {
......@@ -209,6 +207,18 @@ object Seqstat extends ToolCommand {
readQualHistoMap += (pos -> readHistogram(pos))
}
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
logger.info("Start")
val reader = new FastqReader(commandArgs.fastq)
seqStat(reader)
summarize()
logger.debug(nucs)
// logger.debug(baseStats)
logger.info("Done")
......
/**
* Copyright (c) 2015 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
* @author Wai Yi Leung <w.y.leung@lumc.nl>
*/
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import htsjdk.samtools.fastq.{ AsyncFastqWriter, FastqReader, FastqRecord }
import nl.lumc.sasc.biopet.tools.FastqSync._
import org.mockito.Mockito.{ inOrder => inOrd, when }
import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
import scala.collection.JavaConverters._
class SeqstatTest extends TestNGSuite with MockitoSugar with Matchers {
import nl.lumc.sasc.biopet.tools.Seqstat._
private def resourceFile(p: String): File =
new File(resourcePath(p))
private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString
// Helper functions to create iterator over FastqRecords given its IDs as Ints
// Record with 'A' and Qual==39 (SangerEncoding)
private def recordsOver(ids: String*): java.util.Iterator[FastqRecord] = ids
.map(x => new FastqRecord(x, "A", "", "H"))
.toIterator.asJava
private def solexaRecordsOver(ids: String*): java.util.Iterator[FastqRecord] = ids
.map(x => new FastqRecord(x, "ACTGTNCGATAG", "", "abcde;ABCDEF"))
.toIterator.asJava
@DataProvider(name = "mockReaderProvider")
def mockReaderProvider() =
Array(
Array(mock[FastqReader])
)
@Test(dataProvider = "mockReaderProvider")
def testDefault(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3")
}
@Test(dataProvider = "mockReaderProvider")
def testSeqCountReads(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
}
@Test(dataProvider = "mockReaderProvider")
def testSeqQuality(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
}
@Test(dataProvider = "mockReaderProvider")
def testEncodingDetectionSanger(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
summarize()
phred_correction shouldBe 33
phred_encoding shouldBe "sanger"
}
@Test(dataProvider = "mockReaderProvider")
def testEncodingDetectionSolexa(refMock: FastqReader) = {
when(refMock.iterator) thenReturn solexaRecordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
summarize()
phred_correction shouldBe 64
phred_encoding shouldBe "solexa"
}
@Test def testArgsMinimum() = {
val args = Array(
"-i", resourcePath("/paired01a.fq"))
val parsed = parseArgs(args)
parsed.fastq shouldBe resourceFile("/paired01a.fq")
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment