Skip to content
Snippets Groups Projects
Commit 4fe9ee8c authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Refactor seqstat and add (few) tests.. wip

parent 52a76c4f
No related branches found
No related tags found
No related merge requests found
......@@ -37,6 +37,20 @@ object Seqstat extends ToolCommand {
var phred_encoding: String = "sanger"
var phred_correction: Int = 33
val reportValues = List(1, 10, 20, 30, 40, 50, 60)
// the base quality for each position on the reads
var quals: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
// generate the baseHistogram and readHistogram
var baseHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var readHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucleotideHistoMap: mutable.Map[Char, Long] = mutable.Map()
var baseQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
var readQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
case class Args(fastq: File = new File("")) extends AbstractArgs
class OptParser extends AbstractOptParser {
......@@ -125,30 +139,16 @@ object Seqstat extends ToolCommand {
readQuals(avgQual) += 1
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
logger.info("Start")
val reader = new FastqReader(commandArgs.fastq)
for (read <- reader.iterator.asScala) {
def seqStat(fqreader: FastqReader): (Long) = {
var numReads: Long = 0
for (read <- fqreader.iterator.asScala) {
procesRead(read)
numReads += 1
}
numReads
}
val reportValues = List(1, 10, 20, 30, 40, 50, 60)
// the base quality for each position on the reads
var quals: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
// generate the baseHistogram and readHistogram
var baseHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var readHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucleotideHistoMap: mutable.Map[Char, Long] = mutable.Map()
var baseQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
var readQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
def summarize(): Unit = {
// for every position to the max length of any read
for (pos <- 0 until baseStats.length) {
// list all qualities at this particular position `pos`
......@@ -163,9 +163,7 @@ object Seqstat extends ToolCommand {
baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value }
// count N into nucs
baseStats(pos).nuc.zipWithIndex foreach { case (value, index) => nucs(index) += value }
}
detectPhredEncoding(quals)
for (pos <- 0 until nucs.length) {
......@@ -209,6 +207,18 @@ object Seqstat extends ToolCommand {
readQualHistoMap += (pos -> readHistogram(pos))
}
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
logger.info("Start")
val reader = new FastqReader(commandArgs.fastq)
seqStat(reader)
summarize()
logger.debug(nucs)
// logger.debug(baseStats)
logger.info("Done")
......
/**
* Copyright (c) 2015 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
* @author Wai Yi Leung <w.y.leung@lumc.nl>
*/
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import htsjdk.samtools.fastq.{ AsyncFastqWriter, FastqReader, FastqRecord }
import nl.lumc.sasc.biopet.tools.FastqSync._
import org.mockito.Mockito.{ inOrder => inOrd, when }
import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
import scala.collection.JavaConverters._
class SeqstatTest extends TestNGSuite with MockitoSugar with Matchers {
import nl.lumc.sasc.biopet.tools.Seqstat._
private def resourceFile(p: String): File =
new File(resourcePath(p))
private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString
// Helper functions to create iterator over FastqRecords given its IDs as Ints
// Record with 'A' and Qual==39 (SangerEncoding)
private def recordsOver(ids: String*): java.util.Iterator[FastqRecord] = ids
.map(x => new FastqRecord(x, "A", "", "H"))
.toIterator.asJava
private def solexaRecordsOver(ids: String*): java.util.Iterator[FastqRecord] = ids
.map(x => new FastqRecord(x, "ACTGTNCGATAG", "", "abcde;ABCDEF"))
.toIterator.asJava
@DataProvider(name = "mockReaderProvider")
def mockReaderProvider() =
Array(
Array(mock[FastqReader])
)
@Test(dataProvider = "mockReaderProvider")
def testDefault(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3")
}
@Test(dataProvider = "mockReaderProvider")
def testSeqCountReads(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
}
@Test(dataProvider = "mockReaderProvider")
def testSeqQuality(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
}
@Test(dataProvider = "mockReaderProvider")
def testEncodingDetectionSanger(refMock: FastqReader) = {
when(refMock.iterator) thenReturn recordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
summarize()
phred_correction shouldBe 33
phred_encoding shouldBe "sanger"
}
@Test(dataProvider = "mockReaderProvider")
def testEncodingDetectionSolexa(refMock: FastqReader) = {
when(refMock.iterator) thenReturn solexaRecordsOver("1", "2", "3", "4", "5")
val (numReads) = seqStat(refMock)
numReads shouldBe 5
summarize()
phred_correction shouldBe 64
phred_encoding shouldBe "solexa"
}
@Test def testArgsMinimum() = {
val args = Array(
"-i", resourcePath("/paired01a.fq"))
val parsed = parseArgs(args)
parsed.fastq shouldBe resourceFile("/paired01a.fq")
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment