Commit 23f42d6b authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Using Enum for the Fq Encodings

parent a3ea2dcc
......@@ -32,10 +32,17 @@ class Seqstat(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
}
object FqEncoding extends Enumeration {
type FqEncoding = Value
val Sanger = Value(33, "Sanger")
val Solexa = Value(64, "Solexa")
val Unknown = Value(0, "Unknown")
}
object Seqstat extends ToolCommand {
import FqEncoding._
var phred_encoding: String = "sanger"
var phred_correction: Int = 33
var phredEncoding: FqEncoding.Value = Sanger
val reportValues = List(1, 10, 20, 30, 40, 50, 60)
......@@ -77,6 +84,10 @@ object Seqstat extends ToolCommand {
.parse(args, Args())
.getOrElse(sys.exit(1))
/**
*
* @param quals Computed quality histogram [flat]
*/
def detectPhredEncoding(quals: mutable.ArrayBuffer[Long]): Unit = {
// substract 1 on high value, because we start from index 0
val l_qual = quals.takeWhile(_ == 0).length
......@@ -84,38 +95,22 @@ object Seqstat extends ToolCommand {
(l_qual < 59, h_qual > 74) match {
case (false, true) => {
phred_correction = 64
phred_encoding = "solexa"
phredEncoding = Solexa
}
case (true, true) => {
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the h_qual exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
phred_correction = 64
phred_encoding = "solexa"
phredEncoding = Solexa
}
case (true, false) => {
// this is definite a sanger sequence, the lower end is sanger only
phred_correction = 33
phred_encoding = "sanger"
phredEncoding = Sanger
}
case (_, _) => {
phred_correction = 0
phred_encoding = "unknown"
phredEncoding = Unknown
}
}
// if (h_qual > 74) {
// phred_correction = 64
// phred_encoding = "solexa"
// } else if (l_qual < 59) {
// phred_correction = 33
// phred_encoding = "sanger"
// }
}
def transformPhredEncoding(qualMap: Map[Int, Int]): (Map[Int, Int]) = {
qualMap map { case (k, v) => (k - phred_correction, v) }
}
// Setting up the internal storage for the statistics gathered for each read
......@@ -226,7 +221,7 @@ object Seqstat extends ToolCommand {
}
for (pos <- 0 until quals.length) {
var key: Int = pos - phred_correction
var key: Int = pos - phredEncoding.id
if (key > 0) {
// count till the max of baseHistogram.length
for (histokey <- 0 until key + 1) {
......@@ -240,7 +235,7 @@ object Seqstat extends ToolCommand {
}
for (pos <- 0 until readStats.qual.length) {
var key: Int = pos - phred_correction
var key: Int = pos - phredEncoding.id
if (key > 0) {
// count till the max of baseHistogram.length
for (histokey <- 0 until key + 1) {
......@@ -292,12 +287,12 @@ object Seqstat extends ToolCommand {
("len_min", readStats.lengths.takeWhile(_ == 0).length),
("len_max", readStats.lengths.length - 1),
("num_qual_gte", readQualHistoMap.toMap),
("qual_encoding", phred_encoding)
("qual_encoding", phredEncoding.toString.toLowerCase)
))
))
)
val json_report: Json = ConfigUtils.mapToJson(report)
println(json_report.spaces2)
val jsonReport: Json = ConfigUtils.mapToJson(report)
println(jsonReport.spaces2)
}
}
......@@ -37,34 +37,6 @@ class SeqstatSolexaTest extends TestNGSuite with MockitoSugar with Matchers {
Array(
Array(mock[FastqReader])
)
//
// @Test(dataProvider = "mockReaderProvider", groups = Array("solexa"))
// def testEncodingDetectionSolexa(refMock: FastqReader) = {
// when(refMock.iterator) thenReturn solexaRecordsOver("1", "2", "3", "4", "5")
//
// val seqstat = Seqstat
// val numReads = seqstat.seqStat(refMock)
// numReads shouldBe 5
// seqstat.summarize()
//
// seqstat.phred_correction shouldBe 64
// seqstat.phred_encoding shouldBe "solexa"
// }
//
// @Test(dataProvider = "mockReaderProvider", groups = Array("solexa"))
// def testBaseCount(refMock: FastqReader) = {
// when(refMock.iterator) thenReturn solexaRecordsOver("1", "2", "3", "4", "5")
//
// val seqstat = Seqstat
// val numReads = seqstat.seqStat(refMock)
// numReads shouldBe 5
//
// seqstat.summarize()
//
// seqstat.quals.sum shouldBe 50
// seqstat.readStats.withN shouldBe 5
// seqstat.nucleotideHistoMap('N') shouldBe 10
// }
@Test def testArgsMinimum() = {
val args = Array(
......
......@@ -19,6 +19,7 @@ import scala.collection.JavaConverters._
class SeqstatTest extends TestNGSuite with MockitoSugar with Matchers {
import nl.lumc.sasc.biopet.tools.Seqstat._
import nl.lumc.sasc.biopet.tools.FqEncoding._
private def resourceFile(p: String): File =
new File(resourcePath(p))
......@@ -60,21 +61,18 @@ class SeqstatTest extends TestNGSuite with MockitoSugar with Matchers {
// numReads shouldBe 1
seqstat.summarize()
// TODO: divide into more testcases, instead of having in 1,
// currently not possible because the Seqstat is a bit state dependent?
seqstat.phred_correction shouldBe 33
seqstat.phred_encoding shouldBe "sanger"
seqstat.phredEncoding shouldBe Sanger
}
@Test(dataProvider = "mockReaderProvider", groups = Array("nucleocount"), singleThreaded = true, dependsOnGroups = Array("phredscore"))
def testEncodingNucleotideCount(fqMock: FastqReader) = {
val seqstat = Seqstat
baseHistogram(40) shouldEqual 5
baseHistogram(39) shouldEqual 10
baseHistogram(34) shouldEqual 15
baseHistogram(33) shouldEqual 20
baseHistogram(0) shouldEqual 20
nucleotideHistoMap('N') shouldEqual 5
nucleotideHistoMap('A') shouldEqual 5
nucleotideHistoMap('C') shouldEqual 5
nucleotideHistoMap('T') shouldEqual 5
nucleotideHistoMap('G') shouldEqual 5
}
@Test(dataProvider = "mockReaderProvider", groups = Array("basehistogram"), singleThreaded = true, dependsOnGroups = Array("nucleocount"))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment