Commit 18118db3 authored by wyleung's avatar wyleung
Browse files

refactoring

parent 4b96fb53
......@@ -32,7 +32,8 @@ object Seqstat extends ToolCommand {
var phred_encoding: String = "sanger"
var phred_correction: Int = 33
case class ReadStats(length: Int, n_bases: Int, has_N: Int, avg_qual: Map[Int, Int], qualstat: Map[Int, Int],
case class ReadStats(length: Int, n_bases: Int, has_N: Int,
avg_qual: Map[Int, Int], qualstat: Map[Char, Int],
total_reads: Int, length_max: Int, length_min: Int,
bases_total: Long)
......@@ -43,10 +44,10 @@ object Seqstat extends ToolCommand {
* @param record FastqRecord
*/
def analyseFastQRecord(record: FastqRecord): ReadStats = {
var (qualstat, avg_qual, numBases, n_bases, readlength) = (Map(0 -> 0), Map(0 -> 0), 0, 0, 0)
var (avg_qual, numBases, n_bases, readlength) = (Map(0 -> 0), 0, 0, 0)
qualstat = histogramReadQual(record.getBaseQualityString)
avg_qual = Map(qualstat.view.map { case (k, v) => k * v }.toList.foldLeft(0)(_ + _) / qualstat.values.sum -> 1)
val qualstat = histogramReadQual(record.getBaseQualityString).toMap
avg_qual = Map(qualstat.view.map { case (k, v) => k.toInt * v }.toList.foldLeft(0)(_ + _) / qualstat.values.sum -> 1)
n_bases = record.getReadString.count(_ == 'N')
readlength = record.getReadString.length
......@@ -76,38 +77,15 @@ object Seqstat extends ToolCommand {
.asScala.toList
.map(record => analyseFastQRecord(record)).reduceLeft(statReducer)
// val it = fqf.iterator.asScala
// while (it.hasNext) {
// val record = it.next()
//
// numRecords += 1
// numBases += record.getReadString.length
//
// val recordNbases: Int = record.getReadString.count(_ == 'N')
// if (recordNbases > 0) {
// numRecordsWithN += 1
// numBasesWithN += recordNbases
// }
//
// // compute the histogram of qualities for this read
// val qualStat = histogramReadQual(record.getBaseQualityString)
// // merge the matrixes
// baseQuals = baseQuals |+| qualStat
//
// // compute the per read avg quality
// val AvgReadQual = qualStat.view.map { case (k, v) => k * v }.toList.foldLeft(0)(_ + _) / qualStat.values.sum
// readQuals = readQuals |+| Map(AvgReadQual -> 1)
//
// readlength_max = if (record.length() > readlength_max) record.length() else readlength_max;
// readlength_min = if (record.length() < readlength_min) record.length() else readlength_min;
//
// }
// the qualities are in the 'char' format, convert to int first
baseQuals = result.qualstat
readQuals = result.avg_qual
// detect and set the Phred encoding
detectPhredEncoding(result.qualstat)
detectPhredEncoding(baseQuals)
// transform the keys
baseQuals = transformPhredEncoding(result.qualstat)
readQuals = transformPhredEncoding(result.avg_qual)
baseQuals = transformPhredEncoding(baseQuals)
readQuals = transformPhredEncoding(readQuals)
(result, baseQuals, readQuals)
}
......@@ -116,13 +94,14 @@ object Seqstat extends ToolCommand {
* Creates histogram from a (quality) string
* - qualString, string containing the qualities
*
* @param qualString String of the source FastQ file
* @param qualstring String of the source FastQ file
*/
def histogramReadQual(qualstring: String): (Map[Int, Int]) = {
def histogramReadQual(qualstring: String): Map[Char, Int] = {
// http://langref.org/scala/maps/algorithms/histogram
qualstring.foldLeft(Map[Int, Int]()) {
(m, c) => m.updated(c.toInt, m.getOrElse(c.toInt, 0) + 1)
}
// qualstring.foldLeft(Map[Int, Int]()) {
// (m, c) => m.updated(c.toInt, m.getOrElse(c.toInt, 0) + 1)
// }
qualstring.groupBy(identity).mapValues(_.size)
}
case class Args(fastq: File = new File("")) extends AbstractArgs
......@@ -180,8 +159,8 @@ object Seqstat extends ToolCommand {
res.avg_qual |+| nxt.avg_qual,
res.qualstat |+| nxt.qualstat,
res.total_reads + nxt.total_reads,
List(res.length_max, nxt.length_max).max,
List(res.length_min, nxt.length_min).min,
res.length_max max nxt.length_max,
res.length_min min nxt.length_min,
res.bases_total + nxt.bases_total
)
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment