Commit 34dca7bc authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'fix-validator' into 'develop'

Fix validator

This looses the fastq validator

See merge request !344
parents 41a5ea2a b0f1be5a
......@@ -10,6 +10,7 @@ import scala.collection.mutable.ListBuffer
/**
* Created by sajvanderzeeuw on 2-2-16.
* Modified by pjvan_thof
*/
object ValidateFastq extends ToolCommand {
/**
......@@ -63,13 +64,13 @@ object ValidateFastq extends ToolCommand {
//Here we check if the readnames of both files are concordant, and if the sequence content are correct DNA/RNA sequences
recordR2 match {
case Some(recordR2) => // Paired End
validFastqRecord(recordR2)
duplicateCheck(recordR2, lastRecordR2)
checkMate(recordR1, recordR2)
case Some(r2) => // Paired End
validFastqRecord(r2)
duplicateCheck(r2, lastRecordR2)
checkMate(recordR1, r2)
case _ => // Single end
}
if (counter % 1e5 == 0) logger.info(counter + " reads processed")
if (counter % 1e5 == 0) logger.info(counter + (if (recordR2.isDefined) " pairs" else " reads") + " processed")
lastRecordR1 = Some(recordR1)
lastRecordR2 = recordR2
}
......@@ -78,9 +79,12 @@ object ValidateFastq extends ToolCommand {
if (readFq2.map(_.hasNext) == Some(true))
throw new IllegalStateException("R2 contains more reads then R1")
logger.info(s"Possible quality encodings found: ${getPossibleEncodings.mkString(", ")}")
getPossibleEncodings match {
case l if l.nonEmpty => logger.info(s"Possible quality encodings found: ${l.mkString(", ")}")
case _ => logger.warn(s"No possible quality encodings found")
}
logger.info(s"Done processing ${counter} fastq records, no errors found")
logger.info(s"Done processing $counter fastq records, no errors found")
} catch {
case e: IllegalStateException =>
logger.error(s"Error found at readnumber: $counter, linenumber ${(counter * 4) - 3}")
......@@ -96,9 +100,9 @@ object ValidateFastq extends ToolCommand {
private[tools] var maxQual: Option[Char] = None
/**
*
* @param record
* @throws IllegalStateException
* This method checks if the encoding in a fastq record is correct
* @param record The fastq record to check
* @throws IllegalStateException Throws this when an error is ofund during checking
*/
private[tools] def checkQualEncoding(record: FastqRecord): Unit = {
val min = record.getBaseQualityString.min
......@@ -114,21 +118,21 @@ object ValidateFastq extends ToolCommand {
}
/**
*
* @return
* @throws IllegalStateException
* This method returns the possible encodings till now
* @return List of possible encodings
* @throws IllegalStateException Throws this when an error is ofund during checking
*/
private[tools] def getPossibleEncodings: List[String] = {
val buffer: ListBuffer[String] = ListBuffer()
(minQual, maxQual) match {
case (Some(min), Some(max)) =>
if (min < '!' || max > '~')
throw new IllegalStateException(s"Quality is out of ascii range 33-126. minQual: '$min', maxQual: '$max'")
if (min >= '!' && max <= 'I') buffer += "Sanger"
if (min >= ';' && max <= 'h') buffer += "Solexa"
if (min >= '@' && max <= 'h') buffer += "Illumina 1.3+"
if (min >= 'C' && max <= 'h') buffer += "Illumina 1.5+"
if (min >= '!' && max <= 'J') buffer += "Illumina 1.8+"
if (buffer.isEmpty)
throw new IllegalStateException(s"No possible quality encoding found. minQual: '$min', maxQual: '$max'")
case _ =>
}
buffer.toList
......@@ -138,9 +142,9 @@ object ValidateFastq extends ToolCommand {
/**
* This function checks for duplicates.
* @param current
* @param before
* @throws IllegalStateException
* @param current currect fastq record
* @param before fastq record before the current record
* @throws IllegalStateException Throws this when an error is ofund during checking
*/
def duplicateCheck(current: FastqRecord, before: Option[FastqRecord]): Unit = {
if (before.exists(_.getReadHeader == current.getReadHeader))
......@@ -148,9 +152,9 @@ object ValidateFastq extends ToolCommand {
}
/**
*
* @param record
* @throws IllegalStateException
* This method will check if fastq record is correct
* @param record Fastq record to check
* @throws IllegalStateException Throws this when an error is ofund during checking
*/
def validFastqRecord(record: FastqRecord): Unit = {
checkQualEncoding(record)
......@@ -158,15 +162,15 @@ object ValidateFastq extends ToolCommand {
case allowedBases(m) =>
case _ => throw new IllegalStateException(s"Non IUPAC symbols identified")
}
if (record.getReadString.size != record.getBaseQualityString.size)
if (record.getReadString.length != record.getBaseQualityString.length)
throw new IllegalStateException(s"Sequence length does not match quality length")
}
/**
*
* @param r1
* @param r2
* @throws IllegalStateException
* This method checks if the pair is the same ID
* @param r1 R1 fastq record
* @param r2 R2 fastq record
* @throws IllegalStateException Throws this when an error is ofund during checking
*/
def checkMate(r1: FastqRecord, r2: FastqRecord): Unit = {
val id1 = r1.getReadHeader.takeWhile(_ != ' ')
......
package nl.lumc.sasc.biopet.tools
import java.io.{ OutputStream, PrintStream, ByteArrayOutputStream }
import java.nio.file.Paths
import htsjdk.samtools.fastq.FastqRecord
import nl.lumc.sasc.biopet.utils.Logging
import org.apache.log4j.{ FileAppender, Appender }
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
import scala.collection.JavaConversions._
/**
* This class test ValidateFatq
*
* Created by pjvan_thof on 2/17/16.
*/
class ValidateFastqTest extends TestNGSuite with Matchers {
@Test
def testCheckMate: Unit = {
def testCheckMate(): Unit = {
ValidateFastq.checkMate(new FastqRecord("read_1", "ATCG", "", "AAAA"), new FastqRecord("read_1", "ATCG", "", "AAAA"))
intercept[IllegalStateException] {
......@@ -27,7 +24,7 @@ class ValidateFastqTest extends TestNGSuite with Matchers {
}
@Test
def testDuplicateCheck: Unit = {
def testDuplicateCheck(): Unit = {
ValidateFastq.duplicateCheck(new FastqRecord("read_1", "ATCG", "", "AAAA"), None)
ValidateFastq.duplicateCheck(new FastqRecord("read_1", "ATCG", "", "AAAA"), Some(new FastqRecord("read_2", "ATCG", "", "AAAA")))
......@@ -58,38 +55,41 @@ class ValidateFastqTest extends TestNGSuite with Matchers {
}
@Test
def testGetPossibleEncodingsFail: Unit = {
intercept[IllegalStateException] {
ValidateFastq.minQual = Some('!')
ValidateFastq.maxQual = Some('h')
ValidateFastq.getPossibleEncodings
}
def testGetPossibleEncodingsFail(): Unit = {
ValidateFastq.minQual = Some('!')
ValidateFastq.maxQual = Some('h')
ValidateFastq.getPossibleEncodings shouldBe Nil
}
@Test
def testCheckQualEncoding: Unit = {
def testCheckQualEncoding(): Unit = {
ValidateFastq.minQual = None
ValidateFastq.maxQual = None
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "AAAA"))
ValidateFastq.getPossibleEncodings should not be Nil
intercept[IllegalStateException] {
ValidateFastq.minQual = None
ValidateFastq.maxQual = None
ValidateFastq.minQual = None
ValidateFastq.maxQual = None
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "A!hA"))
}
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "A!hA"))
ValidateFastq.getPossibleEncodings shouldBe Nil
ValidateFastq.minQual = None
ValidateFastq.maxQual = None
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "hhhh"))
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "!!!!"))
ValidateFastq.getPossibleEncodings shouldBe Nil
intercept[IllegalStateException] {
ValidateFastq.minQual = None
ValidateFastq.maxQual = None
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "hhhh"))
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "!!!!"))
ValidateFastq.checkQualEncoding(new FastqRecord("read_1", "ATCG", "", "!! !!"))
}
}
@Test
def testValidFastqRecord: Unit = {
def testValidFastqRecord(): Unit = {
ValidateFastq.minQual = None
ValidateFastq.maxQual = None
ValidateFastq.validFastqRecord(new FastqRecord("read_1", "ATCG", "", "AAAA"))
......@@ -107,7 +107,7 @@ class ValidateFastqTest extends TestNGSuite with Matchers {
Paths.get(getClass.getResource(p).toURI).toString
@Test
def testMain: Unit = {
def testMain(): Unit = {
ValidateFastq.minQual = None
ValidateFastq.maxQual = None
val r1 = resourcePath("/paired01a.fq")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment