Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
34dca7bc
Commit
34dca7bc
authored
Mar 08, 2016
by
Wai Yi Leung
Browse files
Merge branch 'fix-validator' into 'develop'
Fix validator This looses the fastq validator See merge request !344
parents
41a5ea2a
b0f1be5a
Changes
2
Hide whitespace changes
Inline
Side-by-side
public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala
View file @
34dca7bc
...
...
@@ -10,6 +10,7 @@ import scala.collection.mutable.ListBuffer
/**
* Created by sajvanderzeeuw on 2-2-16.
* Modified by pjvan_thof
*/
object
ValidateFastq
extends
ToolCommand
{
/**
...
...
@@ -63,13 +64,13 @@ object ValidateFastq extends ToolCommand {
//Here we check if the readnames of both files are concordant, and if the sequence content are correct DNA/RNA sequences
recordR2
match
{
case
Some
(
r
ecordR
2
)
=>
// Paired End
validFastqRecord
(
r
ecordR
2
)
duplicateCheck
(
r
ecordR
2
,
lastRecordR2
)
checkMate
(
recordR1
,
r
ecordR
2
)
case
Some
(
r2
)
=>
// Paired End
validFastqRecord
(
r2
)
duplicateCheck
(
r2
,
lastRecordR2
)
checkMate
(
recordR1
,
r2
)
case
_
=>
// Single end
}
if
(
counter
%
1
e5
==
0
)
logger
.
info
(
counter
+
" reads processed"
)
if
(
counter
%
1
e5
==
0
)
logger
.
info
(
counter
+
(
if
(
recordR2
.
isDefined
)
" pairs"
else
" reads
"
)
+
"
processed"
)
lastRecordR1
=
Some
(
recordR1
)
lastRecordR2
=
recordR2
}
...
...
@@ -78,9 +79,12 @@ object ValidateFastq extends ToolCommand {
if
(
readFq2
.
map
(
_
.
hasNext
)
==
Some
(
true
))
throw
new
IllegalStateException
(
"R2 contains more reads then R1"
)
logger
.
info
(
s
"Possible quality encodings found: ${getPossibleEncodings.mkString("
,
")}"
)
getPossibleEncodings
match
{
case
l
if
l
.
nonEmpty
=>
logger
.
info
(
s
"Possible quality encodings found: ${l.mkString("
,
")}"
)
case
_
=>
logger
.
warn
(
s
"No possible quality encodings found"
)
}
logger
.
info
(
s
"Done processing $
{
counter
}
fastq records, no errors found"
)
logger
.
info
(
s
"Done processing $counter fastq records, no errors found"
)
}
catch
{
case
e
:
IllegalStateException
=>
logger
.
error
(
s
"Error found at readnumber: $counter, linenumber ${(counter * 4) - 3}"
)
...
...
@@ -96,9 +100,9 @@ object ValidateFastq extends ToolCommand {
private
[
tools
]
var
maxQual
:
Option
[
Char
]
=
None
/**
*
* @param record
* @throws IllegalStateException
*
This method checks if the encoding in a fastq record is correct
* @param record
The fastq record to check
* @throws IllegalStateException
Throws this when an error is ofund during checking
*/
private
[
tools
]
def
checkQualEncoding
(
record
:
FastqRecord
)
:
Unit
=
{
val
min
=
record
.
getBaseQualityString
.
min
...
...
@@ -114,21 +118,21 @@ object ValidateFastq extends ToolCommand {
}
/**
*
* @return
* @throws IllegalStateException
*
This method returns the possible encodings till now
* @return
List of possible encodings
* @throws IllegalStateException
Throws this when an error is ofund during checking
*/
private
[
tools
]
def
getPossibleEncodings
:
List
[
String
]
=
{
val
buffer
:
ListBuffer
[
String
]
=
ListBuffer
()
(
minQual
,
maxQual
)
match
{
case
(
Some
(
min
),
Some
(
max
))
=>
if
(
min
<
'!'
||
max
>
'~'
)
throw
new
IllegalStateException
(
s
"Quality is out of ascii range 33-126. minQual: '$min', maxQual: '$max'"
)
if
(
min
>=
'!'
&&
max
<=
'I'
)
buffer
+=
"Sanger"
if
(
min
>=
';'
&&
max
<=
'h'
)
buffer
+=
"Solexa"
if
(
min
>=
'@'
&&
max
<=
'h'
)
buffer
+=
"Illumina 1.3+"
if
(
min
>=
'C'
&&
max
<=
'h'
)
buffer
+=
"Illumina 1.5+"
if
(
min
>=
'!'
&&
max
<=
'J'
)
buffer
+=
"Illumina 1.8+"
if
(
buffer
.
isEmpty
)
throw
new
IllegalStateException
(
s
"No possible quality encoding found. minQual: '$min', maxQual: '$max'"
)
case
_
=>
}
buffer
.
toList
...
...
@@ -138,9 +142,9 @@ object ValidateFastq extends ToolCommand {
/**
* This function checks for duplicates.
* @param current
* @param before
* @throws IllegalStateException
* @param current
currect fastq record
* @param before
fastq record before the current record
* @throws IllegalStateException
Throws this when an error is ofund during checking
*/
def
duplicateCheck
(
current
:
FastqRecord
,
before
:
Option
[
FastqRecord
])
:
Unit
=
{
if
(
before
.
exists
(
_
.
getReadHeader
==
current
.
getReadHeader
))
...
...
@@ -148,9 +152,9 @@ object ValidateFastq extends ToolCommand {
}
/**
*
* @param record
* @throws IllegalStateException
*
This method will check if fastq record is correct
* @param record
Fastq record to check
* @throws IllegalStateException
Throws this when an error is ofund during checking
*/
def
validFastqRecord
(
record
:
FastqRecord
)
:
Unit
=
{
checkQualEncoding
(
record
)
...
...
@@ -158,15 +162,15 @@ object ValidateFastq extends ToolCommand {
case
allowedBases
(
m
)
=>
case
_
=>
throw
new
IllegalStateException
(
s
"Non IUPAC symbols identified"
)
}
if
(
record
.
getReadString
.
size
!=
record
.
getBaseQualityString
.
size
)
if
(
record
.
getReadString
.
length
!=
record
.
getBaseQualityString
.
length
)
throw
new
IllegalStateException
(
s
"Sequence length does not match quality length"
)
}
/**
*
* @param r1
* @param r2
* @throws IllegalStateException
*
This method checks if the pair is the same ID
* @param r1
R1 fastq record
* @param r2
R2 fastq record
* @throws IllegalStateException
Throws this when an error is ofund during checking
*/
def
checkMate
(
r1
:
FastqRecord
,
r2
:
FastqRecord
)
:
Unit
=
{
val
id1
=
r1
.
getReadHeader
.
takeWhile
(
_
!=
' '
)
...
...
public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/ValidateFastqTest.scala
View file @
34dca7bc
package
nl.lumc.sasc.biopet.tools
import
java.io.
{
OutputStream
,
PrintStream
,
ByteArrayOutputStream
}
import
java.nio.file.Paths
import
htsjdk.samtools.fastq.FastqRecord
import
nl.lumc.sasc.biopet.utils.Logging
import
org.apache.log4j.
{
FileAppender
,
Appender
}
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.
{
DataProvider
,
Test
}
import
scala.collection.JavaConversions._
/**
* This class test ValidateFatq
*
* Created by pjvan_thof on 2/17/16.
*/
class
ValidateFastqTest
extends
TestNGSuite
with
Matchers
{
@Test
def
testCheckMate
:
Unit
=
{
def
testCheckMate
()
:
Unit
=
{
ValidateFastq
.
checkMate
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"AAAA"
),
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"AAAA"
))
intercept
[
IllegalStateException
]
{
...
...
@@ -27,7 +24,7 @@ class ValidateFastqTest extends TestNGSuite with Matchers {
}
@Test
def
testDuplicateCheck
:
Unit
=
{
def
testDuplicateCheck
()
:
Unit
=
{
ValidateFastq
.
duplicateCheck
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"AAAA"
),
None
)
ValidateFastq
.
duplicateCheck
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"AAAA"
),
Some
(
new
FastqRecord
(
"read_2"
,
"ATCG"
,
""
,
"AAAA"
)))
...
...
@@ -58,38 +55,41 @@ class ValidateFastqTest extends TestNGSuite with Matchers {
}
@Test
def
testGetPossibleEncodingsFail
:
Unit
=
{
intercept
[
IllegalStateException
]
{
ValidateFastq
.
minQual
=
Some
(
'!'
)
ValidateFastq
.
maxQual
=
Some
(
'h'
)
ValidateFastq
.
getPossibleEncodings
}
def
testGetPossibleEncodingsFail
()
:
Unit
=
{
ValidateFastq
.
minQual
=
Some
(
'!'
)
ValidateFastq
.
maxQual
=
Some
(
'h'
)
ValidateFastq
.
getPossibleEncodings
shouldBe
Nil
}
@Test
def
testCheckQualEncoding
:
Unit
=
{
def
testCheckQualEncoding
()
:
Unit
=
{
ValidateFastq
.
minQual
=
None
ValidateFastq
.
maxQual
=
None
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"AAAA"
))
ValidateFastq
.
getPossibleEncodings
should
not
be
Nil
intercept
[
IllegalStateException
]
{
ValidateFastq
.
minQual
=
None
ValidateFastq
.
maxQual
=
None
ValidateFastq
.
minQual
=
None
ValidateFastq
.
maxQual
=
None
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"A!hA"
))
}
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"A!hA"
))
ValidateFastq
.
getPossibleEncodings
shouldBe
Nil
ValidateFastq
.
minQual
=
None
ValidateFastq
.
maxQual
=
None
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"hhhh"
))
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"!!!!"
))
ValidateFastq
.
getPossibleEncodings
shouldBe
Nil
intercept
[
IllegalStateException
]
{
ValidateFastq
.
minQual
=
None
ValidateFastq
.
maxQual
=
None
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"hhhh"
))
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"!!!!"
))
ValidateFastq
.
checkQualEncoding
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"!! !!"
))
}
}
@Test
def
testValidFastqRecord
:
Unit
=
{
def
testValidFastqRecord
()
:
Unit
=
{
ValidateFastq
.
minQual
=
None
ValidateFastq
.
maxQual
=
None
ValidateFastq
.
validFastqRecord
(
new
FastqRecord
(
"read_1"
,
"ATCG"
,
""
,
"AAAA"
))
...
...
@@ -107,7 +107,7 @@ class ValidateFastqTest extends TestNGSuite with Matchers {
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
@Test
def
testMain
:
Unit
=
{
def
testMain
()
:
Unit
=
{
ValidateFastq
.
minQual
=
None
ValidateFastq
.
maxQual
=
None
val
r1
=
resourcePath
(
"/paired01a.fq"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment