Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
bb3991e7
Commit
bb3991e7
authored
Feb 16, 2016
by
Peter van 't Hof
Browse files
Added quality encoding check
parent
89ed20eb
Changes
1
Hide whitespace changes
Inline
Side-by-side
public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckFastqPairs.scala
View file @
bb3991e7
...
...
@@ -6,6 +6,7 @@ import htsjdk.samtools.fastq.{ FastqRecord, FastqReader }
import
nl.lumc.sasc.biopet.utils.ToolCommand
import
scala.collection.JavaConversions._
import
scala.collection.mutable.ListBuffer
/**
* Created by sajvanderzeeuw on 2-2-16.
...
...
@@ -84,15 +85,58 @@ object CheckFastqPairs extends ToolCommand {
logger
.
error
(
e
.
getMessage
)
}
logger
.
info
(
s
"Possible quality encodings found: ${getPossibleEncodings.mkString("
,
")}"
)
//close both iterators
readFq1
.
close
()
readFq2
.
foreach
(
_
.
close
())
}
private
var
minQual
:
Option
[
Char
]
=
None
private
var
maxQual
:
Option
[
Char
]
=
None
/**
*
* @param record
* @throws IllegalStateException
*/
private
[
tools
]
def
checkQualEncoding
(
record
:
FastqRecord
)
:
Unit
=
{
val
min
=
record
.
getBaseQualityString
.
min
val
max
=
record
.
getBaseQualityString
.
max
if
(!
minQual
.
exists
(
_
<=
min
))
{
minQual
=
Some
(
min
)
getPossibleEncodings
}
if
(!
maxQual
.
exists
(
_
>=
max
))
{
maxQual
=
Some
(
max
)
getPossibleEncodings
}
}
/**
*
* @return
* @throws IllegalStateException
*/
private
[
tools
]
def
getPossibleEncodings
:
List
[
String
]
=
{
val
buffer
:
ListBuffer
[
String
]
=
ListBuffer
()
(
minQual
,
maxQual
)
match
{
case
(
Some
(
min
),
Some
(
max
))
=>
if
(
min
>=
'!'
&&
max
<=
'I'
)
buffer
+=
"Sanger"
if
(
min
>=
';'
&&
max
<=
'h'
)
buffer
+=
"Solexa"
if
(
min
>=
'@'
&&
max
<=
'h'
)
buffer
+=
"Illumina 1.3+"
if
(
min
>=
'C'
&&
max
<=
'h'
)
buffer
+=
"Illumina 1.5+"
if
(
min
>=
'!'
&&
max
<=
'J'
)
buffer
+=
"Illumina 1.8+"
if
(
buffer
.
isEmpty
)
throw
new
IllegalStateException
(
"No possible quality encoding found"
)
case
_
=>
}
buffer
.
toList
}
val
allowedBases
=
"""([actgnACTGN+]+)"""
.
r
/**
*
*
This function checks for duplicates.
* @param current
* @param before
* @throws IllegalStateException
...
...
@@ -108,6 +152,7 @@ object CheckFastqPairs extends ToolCommand {
* @throws IllegalStateException
*/
def
validFastqRecord
(
record
:
FastqRecord
)
:
Unit
=
{
checkQualEncoding
(
record
)
record
.
getReadString
match
{
case
allowedBases
(
m
)
=>
case
_
=>
throw
new
IllegalStateException
(
s
"Non IUPAC symbols identified"
)
...
...
@@ -130,6 +175,4 @@ object CheckFastqPairs extends ToolCommand {
id1
.
stripSuffix
(
".1"
)
==
id2
.
stripSuffix
(
".2"
)))
throw
new
IllegalStateException
(
s
"sequenceHeaders does not match. R1: '${r1.getReadHeader}', R2: '${r2.getReadHeader}'"
)
}
//TODO: check duplicate read ideas in both R1 and R2
}
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment