From 734801f6c59a3e63a759e378e2dcf0ec9a19cdc0 Mon Sep 17 00:00:00 2001 From: sajvanderzeeuw <s.a.j.van_der_zeeuw@lumc.nl> Date: Fri, 5 Feb 2016 10:48:34 +0100 Subject: [PATCH] add DNA base check (testing still needed) --- .../sasc/biopet/tools/CheckFastqPairs.scala | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckFastqPairs.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckFastqPairs.scala index d3b45590f..e4c78dd32 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckFastqPairs.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckFastqPairs.scala @@ -55,15 +55,30 @@ object CheckFastqPairs extends ToolCommand { //Getting R2 record, None if it's single end val recordR2 = readFq2.map(_.next()) - //Here we check if the readnames of both files are concordant + //Here we check if the readnames of both files are concordant, and if the sequence content are correct DNA/RNA sequences recordR2 match { case Some(recordR2) => // Paired End val readHeader = recordR1.getReadHeader val readHeader2 = recordR2.getReadHeader + val readSeq = recordR1.getReadString + val readSeq2 = recordR2.getReadString val id1 = readHeader.takeWhile(_ != ' ') val id2 = readHeader2.takeWhile(_ != ' ') - if (counter % 1e5 == 0) logger.info(counter + " reads processed") + if (counter % 1e4 == 0) logger.info(counter + " reads processed") + + + val allowedBases = """([actgnACTGN+]+)""".r + + val validBases: Boolean = readSeq match { + case allowedBases(m) => true + case _ => throw new IllegalStateException(s"Non IUPAC symbols identified '${(counter*4)-3}'") + } + + val validBases2: Boolean = readSeq2 match { + case allowedBases(m) => true + case _ => throw new IllegalStateException(s"Non IUPAC symbols identified '${(counter*4)-3}'") + } if (id1 == id2){ -- GitLab