From 9fe0e57a670ebd8f5fa8f0ecde9003152f333bfb Mon Sep 17 00:00:00 2001 From: bow <bow@bow.web.id> Date: Fri, 6 Feb 2015 14:42:32 +0100 Subject: [PATCH] Parse module status in Flexiprep FastQC wrapper --- .../biopet/pipelines/flexiprep/Fastqc.scala | 31 ++++++++++++------- .../pipelines/flexiprep/FastqcV0101Test.scala | 8 +++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index 196ce03fd..5e2fc66dd 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -34,6 +34,8 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils */ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) { + protected case class FastQCModule(name: String, status: String, lines: Seq[String]) + /** Default FastQC output directory containing actual results */ // this is a def instead of a val since the value depends on the variable `output`, which is null on class creation def outputDir: File = new File(output.getAbsolutePath.stripSuffix(".zip")) @@ -47,11 +49,11 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r * * @return Mapping of FastQC module names and its contents as array of strings (one item per line) * @throws FileNotFoundException if the FastQC data file can not be found. - * @throws IllegalStateException if the module mapping is empty. + * @throws IllegalStateException if the module lines have no content or mapping is empty. */ @throws(classOf[FileNotFoundException]) @throws(classOf[IllegalStateException]) - def qcModules: Map[String, Array[String]] = { + def qcModules: Map[String, FastQCModule] = { val fqModules = Source.fromFile(dataFile) // drop all the characters before the first module delimiter (i.e. '>>') @@ -64,14 +66,18 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r .map { case (modString) => // module name is in the first line, without '>>' and before the tab character - val modName = modString - // so we take all characters in the first line - .takeWhile(_ != '\n') - // and drop all characters that equals '>' + val Array(firstLine, otherLines) = modString + // drop all '>>' character (start of module) .dropWhile(_ == '>') - // and take all characters before the tab - .takeWhile(_ != '\t') - modName -> modString.split('\n') + // split first line and others + .split("\n", 2) + // and slice them + .slice(0, 2) + // extract module name and module status + val Array(modName, modStatus) = firstLine + .split("\t", 2) + .slice(0, 2) + modName -> FastQCModule(modName, modStatus, otherLines.split("\n").toSeq) } .toMap @@ -89,6 +95,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r @throws(classOf[NoSuchElementException]) def encoding: String = qcModules("Basic Statistics") + .lines .dropWhile(!_.startsWith("Encoding")) .head .stripPrefix("Encoding\t") @@ -117,10 +124,10 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r } val found = qcModules.get("Overrepresented sequences") match { - case None => Array.empty[String] - case Some(modLines) => + case None => Seq.empty[String] + case Some(qcModule) => for ( - line <- modLines if !(line.startsWith("#") || line.startsWith(">")); + line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">")); values = line.split("\t") if values.size >= 4 ) yield values(3) } diff --git a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala index b9b922f4c..0951bea84 100644 --- a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala +++ b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala @@ -53,6 +53,14 @@ class FastqcV0101Test extends TestNGSuite with Matchers { fqc.qcModules.keySet should contain("Kmer Content") } + @Test def testSingleQcModule() = { + val fqc = new Fastqc(null) + fqc.output = outputv0101 + fqc.qcModules("Basic Statistics").name should ===("Basic Statistics") + fqc.qcModules("Basic Statistics").status should ===("pass") + fqc.qcModules("Basic Statistics").lines.size shouldBe 8 + } + @Test def testEncoding() = { val fqc = new Fastqc(null) fqc.output = outputv0101 -- GitLab