Skip to content
Snippets Groups Projects
Commit 1b221835 authored by bow's avatar bow
Browse files

Refactor and document fastqc data file parsing function

parent c378965a
Branches
Tags
No related merge requests found
...@@ -25,27 +25,45 @@ import scalaz._, Scalaz._ ...@@ -25,27 +25,45 @@ import scalaz._, Scalaz._
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) { class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) {
def getDataBlock(name: String): Array[String] = { // Based on Fastqc v0.10.1
/**
* FastQC QC modules.
*
* @return Mapping of FastQC module names and its contents as array of strings (one item per line)
* @throws FileNotFoundException if the FastQC data file can not be found.
* @throws IllegalStateException if the module mapping is empty.
*/
@throws(classOf[FileNotFoundException])
@throws(classOf[IllegalStateException])
protected lazy val qcModules: Map[String, Array[String]] = {
val outputDir = output.getAbsolutePath.stripSuffix(".zip") val outputDir = output.getAbsolutePath.stripSuffix(".zip")
val dataFile = new File(outputDir + "/fastqc_data.txt") val dataFile = new File(outputDir, "fastqc_data.txt")
if (!dataFile.exists) return null
val data = Source.fromFile(dataFile).mkString val fqModules = Source.fromFile(dataFile)
for (block <- data.split(">>END_MODULE\n")) { // drop all the characters before the first module delimiter (i.e. '>>')
val b = if (block.startsWith("##FastQC")) block.substring(block.indexOf("\n") + 1) else block .dropWhile(_ != '>')
if (b.startsWith(">>" + name)) // pull everything into a string
return for (line <- b.split("\n")) .mkString
yield line // split into modules
} .split(">>END_MODULE\n")
return null // make map of module name -> module lines
} .map {
case (modString) =>
// module name is in the first line, without '>>' and before the tab character
val modName = modString
// so we take all characters in the first line
.takeWhile(_ != '\n')
// and drop all characters that equals '>'
.dropWhile(_ == '>')
// and take all characters before the tab
.takeWhile(_ != '\t')
modName -> modString.split('\n')
}
.toMap
def getEncoding: String = { if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString)
val block = getDataBlock("Basic Statistics") else fqModules
if (block == null) return null
for (
line <- block if (line.startsWith("Encoding"))
) return line.stripPrefix("Encoding\t")
return null // Could be default Sanger with a warning in the log
} }
protected case class Sequence(name: String, seq: String) protected case class Sequence(name: String, seq: String)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment