Commit 26bc48af authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Fix fastqc dry run bug

parent ed0f3f1a
......@@ -52,19 +52,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
* @throws FileNotFoundException if the FastQC data file can not be found.
* @throws IllegalStateException if the module lines have no content or mapping is empty.
*/
@throws(classOf[FileNotFoundException])
@throws(classOf[IllegalStateException])
def qcModules: Map[String, FastQCModule] = {
val fqModules = Source.fromFile(dataFile)
// drop all the characters before the first module delimiter (i.e. '>>')
.dropWhile(_ != '>')
// pull everything into a string
.mkString
// split into modules
.split(">>END_MODULE\n")
// make map of module name -> module lines
.map {
val fqModules = Source.fromFile(dataFile)
// drop all the characters before the first module delimiter (i.e. '>>')
.dropWhile(_ != '>')
// pull everything into a string
.mkString
// split into modules
.split(">>END_MODULE\n")
// make map of module name -> module lines
.map {
case (modString) =>
// module name is in the first line, without '>>' and before the tab character
val Array(firstLine, otherLines) = modString
......@@ -80,10 +77,10 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
.slice(0, 2)
modName -> FastQCModule(modName, modStatus, otherLines.split("\n").toSeq)
}
.toMap
.toMap
if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString)
else fqModules
if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString)
else fqModules
}
/**
......@@ -93,14 +90,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
* @throws NoSuchElementException when the "Basic Statistics" key does not exist in the mapping or
* when a line starting with "Encoding" does not exist.
*/
@throws(classOf[NoSuchElementException])
def encoding: String =
qcModules("Basic Statistics")
def encoding: String = {
if (dataFile.exists())
qcModules("Basic Statistics") //FIXME: not save
.lines
.dropWhile(!_.startsWith("Encoding"))
.head
.stripPrefix("Encoding\t")
.stripSuffix("\t")
else ""
}
/** Case class representing a known adapter sequence */
protected case class AdapterSequence(name: String, seq: String)
......@@ -111,32 +110,33 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
* @return a [[Set]] of [[AdapterSequence]] objects.
*/
def foundAdapters: Set[AdapterSequence] = {
if (dataFile.exists()) {
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match {
case None => Set.empty[AdapterSequence]
case Some(f) =>
(for {
line <- Source.fromFile(f).getLines()
if !line.startsWith("#")
values = line.split("\t+")
if values.size >= 2
} yield AdapterSequence(values(0), values(1))).toSet
}
val found = qcModules.get("Overrepresented sequences") match {
case None => Seq.empty[String]
case Some(qcModule) =>
for (
line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">"));
values = line.split("\t") if values.size >= 4
) yield values(3)
}
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match {
case None => Set.empty[AdapterSequence]
case Some(f) =>
(for {
line <- Source.fromFile(f).getLines()
if !line.startsWith("#")
values = line.split("\t+")
if values.size >= 2
} yield AdapterSequence(values(0), values(1))).toSet
}
val found = qcModules.get("Overrepresented sequences") match {
case None => Seq.empty[String]
case Some(qcModule) =>
for (
line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">"));
values = line.split("\t") if values.size >= 4
) yield values(3)
}
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
(getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants))
.filter(x => found.exists(_.startsWith(x.name)))
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
(getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants))
.filter(x => found.exists(_.startsWith(x.name)))
} else Set()
}
/** Summary of the FastQC run, stored in a [[Json]] object */
......
......@@ -32,6 +32,7 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk.
case s if (s.contains("Illumina <1.3")) => Option(64)
case s if (s.contains("Illumina 1.3")) => Option(64)
case s if (s.contains("Illumina 1.5")) => Option(64)
case _ => None
}
if (Q != None) V = true
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment