Skip to content
Snippets Groups Projects
Commit 26bc48af authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Fix fastqc dry run bug

parent ed0f3f1a
No related branches found
No related tags found
No related merge requests found
......@@ -52,19 +52,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
* @throws FileNotFoundException if the FastQC data file can not be found.
* @throws IllegalStateException if the module lines have no content or mapping is empty.
*/
@throws(classOf[FileNotFoundException])
@throws(classOf[IllegalStateException])
def qcModules: Map[String, FastQCModule] = {
val fqModules = Source.fromFile(dataFile)
// drop all the characters before the first module delimiter (i.e. '>>')
.dropWhile(_ != '>')
// pull everything into a string
.mkString
// split into modules
.split(">>END_MODULE\n")
// make map of module name -> module lines
.map {
val fqModules = Source.fromFile(dataFile)
// drop all the characters before the first module delimiter (i.e. '>>')
.dropWhile(_ != '>')
// pull everything into a string
.mkString
// split into modules
.split(">>END_MODULE\n")
// make map of module name -> module lines
.map {
case (modString) =>
// module name is in the first line, without '>>' and before the tab character
val Array(firstLine, otherLines) = modString
......@@ -80,10 +77,10 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
.slice(0, 2)
modName -> FastQCModule(modName, modStatus, otherLines.split("\n").toSeq)
}
.toMap
.toMap
if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString)
else fqModules
if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString)
else fqModules
}
/**
......@@ -93,14 +90,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
* @throws NoSuchElementException when the "Basic Statistics" key does not exist in the mapping or
* when a line starting with "Encoding" does not exist.
*/
@throws(classOf[NoSuchElementException])
def encoding: String =
qcModules("Basic Statistics")
def encoding: String = {
if (dataFile.exists())
qcModules("Basic Statistics") //FIXME: not save
.lines
.dropWhile(!_.startsWith("Encoding"))
.head
.stripPrefix("Encoding\t")
.stripSuffix("\t")
else ""
}
/** Case class representing a known adapter sequence */
protected case class AdapterSequence(name: String, seq: String)
......@@ -111,32 +110,33 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
* @return a [[Set]] of [[AdapterSequence]] objects.
*/
def foundAdapters: Set[AdapterSequence] = {
if (dataFile.exists()) {
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match {
case None => Set.empty[AdapterSequence]
case Some(f) =>
(for {
line <- Source.fromFile(f).getLines()
if !line.startsWith("#")
values = line.split("\t+")
if values.size >= 2
} yield AdapterSequence(values(0), values(1))).toSet
}
val found = qcModules.get("Overrepresented sequences") match {
case None => Seq.empty[String]
case Some(qcModule) =>
for (
line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">"));
values = line.split("\t") if values.size >= 4
) yield values(3)
}
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match {
case None => Set.empty[AdapterSequence]
case Some(f) =>
(for {
line <- Source.fromFile(f).getLines()
if !line.startsWith("#")
values = line.split("\t+")
if values.size >= 2
} yield AdapterSequence(values(0), values(1))).toSet
}
val found = qcModules.get("Overrepresented sequences") match {
case None => Seq.empty[String]
case Some(qcModule) =>
for (
line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">"));
values = line.split("\t") if values.size >= 4
) yield values(3)
}
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
(getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants))
.filter(x => found.exists(_.startsWith(x.name)))
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
(getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants))
.filter(x => found.exists(_.startsWith(x.name)))
} else Set()
}
/** Summary of the FastQC run, stored in a [[Json]] object */
......
......@@ -32,6 +32,7 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk.
case s if (s.contains("Illumina <1.3")) => Option(64)
case s if (s.contains("Illumina 1.3")) => Option(64)
case s if (s.contains("Illumina 1.5")) => Option(64)
case _ => None
}
if (Q != None) V = true
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment