Commit 353dc276 authored by bow's avatar bow
Browse files

Refactor and document adapter retrieval from FastQC

parent 6ae2579a
......@@ -33,7 +33,7 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada
override def beforeCmd() {
super.beforeCmd
val foundAdapters = fastqc.getFoundAdapters.map(_.seq)
val foundAdapters = fastqc.foundAdapters.map(_.seq)
if (default_clip_mode == "3") opt_adapter ++= foundAdapters
else if (default_clip_mode == "5") opt_front ++= foundAdapters
else if (default_clip_mode == "both") opt_anywhere ++= foundAdapters
......
......@@ -80,28 +80,43 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
.head
.stripPrefix("Encoding\t")
/** Case class representing a known adapter sequence */
protected case class AdapterSequence(name: String, seq: String)
/**
* Retrieves overrepresented sequences found by FastQ.
*
* @return a [[Set]] of [[AdapterSequence]] objects.
*/
lazy val foundAdapters: Set[AdapterSequence] = {
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def getFastqcSeqs(file: File): Set[AdapterSequence] =
if (file != null) {
(for (
line <- Source.fromFile(file).getLines(); if line.startsWith("#");
values = line.split("\t*") if values.size >= 2
) yield Sequence(values(0), values(1))).toList
} else Nil
) yield AdapterSequence(values(0), values(1))).toSet
} else Set.empty[AdapterSequence]
val found = qcModules.get("Overrepresented sequences") match {
case None => Array.empty[String]
case Some(modLines) =>
for (
line <- modLines if !line.startsWith("#");
values = line.split("\t") if values.size >= 4
) yield values(3)
}
val seqs = getSeqs(adapters) ::: getSeqs(contaminants)
val block = getDataBlock("Overrepresented sequences")
if (block == null) return Nil
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
(getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants))
.filter(x => found.exists(_.startsWith(x.name)))
}
val found = for (
line <- block if !line.startsWith("#");
values = line.split("\t") if values.size >= 4
) yield values(3)
/** Summary of the FastQC run, stored in a [[Json]] object */
def summary: Json = {
seqs.filter(x => found.exists(_.startsWith(x.name)))
}
val outputDir: String = output.getAbsolutePath.stripSuffix(".zip")
val outputMap =
Map("plot_duplication_levels" -> "Images/duplication_levels.png",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment