Commit c7340695 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #25 from biopet/fix-BIOPET-576

Fix adapters fetch from fastqc
parents 7812d950 674dc69e
...@@ -75,8 +75,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction with Vers ...@@ -75,8 +75,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction with Vers
// otherwise, check if adapters are already present (depending on FastQC version) // otherwise, check if adapters are already present (depending on FastQC version)
case None => case None =>
val defaultAdapters = getVersion match { val defaultAdapters = getVersion match {
case Some("v0.11.2") => Option(new File(fastqcDir + "/Configuration/adapter_list.txt")) case Some(v) if v.contains("v0.11") => Option(new File(fastqcDir + "/Configuration/adapter_list.txt"))
case _ => None case _ => None
} }
defaultAdapters.collect { case adp => config("adapters", default = adp) } defaultAdapters.collect { case adp => config("adapters", default = adp) }
} }
......
...@@ -155,6 +155,8 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r ...@@ -155,6 +155,8 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
*/ */
def foundAdapters: Set[AdapterSequence] = { def foundAdapters: Set[AdapterSequence] = {
if (dataFile.exists) { // On a dry run this file does not yet exist if (dataFile.exists) { // On a dry run this file does not yet exist
val modules = qcModules
/** Returns a list of adapter and/or contaminant sequences known to FastQC */ /** Returns a list of adapter and/or contaminant sequences known to FastQC */
def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match { def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match {
case None => Set.empty[AdapterSequence] case None => Set.empty[AdapterSequence]
...@@ -170,7 +172,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r ...@@ -170,7 +172,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
val adapterSet = getFastqcSeqs(adapters) val adapterSet = getFastqcSeqs(adapters)
val contaminantSet = getFastqcSeqs(contaminants) val contaminantSet = getFastqcSeqs(contaminants)
val foundAdapterNames: Seq[String] = qcModules.get("Overrepresented sequences") match { val foundAdapterNames: Seq[String] = modules.get("Overrepresented sequences") match {
case None => Seq.empty[String] case None => Seq.empty[String]
case Some(qcModule) => case Some(qcModule) =>
for ( for (
...@@ -181,7 +183,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r ...@@ -181,7 +183,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
// select full sequences from known adapters and contaminants // select full sequences from known adapters and contaminants
// based on overrepresented sequences results // based on overrepresented sequences results
val fromKnownList: Set[AdapterSequence] = (adapterSet ++ contaminantSet) val fromKnownList: Set[AdapterSequence] = contaminantSet
.filter(x => foundAdapterNames.exists(_.startsWith(x.name))) .filter(x => foundAdapterNames.exists(_.startsWith(x.name)))
val fromKnownListRC: Set[AdapterSequence] = if (enableRCtrimming) fromKnownList.map { val fromKnownListRC: Set[AdapterSequence] = if (enableRCtrimming) fromKnownList.map {
...@@ -191,7 +193,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r ...@@ -191,7 +193,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
// list all sequences found by FastQC // list all sequences found by FastQC
val fastQCFoundSequences: Seq[AdapterSequence] = if (sensitiveAdapterSearch) { val fastQCFoundSequences: Seq[AdapterSequence] = if (sensitiveAdapterSearch) {
qcModules.get("Overrepresented sequences") match { modules.get("Overrepresented sequences") match {
case None => Seq.empty case None => Seq.empty
case Some(qcModule) => case Some(qcModule) =>
for ( for (
...@@ -199,17 +201,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r ...@@ -199,17 +201,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
values = line.split("\t") if values.size >= 4 values = line.split("\t") if values.size >= 4
) yield AdapterSequence(values(3), values(0)) ) yield AdapterSequence(values(3), values(0))
} }
} else { } else Seq()
Seq.empty
}
// we only want to keep adapter sequences which are known by FastQC val foundAdapters = modules.get("Adapter Content").map { x =>
// sequences such as "Adapter01 (100% over 12bp)" are valid because "Adapter01" is in FastQC val header = x.lines.head.split("\t").tail.zipWithIndex
fastQCFoundSequences.filter(x => { val lines = x.lines.tail.map(_.split("\t").tail)
(adapterSet ++ contaminantSet).count(y => x.name.startsWith(y.name)) == 1 val found = header.filter(h => lines.exists(x => x(h._2).toFloat > 0)).map(_._1)
}) adapterSet.filter(x => found.contains(x.name))
}
fromKnownList ++ fastQCFoundSequences ++ fromKnownListRC fromKnownList ++ fastQCFoundSequences ++ fromKnownListRC ++ foundAdapters.getOrElse(Seq())
} else Set() } else Set()
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment