diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index 50ee00b2c5625226a00e0985bbe761ded375237c..6ef82870e44e1384488d198bde3a52007c6f726f 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -52,19 +52,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r * @throws FileNotFoundException if the FastQC data file can not be found. * @throws IllegalStateException if the module lines have no content or mapping is empty. */ - @throws(classOf[FileNotFoundException]) - @throws(classOf[IllegalStateException]) def qcModules: Map[String, FastQCModule] = { - - val fqModules = Source.fromFile(dataFile) - // drop all the characters before the first module delimiter (i.e. '>>') - .dropWhile(_ != '>') - // pull everything into a string - .mkString - // split into modules - .split(">>END_MODULE\n") - // make map of module name -> module lines - .map { + val fqModules = Source.fromFile(dataFile) + // drop all the characters before the first module delimiter (i.e. '>>') + .dropWhile(_ != '>') + // pull everything into a string + .mkString + // split into modules + .split(">>END_MODULE\n") + // make map of module name -> module lines + .map { case (modString) => // module name is in the first line, without '>>' and before the tab character val Array(firstLine, otherLines) = modString @@ -80,10 +77,10 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r .slice(0, 2) modName -> FastQCModule(modName, modStatus, otherLines.split("\n").toSeq) } - .toMap + .toMap - if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString) - else fqModules + if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString) + else fqModules } /** @@ -93,14 +90,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r * @throws NoSuchElementException when the "Basic Statistics" key does not exist in the mapping or * when a line starting with "Encoding" does not exist. */ - @throws(classOf[NoSuchElementException]) - def encoding: String = - qcModules("Basic Statistics") + def encoding: String = { + if (dataFile.exists()) + qcModules("Basic Statistics") //FIXME: not save .lines .dropWhile(!_.startsWith("Encoding")) .head .stripPrefix("Encoding\t") .stripSuffix("\t") + else "" + } /** Case class representing a known adapter sequence */ protected case class AdapterSequence(name: String, seq: String) @@ -111,32 +110,33 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r * @return a [[Set]] of [[AdapterSequence]] objects. */ def foundAdapters: Set[AdapterSequence] = { + if (dataFile.exists()) { + /** Returns a list of adapter and/or contaminant sequences known to FastQC */ + def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match { + case None => Set.empty[AdapterSequence] + case Some(f) => + (for { + line <- Source.fromFile(f).getLines() + if !line.startsWith("#") + values = line.split("\t+") + if values.size >= 2 + } yield AdapterSequence(values(0), values(1))).toSet + } + + val found = qcModules.get("Overrepresented sequences") match { + case None => Seq.empty[String] + case Some(qcModule) => + for ( + line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">")); + values = line.split("\t") if values.size >= 4 + ) yield values(3) + } - /** Returns a list of adapter and/or contaminant sequences known to FastQC */ - def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match { - case None => Set.empty[AdapterSequence] - case Some(f) => - (for { - line <- Source.fromFile(f).getLines() - if !line.startsWith("#") - values = line.split("\t+") - if values.size >= 2 - } yield AdapterSequence(values(0), values(1))).toSet - } - - val found = qcModules.get("Overrepresented sequences") match { - case None => Seq.empty[String] - case Some(qcModule) => - for ( - line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">")); - values = line.split("\t") if values.size >= 4 - ) yield values(3) - } - - // select full sequences from known adapters and contaminants - // based on overrepresented sequences results - (getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants)) - .filter(x => found.exists(_.startsWith(x.name))) + // select full sequences from known adapters and contaminants + // based on overrepresented sequences results + (getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants)) + .filter(x => found.exists(_.startsWith(x.name))) + } else Set() } /** Summary of the FastQC run, stored in a [[Json]] object */ diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala index 0fdeee289de9672d917264e3e9dc2d556f6bc48b..b89f3ebc83ab00a90b53f0644f80a3e1848b5e1f 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala @@ -32,6 +32,7 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk. case s if (s.contains("Illumina <1.3")) => Option(64) case s if (s.contains("Illumina 1.3")) => Option(64) case s if (s.contains("Illumina 1.5")) => Option(64) + case _ => None } if (Q != None) V = true }