Commit e7541377 authored by bow's avatar bow
Browse files

Initial fix for #238

The issue is caused by the Cutadapt summary not being aware of the
clipped sequence names. This merge request extends the existing Cutadapt
wrapper in the Flexiprep module and makes it so that the wrapper is
aware of FastQC.
parent 8e1b1d70
* Copyright (c) 2015 Leiden University Medical Center and contributors
* (see file for details).
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package nl.lumc.sasc.biopet.pipelines.flexiprep
import nl.lumc.sasc.biopet.utils.config.Configurable
* Cutadapt wrapper specific for Flexiprep.
* This wrapper overrides the summary part so that instead of only displaying the clipped adapters, the sequence names
* are also displayed. In Flexiprep the sequence will always have names since they are detected by FastQC from a list
* of known adapters / contaminants.
* @param root: Configurable object from which this wrapper is initialized.
* @param fastqc: Fastqc wrapper that contains adapter information.
class Cutadapt(root: Configurable, fastqc: Fastqc, readName: String) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) {
/** Clipped adapter names from FastQC */
protected def seqToName = fastqc.foundAdapters
.map(adapter => adapter.seq ->
override def summaryStats: Map[String, Any] = {
val initStats = super.summaryStats
// Map of adapter sequence and how many times it is found
val adapterCounts = initStats.get("adapters") match {
// "adapters" key found in statistics
case Some(v) => v match {
case m: Map[String, Int] => m.toSeq
.map {
case (seq, count) =>
seqToName.get(seq) match {
// adapter sequence is found by FastQC
case Some(n) => n -> (seq, count)
// adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter
// sequences come from FastQC
case None =>
throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in 'clipping_$readName'.")
// FastQC found no adapters
case otherwise =>"No adapters found for summarizing in 'clipping_$readName'.")
Map.empty[String, (String, Int)]
// "adapters" key not found ~ something went wrong in our part
case None => throw new RuntimeException(s"Required key 'adapters' not found in stats entry 'clipping_$readName'.")
initStats.updated("adapters", adapterCounts)
......@@ -19,7 +19,7 @@ import
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, Summarizable }
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.extensions.{ Cat, Gzip, Sickle, Cutadapt }
import nl.lumc.sasc.biopet.extensions.{ Cat, Gzip, Sickle }
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
......@@ -93,7 +93,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman
clip = if (!flexiprep.skipClip) {
val foundAdapters =
if (foundAdapters.nonEmpty) {
val cutadapt = new Cutadapt(root)
val cutadapt = new Cutadapt(root, fastqc, read)
cutadapt.fastq_input = seqtk.output
cutadapt.fastq_output = new File(output.getParentFile, input.getName + ".cutadapt.fq")
cutadapt.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment