Commit ac55539b authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'feature-update-cutadapt' into 'develop'

Update Cutadapt wrapper

Updated cutadapt wrapper to reflect cutadapt 1.9 and added missing options (which were already supported in 1.5 but not in extension)

Closes #243 

See merge request !310
parents defe2788 3049d572
......@@ -26,18 +26,19 @@ import scala.collection.mutable
import scala.io.Source
/**
* Extension for cutadept
* Based on version 1.5
* Extension for cutadapt
* Started with version 1.5
* Updated to version 1.9 (18-01-2016 by wyleung)
*/
class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable with Version {
@Input(doc = "Input fastq file")
var fastq_input: File = _
var fastqInput: File = _
@Output
var fastq_output: File = _
var fastqOutput: File = _
@Output(doc = "Output statistics file")
var stats_output: File = _
var statsOutput: File = _
executable = config("exe", default = "cutadapt")
def versionCommand = executable + " --version"
......@@ -46,28 +47,121 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
/** Name of the key containing clipped adapters information in the summary stats. */
def adaptersStatsName = "adapters"
var default_clip_mode: String = config("default_clip_mode", default = "3")
var opt_adapter: Set[String] = config("adapter", default = Nil)
var opt_anywhere: Set[String] = config("anywhere", default = Nil)
var opt_front: Set[String] = config("front", default = Nil)
var opt_discard: Boolean = config("discard", default = false)
var opt_minimum_length: Int = config("minimum_length", 1)
var opt_maximum_length: Option[Int] = config("maximum_length")
var defaultClipMode: String = config("default_clip_mode", default = "3")
var adapter: Set[String] = config("adapter", default = Nil)
var anywhere: Set[String] = config("anywhere", default = Nil)
var front: Set[String] = config("front", default = Nil)
var errorRate: Option[Double] = config("error_rate")
var noIndels: Boolean = config("no_indels", default = false)
var times: Option[Int] = config("times")
var overlap: Option[Int] = config("overlap")
var matchReadWildcards: Boolean = config("match_read_wildcards", default = false)
var noMatchAdapterWildcards: Boolean = config("no_match_adapter_wildcards", default = false) // specific for 1.9
/** Options for filtering of processed reads */
var discard: Boolean = config("discard", default = false)
var trimmedOnly: Boolean = config("trimmed_only", default = false)
var minimumLength: Int = config("minimum_length", 1)
var maximumLength: Option[Int] = config("maximum_length")
var noTrim: Boolean = config("no_trim", default = false)
var maxN: Option[Int] = config("max_n") // specific for 1.9
var maskAdapter: Boolean = config("mask_adapter", default = false)
/** Options that influence what gets output to where */
var quiet: Boolean = config("quiet", default = false)
// var output: File // see up @Output
var infoFile: Option[File] = config("info_file")
var restFile: Option[File] = config("rest_file")
var wildcardFile: Option[File] = config("wildcard_file")
var tooShortOutput: Option[File] = config("too_short_output")
var tooLongOutput: Option[File] = config("too_long_output")
var untrimmedOutput: Option[File] = config("untrimmed_output")
/** Additional read modifications */
var cut: Option[Int] = config("cut")
var qualityCutoff: Option[String] = config("quality_cutoff")
var qualityBase: Option[Int] = config("quality_base")
var trimN: Boolean = config("trim_n", default = false)
var prefix: Option[String] = config("prefix")
var suffix: Option[String] = config("suffix")
var stripSuffix: Set[String] = config("strip_suffix")
var lengthTag: Option[String] = config("length_tag")
/** Colorspace options */
var colorspace: Boolean = config("colorspace", default = false)
var doubleEncode: Boolean = config("double_encode", default = false)
var trimPrimer: Boolean = config("trim_primer", default = false)
var stripF3: Boolean = config("strip_f3", default = false)
var maq: Boolean = config("maq", default = false)
var bwa: Boolean = config("bwa", default = false)
var noZeroCap: Boolean = config("no_zero_cap", default = false)
var zeroCap: Boolean = config("zero_cap", default = false)
/** Paired end options */
var peAdapter: Set[String] = config("pe_adapter", default = Nil)
var peAdapterFront: Set[String] = config("pe_adapter_front", default = Nil)
var peAdapterBoth: Set[String] = config("pe_adapter_both", default = Nil)
var peCut: Boolean = config("pe_cut", default = false)
var pairedOutput: Option[File] = config("paired_output")
var interleaved: Boolean = config("interleaved", default = false)
var untrimmedPairedOutput: Option[File] = config("untrimmed_paired_output")
/** return commandline to execute */
def cmdLine = required(executable) +
// options
repeat("-a", opt_adapter) +
repeat("-b", opt_anywhere) +
repeat("-g", opt_front) +
conditional(opt_discard, "--discard") +
optional("-m", opt_minimum_length) +
optional("-M", opt_maximum_length) +
// Options that influence how the adapters are found
repeat("-a", adapter) +
repeat("-b", anywhere) +
repeat("-g", front) +
optional("--error-rate", errorRate) +
conditional(noIndels, "--no-indels") +
optional("--times", times) +
optional("--overlap", overlap) +
conditional(matchReadWildcards, "--match-read-wildcards") +
conditional(noMatchAdapterWildcards, "--no-match-adapter-wildcards") +
// Options for filtering of processed reads
conditional(discard, "--discard") +
conditional(trimmedOnly, "--trimmed-only") +
optional("-m", minimumLength) +
optional("-M", maximumLength) +
conditional(noTrim, "--no-trim") +
optional("--max-n", maxN) +
conditional(maskAdapter, "--mask-adapter") +
conditional(quiet, "--quiet") +
optional("--info-file", infoFile) +
optional("--rest-file", restFile) +
optional("--wildcard-file", wildcardFile) +
optional("--too-short-output", tooShortOutput) +
optional("--too-long-output", tooLongOutput) +
optional("--untrimmed-output", untrimmedOutput) +
// Additional read modifications
optional("--cut", cut) +
optional("--quality-cutoff", qualityCutoff) +
conditional(trimN, "--trim-n") +
optional("--prefix", prefix) +
optional("--suffix", suffix) +
optional("--strip-suffix", stripSuffix) +
optional("--length-tag", lengthTag) +
// Colorspace options
conditional(colorspace, "--colorspace") +
conditional(doubleEncode, "--double-encode") +
conditional(trimPrimer, "--trim-primer") +
conditional(stripF3, "--strip-f3") +
conditional(maq, "--maq") +
conditional(bwa, "--bwa") +
conditional(noZeroCap, "--no-zero-cap") +
conditional(zeroCap, "--zero-cap") +
// Paired-end options
repeat("-A", peAdapter) +
repeat("-G", peAdapterFront) +
repeat("-B", peAdapterBoth) +
conditional(interleaved, "--interleaved") +
optional("--paired-output", pairedOutput) +
optional("--untrimmed-paired-output", untrimmedPairedOutput) +
// input / output
required(fastq_input) +
(if (outputAsStsout) "" else required("--output", fastq_output) +
" > " + required(stats_output))
required(fastqInput) +
(if (outputAsStsout) "" else required("--output", fastqOutput) +
" > " + required(statsOutput))
/** Output summary stats */
def summaryStats: Map[String, Any] = {
......@@ -79,7 +173,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
val stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0)
val adapter_stats: mutable.Map[String, Int] = mutable.Map()
if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines()) {
if (statsOutput.exists) for (line <- Source.fromFile(statsOutput).getLines()) {
line match {
case trimR(m) => stats += ("trimmed" -> m.toInt)
case tooShortR(m) => stats += ("tooshort" -> m.toInt)
......
......@@ -46,16 +46,16 @@ class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.e
// adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter
// sequences come from FastQC
case _ =>
throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastq_input'.")
throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastqInput'.")
}
// FastQC found no adapters
case otherwise =>
;
logger.debug(s"No adapters found for summarizing in '$fastq_input'.")
logger.debug(s"No adapters found for summarizing in '$fastqInput'.")
None
}
// "adapters" key not found ~ something went wrong in our part
case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastq_input'.")
case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastqInput'.")
}
initStats.updated(adaptersStatsName, adapterCounts)
}
......
......@@ -102,12 +102,12 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman
val foundAdapters = fastqc.foundAdapters.map(_.seq)
if (foundAdapters.nonEmpty) {
val cutadapt = new Cutadapt(root, fastqc)
cutadapt.fastq_input = seqtk.output
cutadapt.fastq_output = new File(output.getParentFile, input.getName + ".cutadapt.fq")
cutadapt.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats")
if (cutadapt.default_clip_mode == "3") cutadapt.opt_adapter ++= foundAdapters
else if (cutadapt.default_clip_mode == "5") cutadapt.opt_front ++= foundAdapters
else if (cutadapt.default_clip_mode == "both") cutadapt.opt_anywhere ++= foundAdapters
cutadapt.fastqInput = seqtk.output
cutadapt.fastqOutput = new File(output.getParentFile, input.getName + ".cutadapt.fq")
cutadapt.statsOutput = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats")
if (cutadapt.defaultClipMode == "3") cutadapt.adapter ++= foundAdapters
else if (cutadapt.defaultClipMode == "5") cutadapt.front ++= foundAdapters
else if (cutadapt.defaultClipMode == "both") cutadapt.anywhere ++= foundAdapters
addPipeJob(cutadapt)
Some(cutadapt)
} else None
......@@ -117,7 +117,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman
val sickle = new Sickle(root)
sickle.output_stats = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.trim.stats")
sickle.input_R1 = clip match {
case Some(c) => c.fastq_output
case Some(c) => c.fastqOutput
case _ => seqtk.output
}
sickle.output_R1 = new File(output.getParentFile, input.getName + ".sickle.fq")
......@@ -127,7 +127,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman
val outputFile = (clip, trim) match {
case (_, Some(t)) => t.output_R1
case (Some(c), _) => c.fastq_output
case (Some(c), _) => c.fastqOutput
case _ => seqtk.output
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment