Skip to content
Snippets Groups Projects
Commit fbed4e7c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'develop' into feature-fix_missing_values

parents 7e4ca301 1047c24a
No related branches found
No related tags found
No related merge requests found
Showing
with 1264 additions and 78 deletions
......@@ -25,16 +25,16 @@ import nl.lumc.sasc.biopet.core.config.Configurable
class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Contaminants", required = false)
var contaminants: File = _
var contaminants: Option[File] = None
@Input(doc = "Adapters", required = false)
var adapters: File = _
var adapters: Option[File] = None
@Input(doc = "Fastq file", shortName = "FQ")
var fastqfile: File = _
var fastqfile: File = null
@Output(doc = "Output", shortName = "out")
var output: File = _
var output: File = null
executable = config("exe", default = "fastqc")
var java_exe: String = config("exe", default = "java", submodule = "java", freeVar = false)
......@@ -50,17 +50,31 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
override def afterGraph {
this.checkExecutable
if (contaminants == null) {
val fastqcDir = executable.substring(0, executable.lastIndexOf("/"))
val defaultContams = getVersion match {
case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt")
case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt")
}
val defaultAdapters = getVersion match {
case "v0.11.2" => new File(fastqcDir + "/Configuration/adapter_list.txt")
case _ => null
}
contaminants = config("contaminants", default = defaultContams)
val fastqcDir = executable.substring(0, executable.lastIndexOf("/"))
contaminants = contaminants match {
// user-defined contaminants file take precedence
case userDefinedValue @ Some(_) => userDefinedValue
// otherwise, use default contaminants file (depending on FastQC version)
case None =>
val defaultContams = getVersion match {
case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt")
case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt")
}
config("contaminants", default = defaultContams)
}
adapters = adapters match {
// user-defined contaminants file take precedence
case userDefinedValue @ Some(_) => userDefinedValue
// otherwise, check if adapters are already present (depending on FastQC version)
case None =>
val defaultAdapters = getVersion match {
case "v0.11.2" => Option(new File(fastqcDir + "/Configuration/adapter_list.txt"))
case _ => None
}
defaultAdapters.collect { case adp => config("adapters", default = adp) }
}
}
......@@ -74,6 +88,6 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
conditional(noextract, "--noextract") +
conditional(extract, "--extract") +
conditional(quiet, "--quiet") +
required("-o", output.getParent()) +
required("-o", output.getParent) +
required(fastqfile)
}
......@@ -39,5 +39,17 @@
<artifactId>BiopetFramework</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.11</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
......@@ -33,14 +33,14 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada
override def beforeCmd() {
super.beforeCmd
val foundAdapters = fastqc.getFoundAdapters.map(_.seq)
val foundAdapters = fastqc.foundAdapters.map(_.seq)
if (default_clip_mode == "3") opt_adapter ++= foundAdapters
else if (default_clip_mode == "5") opt_front ++= foundAdapters
else if (default_clip_mode == "both") opt_anywhere ++= foundAdapters
}
override def cmdLine = {
if (!opt_adapter.isEmpty || !opt_anywhere.isEmpty || !opt_front.isEmpty) {
if (opt_adapter.nonEmpty || opt_anywhere.nonEmpty || opt_front.nonEmpty) {
analysisName = getClass.getSimpleName
super.cmdLine
} else {
......
......@@ -16,82 +16,154 @@
package nl.lumc.sasc.biopet.pipelines.flexiprep
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import java.io.{ File, FileNotFoundException }
import scala.io.Source
import argonaut._, Argonaut._
import scalaz._, Scalaz._
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.utils.ConfigUtils
/**
* FastQC wrapper with added functionality for the Flexiprep pipeline
*
* This wrapper implements additional methods for parsing FastQC output files and aggregating everything in a summary
* object. The current implementation is based on FastQC v0.10.1.
*/
class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) {
def getDataBlock(name: String): Array[String] = { // Based on Fastqc v0.10.1
val outputDir = output.getAbsolutePath.stripSuffix(".zip")
val dataFile = new File(outputDir + "/fastqc_data.txt")
if (!dataFile.exists) return null
val data = Source.fromFile(dataFile).mkString
for (block <- data.split(">>END_MODULE\n")) {
val b = if (block.startsWith("##FastQC")) block.substring(block.indexOf("\n") + 1) else block
if (b.startsWith(">>" + name))
return for (line <- b.split("\n"))
yield line
}
return null
}
def getEncoding: String = {
val block = getDataBlock("Basic Statistics")
if (block == null) return null
for (
line <- block if (line.startsWith("Encoding"))
) return line.stripPrefix("Encoding\t")
return null // Could be default Sanger with a warning in the log
/** Class for storing a single FastQC module result */
protected case class FastQCModule(name: String, status: String, lines: Seq[String])
/** Default FastQC output directory containing actual results */
// this is a def instead of a val since the value depends on the variable `output`, which is null on class creation
def outputDir: File = new File(output.getAbsolutePath.stripSuffix(".zip"))
/** Default FastQC output data file */
// this is a def instead of a val since the value depends on the variable `output`, which is null on class creation
def dataFile: File = new File(outputDir, "fastqc_data.txt")
/**
* FastQC QC modules.
*
* @return Mapping of FastQC module names and its contents as array of strings (one item per line)
* @throws FileNotFoundException if the FastQC data file can not be found.
* @throws IllegalStateException if the module lines have no content or mapping is empty.
*/
@throws(classOf[FileNotFoundException])
@throws(classOf[IllegalStateException])
def qcModules: Map[String, FastQCModule] = {
val fqModules = Source.fromFile(dataFile)
// drop all the characters before the first module delimiter (i.e. '>>')
.dropWhile(_ != '>')
// pull everything into a string
.mkString
// split into modules
.split(">>END_MODULE\n")
// make map of module name -> module lines
.map {
case (modString) =>
// module name is in the first line, without '>>' and before the tab character
val Array(firstLine, otherLines) = modString
// drop all '>>' character (start of module)
.dropWhile(_ == '>')
// split first line and others
.split("\n", 2)
// and slice them
.slice(0, 2)
// extract module name and module status
val Array(modName, modStatus) = firstLine
.split("\t", 2)
.slice(0, 2)
modName -> FastQCModule(modName, modStatus, otherLines.split("\n").toSeq)
}
.toMap
if (fqModules.isEmpty) throw new IllegalStateException("Empty FastQC data file " + dataFile.toString)
else fqModules
}
protected case class Sequence(name: String, seq: String)
def getFoundAdapters: List[Sequence] = {
def getSeqs(file: File) = {
if (file != null) {
(for (
line <- Source.fromFile(file).getLines(); if line.startsWith("#");
values = line.split("\t*") if values.size >= 2
) yield Sequence(values(0), values(1))).toList
} else Nil
}
/**
* Retrieves the FASTQ file encoding as computed by FastQC.
*
* @return encoding name
* @throws NoSuchElementException when the "Basic Statistics" key does not exist in the mapping or
* when a line starting with "Encoding" does not exist.
*/
@throws(classOf[NoSuchElementException])
def encoding: String =
qcModules("Basic Statistics")
.lines
.dropWhile(!_.startsWith("Encoding"))
.head
.stripPrefix("Encoding\t")
.stripSuffix("\t")
/** Case class representing a known adapter sequence */
protected case class AdapterSequence(name: String, seq: String)
val seqs = getSeqs(adapters) ::: getSeqs(contaminants)
/**
* Retrieves overrepresented sequences found by FastQ.
*
* @return a [[Set]] of [[AdapterSequence]] objects.
*/
def foundAdapters: Set[AdapterSequence] = {
val block = getDataBlock("Overrepresented sequences")
if (block == null) return Nil
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def getFastqcSeqs(file: Option[File]): Set[AdapterSequence] = file match {
case None => Set.empty[AdapterSequence]
case Some(f) =>
(for {
line <- Source.fromFile(f).getLines()
if !line.startsWith("#")
values = line.split("\t+")
if values.size >= 2
} yield AdapterSequence(values(0), values(1))).toSet
}
val found = for (
line <- block if !line.startsWith("#");
values = line.split("\t") if values.size >= 4
) yield values(3)
val found = qcModules.get("Overrepresented sequences") match {
case None => Seq.empty[String]
case Some(qcModule) =>
for (
line <- qcModule.lines if !(line.startsWith("#") || line.startsWith(">"));
values = line.split("\t") if values.size >= 4
) yield values(3)
}
seqs.filter(x => found.exists(_.startsWith(x.name)))
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
(getFastqcSeqs(adapters) ++ getFastqcSeqs(contaminants))
.filter(x => found.exists(_.startsWith(x.name)))
}
def getSummary: Json = {
val subfixs = Map("plot_duplication_levels" -> "Images/duplication_levels.png",
"plot_kmer_profiles" -> "Images/kmer_profiles.png",
"plot_per_base_gc_content" -> "Images/per_base_gc_content.png",
"plot_per_base_n_content" -> "Images/per_base_n_content.png",
"plot_per_base_quality" -> "Images/per_base_quality.png",
"plot_per_base_sequence_content" -> "Images/per_base_sequence_content.png",
"plot_per_sequence_gc_content" -> "Images/per_sequence_gc_content.png",
"plot_per_sequence_quality" -> "Images/per_sequence_quality.png",
"plot_sequence_length_distribution" -> "Images/sequence_length_distribution.png",
"fastqc_data" -> "fastqc_data.txt")
val dir = output.getAbsolutePath.stripSuffix(".zip") + "/"
var outputMap: Map[String, Map[String, String]] = Map()
for ((k, v) <- subfixs) outputMap += (k -> Map("path" -> (dir + v)))
val temp = ("" := outputMap) ->: jEmptyObject
return temp.fieldOrEmptyObject("")
/** Summary of the FastQC run, stored in a [[Json]] object */
def summary: Json = {
val outputMap =
Map("plot_duplication_levels" -> "Images/duplication_levels.png",
"plot_kmer_profiles" -> "Images/kmer_profiles.png",
"plot_per_base_gc_content" -> "Images/per_base_gc_content.png",
"plot_per_base_n_content" -> "Images/per_base_n_content.png",
"plot_per_base_quality" -> "Images/per_base_quality.png",
"plot_per_base_sequence_content" -> "Images/per_base_sequence_content.png",
"plot_per_sequence_gc_content" -> "Images/per_sequence_gc_content.png",
"plot_per_sequence_quality" -> "Images/per_sequence_quality.png",
"plot_sequence_length_distribution" -> "Images/sequence_length_distribution.png",
"fastqc_data" -> "fastqc_data.txt")
.map {
case (name, relPath) =>
name -> Map("path" -> (outputDir + File.separator + relPath))
}
ConfigUtils.mapToJson(outputMap)
}
}
object Fastqc {
def apply(root: Configurable, fastqfile: File, outDir: String): Fastqc = {
val fastqcCommand = new Fastqc(root)
fastqcCommand.fastqfile = fastqfile
......@@ -102,6 +174,6 @@ object Fastqc {
//if (filename.endsWith(".fq")) filename = filename.substring(0,filename.size - 3)
fastqcCommand.output = new File(outDir + "/" + filename + "_fastqc.zip")
fastqcCommand.afterGraph
return fastqcCommand
fastqcCommand
}
}
......@@ -201,7 +201,7 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co
def fastqcSummary(fastqc: Fastqc): Option[Json] = {
if (fastqc == null) return None
else return Option(fastqc.getSummary)
else return Option(fastqc.summary)
}
def clipstatSummary(): Option[Json] = {
......
......@@ -25,7 +25,7 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk.
override def beforeCmd {
super.beforeCmd
if (fastqc != null && Q == None) {
val encoding = fastqc.getEncoding
val encoding = fastqc.encoding
Q = encoding match {
case null => None
case s if (s.contains("Sanger / Illumina 1.9")) => None
......
# This file contains a list of potential contaminants which are
# frequently found in high throughput sequencing reactions. These
# are mostly sequences of adapters / primers used in the various
# sequencing chemistries.
#
# Please DO NOT rely on these sequences to design your own oligos, some
# of them are truncated at ambiguous positions, and none of them are
# definitive sequences from the manufacturers so don't blame us if you
# try to use them and they don't work.
#
# You can add more sequences to the file by putting one line per entry
# and specifying a name[tab]sequence. If the contaminant you add is
# likely to be of use to others please consider sending it to the FastQ
# authors, either via a bug report at www.bioinformatics.bbsrc.ac.uk/bugzilla/
# or by directly emailing simon.andrews@bbsrc.ac.uk so other users of
# the program can benefit.
Illumina Single End Adapter 1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
Illumina Single End Adapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Adapter 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Adapter 2 GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
Illumina Paried End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End PCR Primer 2 CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
Illumina Paried End Sequencing Primer 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Sequencing Primer 2 CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
Illumina DpnII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGAC
Illumina DpnII expression Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina DpnII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina DpnII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina DpnII expression Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina NlaIII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina NlaIII expression Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina NlaIII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina NlaIII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina NlaIII expression Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina Small RNA Adapter 1 GTTCAGAGTTCTACAGTCCGACGATC
Illumina Small RNA Adapter 2 TGGAATTCTCGGGTGCCAAGG
Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina Multiplexing Adapter 1 GATCGGAAGAGCACACGTCT
Illumina Multiplexing Adapter 2 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing PCR Primer 1.01 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing PCR Primer 2.01 GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
Illumina Multiplexing Read1 Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing Index Sequencing Primer GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
Illumina Multiplexing Read2 Sequencing Primer GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
Illumina PCR Primer Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC
Illumina PCR Primer Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC
Illumina PCR Primer Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC
Illumina PCR Primer Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC
Illumina PCR Primer Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC
Illumina PCR Primer Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC
Illumina PCR Primer Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC
Illumina PCR Primer Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC
Illumina PCR Primer Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC
Illumina PCR Primer Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC
Illumina PCR Primer Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC
Illumina PCR Primer Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC
Illumina DpnII Gex Adapter 1 GATCGTCGGACTGTAGAACTCTGAAC
Illumina DpnII Gex Adapter 1.01 ACAGGTTCAGAGTTCTACAGTCCGAC
Illumina DpnII Gex Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina DpnII Gex Adapter 2.01 TCGTATGCCGTCTTCTGCTTG
Illumina DpnII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina DpnII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina DpnII Gex Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina NlaIII Gex Adapter 1.01 TCGGACTGTAGAACTCTGAAC
Illumina NlaIII Gex Adapter 1.02 ACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina NlaIII Gex Adapter 2.01 CAAGCAGAAGACGGCATACGA
Illumina NlaIII Gex Adapter 2.02 TCGTATGCCGTCTTCTGCTTG
Illumina NlaIII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina NlaIII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina NlaIII Gex Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
Illumina 5p RNA Adapter GTTCAGAGTTCTACAGTCCGACGATC
Illumina RNA Adapter1 TGGAATTCTCGGGTGCCAAGG
Illumina Small RNA 3p Adapter 1 ATCTCGTATGCCGTCTTCTGCTTG
Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
TruSeq Universal Adapter AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
TruSeq Adapter, Index 1 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 2 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 3 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 4 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 5 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 6 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 7 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 8 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 9 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 10 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 11 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 12 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG
Illumina RNA RT Primer GCCTTGGCACCCGAGAATTCCA
Illumina RNA PCR Primer AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA
RNA PCR Primer, Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 13 CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 14 CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 15 CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 16 CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 17 CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 18 CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 19 CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 20 CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 21 CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 22 CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 23 CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 24 CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 25 CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 26 CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 27 CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 28 CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 29 CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 30 CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 31 CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 32 CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 33 CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 34 CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 35 CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 36 CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 37 CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 38 CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 39 CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 40 CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 41 CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 42 CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 43 CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 44 CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 45 CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 46 CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 47 CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 48 CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
ABI Dynabead EcoP Oligo CTGATCTAGAGGTACCGGATCCCAGCAGT
ABI Solid3 Adapter A CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG
ABI Solid3 Adapter B CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT
ABI Solid3 5' AMP Primer CCACTACGCCTCCGCTTTCCTCTCTATG
ABI Solid3 3' AMP Primer CTGCCCCGGGTTCCTCATTCT
ABI Solid3 EF1 alpha Sense Primer CATGTGTGTTGAGAGCTTC
ABI Solid3 EF1 alpha Antisense Primer GAAAACCAAAGTGGTCCAC
ABI Solid3 GAPDH Forward Primer TTAGCACCCCTGGCCAAGG
ABI Solid3 GAPDH Reverse Primer CTTACTCCTTGGAGGCCATG
This diff is collapsed.
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.flexiprep
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
class FastqcV0101Test extends TestNGSuite with Matchers {
/** Returns the absolute path to test resource directory as a File object */
private val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString)
/** Given a resource file name, returns the the absolute path to it as a File object */
private def resourceFile(p: String): File = new File(resourceDir, p)
/** Mock output file of a FastQC v0.10.1 run */
// the file doesn't actually exist, we just need it so the outputDir value can be computed correctly
private val outputv0101: File = resourceFile("v0101.fq_fastqc.zip")
@Test def testOutputDir() = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
fqc.outputDir shouldBe new File(resourceDir, "v0101.fq_fastqc")
}
@Test def testQcModules() = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
// 11 QC modules
fqc.qcModules.size shouldBe 11
// first module
fqc.qcModules.keySet should contain("Basic Statistics")
// mid (6th module)
fqc.qcModules.keySet should contain("Per sequence GC content")
// last module
fqc.qcModules.keySet should contain("Kmer Content")
}
@Test def testSingleQcModule() = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
fqc.qcModules("Basic Statistics").name should ===("Basic Statistics")
fqc.qcModules("Basic Statistics").status should ===("pass")
fqc.qcModules("Basic Statistics").lines.size shouldBe 8
}
@Test def testEncoding() = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
fqc.encoding shouldBe "Sanger / Illumina 1.9"
}
@Test def testFoundAdapter() = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
fqc.contaminants = Option(resourceFile("fqc_contaminants_v0101.txt"))
val adapters = fqc.foundAdapters
adapters.size shouldBe 1
adapters.head.name should ===("TruSeq Adapter, Index 1")
// from fqc_contaminants_v0101.txt
adapters.head.seq should ===("GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG")
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment