Skip to content
Snippets Groups Projects
Commit e980fd75 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'feature-cutadapt-adapterreporting' into 'develop'

Fix reporting of found Adapters with counts by Cutadapt in Json.

the adapters used by Cutadapt were not reported in the json. This fix will solve this.

fixes #319 
fixes #325 

See merge request !369
parents 9e3d9653 d01a6749
No related branches found
No related tags found
No related merge requests found
......@@ -24,6 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.collection.mutable
import scala.io.Source
import scala.util.matching.Regex
/**
* Extension for cutadapt
......@@ -163,6 +164,51 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
(if (outputAsStsout) "" else required("--output", fastqOutput) +
" > " + required(statsOutput))
def extractClippedAdapters(statsOutput: File): Map[String, Any] = {
val histoCountRow: Regex = """([\d]+)\t([\d]+)\t.*""".r
val adapterR = """Sequence: ([C|T|A|G]+);.*Trimmed: ([\d]+) times\.""".r
val statsFile = Source.fromFile(statsOutput)
val adapterRawStats: Array[String] = statsFile.mkString
.split("=== Adapter [\\d]+ ===")
.filter(_.contains("Sequence")
)
statsFile.close()
adapterRawStats.map(adapter => {
var adapterName = ""
var adapterCount = 0
// identify the adapter name and count
for (line <- adapter.split("\n")) {
line match {
case adapterR(adapter, count) => {
adapterName = adapter
adapterCount = count.toInt
}
case _ =>
}
}
// parse the block that gives the histogram of clipped bases and from which end
val counts = adapter.split("Overview of removed sequences ")
.filter(x => x.contains("length"))
.map(clipSideRawStats => {
val clipSideLabel = if (clipSideRawStats.contains("5'")) { "5p" } else { "3p" }
val histogramValues = clipSideRawStats.split("\n").flatMap({
case histoCountRow(length, count) => Some(length.toInt -> count.toInt)
case _ => None
})
clipSideLabel -> histogramValues.toMap
})
adapterName -> Map(
"count" -> adapterCount,
"histogram" -> counts.toMap
)
}).toMap // converting the Array[String] containing map-items to Map with 'toMap'
}
/** Output summary stats */
def summaryStats: Map[String, Any] = {
/**
......@@ -177,7 +223,6 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
val tooLongR = """.* that were too long: *([,\d]+) .*""".r
val tooManyN = """.* with too many N: *([,\d]+) .*""".r
val adapterR = """Sequence ([C|T|A|G]*);.*Trimmed: ([,\d]+) times.""".r
val basePairsProcessed = """Total basepairs processed: *([,\d]+) bp""".r
val basePairsWritten = """Total written \(filtered\): *([,\d]+) bp .*""".r
......@@ -192,24 +237,28 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
"bpoutput" -> 0,
"toomanyn" -> 0
)
val adapterStats: mutable.Map[String, Long] = mutable.Map()
// extract the adapters with its histogram
val adapterStats = if (statsOutput.exists) {
extractClippedAdapters(statsOutput)
} else Map.empty
if (statsOutput.exists) {
val statsFile = Source.fromFile(statsOutput)
for (line <- statsFile.getLines()) {
line match {
case processedReads(m) => stats("processed") = m.replaceAll(",", "").toLong
case withAdapters(m) => stats("withadapters") = m.replaceAll(",", "").toLong
case readsPassingFilters(m) => stats("passingfilters") = m.replaceAll(",", "").toLong
case tooShortR(m) => stats("tooshort") = m.replaceAll(",", "").toLong
case tooLongR(m) => stats("toolong") = m.replaceAll(",", "").toLong
case tooManyN(m) => stats("toomanyn") = m.replaceAll(",", "").toLong
case basePairsProcessed(m) => stats("bpinput") = m.replaceAll(",", "").toLong
case basePairsWritten(m) => stats("bpoutput") = m.replaceAll(",", "").toLong
case adapterR(adapter, count) => adapterStats += (adapter -> count.toLong)
case _ =>
case processedReads(m) => stats("processed") = m.replaceAll(",", "").toLong
case withAdapters(m) => stats("withadapters") = m.replaceAll(",", "").toLong
case readsPassingFilters(m) => stats("passingfilters") = m.replaceAll(",", "").toLong
case tooShortR(m) => stats("tooshort") = m.replaceAll(",", "").toLong
case tooLongR(m) => stats("toolong") = m.replaceAll(",", "").toLong
case tooManyN(m) => stats("toomanyn") = m.replaceAll(",", "").toLong
case basePairsProcessed(m) => stats("bpinput") = m.replaceAll(",", "").toLong
case basePairsWritten(m) => stats("bpoutput") = m.replaceAll(",", "").toLong
case _ =>
}
}
statsFile.close()
}
val cleanReads = stats("processed") - stats("withadapters")
......@@ -223,8 +272,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
"num_reads_discarded_too_long" -> stats("toolong"),
"num_reads_discarded_many_n" -> stats("toomanyn"),
"num_bases_input" -> stats("bpinput"),
"num_based_output" -> stats("bpoutput"),
adaptersStatsName -> adapterStats.toMap
"num_bases_output" -> stats("bpoutput"),
adaptersStatsName -> adapterStats
)
}
......
......@@ -16,6 +16,7 @@
package nl.lumc.sasc.biopet.pipelines.flexiprep
import nl.lumc.sasc.biopet.utils.config.Configurable
import scala.collection.JavaConversions._
/**
* Cutadapt wrapper specific for Flexiprep.
......@@ -41,23 +42,26 @@ class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.e
val adapterCounts: Map[String, Any] = initStats.get(adaptersStatsName) match {
// "adapters" key found in statistics
case Some(m: Map[_, _]) => m.flatMap {
case (seq: String, count) =>
seqToNameMap.get(seq) match {
case (adapterSequence: String, adapterStats: Map[_, _]) =>
seqToNameMap.get(adapterSequence) match {
// adapter sequence is found by FastQC
case Some(n) => Some(n -> Map("sequence" -> seq, "count" -> count))
case Some(adapterSeqName) => {
Some(adapterSeqName ->
Map("sequence" -> adapterSequence, "stats" -> adapterStats.toMap)
)
}
// adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter
// sequences come from FastQC
case _ =>
throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastqInput'.")
throw new IllegalStateException(s"Adapter '$adapterSequence' is clipped but not found by FastQC in '$fastqInput'.")
}
// FastQC found no adapters
case otherwise =>
;
logger.debug(s"No adapters found for summarizing in '$fastqInput'.")
None
}
// "adapters" key not found ~ something went wrong in our part
case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastqInput'.")
case _ => throw new RuntimeException(s"Required key '${adaptersStatsName}' not found in stats entry '${fastqInput}'.")
}
initStats.updated(adaptersStatsName, adapterCounts)
}
......
This is cutadapt 1.9.1 with Python 2.7.6
Command line parameters: -b CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC -b GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG -b CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC -b GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG --error-rate 0.2 --times 2 -m 15 ct_r1.fq.gz.seqtk.fq --output ct_r1.fq.gz.cutadapt.fq
Trimming 4 adapters with at most 20.0% errors in single-end mode ...
Finished in 0.19 s (189 us/read; 0.32 M reads/minute).
=== Summary ===
Total reads processed: 1,000
Reads with adapters: 440 (44.0%)
Reads that were too short: 15 (1.5%)
Reads written (passing filters): 985 (98.5%)
Total basepairs processed: 100,000 bp
Total written (filtered): 89,423 bp (89.4%)
=== Adapter 1 ===
Sequence: CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC; Type: variable 5'/3'; Length: 63; Trimmed: 94 times.
18 times, it overlapped the 5' end of a read
76 times, it overlapped the 3' end or was within the read
No. of allowed errors:
0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12
Overview of removed sequences (5')
length count expect max.err error counts
3 8 15.6 0 8
4 3 3.9 0 2 1
5 2 1.0 1 0 2
6 4 0.2 1 1 3
9 1 0.0 1 0 0 1
Overview of removed sequences (3' or within)
length count expect max.err error counts
3 13 15.6 0 13
4 19 3.9 0 3 16
5 21 1.0 1 0 21
6 18 0.2 1 1 17
7 2 0.1 1 0 2
9 1 0.0 1 0 0 1
11 1 0.0 2 0 0 1
12 1 0.0 2 0 0 1
=== Adapter 2 ===
Sequence: GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG; Type: variable 5'/3'; Length: 63; Trimmed: 340 times.
117 times, it overlapped the 5' end of a read
223 times, it overlapped the 3' end or was within the read
No. of allowed errors:
0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12
Overview of removed sequences (5')
length count expect max.err error counts
3 14 15.6 0 14
4 29 3.9 0 6 23
5 32 1.0 1 3 29
6 36 0.2 1 0 36
8 1 0.0 1 0 1
9 1 0.0 1 0 0 1
10 1 0.0 2 0 0 1
11 2 0.0 2 0 0 2
37 1 0.0 7 0 0 0 0 0 1
Overview of removed sequences (3' or within)
length count expect max.err error counts
3 18 15.6 0 18
4 9 3.9 0 5 4
5 15 1.0 1 8 7
6 10 0.2 1 8 2
7 7 0.1 1 5 2
8 10 0.0 1 9 1
9 6 0.0 1 5 1
10 8 0.0 2 5 0 3
11 4 0.0 2 4
12 4 0.0 2 4
13 9 0.0 2 9
14 4 0.0 2 3 0 1
15 7 0.0 3 7
16 2 0.0 3 2
17 4 0.0 3 2 1 0 1
18 2 0.0 3 2
19 2 0.0 3 2
20 2 0.0 4 0 1 1
21 7 0.0 4 6 1
22 7 0.0 4 7
23 2 0.0 4 2
24 3 0.0 4 3
25 5 0.0 5 5
26 5 0.0 5 5
27 8 0.0 5 8
28 6 0.0 5 5 1
29 2 0.0 5 2
30 5 0.0 6 5
31 3 0.0 6 3
32 8 0.0 6 8
33 1 0.0 6 1
34 5 0.0 6 0 5
35 2 0.0 7 0 0 0 0 0 0 2
36 3 0.0 7 0 0 0 0 0 0 3
37 4 0.0 7 0 0 0 0 0 0 0 2 2
38 2 0.0 7 0 0 0 0 0 0 0 0 0 2
39 4 0.0 7 0 0 0 0 1 0 0 0 0 3
40 3 0.0 8 0 0 0 0 0 0 0 3
41 1 0.0 8 0 0 0 0 0 0 0 1
42 4 0.0 8 0 0 0 0 0 0 0 0 4
43 5 0.0 8 0 0 0 0 0 0 0 0 0 5
44 3 0.0 8 0 0 0 0 0 0 0 0 0 0 3
46 1 0.0 9 0 0 0 0 0 0 0 0 0 0 1
49 1 0.0 9 0 0 0 0 0 1
=== Adapter 3 ===
Sequence: CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC; Type: variable 5'/3'; Length: 63; Trimmed: 0 times.
=== Adapter 4 ===
Sequence: GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG; Type: variable 5'/3'; Length: 63; Trimmed: 82 times.
15 times, it overlapped the 5' end of a read
67 times, it overlapped the 3' end or was within the read
No. of allowed errors:
0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12
Overview of removed sequences (5')
length count expect max.err error counts
26 1 0.0 5 0 1
61 2 0.0 12 0 0 0 2
64 11 0.0 12 0 0 0 11
72 1 0.0 12 0 0 0 0 0 0 0 0 0 0 0 1
Overview of removed sequences (3' or within)
length count expect max.err error counts
45 3 0.0 9 0 0 0 3
46 2 0.0 9 0 0 0 2
47 3 0.0 9 0 0 0 3
48 3 0.0 9 0 0 0 3
49 2 0.0 9 0 0 0 2
50 3 0.0 10 0 0 0 3
51 2 0.0 10 0 0 0 2
52 6 0.0 10 0 0 0 6
53 1 0.0 10 0 0 0 1
54 5 0.0 10 0 0 0 4 0 1
56 2 0.0 11 0 0 0 2
57 2 0.0 11 0 0 0 2
58 2 0.0 11 0 0 0 2
59 3 0.0 11 0 0 0 2 0 0 0 0 0 1
61 1 0.0 12 0 0 0 0 0 1
62 3 0.0 12 0 0 0 2 1
63 1 0.0 12 0 0 0 0 1
66 3 0.0 12 0 0 0 3
67 3 0.0 12 0 0 0 3
70 1 0.0 12 0 0 0 1
72 1 0.0 12 0 0 0 1
80 1 0.0 12 0 0 0 1
99 14 0.0 12 0 0 0 14
# This file contains a list of potential contaminants which are
# frequently found in high throughput sequencing reactions. These
# are mostly sequences of adapters / primers used in the various
# sequencing chemistries.
#
# Please DO NOT rely on these sequences to design your own oligos, some
# of them are truncated at ambiguous positions, and none of them are
# definitive sequences from the manufacturers so don't blame us if you
# try to use them and they don't work.
#
# You can add more sequences to the file by putting one line per entry
# and specifying a name[tab]sequence. If the contaminant you add is
# likely to be of use to others please consider sending it to the FastQ
# authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
# or by directly emailing simon.andrews@babraham.ac.uk so other users of
# the program can benefit.
Illumina Single End Adapter 1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
Illumina Single End Adapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Adapter 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Adapter 2 GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
Illumina Paried End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End PCR Primer 2 CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
Illumina Paried End Sequencing Primer 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Sequencing Primer 2 CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
Illumina DpnII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGAC
Illumina DpnII expression Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina DpnII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina DpnII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina DpnII expression Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina NlaIII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina NlaIII expression Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina NlaIII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina NlaIII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina NlaIII expression Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina Small RNA Adapter 1 GTTCAGAGTTCTACAGTCCGACGATC
Illumina Small RNA Adapter 2 TGGAATTCTCGGGTGCCAAGG
Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina Multiplexing Adapter 1 GATCGGAAGAGCACACGTCT
Illumina Multiplexing Adapter 2 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing PCR Primer 1.01 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing PCR Primer 2.01 GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
Illumina Multiplexing Read1 Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing Index Sequencing Primer GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
Illumina Multiplexing Read2 Sequencing Primer GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
Illumina PCR Primer Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC
Illumina PCR Primer Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC
Illumina PCR Primer Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC
Illumina PCR Primer Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC
Illumina PCR Primer Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC
Illumina PCR Primer Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC
Illumina PCR Primer Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC
Illumina PCR Primer Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC
Illumina PCR Primer Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC
Illumina PCR Primer Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC
Illumina PCR Primer Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC
Illumina PCR Primer Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC
Illumina DpnII Gex Adapter 1 GATCGTCGGACTGTAGAACTCTGAAC
Illumina DpnII Gex Adapter 1.01 ACAGGTTCAGAGTTCTACAGTCCGAC
Illumina DpnII Gex Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina DpnII Gex Adapter 2.01 TCGTATGCCGTCTTCTGCTTG
Illumina DpnII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina DpnII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina DpnII Gex Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina NlaIII Gex Adapter 1.01 TCGGACTGTAGAACTCTGAAC
Illumina NlaIII Gex Adapter 1.02 ACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina NlaIII Gex Adapter 2.01 CAAGCAGAAGACGGCATACGA
Illumina NlaIII Gex Adapter 2.02 TCGTATGCCGTCTTCTGCTTG
Illumina NlaIII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina NlaIII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina NlaIII Gex Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
Illumina 5p RNA Adapter GTTCAGAGTTCTACAGTCCGACGATC
Illumina RNA Adapter1 TGGAATTCTCGGGTGCCAAGG
Illumina Small RNA 3p Adapter 1 ATCTCGTATGCCGTCTTCTGCTTG
Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
TruSeq Universal Adapter AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
TruSeq Adapter, Index 1 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 2 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 3 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 4 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 5 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 6 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 7 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 8 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 9 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 10 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 11 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 12 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 13 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 14 GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 15 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 16 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 18 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 19 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 20 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 21 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 22 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 23 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCACTCTTCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 25 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 27 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTCTCGTATGCCGTCTTCTGCTTG
Illumina RNA RT Primer GCCTTGGCACCCGAGAATTCCA
Illumina RNA PCR Primer AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA
RNA PCR Primer, Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 13 CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 14 CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 15 CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 16 CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 17 CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 18 CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 19 CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 20 CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 21 CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 22 CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 23 CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 24 CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 25 CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 26 CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 27 CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 28 CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 29 CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 30 CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 31 CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 32 CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 33 CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 34 CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 35 CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 36 CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 37 CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 38 CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 39 CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 40 CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 41 CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 42 CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 43 CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 44 CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 45 CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 46 CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 47 CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 48 CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
ABI Dynabead EcoP Oligo CTGATCTAGAGGTACCGGATCCCAGCAGT
ABI Solid3 Adapter A CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG
ABI Solid3 Adapter B CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT
ABI Solid3 5' AMP Primer CCACTACGCCTCCGCTTTCCTCTCTATG
ABI Solid3 3' AMP Primer CTGCCCCGGGTTCCTCATTCT
ABI Solid3 EF1 alpha Sense Primer CATGTGTGTTGAGAGCTTC
ABI Solid3 EF1 alpha Antisense Primer GAAAACCAAAGTGGTCCAC
ABI Solid3 GAPDH Forward Primer TTAGCACCCCTGGCCAAGG
ABI Solid3 GAPDH Reverse Primer CTTACTCCTTGGAGGCCATG
====
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
====
##FastQC 0.10.1
##FastQC 0.11.2
>>Basic Statistics pass
#Measure Value
Filename ct_r1.fq
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1000
Filtered Sequences 0
Sequence length 100
%GC 53
#Measure Value
Filename ct_r1.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1000
Sequences flagged as poor quality 0
Sequence length 100
%GC 53
>>END_MODULE
>>Per base sequence quality fail
#Base Mean Median Lower Quartile Upper Quartile 10th Percentile 90th Percentile
......@@ -36,26 +20,111 @@ Sequence length 100
7 35.783 37.0 35.0 37.0 35.0 37.0
8 36.008 37.0 35.0 37.0 35.0 37.0
9 37.706 39.0 37.0 39.0 35.0 39.0
10-14 37.857600000000005 39.2 37.2 39.4 34.8 39.4
15-19 38.9788 40.2 38.0 41.0 35.0 41.0
20-24 38.8246 40.0 38.0 41.0 34.8 41.0
25-29 38.589600000000004 40.0 38.0 41.0 34.4 41.0
30-34 38.3568 40.0 38.0 41.0 33.8 41.0
35-39 38.1592 40.0 37.4 41.0 33.6 41.0
40-44 37.4808 39.8 36.0 41.0 32.6 41.0
45-49 36.9478 39.0 35.0 40.8 31.2 41.0
50-54 35.845600000000005 37.8 34.6 40.0 29.4 41.0
55-59 34.739 36.6 33.6 40.0 27.4 41.0
60-64 34.1336 35.4 33.4 38.6 27.2 40.2
65-69 32.7464 35.0 32.6 37.2 24.6 39.6
70-74 29.3478 34.0 29.6 35.6 2.0 38.6
75-79 27.4908 33.2 26.4 35.0 2.0 36.6
80-84 25.893000000000008 33.0 21.8 35.0 2.0 35.4
85-89 25.031799999999997 32.4 16.2 34.6 2.0 35.0
90-94 23.9446 31.4 6.4 34.0 2.0 35.0
95-99 22.9358 30.4 2.0 34.0 2.0 35.0
10-11 37.709 39.0 37.0 39.0 35.0 39.0
12-13 37.6135 39.0 37.0 39.0 35.0 39.0
14-15 38.793 40.0 38.0 41.0 34.5 41.0
16-17 39.033500000000004 40.5 38.0 41.0 35.0 41.0
18-19 38.942 40.0 38.0 41.0 35.0 41.0
20-21 38.888 40.0 38.0 41.0 35.0 41.0
22-23 38.807 40.0 38.0 41.0 35.0 41.0
24-25 38.702 40.0 38.0 41.0 34.0 41.0
26-27 38.65 40.0 38.0 41.0 34.5 41.0
28-29 38.4885 40.0 38.0 41.0 34.5 41.0
30-31 38.307 40.0 38.0 41.0 34.0 41.0
32-33 38.433499999999995 40.0 38.0 41.0 34.0 41.0
34-35 38.3425 40.0 38.0 41.0 33.5 41.0
36-37 38.1185 40.0 37.5 41.0 33.5 41.0
38-39 38.088499999999996 40.0 37.0 41.0 33.5 41.0
40-41 37.555 40.0 36.0 41.0 32.5 41.0
42-43 37.504999999999995 40.0 36.0 41.0 33.0 41.0
44-45 37.167 39.0 35.5 41.0 32.0 41.0
46-47 36.980999999999995 39.0 35.0 41.0 31.0 41.0
48-49 36.8635 39.0 35.0 40.5 31.0 41.0
50-51 36.4125 38.5 35.0 40.0 30.5 41.0
52-53 35.528000000000006 37.5 34.5 40.0 28.5 41.0
54-55 34.925 37.0 33.5 40.0 27.5 41.0
56-57 34.8735 37.0 34.0 40.0 27.5 41.0
58-59 34.7225 36.0 33.5 40.0 28.0 41.0
60-61 34.67400000000001 36.0 34.0 39.0 28.5 40.5
62-63 33.841499999999996 35.0 33.0 38.5 26.5 40.0
64-65 33.549 35.0 33.0 38.0 26.0 40.0
66-67 32.971999999999994 35.0 33.0 37.0 26.0 40.0
68-69 32.1635 35.0 32.0 37.0 22.5 39.0
70-71 30.002000000000002 34.0 30.5 36.0 2.0 39.0
72-73 29.0695 34.0 29.0 35.5 2.0 38.5
74-75 28.641 34.0 29.0 35.0 2.0 38.0
76-77 27.8495 33.0 27.5 35.0 2.0 36.0
78-79 26.5345 33.0 24.0 35.0 2.0 36.5
80-81 26.140500000000003 33.0 23.0 35.0 2.0 36.0
82-83 25.784 33.0 21.5 35.0 2.0 35.0
84-85 25.6115 33.0 20.0 35.0 2.0 35.0
86-87 25.1755 33.0 17.0 35.0 2.0 35.0
88-89 24.600499999999997 31.5 13.5 34.0 2.0 35.0
90-91 24.088 31.5 6.5 34.0 2.0 35.0
92-93 24.16 32.0 8.5 34.0 2.0 35.0
94-95 23.02 30.0 2.0 34.0 2.0 35.0
96-97 23.183 30.5 2.0 34.0 2.0 35.0
98-99 22.75 30.5 2.0 34.0 2.0 35.0
100 21.984 30.0 2.0 34.0 2.0 35.0
>>END_MODULE
>>Per tile sequence quality pass
#Tile Base Mean
1101 1 0.0
1101 2 0.0
1101 3 0.0
1101 4 0.0
1101 5 0.0
1101 6 0.0
1101 7 0.0
1101 8 0.0
1101 9 0.0
1101 10-11 0.0
1101 12-13 0.0
1101 14-15 0.0
1101 16-17 0.0
1101 18-19 0.0
1101 20-21 0.0
1101 22-23 0.0
1101 24-25 0.0
1101 26-27 0.0
1101 28-29 0.0
1101 30-31 0.0
1101 32-33 0.0
1101 34-35 0.0
1101 36-37 0.0
1101 38-39 0.0
1101 40-41 0.0
1101 42-43 0.0
1101 44-45 0.0
1101 46-47 0.0
1101 48-49 0.0
1101 50-51 0.0
1101 52-53 0.0
1101 54-55 0.0
1101 56-57 0.0
1101 58-59 0.0
1101 60-61 0.0
1101 62-63 0.0
1101 64-65 0.0
1101 66-67 0.0
1101 68-69 0.0
1101 70-71 0.0
1101 72-73 0.0
1101 74-75 0.0
1101 76-77 0.0
1101 78-79 0.0
1101 80-81 0.0
1101 82-83 0.0
1101 84-85 0.0
1101 86-87 0.0
1101 88-89 0.0
1101 90-91 0.0
1101 92-93 0.0
1101 94-95 0.0
1101 96-97 0.0
1101 98-99 0.0
1101 100 0.0
>>END_MODULE
>>Per sequence quality scores pass
#Quality Count
11 1.0
......@@ -99,57 +168,53 @@ Sequence length 100
7 20.9 24.7 32.6 21.8
8 20.0 27.200000000000003 30.0 22.8
9 24.5 21.5 27.800000000000004 26.200000000000003
10-14 25.22 23.28 26.26 25.240000000000002
15-19 26.44 21.34 26.1 26.119999999999997
20-24 25.240000000000002 22.1 24.6 28.060000000000002
25-29 24.62 22.06 25.119999999999997 28.199999999999996
30-34 26.240000000000002 21.44 24.279999999999998 28.04
35-39 24.8 22.439999999999998 24.34 28.42
40-44 25.8 22.84 23.9 27.46
45-49 26.26 22.64 23.66 27.439999999999998
50-54 26.72 22.58 23.18 27.52
55-59 25.019999999999996 22.58 24.38 28.02
60-64 26.251501802162597 22.00640768922707 23.28794553464157 28.454144973968766
65-69 25.683829444891394 23.873692679002414 23.049074818986323 27.39340305711987
70-74 25.554134697357206 25.44757033248082 21.717817561807333 27.28047740835465
75-79 25.818501428257523 23.643155350472423 23.071852340145025 27.466490881125026
80-84 26.973532796317606 23.95857307249712 21.74913693901036 27.318757192174914
85-89 25.452016689847014 24.849327770050998 22.624014835419565 27.07464070468243
90-94 24.547101449275363 22.35054347826087 24.139492753623188 28.962862318840582
95-99 25.318837549655026 24.231653773782146 23.186284758519758 27.263223918043067
10-11 25.15 24.0 27.55 23.3
12-13 26.200000000000003 22.3 24.65 26.85
14-15 24.75 21.95 26.3 27.0
16-17 25.4 21.7 26.55 26.35
18-19 27.650000000000002 21.6 25.85 24.9
20-21 24.8 21.8 24.3 29.099999999999998
22-23 25.900000000000002 23.05 24.15 26.900000000000002
24-25 24.85 21.4 25.900000000000002 27.85
26-27 24.7 20.849999999999998 25.0 29.45
28-29 24.4 23.3 24.95 27.35
30-31 27.35 20.95 25.15 26.55
32-33 24.9 22.05 23.400000000000002 29.65
34-35 25.6 22.15 25.900000000000002 26.35
36-37 24.95 21.2 23.400000000000002 30.45
38-39 24.8 23.35 23.7 28.15
40-41 27.0 23.35 23.599999999999998 26.05
42-43 25.15 22.35 23.799999999999997 28.7
44-45 26.200000000000003 20.7 24.3 28.799999999999997
46-47 26.3 24.0 23.150000000000002 26.55
48-49 25.5 23.3 24.05 27.150000000000002
50-51 27.55 22.75 23.7 26.0
52-53 24.45 23.400000000000002 23.1 29.049999999999997
54-55 27.450000000000003 21.85 23.0 27.700000000000003
56-57 25.85 22.15 23.5 28.499999999999996
58-59 24.05 22.75 25.6 27.6
60-61 25.25 20.95 23.45 30.349999999999998
62-63 27.3 21.9 23.7 27.1
64-65 26.178535606820464 24.57372116349047 22.617853560682047 26.629889669007024
66-67 25.7 23.75 22.05 28.499999999999996
68-69 25.405679513184587 23.52941176470588 24.036511156186613 27.028397565922923
70-71 25.159574468085104 23.085106382978722 23.138297872340424 28.617021276595743
72-73 26.031065881092662 26.513122656668454 20.51419389394751 26.941617568291377
74-75 25.197680548234054 26.56826568265683 21.929362150764366 26.304691618344755
76-77 25.911812738160044 23.51660315732172 24.550898203592812 26.02068590092542
78-79 26.16345062429058 22.985244040862657 21.793416572077184 29.05788876276958
80-81 26.98324022346369 25.474860335195533 21.005586592178773 26.536312849162012
82-83 26.46370023419204 24.355971896955502 22.131147540983605 27.049180327868854
84-85 26.124567474048444 23.18339100346021 22.145328719723185 28.546712802768166
86-87 25.976331360946748 25.443786982248522 22.36686390532544 26.21301775147929
88-89 25.503742084052966 23.54634427173287 23.316062176165804 27.63385146804836
90-91 23.832052040212893 21.525724423418097 25.901833234772326 28.74039030159669
92-93 24.525139664804467 22.849162011173185 23.743016759776538 28.88268156424581
94-95 25.161987041036717 24.028077753779698 22.4622030237581 28.347732181425485
96-97 25.37393162393162 24.412393162393162 23.664529914529915 26.549145299145298
98-99 25.67703109327984 23.620862587763288 22.71815446339017 27.9839518555667
100 24.0 26.0 21.9 28.1
>>END_MODULE
>>Per base GC content fail
#Base %GC
1 71.01303911735206
2 64.1
3 73.3
4 65.3
5 55.800000000000004
6 87.3
7 42.699999999999996
8 42.8
9 50.7
10-14 50.46000000000001
15-19 52.559999999999995
20-24 53.300000000000004
25-29 52.82
30-34 54.279999999999994
35-39 53.22
40-44 53.26
45-49 53.7
50-54 54.24
55-59 53.04
60-64 54.70564677613135
65-69 53.07723250201126
70-74 52.834612105711855
75-79 53.28499230938255
80-84 54.29228998849251
85-89 52.526657394529444
90-94 53.509963768115945
95-99 52.5820614676981
100 52.1
>>END_MODULE
>>Per sequence GC content fail
#GC Content Count
0 0.0
......@@ -265,24 +330,51 @@ Sequence length 100
7 0.0
8 0.0
9 0.0
10-14 0.0
15-19 0.0
20-24 0.0
25-29 0.0
30-34 0.0
35-39 0.0
40-44 0.0
45-49 0.0
50-54 0.0
55-59 0.0
60-64 0.12
65-69 0.5599999999999999
70-74 6.16
75-79 8.98
80-84 13.100000000000001
85-89 13.719999999999999
90-94 11.68
95-99 4.34
10-11 0.0
12-13 0.0
14-15 0.0
16-17 0.0
18-19 0.0
20-21 0.0
22-23 0.0
24-25 0.0
26-27 0.0
28-29 0.0
30-31 0.0
32-33 0.0
34-35 0.0
36-37 0.0
38-39 0.0
40-41 0.0
42-43 0.0
44-45 0.0
46-47 0.0
48-49 0.0
50-51 0.0
52-53 0.0
54-55 0.0
56-57 0.0
58-59 0.0
60-61 0.0
62-63 0.0
64-65 0.3
66-67 0.0
68-69 1.4000000000000001
70-71 6.0
72-73 6.65
74-75 5.1499999999999995
76-77 8.15
78-79 11.899999999999999
80-81 10.5
82-83 14.6
84-85 13.3
86-87 15.5
88-89 13.15
90-91 15.45
92-93 10.5
94-95 7.3999999999999995
96-97 6.4
98-99 0.3
100 0.0
>>END_MODULE
>>Sequence Length Distribution pass
......@@ -290,565 +382,85 @@ Sequence length 100
100 1000.0
>>END_MODULE
>>Sequence Duplication Levels pass
#Total Duplicate Percentage 3.4
#Duplication Level Relative count
1 100.0
2 0.4140786749482402
3 0.0
4 0.0
5 0.0
6 0.0
7 0.0
8 0.0
9 0.0
10++ 0.2070393374741201
#Total Deduplicated Percentage 97.2
#Duplication Level Percentage of deduplicated Percentage of total
1 99.38271604938271 96.6
2 0.411522633744856 0.8
3 0.0 0.0
4 0.0 0.0
5 0.0 0.0
6 0.0 0.0
7 0.0 0.0
8 0.0 0.0
9 0.0 0.0
>10 0.205761316872428 2.6
>50 0.0 0.0
>100 0.0 0.0
>500 0.0 0.0
>1k 0.0 0.0
>5k 0.0 0.0
>10k+ 0.0 0.0
>>END_MODULE
>>Overrepresented sequences fail
#Sequence Count Percentage Possible Source
AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTAT 14 1.4000000000000001 TruSeq Adapter, Index 1 (97% over 36bp)
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATG 12 1.2 TruSeq Adapter, Index 1 (97% over 36bp)
AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTAT 14 1.4000000000000001 TruSeq Adapter, Index 18 (97% over 37bp)
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATG 12 1.2 TruSeq Adapter, Index 18 (97% over 37bp)
AGGGGGAATGATGGTTGTCTTTGGATATACTACAGCGATGGCTATTGAGG 2 0.2 No Hit
GGCTTGTTTTATTTTAATGGCTGATCTATGTAATCACAGAGGCCAGTATG 2 0.2 No Hit
GTGGGGTGGTGTTTGTGGGGGACTTCATCATCTCAGGCTTCCCAGGGTCC 2 0.2 No Hit
CGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATGCCG 2 0.2 TruSeq Adapter, Index 1 (96% over 33bp)
CGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATGCCG 2 0.2 TruSeq Adapter, Index 18 (97% over 34bp)
>>END_MODULE
>>Adapter Content fail
#Position Illumina Universal Adapter Illumina Small RNA Adapter Nextera Transposase Sequence
1 1.4 0.0 0.0
2 1.4 0.0 0.0
3 1.4 0.0 0.0
4 1.4 0.0 0.0
5 1.4 0.0 0.0
6 1.4 0.0 0.0
7 1.4 0.0 0.0
8 1.4 0.0 0.0
9 1.4 0.0 0.0
10-11 1.4 0.0 0.0
12-13 1.4 0.0 0.0
14-15 1.4 0.0 0.0
16-17 1.4 0.0 0.0
18-19 1.4 0.0 0.0
20-21 1.5 0.0 0.0
22-23 1.5 0.0 0.0
24-25 1.5 0.0 0.0
26-27 1.5 0.0 0.0
28-29 1.6 0.0 0.0
30-31 1.7 0.0 0.0
32-33 1.9 0.0 0.0
34-35 2.4 0.0 0.0
36-37 2.45 0.0 0.0
38-39 2.95 0.0 0.0
40-41 3.25 0.0 0.0
42-43 3.75 0.0 0.0
44-45 4.2 0.0 0.0
46-47 4.9 0.0 0.0
48-49 5.699999999999999 0.0 0.0
50-51 6.300000000000001 0.0 0.0
52-53 6.949999999999999 0.0 0.0
54-55 7.65 0.0 0.0
56-57 8.399999999999999 0.0 0.0
58-59 9.350000000000001 0.0 0.0
60-61 9.899999999999999 0.0 0.0
62-63 10.600000000000001 0.0 0.0
64-65 11.3 0.0 0.0
66-67 12.0 0.0 0.0
68-69 13.05 0.0 0.0
70-71 13.6 0.0 0.0
72-73 14.5 0.0 0.0
74-75 15.55 0.0 0.0
76-77 16.15 0.0 0.0
78-79 17.2 0.0 0.0
80-81 17.700000000000003 0.0 0.0
82-83 18.15 0.0 0.0
84-85 18.75 0.0 0.0
86-87 19.799999999999997 0.0 0.0
88 20.6 0.0 0.0
>>END_MODULE
>>Kmer Content fail
#Sequence Count Obs/Exp Overall Obs/Exp Max Max Obs/Exp Position
AAAAA 385 7.3597403 68.038994 65-69
AGATC 435 5.4375157 23.135067 1
GAAGA 375 5.258809 32.443344 6
GGAAG 420 5.044668 33.345257 5
TCCAG 475 4.8355613 14.131038 2
AAGAG 320 4.487517 25.954676 7
CCAGG 475 4.4180827 17.21471 3
GAGCA 380 4.3399205 21.1377 9
AGCAC 395 4.2895336 15.0741825 7
CTCCA 415 4.0171337 12.105032 95-96
AGAGC 340 3.883087 21.137697 8
TTTTT 280 3.8749053 8.964593 10-14
CTTCT 370 3.8646336 11.598914 55-59
CTGAA 305 3.812511 13.130004 90-94
CGGAA 320 3.65467 26.422123 5
ACCAG 335 3.6379597 10.049457 7
TCTGA 310 3.6325634 12.308498 90-94
CACAC 340 3.5108058 14.806036 85-89
ATCGG 325 3.4795394 24.768969 3
TCGGA 320 3.426008 19.815174 3
GATCG 320 3.426008 19.815174 1
CGTCT 355 3.387832 11.578538 85-89
CTGCT 355 3.387832 17.662533 3
GCACA 310 3.3664696 15.0741825 8
TCTTC 320 3.3423858 7.7326093 50-54
CAGCA 305 3.3121717 10.049455 6
GAACT 260 3.2500093 13.130004 90-94
GTCTG 320 3.2116532 12.65067 90-94
CAGGA 280 3.197836 15.8532715 3
AACTC 265 3.1497202 23.781752 95-96
TGAAC 250 3.125009 13.130004 90-94
CCAGC 350 3.0954454 6.6359653 95-96
AGTCA 240 3.0000086 10.41078 25-29
CACCA 290 2.9945107 6.079907 70-74
TGCTG 295 2.960743 9.2877 2
CAGAT 230 2.875008 11.040063 70-74
CTTCC 315 2.8583732 10.916445 30-34
CACGT 280 2.8504362 12.351324 85-89
CAGGG 290 2.8367646 22.630535 9
ACACG 260 2.8234906 13.175687 85-89
TTCCA 250 2.7855206 9.279795 30-34
TTCTT 230 2.765239 6.6755276 50-54
AGCAG 240 2.7410026 15.853272 2
TTCTG 240 2.6363494 10.165324 55-59
ACTCC 270 2.6135564 14.526036 95-96
GCCAG 280 2.6043434 8.607355 1
ACGTC 255 2.595933 10.105629 85-89
GATCT 220 2.5779483 8.675031 40-44
TCTGC 265 2.5289452 13.2469015 2
AAGAT 160 2.4557784 12.783248 35-39
ATCTC 220 2.4512577 9.279794 40-44
CAGTC 240 2.4432309 8.554544 90-94
TCCAA 205 2.4365761 10.999062 7
CTTTT 200 2.4045558 16.688818 6
TTCCT 230 2.40234 9.665762 7
CCAGT 235 2.3923304 9.4206915 25-29
TTTCT 195 2.3444414 16.688818 8
CTGGG 255 2.3383298 6.004135 80-84
TGCTT 210 2.3068056 10.165323 4
TCTTT 190 2.284328 5.5629396 15-19
TTTTC 190 2.2843277 11.125878 7
GGGGG 255 2.2468696 16.307867 2
AGGAA 160 2.2437584 19.466007 5
GTCAC 220 2.2396283 10.184532 95-96
TCACT 200 2.2284167 8.360176 95-96
CACTT 200 2.2284167 10.3108835 30-34
GAAAA 135 2.2103586 10.606119 60-64
ACTTC 195 2.172706 9.279794 30-34
TTGAA 150 2.1582448 11.9834385 60-64
CTCCT 235 2.1324375 16.794533 4
TCCTC 235 2.1324372 8.397265 5
ATCTT 165 2.11616 7.1210704 10-14
GGGGA 205 2.1089406 14.2801 3
ACACA 165 2.092039 11.7331705 8
TGCAG 195 2.0877237 9.907587 5
GACCA 190 2.0633202 10.049455 6
AGGGG 200 2.057503 9.520067 1
CCTCC 260 2.049668 14.590484 5
AGGAG 170 2.0418897 5.557543 2
TCCTT 195 2.0367663 14.498643 4
GTCTT 185 2.032186 15.247986 7
GCTGG 220 2.0173824 8.485845 1
CCAGA 185 2.0090222 5.3284492 70-74
CCTGG 230 2.0054333 8.068818 3
GCAGG 205 2.005299 9.052214 3
GGACC 215 1.9997637 8.607355 5
TTCAT 155 1.987908 5.934226 2
CCTTT 190 1.9845415 14.498643 5
TTTCC 190 1.9845415 5.799457 15-19
TGGCA 185 1.980661 14.861383 2
TCTTG 180 1.977262 10.165323 5
CCAAG 180 1.9547247 9.044511 35-39
CTTCA 175 1.9498644 10.310883 6
CAAGA 145 1.933477 12.339583 35-39
CTGGA 180 1.9271295 9.907587 6
GGCTG 210 1.9256833 16.97169 2
AATGA 125 1.918577 7.677627 95-96
TGAAA 125 1.918577 15.623971 60-64
GCTTC 200 1.9086379 13.2469015 2
GTCCA 185 1.8833237 14.131036 1
AGAAA 115 1.882898 7.5757995 7
TGGGG 195 1.8805519 13.386638 1
TTCTC 180 1.880092 5.799457 25-29
CTTGA 160 1.8748715 8.675031 60-64
ACAAA 120 1.8682072 5.762797 40-44
TCTCG 195 1.8609219 8.831266 5
GGGAC 190 1.8585701 9.052216 5
TGAGG 165 1.8578365 5.209824 2
TGAAG 140 1.8404517 6.082693 2
CATCT 165 1.8384434 5.155441 4
CACTG 180 1.8324232 9.4206915 6
CTGCA 180 1.8324231 5.3465896 90-94
GCTGC 210 1.8310483 8.068819 1
GCAGA 160 1.8273348 10.568848 3
CCTTC 200 1.8148402 8.397265 9
AGGGA 150 1.8016673 6.0081544 95-96
TTTCA 140 1.7955297 7.1210704 15-19
CACAG 165 1.7918309 5.432139 95-96
AAACA 115 1.7903653 7.6389136 70-74
ATTTT 120 1.7715117 13.661307 6
TTTTG 140 1.7701824 17.551357 7
GGGGC 210 1.7594293 11.629828 3
GATTT 130 1.7534488 12.481857 6
CAAAT 120 1.7513192 6.7527947 50-54
GAGGG 170 1.7488776 9.520067 1
GAAGG 145 1.7416117 6.0081544 95-96
CATTT 135 1.7314036 5.9342256 5
ATTTC 135 1.7314036 5.9342256 7
CCTCT 190 1.7240983 8.397266 1
ATCCA 145 1.7234317 5.49953 4
GCAGC 185 1.7207267 6.9789357 95-96
TCCTG 180 1.717774 13.2469 2
CTCTG 180 1.717774 13.2469 2
AAAAC 110 1.7125233 7.6389136 70-74
CTTGG 170 1.7061908 9.2877 2
AAAAT 95 1.7024158 8.291661 9
TCACC 175 1.693972 8.957724 8
TCCAC 175 1.693972 8.957724 5
GAGAA 120 1.6828189 6.488669 6
TCTCC 185 1.6787271 5.038359 55-59
GAGCC 180 1.6742208 8.607355 9
TCATC 150 1.6713123 5.1554413 2
AGACA 125 1.6667906 6.169792 2
TGATG 135 1.6636823 11.404236 9
GGGAG 160 1.6460025 9.520067 1
AGCCA 150 1.6289369 6.029673 10-14
ATGCC 160 1.6288207 8.478622 45-49
CTCGT 170 1.6223421 8.831266 3
GAGGA 135 1.6215005 11.115086 3
TGTTG 140 1.6173534 10.690706 2
CTCAT 145 1.6156021 5.1554418 2
CAGGT 150 1.6059413 9.907587 4
GCTTG 160 1.6058266 9.2877 60-64
GGGTC 175 1.6047363 12.728768 2
TCATT 125 1.6031516 5.934226 9
GTTGA 130 1.6020645 5.702118 1
ACAGA 120 1.6001189 10.005068 95-96
GGAGG 155 1.5945649 9.520067 2
GGGGT 165 1.5912362 13.386638 1
TGGGA 140 1.5763463 10.419649 2
GGATG 140 1.5763462 15.629472 6
GCCTC 190 1.575248 7.672287 2
CCTGC 190 1.5752479 11.508429 2
GCTCC 190 1.5752479 11.508429 6
TCTCT 150 1.5667434 5.224736 95-96
GGGAA 130 1.561445 11.115086 4
TCCAT 140 1.5598917 10.3108835 8
GGCTT 155 1.5556445 13.93155 1
TTGAT 115 1.5511277 6.240928 4
CATCA 130 1.5451456 5.49953 2
AGAGA 110 1.542584 6.488669 9
AGGAC 135 1.541814 6.341309 55-59
GTATG 125 1.5404466 9.123388 45-49
AACAT 105 1.5324043 13.5055895 9
AGCTC 150 1.5270194 9.4206915 5
TTTGT 120 1.5172992 17.551357 8
GATGA 115 1.5117996 6.082693 5
GAGAT 115 1.5117996 6.082693 4
AGGAT 115 1.5117996 12.165386 4
TGAGA 115 1.5117996 6.082693 5
CTGGT 150 1.5054625 9.2877 4
GCTGT 150 1.5054625 18.5754 3
TTCAC 135 1.504181 10.310883 7
CCCAG 170 1.5035021 12.276537 2
CAGTG 140 1.4988785 9.907587 5
CTCCC 190 1.4978343 7.295242 1
CCCTG 180 1.4923402 11.5084305 2
CAGAG 130 1.4847097 7.398194 20-24
CTTTG 135 1.4829465 10.165323 2
CAAAA 95 1.4789973 7.203496 9
TCTCA 130 1.4484707 5.1554413 8
GAATG 110 1.4460692 12.165386 7
GGAAT 110 1.4460692 12.165386 5
TTTGG 125 1.4440656 5.345353 7
GGCCT 165 1.4386805 12.103227 1
GCTCT 150 1.4314783 6.1818867 20-24
TCTGT 130 1.4280226 15.247986 3
CTGTT 130 1.4280226 15.247986 4
AGGTT 115 1.4172109 11.404235 8
TTGAG 115 1.4172107 5.702117 4
TTTGA 105 1.416247 7.4891143 10-14
ATCTG 120 1.4061534 5.4218936 2
GGTCT 140 1.4050984 9.287701 6
TTTTA 95 1.4024467 7.384491 95-96
GGGTG 145 1.3983592 13.386638 2
GGCAC 150 1.3951839 8.607355 4
AAAGA 85 1.3917071 7.5757985 8
AAGAA 85 1.3917071 5.254889 75-79
TTGTT 110 1.3908576 5.850453 4
GGAGA 115 1.3812783 5.557543 3
ATGAC 110 1.3750039 6.252721 95-96
TGTTC 125 1.3730987 10.165325 5
GGGCA 140 1.3694727 9.052216 4
ATGAT 95 1.3668885 6.6574664 6
CCACT 140 1.3551775 5.3746343 30-34
TGGCT 135 1.3549163 13.931552 3
GATGG 120 1.3511539 10.419648 9
TCGTA 115 1.3475639 5.421894 40-44
TGTCA 115 1.3475639 5.421894 5
GCTGA 125 1.3382844 9.907587 6
CAGAA 100 1.3334324 5.6025352 90-94
CCAAA 105 1.3312978 5.8665853 8
GGGCT 145 1.3296387 12.728768 1
TAGGA 100 1.3146083 12.165386 4
GACAG 115 1.313397 5.2844243 1
GGTCC 150 1.3078917 8.068819 6
CCATC 135 1.3067783 8.957724 9
AAATG 85 1.3046323 7.101804 6
TTCAA 95 1.2997144 6.330293 9
CGTAT 110 1.2889742 8.675031 45-49
TGACT 110 1.2889742 5.421894 3
TATGC 110 1.2889739 8.67503 45-49
GCCCT 155 1.2850707 7.672287 3
TGGGC 140 1.283789 8.485846 7
ACTTT 100 1.2825212 5.9342256 1
ATGTT 95 1.2813665 6.2409286 1
ATTTG 95 1.2813663 12.481856 9
TGGTT 110 1.2707777 5.345353 5
TGGTG 120 1.2666163 9.767722 7
GTTTT 100 1.2644161 5.8504534 6
GCCTG 145 1.2642952 12.103229 1
TTGCT 115 1.2632507 6.0991945 50-54
CCACC 150 1.2614243 7.7821474 5
GGACA 110 1.2562928 15.853274 6
GAAGC 110 1.2562928 10.568849 9
TGACA 100 1.2500036 5.7837667 9
GACAT 100 1.2500035 11.567533 7
TGGAA 95 1.248878 6.082693 5
ACAGC 115 1.2488517 10.049455 5
AATCC 105 1.2480024 5.499531 7
TGCCT 130 1.2406145 8.831266 3
AGGTG 110 1.2385577 5.209824 4
GTGGC 135 1.2379395 12.728768 1
CATGT 105 1.2303842 5.4218936 1
TAGAT 85 1.2230055 6.0453725 90-94
CCCTC 155 1.2219174 7.295242 4
GCCGT 140 1.2206988 8.068819 3
AGTTT 90 1.2139261 6.2409286 7
TTTAG 90 1.213926 6.240928 8
TTGGG 115 1.2138406 9.767722 2
ACCTC 125 1.20998 8.957724 1
AGCAA 90 1.2000892 6.169792 9
CAAAG 90 1.2000891 6.169791 5
AAAGC 90 1.2000891 6.169791 6
ACAGG 105 1.1991886 10.568849 8
AGGCA 105 1.1991886 5.712891 95-96
ATCAG 95 1.1875033 5.7837663 6
ATGAG 90 1.1831475 6.082693 25-29
CAGTT 100 1.1717947 5.1698627 85-89
ATGCT 100 1.1717947 5.421894 8
TCAAT 85 1.1629024 6.3302937 10-14
TGTGT 100 1.1552525 10.690706 3
GCCCA 130 1.1497369 12.276536 1
TGATT 85 1.1464858 12.481857 5
TGCTC 120 1.1451827 8.831267 4
TGTCC 120 1.1451827 13.2469015 2
TCCCC 145 1.143084 7.295242 2
AAGGC 100 1.1420842 5.493164 65-69
CAACA 90 1.1411123 5.8665853 8
CACAA 90 1.1411123 11.7331705 9
ACATC 95 1.129145 5.4995303 8
AAGCT 90 1.1250031 6.2527194 95-96
GAAAG 80 1.1218792 12.977338 7
AAGGA 80 1.1218792 6.488669 3
GCACT 110 1.1198142 9.4206915 5
CCTGA 110 1.119814 9.420691 9
ACCTT 100 1.1142083 5.1554418 7
GTCAT 95 1.113205 5.421894 1
TGATC 95 1.113205 10.843788 5
TCATG 95 1.113205 5.421894 3
TGGAT 90 1.1091216 5.702118 9
GTGGG 115 1.1090435 8.924425 1
CTGTG 110 1.1040058 9.2877 4
GCTTT 100 1.0984789 5.4947696 95-96
TGTCT 100 1.0984789 10.165323 5
TTGGT 95 1.0974898 5.345353 4
CTGTC 115 1.0974668 17.662535 4
CAGAC 100 1.0859579 5.0247273 5
GGAAC 95 1.0849801 5.2844243 6
CCTCG 130 1.0778012 7.672287 6
GCGGC 135 1.075477 7.372196 1
ATAAA 60 1.0752101 8.291662 7
GGGAT 95 1.0696635 10.419649 3
CATCC 110 1.0647823 8.957723 3
ACAGT 85 1.062503 5.7837663 4
ACTGA 85 1.062503 11.567533 7
GTTGG 100 1.0555136 9.767722 1
TGTGG 100 1.0555136 9.767722 5
GGAAA 75 1.0517617 19.466007 6
GTGAA 80 1.0516868 6.082693 1
GAAGT 80 1.0516866 6.082693 5
GTCTC 110 1.0497508 8.831267 1
CGGCT 120 1.046313 8.068818 1
TTTAT 70 1.0333818 5.4645233 10-14
GACAC 95 1.0316601 10.049455 7
GGCAA 90 1.0278759 10.56885 3
TCATA 75 1.0260904 6.330293 5
ATTCA 75 1.0260903 6.3302927 7
TAACA 70 1.0216029 6.7527957 8
GGTCA 95 1.0170963 9.907589 3
ATGGC 95 1.0170962 9.907587 1
TCAGG 95 1.0170962 9.907587 8
GGTGA 90 1.0133655 15.629474 3
TGTTT 80 1.0115329 5.8504534 5
TGAAT 70 1.007181 6.6574664 5
ATTGA 70 1.0071809 6.6574664 7
AAGTT 70 1.0071809 6.6574664 6
TTGCC 105 1.0020349 8.831267 2
CTTGC 105 1.0020349 8.831267 6
GCAAA 75 1.0000744 6.169792 4
CATAG 80 1.0000029 6.2527204 95-96
GACTT 85 0.99602544 5.421894 1
CTGAT 85 0.99602544 5.421894 4
CTTGT 90 0.988631 10.165323 3
AATGG 75 0.98595625 6.082693 8
AAGGT 75 0.9859562 6.0826926 4
GATGT 80 0.98588586 5.7021174 7
GGATT 80 0.98588586 11.404235 5
GGCGG 115 0.96349704 7.753219 1
AGAGG 80 0.9608892 5.557543 8
GAGGT 85 0.95706743 5.2098246 3
ATGGG 85 0.9570673 5.209824 1
CCGTC 115 0.95343953 7.672287 4
TAGCA 75 0.9375027 5.7837667 1
ACATG 75 0.9375026 5.7837663 2
TTGCA 80 0.93743575 5.421894 4
GTTCA 80 0.93743575 5.421894 6
ATGTC 80 0.93743575 5.421894 5
TTCAG 80 0.93743575 5.421894 8
TTGAC 80 0.9374356 5.4218936 2
GTTCT 85 0.93370706 5.0826616 1
TTGTC 85 0.93370706 5.0826616 9
TTTGC 85 0.93370706 5.0826616 3
ATGGT 75 0.924268 5.7021174 4
ATGAA 60 0.920917 7.1018047 9
AGATG 70 0.92022586 6.082693 5
GCTCA 90 0.91621155 5.092265 95-96
AGTGC 85 0.9100334 9.907587 2
AGGGT 80 0.90076935 10.419649 1
GTAGG 80 0.90076923 10.419648 6
AGTGG 80 0.90076923 5.209824 2
TAAAA 50 0.89600843 8.291662 8
CACAT 75 0.89143026 5.499531 6
CCATT 80 0.89136666 10.3108835 9
ATACT 65 0.8892783 6.330293 9
ACATT 65 0.88927823 6.3302927 7
GCGGG 105 0.87971467 7.753219 2
ACACC 85 0.8777014 9.555587 9
CATAA 60 0.8756596 6.7527947 6
ACCCT 90 0.8711856 13.436585 1
GAACA 65 0.8667311 6.169792 7
ACTGC 85 0.8653109 5.092265 95-96
GGTAT 70 0.86265016 17.106354 6
AGTTG 70 0.86265016 5.702118 7
GAGAC 75 0.85656327 5.2844243 1
GTGTC 85 0.8530954 13.93155 1
GTTGC 85 0.8530954 9.2877 1
ATAGA 55 0.84417385 7.1018047 8
GAAAT 55 0.84417385 7.1018047 5
CATTC 75 0.83565605 5.155441 6
TCACA 70 0.83200157 5.499531 3
TGCGG 90 0.8252928 8.485845 3
GCATT 70 0.8202563 5.421894 4
GAACC 75 0.8144686 5.0247283 6
CTCGA 80 0.81441027 9.420691 6
GAATC 65 0.8125023 5.7837667 6
TACAG 65 0.81250226 11.567533 7
TGGTA 65 0.80103225 11.404236 5
AAGAC 60 0.80005944 6.169791 8
CAAGG 70 0.7994591 5.2844243 2
ATGTA 55 0.7913565 6.6574664 4
AATGT 55 0.7913565 6.6574664 3
CGGCA 85 0.7906042 8.607354 2
GAGAG 65 0.7807225 5.557543 8
ACCAT 65 0.7725729 5.499531 8
TTCTA 60 0.7695128 5.934226 9
TAGAA 50 0.7674308 7.1018047 9
GCATC 75 0.7635097 9.4206915 1
GTTCC 80 0.76345515 8.831267 6
AGCTT 65 0.76166654 5.421894 1
TTAGC 65 0.76166654 5.421894 9
CTGTA 65 0.76166654 5.421894 2
ACTTG 65 0.7616664 5.4218936 2
GTGCT 75 0.7527313 9.287701 3
ATCAT 55 0.7524662 6.3302927 3
GTTTG 65 0.7509141 5.345353 9
GTGTT 65 0.7509141 10.690706 1
GTCAA 60 0.75000215 11.5675335 6
AATGC 60 0.75000215 6.252721 95-96
CAAGT 60 0.7500021 5.7837663 9
GCAAT 60 0.7500021 5.7837663 4
GCAAG 65 0.74235487 5.2844243 1
AGTGT 60 0.7394144 5.7021174 1
TTAGG 60 0.7394144 5.702118 7
AGCGG 75 0.73364604 9.052214 1
ATCCT 65 0.72423524 5.155441 4
ACTCT 65 0.72423524 5.155441 9
AGTGA 55 0.7230346 6.082693 6
AATAA 40 0.71680677 8.291662 6
AACCT 60 0.71314424 5.4995303 1
ATTCT 55 0.70538664 5.9342256 7
AGTCT 60 0.7030768 5.421894 3
GTGCA 65 0.69590795 9.907589 6
AAAGT 45 0.69068766 7.101804 8
AACTG 55 0.6875019 5.7837663 1
CGAAG 60 0.68525064 5.2844243 4
GATTG 55 0.67779654 5.702118 6
GTGAT 55 0.67779654 11.404236 4
TGTTA 50 0.67440337 12.481857 5
TTGTA 50 0.6744033 6.240928 9
TATTG 50 0.6744033 6.240928 7
CTCTA 60 0.6685249 5.1554413 7
TACCT 60 0.66852486 10.310882 8
ATGGA 50 0.65730417 6.082693 8
ATACA 45 0.6567447 6.7527957 6
ATCAA 45 0.65674466 6.7527947 9
TGTAA 45 0.6474735 6.6574664 7
GCGGT 70 0.6418945 8.485846 4
GGCCG 80 0.63731974 7.372196 2
GGTTT 55 0.63538885 10.690706 9
TTGTG 55 0.63538885 5.345353 1
TATAT 40 0.62991583 7.2865515 8
CCTGT 65 0.62030727 8.831266 3
GTGAG 55 0.6192789 5.2098246 1
TAGGG 55 0.61927885 5.209824 8
GAGTT 50 0.6161787 5.7021174 6
ATGTG 50 0.6161787 5.702118 2
GAATA 40 0.61394465 7.1018047 6
CTGCG 70 0.6103493 8.068818 2
CGGTG 65 0.59604484 8.485845 2
TAAGG 45 0.5915738 6.082693 9
AAGTG 45 0.5915737 6.0826926 1
TATTT 40 0.5905039 6.8306537 8
GGCAT 55 0.5888452 14.861383 3
GTATC 50 0.5858973 5.421894 4
ATAAC 40 0.5837731 13.505591 7
TTACT 45 0.57713455 5.934226 9
GTATA 40 0.575532 13.314933 7
GAGTG 50 0.5629808 5.209824 1
GTACA 45 0.5625016 5.7837667 6
ATAGC 45 0.5625016 5.7837667 9
TCTAC 50 0.5571041 5.1554413 8
GCGAG 55 0.53800714 9.052216 1
ACGGG 55 0.5380071 9.052214 1
GATAA 35 0.5372016 7.1018047 6
AATAG 35 0.5372016 7.101805 7
CAACT 45 0.53485817 5.4995303 6
CATAC 45 0.53485817 5.4995303 5
GATTC 45 0.52730757 5.421894 6
AGGTA 40 0.5258433 12.165386 5
CGGTC 60 0.52315664 8.068819 5
ACGAG 45 0.51393795 5.2844243 7
TATTC 40 0.5130085 5.9342256 7
CTAAA 35 0.51080143 6.7527957 9
TACAA 35 0.51080143 5.402236 35-39
CCTTA 45 0.5013937 5.1554413 6
CAGTA 40 0.50000143 5.7837667 4
GTGTA 40 0.49294293 5.702118 4
TAACT 35 0.47884214 6.330293 8
CTTAA 35 0.47884214 6.330293 7
CTATA 35 0.47884214 6.330293 4
TTAAC 35 0.47884214 6.330293 8
TATCA 35 0.4788421 6.3302927 5
TCAAC 40 0.47542948 5.499531 7
ACTCA 40 0.47542942 5.49953 8
TTAGT 35 0.47208238 10.120425 95-96
TGTAT 35 0.47208238 6.2409286 3
ATTGT 35 0.47208235 6.240928 8
GTTAC 40 0.46871787 5.421894 6
TGTAC 40 0.46871787 10.843788 7
AGAGT 35 0.46011293 6.082693 5
AGTAG 35 0.46011293 6.082693 5
CTCCG 55 0.45599285 7.672287 6
GGTAG 40 0.45038468 5.2098246 2
TTTAC 35 0.44888243 5.9342256 8
CTACT 40 0.44568333 5.1554418 4
AACTA 30 0.4378298 6.7527947 9
TATAG 30 0.43164897 6.6574664 5
ATATA 25 0.4199739 7.7728767 9
CTCAA 35 0.41600078 5.499531 9
TATAC 30 0.4104361 6.3302927 5
ACTAT 30 0.4104361 6.3302927 6
TACTA 30 0.4104361 6.3302927 5
TCGAT 35 0.41012815 10.843788 7
ACGTT 35 0.41012815 5.421894 4
CGAAA 30 0.40002972 6.169792 9
GTAAG 30 0.3943825 6.082693 8
ATAGG 30 0.3943825 6.082693 3
TCCTA 35 0.38997287 5.1554413 5
TTACC 35 0.38997287 5.1554413 7
ACCGA 35 0.3800853 5.0247273 7
GCATA 30 0.37500107 5.7837667 1
TCGAA 30 0.37500107 5.7837667 4
GCTAA 30 0.37500107 5.7837667 8
TAGGT 30 0.3697072 5.7021174 7
GTTAG 30 0.3697072 5.702118 6
CAATA 25 0.36485815 6.7527947 5
ATACC 30 0.35657212 5.499531 6
GACGA 30 0.3426253 5.284424 6
AAGCG 30 0.3426253 10.568848 7
GTTTA 25 0.33720168 6.2409286 7
GTATT 25 0.33720168 12.481857 6
AGATA 20 0.30697232 7.1018047 5
CGTCA 30 0.30540386 9.420691 5
CCTAA 25 0.29714343 5.499531 7
TACCA 25 0.2971434 5.49953 9
TGCTA 25 0.29294866 5.421894 7
TACGT 25 0.29294863 5.4218936 9
AGACG 25 0.2855211 5.284425 9
CCTAT 25 0.2785521 5.1554418 3
TAAGC 20 0.25000072 5.7837667 9
CTAAG 20 0.25000072 5.7837667 8
CGATT 20 0.23435894 5.421894 9
GGGTA 20 0.22519234 5.2098246 2
ACGCA 20 0.21719159 5.0247273 5
GCGAA 15 0.17131266 5.284425 3
CGAAC 15 0.16289368 5.0247273 5
>>Kmer Content pass
>>END_MODULE
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.flexiprep
import java.io.File
import org.testng.annotations.Test
class CutadaptTest extends FastqcV0101Test {
/** Mock output file of a Cutadapt 1.9 run */
private[flexiprep] val cutadaptOut: File = resourceFile("ct-test.R1.clip.stats")
def testFastQCinstance: Fastqc = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
fqc.contaminants = Option(resourceFile("fqc_contaminants_v0112.txt"))
// fqc.beforeGraph()
fqc
}
def testCutadaptInst: Cutadapt = {
val caExe = new Cutadapt(null, testFastQCinstance)
caExe.statsOutput = cutadaptOut
caExe
}
@Test def testAdapterFound() = {
val cutadapt = testCutadaptInst
val adapters = cutadapt.extractClippedAdapters(cutadaptOut)
adapters.keys.size shouldBe 4
adapters.get("CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC") shouldBe Some(
Map(
"count" -> 94,
"histogram" -> Map(
"5p" -> Map(5 -> 2, 6 -> 4, 9 -> 1, 3 -> 8, 4 -> 3),
"3p" -> Map(5 -> 21, 6 -> 18, 9 -> 1, 12 -> 1, 7 -> 2, 3 -> 13, 11 -> 1, 4 -> 19)
)
)
)
adapters.get("CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC") shouldBe Some(
Map(
"count" -> 0,
"histogram" -> Map()
)
)
}
@Test def testSummary() = {
val cutadapt = testCutadaptInst
val summary = cutadapt.summaryStats
summary.keys shouldBe Set("num_bases_input", "num_reads_input", "num_reads_output",
"num_reads_with_adapters", "num_reads_affected", "num_reads_discarded_too_long",
"adapters", "num_reads_discarded_many_n", "num_reads_discarded_too_short", "num_bases_output")
summary.keys.size shouldBe 10
summary("adapters").asInstanceOf[Map[String, Map[String, Any]]].keys.size shouldBe 4
summary("num_bases_input") shouldBe 100000
summary("num_reads_input") shouldBe 1000
summary("num_reads_output") shouldBe 985
summary("num_reads_with_adapters") shouldBe 440
summary("num_reads_affected") shouldBe 425
summary("num_reads_discarded_too_long") shouldBe 0
summary("num_reads_discarded_many_n") shouldBe 0
summary("num_reads_discarded_too_short") shouldBe 15
summary("num_bases_output") shouldBe 89423
}
}
......@@ -25,14 +25,14 @@ import org.testng.annotations.Test
class FastqcV0101Test extends TestNGSuite with Matchers {
/** Returns the absolute path to test resource directory as a File object */
private val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString)
private[flexiprep] val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString)
/** Given a resource file name, returns the the absolute path to it as a File object */
private def resourceFile(p: String): File = new File(resourceDir, p)
private[flexiprep] def resourceFile(p: String): File = new File(resourceDir, p)
/** Mock output file of a FastQC v0.10.1 run */
// the file doesn't actually exist, we just need it so the outputDir value can be computed correctly
private val outputv0101: File = resourceFile("v0101.fq_fastqc.zip")
private[flexiprep] val outputv0101: File = resourceFile("v0101.fq_fastqc.zip")
@Test def testOutputDir() = {
val fqc = new Fastqc(null)
......@@ -44,7 +44,7 @@ class FastqcV0101Test extends TestNGSuite with Matchers {
val fqc = new Fastqc(null)
fqc.output = outputv0101
// 11 QC modules
fqc.qcModules.size shouldBe 11
fqc.qcModules.size shouldBe 12
// first module
fqc.qcModules.keySet should contain("Basic Statistics")
// mid (6th module)
......@@ -83,4 +83,23 @@ class FastqcV0101Test extends TestNGSuite with Matchers {
adapters.last.seq shouldEqual "GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
}
@Test def testPerBaseSequenceQuality() = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
val perBaseSequenceQuality = fqc.perBaseSequenceQuality
perBaseSequenceQuality.size shouldBe 55
perBaseSequenceQuality.keys should contain("54-55")
}
@Test def testPerBaseSequenceContent() = {
val fqc = new Fastqc(null)
fqc.output = outputv0101
val perBaseSequenceContent: Map[String, Map[String, Double]] = fqc.perBaseSequenceContent
perBaseSequenceContent.size shouldBe 55
perBaseSequenceContent.keys should contain("1")
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment