Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
fbed4e7c
Commit
fbed4e7c
authored
Feb 10, 2015
by
Peter van 't Hof
Browse files
Merge branch 'develop' into feature-fix_missing_values
parents
7e4ca301
1047c24a
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala
View file @
fbed4e7c
...
...
@@ -25,16 +25,16 @@ import nl.lumc.sasc.biopet.core.config.Configurable
class
Fastqc
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
{
@Input
(
doc
=
"Contaminants"
,
required
=
false
)
var
contaminants
:
File
=
_
var
contaminants
:
Option
[
File
]
=
None
@Input
(
doc
=
"Adapters"
,
required
=
false
)
var
adapters
:
File
=
_
var
adapters
:
Option
[
File
]
=
None
@Input
(
doc
=
"Fastq file"
,
shortName
=
"FQ"
)
var
fastqfile
:
File
=
_
var
fastqfile
:
File
=
null
@Output
(
doc
=
"Output"
,
shortName
=
"out"
)
var
output
:
File
=
_
var
output
:
File
=
null
executable
=
config
(
"exe"
,
default
=
"fastqc"
)
var
java_exe
:
String
=
config
(
"exe"
,
default
=
"java"
,
submodule
=
"java"
,
freeVar
=
false
)
...
...
@@ -50,17 +50,31 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
override
def
afterGraph
{
this
.
checkExecutable
if
(
contaminants
==
null
)
{
val
fastqcDir
=
executable
.
substring
(
0
,
executable
.
lastIndexOf
(
"/"
))
val
defaultContams
=
getVersion
match
{
case
"v0.11.2"
=>
new
File
(
fastqcDir
+
"/Configuration/contaminant_list.txt"
)
case
_
=>
new
File
(
fastqcDir
+
"/Contaminants/contaminant_list.txt"
)
}
val
defaultAdapters
=
getVersion
match
{
case
"v0.11.2"
=>
new
File
(
fastqcDir
+
"/Configuration/adapter_list.txt"
)
case
_
=>
null
}
contaminants
=
config
(
"contaminants"
,
default
=
defaultContams
)
val
fastqcDir
=
executable
.
substring
(
0
,
executable
.
lastIndexOf
(
"/"
))
contaminants
=
contaminants
match
{
// user-defined contaminants file take precedence
case
userDefinedValue
@
Some
(
_
)
=>
userDefinedValue
// otherwise, use default contaminants file (depending on FastQC version)
case
None
=>
val
defaultContams
=
getVersion
match
{
case
"v0.11.2"
=>
new
File
(
fastqcDir
+
"/Configuration/contaminant_list.txt"
)
case
_
=>
new
File
(
fastqcDir
+
"/Contaminants/contaminant_list.txt"
)
}
config
(
"contaminants"
,
default
=
defaultContams
)
}
adapters
=
adapters
match
{
// user-defined contaminants file take precedence
case
userDefinedValue
@
Some
(
_
)
=>
userDefinedValue
// otherwise, check if adapters are already present (depending on FastQC version)
case
None
=>
val
defaultAdapters
=
getVersion
match
{
case
"v0.11.2"
=>
Option
(
new
File
(
fastqcDir
+
"/Configuration/adapter_list.txt"
))
case
_
=>
None
}
defaultAdapters
.
collect
{
case
adp
=>
config
(
"adapters"
,
default
=
adp
)
}
}
}
...
...
@@ -74,6 +88,6 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
conditional
(
noextract
,
"--noextract"
)
+
conditional
(
extract
,
"--extract"
)
+
conditional
(
quiet
,
"--quiet"
)
+
required
(
"-o"
,
output
.
getParent
()
)
+
required
(
"-o"
,
output
.
getParent
)
+
required
(
fastqfile
)
}
public/flexiprep/pom.xml
View file @
fbed4e7c
...
...
@@ -39,5 +39,17 @@
<artifactId>
BiopetFramework
</artifactId>
<version>
${project.version}
</version>
</dependency>
<dependency>
<groupId>
org.testng
</groupId>
<artifactId>
testng
</artifactId>
<version>
6.8
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.scalatest
</groupId>
<artifactId>
scalatest_2.11
</artifactId>
<version>
2.2.1
</version>
<scope>
test
</scope>
</dependency>
</dependencies>
</project>
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
View file @
fbed4e7c
...
...
@@ -33,14 +33,14 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada
override
def
beforeCmd
()
{
super
.
beforeCmd
val
foundAdapters
=
fastqc
.
getF
oundAdapters
.
map
(
_
.
seq
)
val
foundAdapters
=
fastqc
.
f
oundAdapters
.
map
(
_
.
seq
)
if
(
default_clip_mode
==
"3"
)
opt_adapter
++=
foundAdapters
else
if
(
default_clip_mode
==
"5"
)
opt_front
++=
foundAdapters
else
if
(
default_clip_mode
==
"both"
)
opt_anywhere
++=
foundAdapters
}
override
def
cmdLine
=
{
if
(
!
opt_adapter
.
is
Empty
||
!
opt_anywhere
.
is
Empty
||
!
opt_front
.
is
Empty
)
{
if
(
opt_adapter
.
non
Empty
||
opt_anywhere
.
non
Empty
||
opt_front
.
non
Empty
)
{
analysisName
=
getClass
.
getSimpleName
super
.
cmdLine
}
else
{
...
...
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala
View file @
fbed4e7c
...
...
@@ -16,82 +16,154 @@
package
nl.lumc.sasc.biopet.pipelines.flexiprep
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
java.io.
{
File
,
FileNotFoundException
}
import
scala.io.Source
import
argonaut._
,
Argonaut
.
_
import
scalaz._
,
Scalaz
.
_
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.utils.ConfigUtils
/**
* FastQC wrapper with added functionality for the Flexiprep pipeline
*
* This wrapper implements additional methods for parsing FastQC output files and aggregating everything in a summary
* object. The current implementation is based on FastQC v0.10.1.
*/
class
Fastqc
(
root
:
Configurable
)
extends
nl
.
lumc
.
sasc
.
biopet
.
extensions
.
Fastqc
(
root
)
{
def
getDataBlock
(
name
:
String
)
:
Array
[
String
]
=
{
// Based on Fastqc v0.10.1
val
outputDir
=
output
.
getAbsolutePath
.
stripSuffix
(
".zip"
)
val
dataFile
=
new
File
(
outputDir
+
"/fastqc_data.txt"
)
if
(!
dataFile
.
exists
)
return
null
val
data
=
Source
.
fromFile
(
dataFile
).
mkString
for
(
block
<-
data
.
split
(
">>END_MODULE\n"
))
{
val
b
=
if
(
block
.
startsWith
(
"##FastQC"
))
block
.
substring
(
block
.
indexOf
(
"\n"
)
+
1
)
else
block
if
(
b
.
startsWith
(
">>"
+
name
))
return
for
(
line
<-
b
.
split
(
"\n"
))
yield
line
}
return
null
}
def
getEncoding
:
String
=
{
val
block
=
getDataBlock
(
"Basic Statistics"
)
if
(
block
==
null
)
return
null
for
(
line
<-
block
if
(
line
.
startsWith
(
"Encoding"
))
)
return
line
.
stripPrefix
(
"Encoding\t"
)
return
null
// Could be default Sanger with a warning in the log
/** Class for storing a single FastQC module result */
protected
case
class
FastQCModule
(
name
:
String
,
status
:
String
,
lines
:
Seq
[
String
])
/** Default FastQC output directory containing actual results */
// this is a def instead of a val since the value depends on the variable `output`, which is null on class creation
def
outputDir
:
File
=
new
File
(
output
.
getAbsolutePath
.
stripSuffix
(
".zip"
))
/** Default FastQC output data file */
// this is a def instead of a val since the value depends on the variable `output`, which is null on class creation
def
dataFile
:
File
=
new
File
(
outputDir
,
"fastqc_data.txt"
)
/**
* FastQC QC modules.
*
* @return Mapping of FastQC module names and its contents as array of strings (one item per line)
* @throws FileNotFoundException if the FastQC data file can not be found.
* @throws IllegalStateException if the module lines have no content or mapping is empty.
*/
@throws
(
classOf
[
FileNotFoundException
])
@throws
(
classOf
[
IllegalStateException
])
def
qcModules
:
Map
[
String
,
FastQCModule
]
=
{
val
fqModules
=
Source
.
fromFile
(
dataFile
)
// drop all the characters before the first module delimiter (i.e. '>>')
.
dropWhile
(
_
!=
'>'
)
// pull everything into a string
.
mkString
// split into modules
.
split
(
">>END_MODULE\n"
)
// make map of module name -> module lines
.
map
{
case
(
modString
)
=>
// module name is in the first line, without '>>' and before the tab character
val
Array
(
firstLine
,
otherLines
)
=
modString
// drop all '>>' character (start of module)
.
dropWhile
(
_
==
'>'
)
// split first line and others
.
split
(
"\n"
,
2
)
// and slice them
.
slice
(
0
,
2
)
// extract module name and module status
val
Array
(
modName
,
modStatus
)
=
firstLine
.
split
(
"\t"
,
2
)
.
slice
(
0
,
2
)
modName
->
FastQCModule
(
modName
,
modStatus
,
otherLines
.
split
(
"\n"
).
toSeq
)
}
.
toMap
if
(
fqModules
.
isEmpty
)
throw
new
IllegalStateException
(
"Empty FastQC data file "
+
dataFile
.
toString
)
else
fqModules
}
protected
case
class
Sequence
(
name
:
String
,
seq
:
String
)
def
getFoundAdapters
:
List
[
Sequence
]
=
{
def
getSeqs
(
file
:
File
)
=
{
if
(
file
!=
null
)
{
(
for
(
line
<-
Source
.
fromFile
(
file
).
getLines
();
if
line
.
startsWith
(
"#"
);
values
=
line
.
split
(
"\t*"
)
if
values
.
size
>=
2
)
yield
Sequence
(
values
(
0
),
values
(
1
))).
toList
}
else
Nil
}
/**
* Retrieves the FASTQ file encoding as computed by FastQC.
*
* @return encoding name
* @throws NoSuchElementException when the "Basic Statistics" key does not exist in the mapping or
* when a line starting with "Encoding" does not exist.
*/
@throws
(
classOf
[
NoSuchElementException
])
def
encoding
:
String
=
qcModules
(
"Basic Statistics"
)
.
lines
.
dropWhile
(!
_
.
startsWith
(
"Encoding"
))
.
head
.
stripPrefix
(
"Encoding\t"
)
.
stripSuffix
(
"\t"
)
/** Case class representing a known adapter sequence */
protected
case
class
AdapterSequence
(
name
:
String
,
seq
:
String
)
val
seqs
=
getSeqs
(
adapters
)
:::
getSeqs
(
contaminants
)
/**
* Retrieves overrepresented sequences found by FastQ.
*
* @return a [[Set]] of [[AdapterSequence]] objects.
*/
def
foundAdapters
:
Set
[
AdapterSequence
]
=
{
val
block
=
getDataBlock
(
"Overrepresented sequences"
)
if
(
block
==
null
)
return
Nil
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def
getFastqcSeqs
(
file
:
Option
[
File
])
:
Set
[
AdapterSequence
]
=
file
match
{
case
None
=>
Set
.
empty
[
AdapterSequence
]
case
Some
(
f
)
=>
(
for
{
line
<-
Source
.
fromFile
(
f
).
getLines
()
if
!
line
.
startsWith
(
"#"
)
values
=
line
.
split
(
"\t+"
)
if
values
.
size
>=
2
}
yield
AdapterSequence
(
values
(
0
),
values
(
1
))).
toSet
}
val
found
=
for
(
line
<-
block
if
!
line
.
startsWith
(
"#"
);
values
=
line
.
split
(
"\t"
)
if
values
.
size
>=
4
)
yield
values
(
3
)
val
found
=
qcModules
.
get
(
"Overrepresented sequences"
)
match
{
case
None
=>
Seq
.
empty
[
String
]
case
Some
(
qcModule
)
=>
for
(
line
<-
qcModule
.
lines
if
!(
line
.
startsWith
(
"#"
)
||
line
.
startsWith
(
">"
));
values
=
line
.
split
(
"\t"
)
if
values
.
size
>=
4
)
yield
values
(
3
)
}
seqs
.
filter
(
x
=>
found
.
exists
(
_
.
startsWith
(
x
.
name
)))
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
(
getFastqcSeqs
(
adapters
)
++
getFastqcSeqs
(
contaminants
))
.
filter
(
x
=>
found
.
exists
(
_
.
startsWith
(
x
.
name
)))
}
def
getSummary
:
Json
=
{
val
subfixs
=
Map
(
"plot_duplication_levels"
->
"Images/duplication_levels.png"
,
"plot_kmer_profiles"
->
"Images/kmer_profiles.png"
,
"plot_per_base_gc_content"
->
"Images/per_base_gc_content.png"
,
"plot_per_base_n_content"
->
"Images/per_base_n_content.png"
,
"plot_per_base_quality"
->
"Images/per_base_quality.png"
,
"plot_per_base_sequence_content"
->
"Images/per_base_sequence_content.png"
,
"plot_per_sequence_gc_content"
->
"Images/per_sequence_gc_content.png"
,
"plot_per_sequence_quality"
->
"Images/per_sequence_quality.png"
,
"plot_sequence_length_distribution"
->
"Images/sequence_length_distribution.png"
,
"fastqc_data"
->
"fastqc_data.txt"
)
val
dir
=
output
.
getAbsolutePath
.
stripSuffix
(
".zip"
)
+
"/"
var
outputMap
:
Map
[
String
,
Map
[
String
,
String
]]
=
Map
()
for
((
k
,
v
)
<-
subfixs
)
outputMap
+=
(
k
->
Map
(
"path"
->
(
dir
+
v
)))
val
temp
=
(
""
:=
outputMap
)
->:
jEmptyObject
return
temp
.
fieldOrEmptyObject
(
""
)
/** Summary of the FastQC run, stored in a [[Json]] object */
def
summary
:
Json
=
{
val
outputMap
=
Map
(
"plot_duplication_levels"
->
"Images/duplication_levels.png"
,
"plot_kmer_profiles"
->
"Images/kmer_profiles.png"
,
"plot_per_base_gc_content"
->
"Images/per_base_gc_content.png"
,
"plot_per_base_n_content"
->
"Images/per_base_n_content.png"
,
"plot_per_base_quality"
->
"Images/per_base_quality.png"
,
"plot_per_base_sequence_content"
->
"Images/per_base_sequence_content.png"
,
"plot_per_sequence_gc_content"
->
"Images/per_sequence_gc_content.png"
,
"plot_per_sequence_quality"
->
"Images/per_sequence_quality.png"
,
"plot_sequence_length_distribution"
->
"Images/sequence_length_distribution.png"
,
"fastqc_data"
->
"fastqc_data.txt"
)
.
map
{
case
(
name
,
relPath
)
=>
name
->
Map
(
"path"
->
(
outputDir
+
File
.
separator
+
relPath
))
}
ConfigUtils
.
mapToJson
(
outputMap
)
}
}
object
Fastqc
{
def
apply
(
root
:
Configurable
,
fastqfile
:
File
,
outDir
:
String
)
:
Fastqc
=
{
val
fastqcCommand
=
new
Fastqc
(
root
)
fastqcCommand
.
fastqfile
=
fastqfile
...
...
@@ -102,6 +174,6 @@ object Fastqc {
//if (filename.endsWith(".fq")) filename = filename.substring(0,filename.size - 3)
fastqcCommand
.
output
=
new
File
(
outDir
+
"/"
+
filename
+
"_fastqc.zip"
)
fastqcCommand
.
afterGraph
return
fastqcCommand
fastqcCommand
}
}
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala
View file @
fbed4e7c
...
...
@@ -201,7 +201,7 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co
def
fastqcSummary
(
fastqc
:
Fastqc
)
:
Option
[
Json
]
=
{
if
(
fastqc
==
null
)
return
None
else
return
Option
(
fastqc
.
getS
ummary
)
else
return
Option
(
fastqc
.
s
ummary
)
}
def
clipstatSummary
()
:
Option
[
Json
]
=
{
...
...
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala
View file @
fbed4e7c
...
...
@@ -25,7 +25,7 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk.
override
def
beforeCmd
{
super
.
beforeCmd
if
(
fastqc
!=
null
&&
Q
==
None
)
{
val
encoding
=
fastqc
.
getE
ncoding
val
encoding
=
fastqc
.
e
ncoding
Q
=
encoding
match
{
case
null
=>
None
case
s
if
(
s
.
contains
(
"Sanger / Illumina 1.9"
))
=>
None
...
...
public/flexiprep/src/test/resources/fqc_contaminants_v0101.txt
0 → 100644
View file @
fbed4e7c
# This file contains a list of potential contaminants which are
# frequently found in high throughput sequencing reactions. These
# are mostly sequences of adapters / primers used in the various
# sequencing chemistries.
#
# Please DO NOT rely on these sequences to design your own oligos, some
# of them are truncated at ambiguous positions, and none of them are
# definitive sequences from the manufacturers so don't blame us if you
# try to use them and they don't work.
#
# You can add more sequences to the file by putting one line per entry
# and specifying a name[tab]sequence. If the contaminant you add is
# likely to be of use to others please consider sending it to the FastQ
# authors, either via a bug report at www.bioinformatics.bbsrc.ac.uk/bugzilla/
# or by directly emailing simon.andrews@bbsrc.ac.uk so other users of
# the program can benefit.
Illumina Single End Adapter 1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
Illumina Single End Adapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Adapter 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Adapter 2 GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
Illumina Paried End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End PCR Primer 2 CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
Illumina Paried End Sequencing Primer 1 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Paired End Sequencing Primer 2 CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
Illumina DpnII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGAC
Illumina DpnII expression Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina DpnII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina DpnII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina DpnII expression Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina NlaIII expression Adapter 1 ACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina NlaIII expression Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina NlaIII expression PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina NlaIII expression PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina NlaIII expression Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina Small RNA Adapter 1 GTTCAGAGTTCTACAGTCCGACGATC
Illumina Small RNA Adapter 2 TGGAATTCTCGGGTGCCAAGG
Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina Multiplexing Adapter 1 GATCGGAAGAGCACACGTCT
Illumina Multiplexing Adapter 2 ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing PCR Primer 1.01 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing PCR Primer 2.01 GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
Illumina Multiplexing Read1 Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT
Illumina Multiplexing Index Sequencing Primer GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
Illumina Multiplexing Read2 Sequencing Primer GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
Illumina PCR Primer Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC
Illumina PCR Primer Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC
Illumina PCR Primer Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC
Illumina PCR Primer Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC
Illumina PCR Primer Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC
Illumina PCR Primer Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC
Illumina PCR Primer Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC
Illumina PCR Primer Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC
Illumina PCR Primer Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC
Illumina PCR Primer Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC
Illumina PCR Primer Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC
Illumina PCR Primer Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC
Illumina DpnII Gex Adapter 1 GATCGTCGGACTGTAGAACTCTGAAC
Illumina DpnII Gex Adapter 1.01 ACAGGTTCAGAGTTCTACAGTCCGAC
Illumina DpnII Gex Adapter 2 CAAGCAGAAGACGGCATACGA
Illumina DpnII Gex Adapter 2.01 TCGTATGCCGTCTTCTGCTTG
Illumina DpnII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina DpnII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina DpnII Gex Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
Illumina NlaIII Gex Adapter 1.01 TCGGACTGTAGAACTCTGAAC
Illumina NlaIII Gex Adapter 1.02 ACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina NlaIII Gex Adapter 2.01 CAAGCAGAAGACGGCATACGA
Illumina NlaIII Gex Adapter 2.02 TCGTATGCCGTCTTCTGCTTG
Illumina NlaIII Gex PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina NlaIII Gex PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina NlaIII Gex Sequencing Primer CCGACAGGTTCAGAGTTCTACAGTCCGACATG
Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA
Illumina 5p RNA Adapter GTTCAGAGTTCTACAGTCCGACGATC
Illumina RNA Adapter1 TGGAATTCTCGGGTGCCAAGG
Illumina Small RNA 3p Adapter 1 ATCTCGTATGCCGTCTTCTGCTTG
Illumina Small RNA PCR Primer 1 CAAGCAGAAGACGGCATACGA
Illumina Small RNA PCR Primer 2 AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
Illumina Small RNA Sequencing Primer CGACAGGTTCAGAGTTCTACAGTCCGACGATC
TruSeq Universal Adapter AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
TruSeq Adapter, Index 1 GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 2 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 3 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 4 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 5 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 6 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 7 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 8 GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 9 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 10 GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 11 GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG
TruSeq Adapter, Index 12 GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG
Illumina RNA RT Primer GCCTTGGCACCCGAGAATTCCA
Illumina RNA PCR Primer AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA
RNA PCR Primer, Index 1 CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 2 CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 3 CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 4 CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 5 CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 6 CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 7 CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 8 CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 9 CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 10 CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 11 CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 12 CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 13 CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 14 CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 15 CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 16 CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 17 CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 18 CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 19 CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 20 CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 21 CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 22 CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 23 CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 24 CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 25 CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 26 CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 27 CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 28 CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 29 CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 30 CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 31 CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 32 CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 33 CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 34 CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 35 CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 36 CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 37 CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 38 CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 39 CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 40 CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 41 CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 42 CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 43 CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 44 CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 45 CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 46 CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 47 CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
RNA PCR Primer, Index 48 CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
ABI Dynabead EcoP Oligo CTGATCTAGAGGTACCGGATCCCAGCAGT
ABI Solid3 Adapter A CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG
ABI Solid3 Adapter B CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT
ABI Solid3 5' AMP Primer CCACTACGCCTCCGCTTTCCTCTCTATG
ABI Solid3 3' AMP Primer CTGCCCCGGGTTCCTCATTCT
ABI Solid3 EF1 alpha Sense Primer CATGTGTGTTGAGAGCTTC
ABI Solid3 EF1 alpha Antisense Primer GAAAACCAAAGTGGTCCAC
ABI Solid3 GAPDH Forward Primer TTAGCACCCCTGGCCAAGG
ABI Solid3 GAPDH Reverse Primer CTTACTCCTTGGAGGCCATG
public/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt
0 → 100644
View file @
fbed4e7c
This diff is collapsed.
Click to expand it.
public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala
0 → 100644
View file @
fbed4e7c
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.pipelines.flexiprep
import
java.io.File
import
java.nio.file.Paths
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
class
FastqcV0101Test
extends
TestNGSuite
with
Matchers
{
/** Returns the absolute path to test resource directory as a File object */
private
val
resourceDir
:
File
=
new
File
(
Paths
.
get
(
getClass
.
getResource
(
"/"
).
toURI
).
toString
)
/** Given a resource file name, returns the the absolute path to it as a File object */
private
def
resourceFile
(
p
:
String
)
:
File
=
new
File
(
resourceDir
,
p
)
/** Mock output file of a FastQC v0.10.1 run */
// the file doesn't actually exist, we just need it so the outputDir value can be computed correctly
private
val
outputv0101
:
File
=
resourceFile
(
"v0101.fq_fastqc.zip"
)
@Test
def
testOutputDir
()
=
{
val
fqc
=
new
Fastqc
(
null
)
fqc
.
output
=
outputv0101
fqc
.
outputDir
shouldBe
new
File
(
resourceDir
,
"v0101.fq_fastqc"
)
}
@Test
def
testQcModules
()
=
{
val
fqc
=
new
Fastqc
(
null
)
fqc
.
output
=
outputv0101
// 11 QC modules
fqc
.
qcModules
.
size
shouldBe
11
// first module
fqc
.
qcModules
.
keySet
should
contain
(
"Basic Statistics"
)
// mid (6th module)
fqc
.
qcModules
.
keySet
should
contain
(
"Per sequence GC content"
)
// last module
fqc
.
qcModules
.
keySet
should
contain
(
"Kmer Content"
)
}
@Test
def
testSingleQcModule
()
=
{