Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
c7340695
Commit
c7340695
authored
Mar 01, 2017
by
Peter van 't Hof
Committed by
GitHub
Mar 01, 2017
Browse files
Merge pull request #25 from biopet/fix-BIOPET-576
Fix adapters fetch from fastqc
parents
7812d950
674dc69e
Changes
2
Hide whitespace changes
Inline
Side-by-side
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala
View file @
c7340695
...
...
@@ -75,8 +75,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction with Vers
// otherwise, check if adapters are already present (depending on FastQC version)
case
None
=>
val
defaultAdapters
=
getVersion
match
{
case
Some
(
"v0.11
.2
"
)
=>
Option
(
new
File
(
fastqcDir
+
"/Configuration/adapter_list.txt"
))
case
_
=>
None
case
Some
(
v
)
if
v
.
contains
(
"v0.11"
)
=>
Option
(
new
File
(
fastqcDir
+
"/Configuration/adapter_list.txt"
))
case
_
=>
None
}
defaultAdapters
.
collect
{
case
adp
=>
config
(
"adapters"
,
default
=
adp
)
}
}
...
...
flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala
View file @
c7340695
...
...
@@ -155,6 +155,8 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
*/
def
foundAdapters
:
Set
[
AdapterSequence
]
=
{
if
(
dataFile
.
exists
)
{
// On a dry run this file does not yet exist
val
modules
=
qcModules
/** Returns a list of adapter and/or contaminant sequences known to FastQC */
def
getFastqcSeqs
(
file
:
Option
[
File
])
:
Set
[
AdapterSequence
]
=
file
match
{
case
None
=>
Set
.
empty
[
AdapterSequence
]
...
...
@@ -170,7 +172,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
val
adapterSet
=
getFastqcSeqs
(
adapters
)
val
contaminantSet
=
getFastqcSeqs
(
contaminants
)
val
foundAdapterNames
:
Seq
[
String
]
=
qcM
odules
.
get
(
"Overrepresented sequences"
)
match
{
val
foundAdapterNames
:
Seq
[
String
]
=
m
odules
.
get
(
"Overrepresented sequences"
)
match
{
case
None
=>
Seq
.
empty
[
String
]
case
Some
(
qcModule
)
=>
for
(
...
...
@@ -181,7 +183,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
// select full sequences from known adapters and contaminants
// based on overrepresented sequences results
val
fromKnownList
:
Set
[
AdapterSequence
]
=
(
adapterSet
++
contaminantSet
)
val
fromKnownList
:
Set
[
AdapterSequence
]
=
contaminantSet
.
filter
(
x
=>
foundAdapterNames
.
exists
(
_
.
startsWith
(
x
.
name
)))
val
fromKnownListRC
:
Set
[
AdapterSequence
]
=
if
(
enableRCtrimming
)
fromKnownList
.
map
{
...
...
@@ -191,7 +193,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
// list all sequences found by FastQC
val
fastQCFoundSequences
:
Seq
[
AdapterSequence
]
=
if
(
sensitiveAdapterSearch
)
{
qcM
odules
.
get
(
"Overrepresented sequences"
)
match
{
m
odules
.
get
(
"Overrepresented sequences"
)
match
{
case
None
=>
Seq
.
empty
case
Some
(
qcModule
)
=>
for
(
...
...
@@ -199,17 +201,16 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
values
=
line
.
split
(
"\t"
)
if
values
.
size
>=
4
)
yield
AdapterSequence
(
values
(
3
),
values
(
0
))
}
}
else
{
Seq
.
empty
}
}
else
Seq
()
// we only want to keep adapter sequences which are known by FastQC
// sequences such as "Adapter01 (100% over 12bp)" are valid because "Adapter01" is in FastQC
fastQCFoundSequences
.
filter
(
x
=>
{
(
adapterSet
++
contaminantSet
).
count
(
y
=>
x
.
name
.
startsWith
(
y
.
name
))
==
1
})
val
foundAdapters
=
modules
.
get
(
"Adapter Content"
).
map
{
x
=>
val
header
=
x
.
lines
.
head
.
split
(
"\t"
).
tail
.
zipWithIndex
val
lines
=
x
.
lines
.
tail
.
map
(
_
.
split
(
"\t"
).
tail
)
val
found
=
header
.
filter
(
h
=>
lines
.
exists
(
x
=>
x
(
h
.
_2
).
toFloat
>
0
)).
map
(
_
.
_1
)
adapterSet
.
filter
(
x
=>
found
.
contains
(
x
.
name
))
}
fromKnownList
++
fastQCFoundSequences
++
fromKnownListRC
fromKnownList
++
fastQCFoundSequences
++
fromKnownListRC
++
foundAdapters
.
getOrElse
(
Seq
())
}
else
Set
()
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment