Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
867eac4c
Commit
867eac4c
authored
Mar 04, 2016
by
Wai Yi Leung
Browse files
Merge branch 'fix-gears_single_end' into 'develop'
Fix for gears single end bam files Fixes #287 See merge request !337
parents
3fb5cf10
c28c5c82
Changes
5
Hide whitespace changes
Inline
Side-by-side
public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala
View file @
867eac4c
...
...
@@ -69,8 +69,8 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
optional
(
"--threads"
,
nCoresRequest
)
+
conditional
(
quick
,
"--quick"
)
+
optional
(
"--min_hits"
,
minHits
)
+
optional
(
"--unclassified-out "
,
unclassified_out
.
get
)
+
optional
(
"--classified-out "
,
classified_out
.
get
)
+
optional
(
"--unclassified-out "
,
unclassified_out
)
+
optional
(
"--classified-out "
,
classified_out
)
+
required
(
"--output"
,
output
)
+
conditional
(
preLoad
,
"--preload"
)
+
conditional
(
paired
,
"--paired"
)
+
...
...
public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala
View file @
867eac4c
...
...
@@ -89,10 +89,10 @@ object SeqStat extends ToolCommand {
(
qual_low_boundery
<
59
,
qual_high_boundery
>
74
)
match
{
case
(
false
,
true
)
=>
phredEncoding
=
Solexa
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
case
(
true
,
true
)
=>
phredEncoding
=
Solexa
// this is definite a sanger sequence, the lower end is sanger only
case
(
true
,
false
)
=>
phredEncoding
=
Sanger
...
...
@@ -181,7 +181,7 @@ object SeqStat extends ToolCommand {
quals
++=
mutable
.
ArrayBuffer
.
fill
(
baseStats
(
pos
).
qual
.
length
-
quals
.
length
)(
0
)
}
if
(
nucs
.
length
<=
baseStats
(
pos
).
nucs
.
length
)
{
nucs
++=
mutable
.
ArrayBuffer
.
fill
(
baseStats
(
pos
).
nucs
.
length
-
nucs
.
length
)(
0
)
nucs
++=
mutable
.
ArrayBuffer
.
fill
(
baseStats
(
pos
).
nucs
.
length
-
nucs
.
length
)(
0
)
}
// count into the quals
baseStats
(
pos
).
qual
.
zipWithIndex
foreach
{
case
(
value
,
index
)
=>
quals
(
index
)
+=
value
}
...
...
public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/ExtractUnmappedReads.scala
View file @
867eac4c
...
...
@@ -21,21 +21,26 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS
)
)
lazy
val
paired
:
Boolean
=
config
(
"paired_bam"
,
default
=
true
)
def
init
()
:
Unit
=
{
require
(
bamFile
!=
null
)
if
(
outputName
==
null
)
outputName
=
bamFile
.
getName
.
stripSuffix
(
".bam"
)
}
def
fastqUnmappedR1
=
new
File
(
outputDir
,
s
"$outputName.unmapped.R1.fq.gz"
)
def
fastqUnmappedR2
=
new
File
(
outputDir
,
s
"$outputName.unmapped.R2.fq.gz"
)
def
fastqUnmappedR2
=
if
(
paired
)
Some
(
new
File
(
outputDir
,
s
"$outputName.unmapped.R2.fq.gz"
))
else
None
def
fastqUnmappedSingletons
=
new
File
(
outputDir
,
s
"$outputName.unmapped.singletons.fq.gz"
)
def
biopetScript
()
:
Unit
=
{
val
samtoolsViewSelectUnmapped
=
new
SamtoolsView
(
this
)
samtoolsViewSelectUnmapped
.
input
=
bamFile
samtoolsViewSelectUnmapped
.
b
=
true
samtoolsViewSelectUnmapped
.
output
=
swapExt
(
outputDir
,
bamFile
,
".bam"
,
"unmapped.bam"
)
samtoolsViewSelectUnmapped
.
f
=
List
(
"12"
)
samtoolsViewSelectUnmapped
.
h
=
true
samtoolsViewSelectUnmapped
.
output
=
swapExt
(
outputDir
,
bamFile
,
".bam"
,
".unmapped.bam"
)
if
(
paired
)
samtoolsViewSelectUnmapped
.
f
=
List
(
"12"
)
else
samtoolsViewSelectUnmapped
.
f
=
List
(
"4"
)
samtoolsViewSelectUnmapped
.
isIntermediate
=
true
add
(
samtoolsViewSelectUnmapped
)
...
...
@@ -43,9 +48,11 @@ class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQS
val
samToFastq
=
new
SamToFastq
(
this
)
samToFastq
.
input
=
samtoolsViewSelectUnmapped
.
output
samToFastq
.
fastqR1
=
fastqUnmappedR1
samToFastq
.
fastqR2
=
fastqUnmappedR2
samToFastq
.
fastqUnpaired
=
fastqUnmappedSingletons
samToFastq
.
isIntermediate
=
true
if
(
paired
)
{
samToFastq
.
fastqR2
=
fastqUnmappedR2
.
get
samToFastq
.
fastqUnpaired
=
fastqUnmappedSingletons
}
samToFastq
.
isIntermediate
=
!
config
(
"keep_unmapped_fastq"
,
default
=
false
).
asBoolean
add
(
samToFastq
)
}
}
public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala
View file @
867eac4c
...
...
@@ -5,6 +5,7 @@ import java.io.{ File, PrintWriter }
import
nl.lumc.sasc.biopet.core.SampleLibraryTag
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.extensions.kraken.
{
KrakenReport
,
Kraken
}
import
nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
import
nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
nl.lumc.sasc.biopet.utils.config.Configurable
...
...
@@ -32,10 +33,27 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
.
stripSuffix
(
".fastq"
)
}
lazy
val
krakenConvertToFasta
:
Boolean
=
config
(
"kraken_discard_quality"
,
default
=
false
)
protected
def
fastqToFasta
(
input
:
File
)
:
File
=
{
val
seqtk
=
new
SeqtkSeq
(
this
)
seqtk
.
input
=
input
seqtk
.
output
=
new
File
(
outputDir
,
input
.
getName
+
".fasta"
)
seqtk
.
A
=
true
seqtk
.
isIntermediate
=
true
add
(
seqtk
)
seqtk
.
output
}
def
biopetScript
()
:
Unit
=
{
// start kraken
val
(
fqR1
,
fqR2
)
=
if
(
krakenConvertToFasta
)
(
fastqToFasta
(
fastqR1
),
fastqR2
.
map
(
fastqToFasta
))
else
(
fastqR1
,
fastqR2
)
val
krakenAnalysis
=
new
Kraken
(
this
)
krakenAnalysis
.
input
=
f
ast
qR1
::
f
ast
qR2
.
toList
krakenAnalysis
.
input
=
fqR1
::
fqR2
.
toList
krakenAnalysis
.
output
=
new
File
(
outputDir
,
s
"$outputName.krkn.raw"
)
krakenAnalysis
.
paired
=
fastqR2
.
isDefined
...
...
@@ -79,7 +97,7 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
/** Statistics shown in the summary file */
def
summaryFiles
:
Map
[
String
,
File
]
=
outputFiles
+
(
"input_R1"
->
fastqR1
)
++
(
fastqR2
match
{
case
Some
(
file
)
=>
Map
(
"input_R
1
"
->
file
)
case
Some
(
file
)
=>
Map
(
"input_R
2
"
->
file
)
case
_
=>
Map
()
})
}
...
...
public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala
View file @
867eac4c
...
...
@@ -97,7 +97,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
extract
.
bamFile
=
bam
extract
.
outputName
=
outputName
add
(
extract
)
executeFlexiprep
(
extract
.
fastqUnmappedR1
,
Some
(
extract
.
fastqUnmappedR2
)
)
executeFlexiprep
(
extract
.
fastqUnmappedR1
,
extract
.
fastqUnmappedR2
)
case
_
=>
throw
new
IllegalArgumentException
(
"Missing input files"
)
}
...
...
@@ -159,6 +159,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
.
empty
++
(
if
(
bamFile
.
isDefined
)
Map
(
"input_bam"
->
bamFile
.
get
)
else
Map
())
++
(
if
(
fastqR1
.
isDefined
)
Map
(
"input_R1"
->
fastqR1
.
get
)
else
Map
())
++
(
if
(
fastqR2
.
isDefined
)
Map
(
"input_R2"
->
fastqR2
.
get
)
else
Map
())
++
outputFiles
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment