Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
biopet.biopet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
biopet.biopet
Commits
5e2ceed3
Commit
5e2ceed3
authored
Apr 19, 2017
by
Peter van 't Hof
Committed by
GitHub
Apr 19, 2017
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #75 from biopet/fix-BIOPET-650
Adding Sambamba as merge step
parents
02292fea
b9cb7c04
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
72 additions
and
23 deletions
+72
-23
basty/src/test/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala
...scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala
+1
-1
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala
...la/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala
+1
-1
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala
...umc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala
+19
-7
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala
.../lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala
+19
-4
mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala
...mc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala
+20
-6
mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala
...asc/biopet/pipelines/mapping/MultisampleMappingTest.scala
+11
-3
shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala
...scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala
+1
-1
No files found.
basty/src/test/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala
View file @
5e2ceed3
...
...
@@ -98,7 +98,7 @@ class BastyTest extends TestNGSuite with Matchers {
val
numberLibs
=
(
if
(
sample1
)
1
else
0
)
+
(
if
(
sample2
)
2
else
0
)
val
numberSamples
=
(
if
(
sample1
)
1
else
0
)
+
(
if
(
sample2
)
1
else
0
)
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
MarkDuplicates
])
shouldBe
(
numberLibs
+
(
if
(
sample2
)
1
else
0
)
)
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
MarkDuplicates
])
shouldBe
(
numberLibs
+
numberSamples
)
// Gatk preprocess
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
IndelRealigner
])
shouldBe
(
numberLibs
*
(
if
(
realign
)
1
else
0
)
+
(
if
(
sample2
&&
realign
)
1
else
0
))
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala
View file @
5e2ceed3
...
...
@@ -25,6 +25,6 @@ abstract class Sambamba extends BiopetCommandLineFunction with Version {
executable
=
config
(
"exe"
,
default
=
"sambamba"
,
namespace
=
"sambamba"
,
freeVar
=
false
)
def
versionCommand
=
executable
def
versionRegex
=
"""sambamba v(.*)"""
.
r
def
versionRegex
=
"""sambamba v
?
(.*)"""
.
r
override
def
versionExitcode
=
List
(
0
,
1
)
}
\ No newline at end of file
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala
View file @
5e2ceed3
...
...
@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba markdup */
class
SambambaMarkdup
(
val
parent
:
Configurable
)
extends
Sambamba
{
override
def
defaultThreads
=
4
@Input
(
doc
=
"Bam File"
)
var
input
:
File
=
_
...
...
@@ -32,17 +31,29 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
var
removeDuplicates
:
Boolean
=
config
(
"remove_duplicates"
,
default
=
false
)
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val
compressionLevel
:
Option
[
Int
]
=
config
(
"compression_level"
,
default
=
6
)
val
hashTableSize
:
Option
[
Int
]
=
config
(
"hash-table-size"
,
default
=
262144
)
val
overflowListSize
:
Option
[
Int
]
=
config
(
"overflow-list-size"
,
default
=
200000
)
val
ioBufferSize
:
Option
[
Int
]
=
config
(
"io-buffer-size"
,
default
=
128
)
val
compressionLevel
:
Option
[
Int
]
=
config
(
"compression_level"
)
val
hashTableSize
:
Option
[
Int
]
=
config
(
"hash-table-size"
)
val
overflowListSize
:
Option
[
Int
]
=
config
(
"overflow-list-size"
)
val
ioBufferSize
:
Option
[
Int
]
=
config
(
"io-buffer-size"
)
val
showProgress
:
Boolean
=
config
(
"show-progress"
,
default
=
true
)
override
def
defaultThreads
=
4
override
def
defaultCoreMemory
=
4.0
@Output
private
var
indexOutput
:
File
=
_
override
def
beforeGraph
()
:
Unit
=
{
indexOutput
=
new
File
(
output
+
".bai"
)
}
/** Returns command to execute */
def
cmdLine
=
required
(
executable
)
+
def
cmdLine
:
String
=
required
(
executable
)
+
required
(
"markdup"
)
+
conditional
(
removeDuplicates
,
"--remove-duplicates"
)
+
optional
(
"-t"
,
nCoresRequest
)
+
optional
(
"-l"
,
compressionLevel
)
+
conditional
(
showProgress
,
"--show-progress"
)
+
optional
(
"--hash-table-size="
,
hashTableSize
,
spaceSeparated
=
false
)
+
optional
(
"--overflow-list-size="
,
overflowListSize
,
spaceSeparated
=
false
)
+
optional
(
"--io-buffer-size="
,
ioBufferSize
,
spaceSeparated
=
false
)
+
...
...
@@ -51,10 +62,11 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
}
object
SambambaMarkdup
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
SambambaMarkdup
=
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
,
isIntermediate
:
Boolean
=
false
)
:
SambambaMarkdup
=
{
val
markdup
=
new
SambambaMarkdup
(
root
)
markdup
.
input
=
input
markdup
.
output
=
output
markdup
.
isIntermediate
=
isIntermediate
markdup
}
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala
View file @
5e2ceed3
...
...
@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba merge */
class
SambambaMerge
(
val
parent
:
Configurable
)
extends
Sambamba
{
override
def
defaultThreads
=
4
@Input
(
doc
=
"Bam File[s]"
)
var
input
:
List
[
File
]
=
Nil
...
...
@@ -30,13 +29,29 @@ class SambambaMerge(val parent: Configurable) extends Sambamba {
var
output
:
File
=
_
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val
compressionLevel
:
Option
[
Int
]
=
config
(
"compression_level"
,
default
=
6
)
val
compressionLevel
:
Option
[
Int
]
=
config
(
"compression_level"
)
val
header
:
Boolean
=
config
(
"header"
,
default
=
false
)
val
showProgress
:
Boolean
=
config
(
"show-progress"
,
default
=
true
)
val
filter
:
Option
[
String
]
=
config
(
"filter"
)
override
def
defaultThreads
=
4
override
def
defaultCoreMemory
=
4.0
@Output
private
var
indexOutput
:
File
=
_
override
def
beforeGraph
()
:
Unit
=
{
indexOutput
=
new
File
(
output
+
".bai"
)
}
/** Returns command to execute */
def
cmdLine
=
required
(
executable
)
+
def
cmdLine
:
String
=
required
(
executable
)
+
required
(
"merge"
)
+
optional
(
"-t"
,
nCoresRequest
)
+
optional
(
"-l"
,
compressionLevel
)
+
optional
(
"-F"
,
filter
)
+
conditional
(
header
,
"--header"
)
+
conditional
(
showProgress
,
"--show-progress"
)
+
required
(
output
)
+
repeat
(
""
,
input
)
repeat
(
input
)
}
mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala
View file @
5e2ceed3
...
...
@@ -19,7 +19,7 @@ import java.io.File
import
htsjdk.samtools.SamReaderFactory
import
htsjdk.samtools.reference.FastaSequenceFile
import
nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import
nl.lumc.sasc.biopet.core.
{
PipelineCommand
,
Reference
,
MultiSampleQScript
}
import
nl.lumc.sasc.biopet.core.
{
MultiSampleQScript
,
PipelineCommand
,
Reference
}
import
nl.lumc.sasc.biopet.extensions.Ln
import
nl.lumc.sasc.biopet.extensions.picard._
import
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
...
...
@@ -28,8 +28,8 @@ import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
import
MultisampleMapping.MergeStrategy
import
nl.lumc.sasc.biopet.extensions.sambamba.
{
SambambaMarkdup
,
SambambaMerge
}
import
scala.collection.JavaConversions._
...
...
@@ -125,7 +125,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript
}
else
None
def
bamFile
:
Option
[
File
]
=
mapping
match
{
case
Some
(
m
)
=>
Some
(
m
.
final
BamFile
)
case
Some
(
m
)
=>
Some
(
m
.
merged
BamFile
)
case
_
if
inputBam
.
isDefined
=>
Some
(
new
File
(
libDir
,
s
"$sampleId-$libId.bam"
))
case
_
=>
None
}
...
...
@@ -247,9 +247,9 @@ trait MultisampleMappingTrait extends MultiSampleQScript
mergeStrategy
match
{
case
MergeStrategy
.
None
=>
case
(
MergeStrategy
.
MergeSam
|
MergeStrategy
.
MarkDuplicates
)
if
libraries
.
flatMap
(
_
.
_2
.
bamFile
).
size
==
1
=>
case
(
MergeStrategy
.
MergeSam
)
if
libraries
.
flatMap
(
_
.
_2
.
bamFile
).
size
==
1
=>
add
(
Ln
.
linkBamFile
(
qscript
,
libraries
.
flatMap
(
_
.
_2
.
bamFile
).
head
,
bamFile
.
get
)
:
_
*
)
case
(
MergeStrategy
.
PreProcessMergeSam
|
MergeStrategy
.
PreProcessMarkDuplicates
)
if
libraries
.
flatMap
(
_
.
_2
.
preProcessBam
).
size
==
1
=>
case
(
MergeStrategy
.
PreProcessMergeSam
)
if
libraries
.
flatMap
(
_
.
_2
.
preProcessBam
).
size
==
1
=>
add
(
Ln
.
linkBamFile
(
qscript
,
libraries
.
flatMap
(
_
.
_2
.
preProcessBam
).
head
,
bamFile
.
get
)
:
_
*
)
case
MergeStrategy
.
MergeSam
=>
add
(
MergeSamFiles
(
qscript
,
libraries
.
flatMap
(
_
.
_2
.
bamFile
).
toList
,
bamFile
.
get
,
isIntermediate
=
!
keepMergedFiles
))
...
...
@@ -259,6 +259,20 @@ trait MultisampleMappingTrait extends MultiSampleQScript
add
(
MarkDuplicates
(
qscript
,
libraries
.
flatMap
(
_
.
_2
.
bamFile
).
toList
,
bamFile
.
get
,
isIntermediate
=
!
keepMergedFiles
))
case
MergeStrategy
.
PreProcessMarkDuplicates
=>
add
(
MarkDuplicates
(
qscript
,
libraries
.
flatMap
(
_
.
_2
.
preProcessBam
).
toList
,
bamFile
.
get
,
isIntermediate
=
!
keepMergedFiles
))
case
MergeStrategy
.
PreProcessSambambaMarkdup
=>
val
mergedBam
=
if
(
libraries
.
flatMap
(
_
.
_2
.
bamFile
).
size
==
1
)
{
add
(
Ln
.
linkBamFile
(
qscript
,
libraries
.
flatMap
(
_
.
_2
.
preProcessBam
).
head
,
new
File
(
sampleDir
,
"merged.bam"
))
:
_
*
)
libraries
.
flatMap
(
_
.
_2
.
preProcessBam
).
head
}
else
{
val
merge
=
new
SambambaMerge
(
qscript
)
merge
.
input
=
libraries
.
flatMap
(
_
.
_2
.
preProcessBam
).
toList
merge
.
output
=
new
File
(
sampleDir
,
"merged.bam"
)
merge
.
isIntermediate
=
true
add
(
merge
)
merge
.
output
}
add
(
SambambaMarkdup
(
qscript
,
mergedBam
,
bamFile
.
get
,
isIntermediate
=
!
keepMergedFiles
))
add
(
Ln
(
qscript
,
bamFile
.
get
+
".bai"
,
bamFile
.
get
.
getAbsolutePath
.
stripSuffix
(
".bam"
)
+
".bai"
))
case
_
=>
throw
new
IllegalStateException
(
"This should not be possible, unimplemented MergeStrategy?"
)
}
...
...
@@ -301,7 +315,7 @@ class MultisampleMapping(val parent: Configurable) extends QScript with Multisam
object
MultisampleMapping
extends
PipelineCommand
{
object
MergeStrategy
extends
Enumeration
{
val
None
,
MergeSam
,
MarkDuplicates
,
PreProcessMergeSam
,
PreProcessMarkDuplicates
=
Value
val
None
,
MergeSam
,
MarkDuplicates
,
PreProcessMergeSam
,
PreProcessMarkDuplicates
,
PreProcessSambambaMarkdup
=
Value
}
/** When file is not absolute an error is raise att the end of the script of a pipeline */
...
...
mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala
View file @
5e2ceed3
...
...
@@ -20,6 +20,7 @@ import com.google.common.io.Files
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
nl.lumc.sasc.biopet.extensions.centrifuge.Centrifuge
import
nl.lumc.sasc.biopet.extensions.picard.
{
MarkDuplicates
,
MergeSamFiles
}
import
nl.lumc.sasc.biopet.extensions.sambamba.SambambaMarkdup
import
nl.lumc.sasc.biopet.utils.
{
ConfigUtils
,
Logging
}
import
nl.lumc.sasc.biopet.utils.config.Config
import
org.apache.commons.io.FileUtils
...
...
@@ -91,16 +92,22 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
pipeline
.
script
()
val
numberFastqLibs
=
(
if
(
sample1
)
1
else
0
)
+
(
if
(
sample2
)
2
else
0
)
+
(
if
(
sample3
&&
bamToFastq
)
1
else
0
)
+
(
if
(
sample4
&&
bamToFastq
)
1
else
0
)
val
numberSamples
=
(
if
(
sample1
)
1
else
0
)
+
(
if
(
sample2
)
1
else
0
)
val
numberSamples
=
(
if
(
sample1
)
1
else
0
)
+
(
if
(
sample2
)
1
else
0
)
+
(
if
(
sample3
)
1
else
0
)
+
(
if
(
sample4
)
1
else
0
)
val
pipesJobs
=
pipeline
.
functions
.
filter
(
_
.
isInstanceOf
[
BiopetCommandLineFunction
])
.
flatMap
(
_
.
asInstanceOf
[
BiopetCommandLineFunction
].
pipesJobs
)
if
(
merge
==
MultisampleMapping
.
MergeStrategy
.
PreProcessMarkDuplicates
)
{
""
}
import
MultisampleMapping.MergeStrategy
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
MarkDuplicates
])
shouldBe
(
numberFastqLibs
+
(
if
(
sample2
&&
(
merge
==
MergeStrategy
.
MarkDuplicates
||
merge
==
MergeStrategy
.
PreProcessMarkDuplicates
))
1
else
0
))
(
if
(
merge
==
MergeStrategy
.
MarkDuplicates
||
merge
==
MergeStrategy
.
PreProcessMarkDuplicates
)
numberSamples
else
0
))
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
MergeSamFiles
])
shouldBe
(
(
if
(
sample2
&&
(
merge
==
MergeStrategy
.
MergeSam
||
merge
==
MergeStrategy
.
PreProcessMergeSam
))
1
else
0
))
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
SambambaMarkdup
])
shouldBe
(
if
(
merge
==
MergeStrategy
.
PreProcessSambambaMarkdup
)
numberSamples
else
0
)
pipeline
.
samples
.
foreach
{
case
(
sampleName
,
sample
)
=>
if
(
merge
==
MergeStrategy
.
None
)
sample
.
bamFile
shouldBe
None
...
...
@@ -211,6 +218,7 @@ object MultisampleMappingTestTrait {
"sickle"
->
Map
(
"exe"
->
"test"
),
"cutadapt"
->
Map
(
"exe"
->
"test"
),
"bwa"
->
Map
(
"exe"
->
"test"
),
"sambamba"
->
Map
(
"exe"
->
"test"
),
"samtools"
->
Map
(
"exe"
->
"test"
),
"igvtools"
->
Map
(
"exe"
->
"test"
,
"igvtools_jar"
->
"test"
),
"wigtobigwig"
->
Map
(
"exe"
->
"test"
),
...
...
@@ -232,7 +240,7 @@ object MultisampleMappingTestTrait {
)))
val
sample2
=
Map
(
"samples"
->
Map
(
"sample
3
"
->
Map
(
"libraries"
->
Map
(
"samples"
->
Map
(
"sample
2
"
->
Map
(
"libraries"
->
Map
(
"lib1"
->
Map
(
"R1"
->
inputTouch
(
"2_1_R1.fq"
),
"R2"
->
inputTouch
(
"2_1_R2.fq"
)
...
...
shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala
View file @
5e2ceed3
...
...
@@ -99,7 +99,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers {
val
numberLibs
=
(
if
(
sample1
)
1
else
0
)
+
(
if
(
sample2
)
2
else
0
)
val
numberSamples
=
(
if
(
sample1
)
1
else
0
)
+
(
if
(
sample2
)
1
else
0
)
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
MarkDuplicates
])
shouldBe
(
numberLibs
+
(
if
(
sample2
)
1
else
0
)
)
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
MarkDuplicates
])
shouldBe
(
numberLibs
+
numberSamples
)
// Gatk preprocess
pipeline
.
functions
.
count
(
_
.
isInstanceOf
[
IndelRealigner
])
shouldBe
(
numberLibs
*
(
if
(
realign
)
1
else
0
)
+
(
if
(
sample2
&&
realign
)
1
else
0
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment