Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
764fcb73
Commit
764fcb73
authored
Dec 22, 2016
by
Peter van 't Hof
Browse files
Merge remote-tracking branch 'remotes/origin/develop' into feature-singlefile
parents
8c52cdc2
725ed53a
Changes
7
Hide whitespace changes
Inline
Side-by-side
biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BamStats.scala
View file @
764fcb73
...
...
@@ -6,7 +6,7 @@ import nl.lumc.sasc.biopet.core.summary.Summarizable
import
nl.lumc.sasc.biopet.core.
{
Reference
,
ToolCommandFunction
}
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.Input
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
* Created by pjvanthof on 18/11/2016.
...
...
@@ -36,6 +36,9 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
}
}
@Output
private
var
outputFiles
:
List
[
File
]
=
Nil
def
bamstatsSummary
:
File
=
new
File
(
outputDir
,
"bamstats.summary.json"
)
def
flagstatSummaryFile
(
contig
:
Option
[
String
]
=
None
)
:
File
=
getOutputFile
(
"flagstats.summary.json"
,
contig
)
def
mappingQualityFile
(
contig
:
Option
[
String
]
=
None
)
:
File
=
getOutputFile
(
"mapping_quality.tsv"
,
contig
)
...
...
@@ -44,6 +47,10 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
override
def
beforeGraph
()
{
super
.
beforeGraph
()
deps
:+=
new
File
(
bamFile
.
getAbsolutePath
.
replaceAll
(
".bam$"
,
".bai"
))
outputFiles
:+=
bamstatsSummary
outputFiles
:+=
flagstatSummaryFile
()
outputFiles
:+=
mappingQualityFile
()
outputFiles
:+=
clipingFile
()
jobOutputFile
=
new
File
(
outputDir
,
".bamstats.out"
)
if
(
reference
==
null
)
reference
=
referenceFasta
()
}
...
...
docs/pipelines/mapping.md
View file @
764fcb73
...
...
@@ -58,6 +58,7 @@ All other values should be provided in the config. Specific config values toward
| readgroup_sequencing_center | String (optional) | Read group sequencing center |
| readgroup_description | String (optional) | Read group description |
| predicted_insertsize | Integer (optional) | Read group predicted insert size |
| keep_final_bam_file | Boolean (default true) | when needed the pipeline can remove the bam file after it's not required anymore for other jobs |
It is possible to provide any config value as a command line argument as well, using the
`-cv`
flag.
E.g.
`-cv reference=<path/to/reference>`
would set value
`reference`
.
...
...
mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala
View file @
764fcb73
...
...
@@ -93,19 +93,23 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
/** Readgroup predicted insert size */
protected
var
predictedInsertsize
:
Option
[
Int
]
=
config
(
"predicted_insertsize"
)
val
keepFinalBamFile
:
Boolean
=
config
(
"keep_final_bam_file"
,
default
=
true
)
protected
var
paired
:
Boolean
=
false
val
flexiprep
=
new
Flexiprep
(
this
)
def
finalBamFile
:
File
=
new
File
(
outputDir
,
outputName
+
".final.bam"
)
def
finalBamFile
:
File
=
if
(
skipMarkduplicates
)
{
new
File
(
outputDir
,
outputName
+
".bam"
)
}
else
new
File
(
outputDir
,
outputName
+
".dedup.bam"
)
/** location of summary file */
def
summaryFile
=
new
File
(
outputDir
,
sampleId
.
getOrElse
(
"x"
)
+
"-"
+
libId
.
getOrElse
(
"x"
)
+
".summary.json"
)
override
def
defaults
=
Map
(
override
def
defaults
:
Map
[
String
,
Any
]
=
Map
(
"gsnap"
->
Map
(
"batch"
->
4
),
"star"
->
Map
(
"outsamunmapped"
->
"Within"
)
)
override
def
fixedValues
=
Map
(
override
def
fixedValues
:
Map
[
String
,
Any
]
=
Map
(
"gsnap"
->
Map
(
"format"
->
"sam"
),
"bowtie"
->
Map
(
"sam"
->
true
)
)
...
...
@@ -255,11 +259,13 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
var
bamFile
=
bamFiles
.
head
if
(!
skipMarkduplicates
)
{
bamFile
=
new
File
(
outputDir
,
outputName
+
".dedup.bam"
)
val
md
=
MarkDuplicates
(
this
,
bamFiles
,
bamFile
)
val
md
=
MarkDuplicates
(
this
,
bamFiles
,
finalBamFile
)
md
.
isIntermediate
=
!
keepFinalBamFile
add
(
md
)
addSummarizable
(
md
,
"mark_duplicates"
)
}
else
if
(
skipMarkduplicates
&&
chunking
)
{
val
mergeSamFile
=
MergeSamFiles
(
this
,
bamFiles
,
new
File
(
outputDir
,
outputName
+
".merge.bam"
))
val
mergeSamFile
=
MergeSamFiles
(
this
,
bamFiles
,
finalBamFile
)
mergeSamFile
.
isIntermediate
=
!
keepFinalBamFile
add
(
mergeSamFile
)
bamFile
=
mergeSamFile
.
output
}
...
...
@@ -270,9 +276,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addSummaryQScript
(
bamMetrics
)
}
add
(
Ln
(
this
,
swapExt
(
outputDir
,
bamFile
,
".bam"
,
".bai"
),
swapExt
(
outputDir
,
finalBamFile
,
".bam"
,
".bai"
)))
add
(
Ln
(
this
,
bamFile
,
finalBamFile
))
outputFiles
+=
(
"finalBamFile"
->
finalBamFile
.
getAbsoluteFile
)
outputFiles
+=
(
"finalBamFile"
->
finalBamFile
)
if
(
config
(
"unmapped_to_gears"
,
default
=
false
).
asBoolean
)
{
val
gears
=
new
GearsSingle
(
this
)
...
...
@@ -331,7 +335,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
val
sortSam
=
SortSam
(
this
,
samFile
,
output
)
if
(
chunking
||
!
skipMarkduplicates
)
sortSam
.
isIntermediate
=
tru
e
sortSam
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFil
e
add
(
sortSam
)
sortSam
.
output
}
...
...
@@ -345,7 +349,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val
sortSam
=
new
SortSam
(
this
)
sortSam
.
output
=
output
val
pipe
=
bwaCommand
|
sortSam
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
pipe
.
threadsCorrection
=
-
1
add
(
pipe
)
output
...
...
@@ -363,6 +367,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val
ar
=
addAddOrReplaceReadGroups
(
reorderSam
.
output
,
output
)
val
pipe
=
new
BiopetFifoPipe
(
this
,
gsnapCommand
::
ar
.
_1
::
reorderSam
::
Nil
)
pipe
.
threadsCorrection
=
-
2
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
add
(
pipe
)
ar
.
_2
}
...
...
@@ -386,7 +391,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val
sortSam
=
new
SortSam
(
this
)
sortSam
.
output
=
output
val
pipe
=
hisat2
|
sortSam
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
pipe
.
threadsCorrection
=
1
add
(
pipe
)
...
...
@@ -430,9 +435,11 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val
reorderSam
=
new
ReorderSam
(
this
)
reorderSam
.
input
=
mergeSamFile
.
output
reorderSam
.
output
=
swapExt
(
output
.
getParent
,
output
,
".merge.bam"
,
".reordered.bam"
)
reorderSam
.
isIntermediate
=
true
add
(
reorderSam
)
val
ar
=
addAddOrReplaceReadGroups
(
reorderSam
.
output
,
output
)
ar
.
_1
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
add
(
ar
.
_1
)
ar
.
_2
}
...
...
@@ -459,7 +466,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
stampyCmd
.
isIntermediate
=
true
add
(
stampyCmd
)
val
sortSam
=
SortSam
(
this
,
stampyCmd
.
output
,
output
)
if
(
chunking
||
!
skipMarkduplicates
)
sortSam
.
isIntermediate
=
tru
e
sortSam
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFil
e
add
(
sortSam
)
sortSam
.
output
}
...
...
@@ -478,6 +485,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val
ar
=
addAddOrReplaceReadGroups
(
bowtie
.
output
,
output
)
val
pipe
=
new
BiopetFifoPipe
(
this
,
(
Some
(
bowtie
)
::
Some
(
ar
.
_1
)
::
Nil
).
flatten
)
pipe
.
threadsCorrection
=
-
1
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
add
(
pipe
)
ar
.
_2
}
...
...
@@ -495,7 +503,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val
sortSam
=
new
SortSam
(
this
)
sortSam
.
output
=
output
val
pipe
=
bowtie2
|
sortSam
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
pipe
.
threadsCorrection
=
-
1
add
(
pipe
)
output
...
...
@@ -517,6 +525,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
pipe
.
threadsCorrection
=
-
3
zcatR1
.
_1
.
foreach
(
x
=>
pipe
.
threadsCorrection
-=
1
)
zcatR2
.
foreach
(
_
.
_1
.
foreach
(
x
=>
pipe
.
threadsCorrection
-=
1
))
pipe
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
add
(
pipe
)
reorderSam
.
output
}
...
...
@@ -531,6 +540,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val
starCommand
=
Star
.
_2pass
(
this
,
zcatR1
.
_2
,
zcatR2
.
map
(
_
.
_2
),
outputDir
,
isIntermediate
=
true
)
addAll
(
starCommand
.
_2
)
val
ar
=
addAddOrReplaceReadGroups
(
starCommand
.
_1
,
output
)
ar
.
_1
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFile
add
(
ar
.
_1
)
ar
.
_2
}
...
...
@@ -547,7 +557,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addOrReplaceReadGroups
.
RGSM
=
sampleId
.
get
if
(
readgroupSequencingCenter
.
isDefined
)
addOrReplaceReadGroups
.
RGCN
=
readgroupSequencingCenter
.
get
if
(
readgroupDescription
.
isDefined
)
addOrReplaceReadGroups
.
RGDS
=
readgroupDescription
.
get
if
(!
skipMarkduplicates
)
addOrReplaceReadGroups
.
isIntermediate
=
tru
e
addOrReplaceReadGroups
.
isIntermediate
=
chunking
||
!
skipMarkduplicates
||
!
keepFinalBamFil
e
(
addOrReplaceReadGroups
,
addOrReplaceReadGroups
.
output
)
}
...
...
mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala
View file @
764fcb73
...
...
@@ -83,10 +83,10 @@ trait MultisampleMappingTrait extends MultiSampleQScript
"merge_strategy"
->
mergeStrategy
.
toString
)
def
makeSample
(
id
:
String
)
=
new
Sample
(
id
)
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
sample
=>
def
makeLibrary
(
id
:
String
)
=
new
Library
(
id
)
class
Library
(
libId
:
String
)
extends
AbstractLibrary
(
libId
)
{
class
Library
(
libId
:
String
)
extends
AbstractLibrary
(
libId
)
{
lib
=>
/** By default the bams files are put in the summary, more files can be added here */
def
summaryFiles
:
Map
[
String
,
File
]
=
(
inputR1
.
map
(
"input_R1"
->
_
)
::
inputR2
.
map
(
"input_R2"
->
_
)
::
...
...
@@ -101,22 +101,28 @@ trait MultisampleMappingTrait extends MultiSampleQScript
lazy
val
bamToFastq
:
Boolean
=
config
(
"bam_to_fastq"
,
default
=
false
)
lazy
val
correctReadgroups
:
Boolean
=
config
(
"correct_readgroups"
,
default
=
false
)
lazy
val
mapping
=
if
(
inputR1
.
isDefined
||
(
inputBam
.
isDefined
&&
bamToFastq
))
{
val
m
=
new
Mapping
(
qscript
)
def
keepFinalBamfile
=
samples
(
sampleId
).
libraries
.
size
==
1
lazy
val
mapping
:
Option
[
Mapping
]
=
if
(
inputR1
.
isDefined
||
(
inputBam
.
isDefined
&&
bamToFastq
))
{
val
m
:
Mapping
=
new
Mapping
(
qscript
)
{
override
def
configNamespace
=
"mapping"
override
def
defaults
:
Map
[
String
,
Any
]
=
super
.
defaults
++
Map
(
"keep_final_bamfile"
->
keepFinalBamfile
)
}
m
.
sampleId
=
Some
(
sampleId
)
m
.
libId
=
Some
(
libId
)
m
.
outputDir
=
libDir
Some
(
m
)
}
else
None
def
bamFile
=
mapping
match
{
def
bamFile
:
Option
[
File
]
=
mapping
match
{
case
Some
(
m
)
=>
Some
(
m
.
finalBamFile
)
case
_
if
inputBam
.
isDefined
=>
Some
(
new
File
(
libDir
,
s
"$sampleId-$libId.bam"
))
case
_
=>
None
}
/** By default the preProcessBam is the same as the normal bamFile. A pipeline can extend this is there are preprocess steps */
def
preProcessBam
=
bamFile
def
preProcessBam
:
Option
[
File
]
=
bamFile
/** This method can be extended to add jobs to the pipeline, to do this the super call of this function must be called by the pipelines */
def
addJobs
()
:
Unit
=
{
...
...
shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala
View file @
764fcb73
...
...
@@ -86,6 +86,8 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait
c
&&
br
.
knownSites
.
nonEmpty
}
override
def
keepFinalBamfile
=
super
.
keepFinalBamfile
&&
!
useIndelRealigner
&&
!
useBaseRecalibration
override
def
preProcessBam
=
if
(
useIndelRealigner
&&
useBaseRecalibration
)
bamFile
.
map
(
swapExt
(
libDir
,
_
,
".bam"
,
".realign.baserecal.bam"
))
else
if
(
useIndelRealigner
)
bamFile
.
map
(
swapExt
(
libDir
,
_
,
".bam"
,
".realign.bam"
))
...
...
@@ -179,7 +181,7 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait
override
def
addMultiSampleJobs
()
=
{
super
.
addMultiSampleJobs
()
addAll
(
dbsnpVcfFile
.
map
(
Shiva
.
makeValidateVcfJobs
(
this
,
_
,
referenceFasta
())).
getOrElse
(
Nil
))
addAll
(
dbsnpVcfFile
.
map
(
Shiva
.
makeValidateVcfJobs
(
this
,
_
,
referenceFasta
()
,
new
File
(
outputDir
,
".validate"
)
)).
getOrElse
(
Nil
))
multisampleVariantCalling
.
foreach
(
vc
=>
{
vc
.
outputDir
=
new
File
(
outputDir
,
"variantcalling"
)
...
...
@@ -262,16 +264,18 @@ object Shiva extends PipelineCommand {
// This is used to only execute 1 validation per vcf file
private
var
validateVcfSeen
:
Set
[(
File
,
File
)]
=
Set
()
def
makeValidateVcfJobs
(
root
:
Configurable
,
vcfFile
:
File
,
referenceFile
:
File
)
:
List
[
QFunction
]
=
{
def
makeValidateVcfJobs
(
root
:
Configurable
,
vcfFile
:
File
,
referenceFile
:
File
,
outputDir
:
File
)
:
List
[
QFunction
]
=
{
if
(
validateVcfSeen
.
contains
((
vcfFile
,
referenceFile
)))
Nil
else
{
validateVcfSeen
++=
Set
((
vcfFile
,
referenceFile
))
val
validateVcf
=
new
ValidateVcf
(
root
)
validateVcf
.
inputVcf
=
vcfFile
validateVcf
.
reference
=
referenceFile
validateVcf
.
jobOutputFile
=
new
File
(
outputDir
,
vcfFile
.
getAbsolutePath
+
".validateVcf.out"
)
val
checkValidateVcf
=
new
CheckValidateVcf
checkValidateVcf
.
inputLogFile
=
validateVcf
.
jobOutputFile
checkValidateVcf
.
jobOutputFile
=
new
File
(
outputDir
,
vcfFile
.
getAbsolutePath
+
".checkValidateVcf.out"
)
List
(
validateVcf
,
checkValidateVcf
)
}
...
...
shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala
View file @
764fcb73
...
...
@@ -85,7 +85,7 @@ class ShivaVariantcalling(val root: Configurable) extends QScript
require
(
inputBams
.
nonEmpty
,
"No input bams found"
)
require
(
callers
.
nonEmpty
,
"must select at least 1 variantcaller, choices are: "
+
callersList
.
map
(
_
.
name
).
mkString
(
", "
))
addAll
(
dbsnpVcfFile
.
map
(
Shiva
.
makeValidateVcfJobs
(
this
,
_
,
referenceFasta
())).
getOrElse
(
Nil
))
addAll
(
dbsnpVcfFile
.
map
(
Shiva
.
makeValidateVcfJobs
(
this
,
_
,
referenceFasta
()
,
new
File
(
outputDir
,
".validate"
)
)).
getOrElse
(
Nil
))
val
cv
=
new
CombineVariants
(
qscript
)
cv
.
out
=
finalFile
...
...
shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaploTypeCallerGvcfTest.scala
View file @
764fcb73
...
...
@@ -37,11 +37,11 @@ class HaploTypeCallerGvcfTest extends TestNGSuite with Matchers {
}
def
createInputMap
(
samples
:
List
[
String
])
:
Map
[
String
,
File
]
=
{
samples
map
{
x
=>
samples
.
map
(
{
x
=>
val
file
=
File
.
createTempFile
(
x
,
".bam"
)
file
.
deleteOnExit
()
x
->
file
}
toMap
}
).
toMap
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment