Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
8685e51a
Commit
8685e51a
authored
Sep 05, 2014
by
Peter van 't Hof
Browse files
Give all gatk function there own class
parent
83c9a8e5
Changes
17
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala
View file @
8685e51a
package
nl.lumc.sasc.biopet.core
import
java.io.BufferedInputStream
//
import java.io.BufferedInputStream
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.queue.QException
import
org.broadinstitute.gatk.queue.function.CommandLineFunction
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
}
import
scala.io.Source
//
import scala.io.Source
import
scala.sys.process.
{
Process
,
ProcessLogger
}
import
scala.util.matching.Regex
import
java.io.FileInputStream
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala
View file @
8685e51a
...
...
@@ -8,6 +8,7 @@ class ConfigValue(val requestIndex: ConfigValueIndex, val foundIndex: ConfigValu
def
getDouble
=
Configurable
.
any2double
(
value
)
def
getList
=
Configurable
.
any2list
(
value
)
def
getFileList
:
List
[
File
]
=
for
(
file
<-
Configurable
.
any2stringList
(
value
))
yield
new
File
(
file
)
def
getStringList
:
List
[
String
]
=
Configurable
.
any2stringList
(
value
)
def
getMap
=
Configurable
.
any2map
(
value
)
def
getBoolean
=
Configurable
.
any2boolean
(
value
)
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
AnalyzeCovariates
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
AnalyzeCovariates
with
GatkGeneral
{
}
object
AnalyzeCovariates
{
def
apply
(
root
:
Configurable
,
before
:
File
,
after
:
File
,
plots
:
File
)
:
AnalyzeCovariates
=
{
val
ac
=
new
AnalyzeCovariates
(
root
)
ac
.
before
=
before
ac
.
after
=
after
ac
.
plots
=
plots
return
ac
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
ApplyRecalibration
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
ApplyRecalibration
with
GatkGeneral
{
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
nt
=
Option
(
getThreads
(
3
))
memoryLimit
=
Option
(
nt
.
getOrElse
(
1
)
*
2
)
override
def
afterGraph
{
super
.
afterGraph
ts_filter_level
=
config
(
"ts_filter_level"
)
}
}
object
ApplyRecalibration
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
,
recal_file
:
File
,
tranches_file
:
File
,
indel
:
Boolean
=
false
)
:
ApplyRecalibration
=
{
val
ar
=
if
(
indel
)
new
ApplyRecalibration
(
root
)
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
defaults
++=
Map
(
"ts_filter_level"
->
99.0
)
}
else
new
ApplyRecalibration
(
root
)
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
defaults
++=
Map
(
"ts_filter_level"
->
99.5
)
}
ar
.
input
:+=
input
ar
.
recal_file
=
recal_file
ar
.
tranches_file
=
tranches_file
ar
.
out
=
output
return
ar
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
BaseRecalibrator
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
BaseRecalibrator
with
GatkGeneral
{
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
if
(
config
.
contains
(
"dbsnp"
))
this
.
knownSites
+:=
new
File
(
config
(
"dbsnp"
))
}
object
BaseRecalibrator
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
BaseRecalibrator
=
{
val
br
=
new
BaseRecalibrator
(
root
)
br
.
input_file
:+=
input
br
.
out
=
output
return
br
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
CombineGVCFs
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
CombineGVCFs
with
GatkGeneral
{
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
}
object
CombineGVCFs
{
def
apply
(
root
:
Configurable
,
input
:
List
[
File
],
output
:
File
)
:
CombineGVCFs
=
{
val
cg
=
new
CombineGVCFs
(
root
)
cg
.
variant
=
input
cg
.
o
=
output
return
cg
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
GenotypeGVCFs
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
GenotypeGVCFs
with
GatkGeneral
{
annotation
++=
config
(
"annotation"
,
default
=
Seq
(
"FisherStrand"
,
"QualByDepth"
,
"ChromosomeCounts"
)).
getStringList
if
(
config
.
contains
(
"dbsnp"
))
dbsnp
=
config
(
"dbsnp"
)
if
(
config
.
contains
(
"scattercount"
,
"genotypegvcfs"
))
scatterCount
=
config
(
"scattercount"
)
if
(
config
(
"inputtype"
,
default
=
"dna"
).
getString
==
"rna"
)
{
stand_call_conf
=
config
(
"stand_call_conf"
,
default
=
20
)
stand_emit_conf
=
config
(
"stand_emit_conf"
,
default
=
0
)
}
else
{
stand_call_conf
=
config
(
"stand_call_conf"
,
default
=
30
)
stand_emit_conf
=
config
(
"stand_emit_conf"
,
default
=
0
)
}
}
object
GenotypeGVCFs
{
def
apply
(
root
:
Configurable
,
gvcfFiles
:
List
[
File
],
output
:
File
)
:
GenotypeGVCFs
=
{
val
gg
=
new
GenotypeGVCFs
(
root
)
gg
.
variant
=
gvcfFiles
gg
.
out
=
output
return
gg
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala
View file @
8685e51a
...
...
@@ -5,7 +5,7 @@ import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class
HaplotypeCaller
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
HaplotypeCaller
with
GatkGeneral
{
min_mapping_quality_score
=
config
(
"minMappingQualityScore"
,
default
=
20
)
if
(
config
.
contains
(
"scattercount"
,
"haplotypecaller"
))
scatterCount
=
config
(
"scattercount"
)
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
if
(
config
.
contains
(
"dbsnp"
))
this
.
dbsnp
=
config
(
"dbsnp"
)
nct
=
config
(
"threads"
,
default
=
3
)
bamOutput
=
config
(
"bamOutput"
)
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
IndelRealigner
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
IndelRealigner
with
GatkGeneral
{
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
}
object
IndelRealigner
{
def
apply
(
root
:
Configurable
,
input
:
File
,
targetIntervals
:
File
,
outputDir
:
String
)
:
IndelRealigner
=
{
val
ir
=
new
IndelRealigner
(
root
)
ir
.
input_file
:+=
input
ir
.
targetIntervals
=
targetIntervals
ir
.
out
=
new
File
(
outputDir
,
input
.
getName
.
stripSuffix
(
".bam"
)
+
".realign.bam"
)
return
ir
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
PrintReads
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
PrintReads
with
GatkGeneral
{
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
}
object
PrintReads
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
PrintReads
=
{
val
br
=
new
PrintReads
(
root
)
br
.
input_file
:+=
input
br
.
out
=
output
return
br
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
RealignerTargetCreator
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
RealignerTargetCreator
with
GatkGeneral
{
override
val
defaultVmem
=
"5G"
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
}
object
RealignerTargetCreator
{
def
apply
(
root
:
Configurable
,
input
:
File
,
outputDir
:
String
)
:
RealignerTargetCreator
=
{
val
re
=
new
RealignerTargetCreator
(
root
)
re
.
input_file
:+=
input
re
.
out
=
new
File
(
outputDir
,
input
.
getName
.
stripSuffix
(
".bam"
)
+
".realign.intervals"
)
return
re
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
SelectVariants
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
SelectVariants
with
GatkGeneral
{
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
}
object
SelectVariants
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
SelectVariants
=
{
val
sv
=
new
SelectVariants
(
root
)
sv
.
variant
=
input
sv
.
out
=
output
return
sv
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
VariantAnnotator
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
VariantAnnotator
with
GatkGeneral
{
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
dbsnp
=
config
(
"dbsnp"
)
}
object
VariantAnnotator
{
def
apply
(
root
:
Configurable
,
input
:
File
,
bamFiles
:
List
[
File
],
output
:
File
)
:
VariantAnnotator
=
{
val
va
=
new
VariantAnnotator
(
root
)
va
.
variant
=
input
va
.
input_file
=
bamFiles
va
.
out
=
output
return
va
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala
0 → 100644
View file @
8685e51a
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
class
VariantRecalibrator
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
VariantRecalibrator
with
GatkGeneral
{
nt
=
Option
(
getThreads
(
4
))
memoryLimit
=
Option
(
nt
.
getOrElse
(
1
)
*
2
)
if
(
config
.
contains
(
"dbsnp"
))
resource
:+=
new
TaggedFile
(
config
(
"dbsnp"
).
getString
,
"known=true,training=false,truth=false,prior=2.0"
)
an
=
config
(
"annotation"
,
default
=
List
(
"QD"
,
"DP"
,
"FS"
,
"ReadPosRankSum"
,
"MQRankSum"
)).
getStringList
minNumBadVariants
=
config
(
"minnumbadvariants"
)
maxGaussians
=
config
(
"maxgaussians"
)
}
object
VariantRecalibrator
{
def
apply
(
root
:
Configurable
,
input
:
File
,
recal_file
:
File
,
tranches_file
:
File
,
indel
:
Boolean
=
false
)
:
VariantRecalibrator
=
{
val
vr
=
if
(
indel
)
new
VariantRecalibrator
(
root
)
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
defaults
++=
Map
(
"ts_filter_level"
->
99.0
)
if
(
config
.
contains
(
"mills"
))
resource
:+=
new
TaggedFile
(
config
(
"mills"
).
getString
,
"known=false,training=true,truth=true,prior=12.0"
)
}
else
new
VariantRecalibrator
(
root
)
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
defaults
++=
Map
(
"ts_filter_level"
->
99.5
)
if
(
config
.
contains
(
"hapmap"
))
resource
+:=
new
TaggedFile
(
config
(
"hapmap"
).
getString
,
"known=false,training=true,truth=true,prior=15.0"
)
if
(
config
.
contains
(
"omni"
))
resource
+:=
new
TaggedFile
(
config
(
"omni"
).
getString
,
"known=false,training=true,truth=true,prior=12.0"
)
if
(
config
.
contains
(
"1000G"
))
resource
+:=
new
TaggedFile
(
config
(
"1000G"
).
getString
,
"known=false,training=true,truth=false,prior=10.0"
)
}
vr
.
input
:+=
input
vr
.
recal_file
=
recal_file
vr
.
tranches_file
=
tranches_file
return
vr
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala
View file @
8685e51a
...
...
@@ -2,8 +2,8 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.gatk.
{
GenotypeGVCFs
,
SelectVariants
}
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.extensions.gatk.
{
CommandLineGATK
,
GenotypeGVCFs
,
SelectVariants
}
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
,
Argument
}
class
GatkGenotyping
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
...
...
@@ -43,40 +43,16 @@ class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript
}
}
trait
gatkArguments
extends
CommandLineGATK
{
this
.
reference_sequence
=
reference
this
.
memoryLimit
=
2
this
.
jobResourceRequests
:+=
"h_vmem=4G"
}
def
addGenotypeGVCFs
(
gvcfFiles
:
List
[
File
],
outputFile
:
File
)
:
File
=
{
val
genotypeGVCFs
=
new
GenotypeGVCFs
()
with
gatkArguments
{
this
.
variant
=
gvcfFiles
if
(
config
.
contains
(
"dbsnp"
))
this
.
dbsnp
=
config
(
"dbsnp"
)
if
(
config
.
contains
(
"scattercount"
,
submodule
=
"genotypegvcfs"
))
this
.
scatterCount
=
config
(
"scattercount"
,
submodule
=
"genotypegvcfs"
)
this
.
out
=
outputFile
if
(
config
(
"inputtype"
,
"dna"
).
getString
==
"rna"
)
{
this
.
stand_call_conf
=
config
(
"stand_call_conf"
,
default
=
20
,
submodule
=
"haplotypecaller"
)
this
.
stand_emit_conf
=
config
(
"stand_emit_conf"
,
default
=
20
,
submodule
=
"haplotypecaller"
)
}
else
{
this
.
stand_call_conf
=
config
(
"stand_call_conf"
,
default
=
30
,
submodule
=
"haplotypecaller"
)
this
.
stand_emit_conf
=
config
(
"stand_emit_conf"
,
default
=
30
,
submodule
=
"haplotypecaller"
)
}
}
val
genotypeGVCFs
=
GenotypeGVCFs
(
this
,
gvcfFiles
,
outputFile
)
add
(
genotypeGVCFs
)
return
genotypeGVCFs
.
out
}
def
addSelectVariants
(
inputFile
:
File
,
samples
:
List
[
String
],
outputDir
:
String
,
name
:
String
)
{
val
selectVariants
=
new
SelectVariants
with
gatkArguments
{
this
.
variant
=
inputFile
for
(
sample
<-
samples
)
this
.
sample_name
:+=
sample
this
.
excludeNonVariants
=
true
if
(
config
.
contains
(
"scattercount"
,
submodule
=
"selectvariants"
))
this
.
scatterCount
=
config
(
"scattercount"
,
submodule
=
"selectvariants"
)
this
.
out
=
outputDir
+
name
+
".vcf"
}
val
selectVariants
=
SelectVariants
(
this
,
inputFile
,
outputDir
+
name
+
".vcf"
)
selectVariants
.
excludeNonVariants
=
true
for
(
sample
<-
samples
)
selectVariants
.
sample_name
:+=
sample
add
(
selectVariants
)
}
}
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
View file @
8685e51a
...
...
@@ -4,10 +4,9 @@ import nl.lumc.sasc.biopet.core.MultiSampleQScript
import
nl.lumc.sasc.biopet.core.PipelineCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
java.io.File
import
nl.lumc.sasc.biopet.extensions.gatk.
{
ApplyRecalibration
,
CombineGVCFs
,
VariantAnnotator
,
VariantRecalibrator
}
import
nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.extensions.gatk._
import
org.broadinstitute.gatk.queue.extensions.picard._
import
org.broadinstitute.gatk.utils.commandline.
{
Argument
}
class
GatkPipeline
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
...
...
@@ -45,7 +44,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
//SampleWide jobs
if
(
mergeGvcfs
&&
gvcfFiles
.
size
>
0
)
{
val
newFile
=
outputDir
+
"merged.gvcf.vcf"
addCombineGVCFs
(
gvcfFiles
,
newFile
)
add
(
CombineGVCFs
(
this
,
gvcfFiles
,
newFile
)
)
gvcfFiles
=
List
(
newFile
)
}
...
...
@@ -109,21 +108,16 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
def
addSnpVariantRecalibrator
(
inputVcf
:
File
,
dir
:
String
)
:
File
=
{
val
snpVariantRecalibrator
=
getVariantRecalibrator
(
"snp"
)
snpVariantRecalibrator
.
input
+:=
inputVcf
snpVariantRecalibrator
.
recal_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".snp.recal"
)
snpVariantRecalibrator
.
tranches_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".snp.tranches"
)
if
(!
snpVariantRecalibrator
.
resource
.
isEmpty
)
{
add
(
snpVariantRecalibrator
)
val
snpApplyRecalibration
=
getApplyRecalibration
(
"snp"
)
snpApplyRecalibration
.
input
+:=
inputVcf
snpApplyRecalibration
.
recal_file
=
snpVariantRecalibrator
.
recal_file
snpApplyRecalibration
.
tranches_file
=
snpVariantRecalibrator
.
tranches_file
snpApplyRecalibration
.
out
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".snp.recal.vcf"
)
add
(
snpApplyRecalibration
)
return
snpApplyRecalibration
.
out
val
snpRecal
=
VariantRecalibrator
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
),
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
),
indel
=
false
)
if
(!
snpRecal
.
resource
.
isEmpty
)
{
add
(
snpRecal
)
val
snpApply
=
ApplyRecalibration
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
),
snpRecal
.
recal_file
,
snpRecal
.
tranches_file
,
indel
=
false
)
add
(
snpApply
)
return
snpApply
.
out
}
else
{
logger
.
warn
(
"Skipped snp Recalibration, resource is missing"
)
return
inputVcf
...
...
@@ -131,103 +125,27 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
def
addIndelVariantRecalibrator
(
inputVcf
:
File
,
dir
:
String
)
:
File
=
{
val
indelVariantRecalibrator
=
getVariantRecalibrator
(
"indel"
)
indelVariantRecalibrator
.
input
+:=
inputVcf
indelVariantRecalibrator
.
recal_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
)
indelVariantRecalibrator
.
tranches_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
)
if
(!
indelVariantRecalibrator
.
resource
.
isEmpty
)
{
add
(
indelVariantRecalibrator
)
val
indelApplyRecalibration
=
getApplyRecalibration
(
"indel"
)
indelApplyRecalibration
.
input
+:=
inputVcf
indelApplyRecalibration
.
recal_file
=
indelVariantRecalibrator
.
recal_file
indelApplyRecalibration
.
tranches_file
=
indelVariantRecalibrator
.
tranches_file
indelApplyRecalibration
.
out
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
)
add
(
indelApplyRecalibration
)
return
indelApplyRecalibration
.
out
val
indelRecal
=
VariantRecalibrator
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
),
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
),
indel
=
true
)
if
(!
indelRecal
.
resource
.
isEmpty
)
{
add
(
indelRecal
)
val
indelApply
=
ApplyRecalibration
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
),
indelRecal
.
recal_file
,
indelRecal
.
tranches_file
,
indel
=
true
)
add
(
indelApply
)
return
indelApply
.
out
}
else
{
logger
.
warn
(
"Skipped indel Recalibration, resource is missing"
)
return
inputVcf
}
}
def
getVariantRecalibrator
(
mode_arg
:
String
)
:
VariantRecalibrator
=
{
val
variantRecalibrator
=
new
VariantRecalibrator
()
with
gatkArguments
{
if
(
mode_arg
==
"indel"
)
{
this
.
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
if
(
config
.
contains
(
"mills"
,
submodule
=
"variantrecalibrator"
))
this
.
resource
:+=
new
TaggedFile
(
config
(
"mills"
,
submodule
=
"variantrecalibrator"
).
getString
,
"known=false,training=true,truth=true,prior=12.0"
)
}
else
{
// SNP
this
.
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
if
(
config
.
contains
(
"hapmap"
,
submodule
=
"variantrecalibrator"
))
this
.
resource
+:=
new
TaggedFile
(
config
(
"hapmap"
,
submodule
=
"variantrecalibrator"
).
getString
,
"known=false,training=true,truth=true,prior=15.0"
)
if
(
config
.
contains
(
"omni"
,
submodule
=
"variantrecalibrator"
))
this
.
resource
+:=
new
TaggedFile
(
config
(
"omni"
,
submodule
=
"variantrecalibrator"
).
getString
,
"known=false,training=true,truth=true,prior=12.0"
)
if
(
config
.
contains
(
"1000G"
,
submodule
=
"variantrecalibrator"
))
this
.
resource
+:=
new
TaggedFile
(
config
(
"1000G"
,
submodule
=
"variantrecalibrator"
).
getString
,
"known=false,training=true,truth=false,prior=10.0"
)
}
if
(
config
.
contains
(
"dbsnp"
,
submodule
=
"variantrecalibrator"
))
this
.
resource
:+=
new
TaggedFile
(
config
(
"dbsnp"
,
submodule
=
"variantrecalibrator"
).
getString
,
"known=true,training=false,truth=false,prior=2.0"
)
this
.
nt
=
4
this
.
memoryLimit
=
nt
*
2
this
.
an
=
Seq
(
"QD"
,
"DP"
,
"FS"
,
"ReadPosRankSum"
,
"MQRankSum"
)
if
(
config
.
contains
(
"minnumbadvariants"
,
submodule
=
"variantrecalibrator"
))
this
.
minNumBadVariants
=
config
(
"minnumbadvariants"
,
submodule
=
"variantrecalibrator"
)
if
(
config
.
contains
(
"maxgaussians"
,
submodule
=
"variantrecalibrator"
))
this
.
maxGaussians
=
config
(
"maxgaussians"
,
submodule
=
"variantrecalibrator"
)
}
return
variantRecalibrator
}
def
getApplyRecalibration
(
mode_arg
:
String
)
:
ApplyRecalibration
=
{
val
applyRecalibration
=
new
ApplyRecalibration
()
with
gatkArguments
{
if
(
mode_arg
==
"indel"
)
{
this
.
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
this
.
ts_filter_level
=
config
(
"ts_filter_level"
,
default
=
99.0
,
submodule
=
"applyrecalibration"
)
}
else
{
// SNP
this
.
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
this
.
ts_filter_level
=
config
(
"ts_filter_level"
,
default
=
99.5
,
submodule
=
"applyrecalibration"
)
}
this
.
nt
=
3
this
.
memoryLimit
=
nt
*
2
if
(
config
.
contains
(
"scattercount"
,
submodule
=
"applyrecalibration"
))
this
.
scatterCount
=
config
(
"scattercount"
,
submodule
=
"applyrecalibration"
)
}
return
applyRecalibration
}
def
addVariantAnnotator
(
inputvcf
:
File
,
bamfiles
:
List
[
File
],
dir
:
String
)
:
File
=
{
val
variantAnnotator
=
new
VariantAnnotator
with
gatkArguments
{
this
.
variant
=
inputvcf
this
.
input_file
=
bamfiles
this
.
dbsnp
=
config
(
"dbsnp"
,
submodule
=
"variantannotator"
)
this
.
out
=
swapExt
(
dir
,
inputvcf
,
".vcf"
,
".anotated.vcf"
)
if
(
config
.
contains
(
"scattercount"
,
submodule
=
"variantannotator"
))
this
.
scatterCount
=
config
(
"scattercount"
,
submodule
=
"variantannotator"
)
}
val
variantAnnotator
=
VariantAnnotator
(
this
,
inputvcf
,
bamfiles
,
swapExt
(
dir
,
inputvcf
,
".vcf"
,
".anotated.vcf"
))
add
(
variantAnnotator
)
return
variantAnnotator
.
out
}
def
addCombineGVCFs
(
input
:
List
[
File
],
output
:
File
)
:
File
=
{
val
combineGVCFs
=
new
CombineGVCFs
with
gatkArguments
{
this
.
variant
=
input
this
.
o
=
output
if
(
config
.
contains
(
"scattercount"
,
submodule
=
"variantannotator"
))
this
.
scatterCount
=
config
(
"scattercount"
,
submodule
=
"combinegvcfs"
)
}
add
(
combineGVCFs
)
return
output
}
trait
gatkArguments
extends
CommandLineGATK
{
this
.
reference_sequence
=
reference
this
.
memoryLimit
=
2
this
.
jobResourceRequests
:+=
"h_vmem=4G"
}
}
object
GatkPipeline
extends
PipelineCommand
{
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
View file @
8685e51a
...
...
@@ -2,13 +2,9 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions._
import
nl.lumc.sasc.biopet.extensions.gatk.HaplotypeCaller
import
nl.lumc.sasc.biopet.extensions.gatk.
{
AnalyzeCovariates
,
BaseRecalibrator
,
GenotypeGVCFs
,
HaplotypeCaller
,
IndelRealigner
,
PrintReads
,
RealignerTargetCreator
}
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.extensions.gatk.
{
BaseRecalibrator
,
CommandLineGATK
,
IndelRealigner
,
PrintReads
,
RealignerTargetCreator
,
GenotypeGVCFs
,
AnalyzeCovariates
}
import
org.broadinstitute.gatk.queue.function._
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
,
Argument
}
import
org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class
GatkVariantcalling
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
...
...
@@ -53,69 +49,29 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
if
(
gvcfMode
&&
singleGenotyping
)
addGenotypeGVCFs
(
List
(
outputFile
),
outputDir
)
}
trait
gatkArguments
extends
CommandLineGATK
{
this
.
reference_sequence
=
reference
this
.
memoryLimit
=
2
this
.
jobResourceRequests
:+=
"h_vmem=4G"
}
def
addIndelRealign
(
inputBam
:
File
,
dir
:
String
)
:
File
=
{
val
realignerTargetCreator
=
new
RealignerTargetCreator
with
gatkArguments
{
this
.
I
:+=
inputBam
this
.
o
=
swapExt
(
dir
,
inputBam
,
".bam"
,
".realign.intervals"
)
this
.
jobResourceRequests
:+=
"h_vmem=5G"
if
(
config
.
contains
(
"scattercount"
,
"realignertargetcreator"
))
this
.
scatterCount
=
config
(
"scattercount"
,
1
,
"realignertargetcreator"
)
}