Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
09190f82
Commit
09190f82
authored
May 27, 2014
by
Peter van 't Hof
Browse files
Added general trait (BiopetQScript) that now have multisample functions
parent
5d0b4fc9
Changes
6
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/java/nl/lumc/sasc/biopet/core/BiopetQScript.scala
0 → 100644
View file @
09190f82
package
nl.lumc.sasc.biopet.core
//import org.broadinstitute.sting.queue.QScript
import
java.io.File
import
org.broadinstitute.sting.queue.util.Logging
trait
BiopetQScript
extends
Logging
{
var
config
:
Config
=
_
var
outputFiles
:
Map
[
String
,
File
]
=
Map
()
def
runSamplesJobs
:
Map
[
String
,
Map
[
String
,
File
]]
=
{
var
output
:
Map
[
String
,
Map
[
String
,
File
]]
=
Map
()
if
(
config
.
contains
(
"samples"
))
for
((
key
,
value
)
<-
config
.
getAsMap
(
"samples"
))
{
var
sample
:
Config
=
config
.
getAsConfig
(
"samples"
).
getAsConfig
(
key
)
if
(!
sample
.
contains
(
"ID"
))
sample
.
map
+=
(
"ID"
->
key
)
if
(
sample
.
getAsString
(
"ID"
)
==
key
)
{
var
files
:
Map
[
String
,
List
[
File
]]
=
runSingleSampleJobs
(
sample
)
}
else
logger
.
warn
(
"Key is not the same as ID on value for sample"
)
}
else
logger
.
warn
(
"No Samples found in config"
)
return
output
}
def
runSingleSampleJobs
(
sample
:
String
)
:
Map
[
String
,
List
[
File
]]
={
return
runSingleSampleJobs
(
config
.
getAsConfig
(
"samples"
).
getAsConfig
(
sample
))
}
def
runSingleSampleJobs
(
sampleConfig
:
Config
)
:
Map
[
String
,
List
[
File
]]
=
{
logger
.
debug
(
"Default sample function, function 'runSingleSampleJobs' not defined in pipeline"
)
runRunsJobs
(
sampleConfig
)
return
Map
()
}
def
runRunsJobs
(
sampleConfig
:
Config
)
:
Map
[
String
,
Map
[
String
,
File
]]
=
{
var
output
:
Map
[
String
,
Map
[
String
,
File
]]
=
Map
()
val
sampleID
=
sampleConfig
.
getAsString
(
"ID"
)
if
(
sampleConfig
.
contains
(
"runs"
))
for
(
key
<-
sampleConfig
.
getAsMap
(
"runs"
).
keySet
)
{
var
run
=
sampleConfig
.
getAsConfig
(
"runs"
).
getAsConfig
(
key
)
if
(!
run
.
contains
(
"ID"
))
run
.
map
+=
(
"ID"
->
key
)
if
(
run
.
getAsString
(
"ID"
)
==
key
)
{
output
+=
key
->
runSingleRunJobs
(
run
,
sampleConfig
)
}
else
logger
.
warn
(
"Key is not the same as ID on value for run of sample: "
+
sampleID
)
}
else
logger
.
warn
(
"No runs found in config for sample: "
+
sampleID
)
return
output
}
def
runSingleRunJobs
(
runConfig
:
Config
,
sampleConfig
:
Config
)
:
Map
[
String
,
File
]
=
{
logger
.
debug
(
"Default run function, function 'runSingleRunJobs' not defined in pipeline"
)
return
Map
()
}
}
biopet-framework/src/main/java/nl/lumc/sasc/biopet/core/Config.scala
View file @
09190f82
...
...
@@ -4,7 +4,7 @@ import scala.util.parsing.json._
import
java.io.File
import
org.broadinstitute.sting.queue.util.Logging
class
Config
(
private
var
map
:
Map
[
String
,
Any
])
extends
Logging
{
class
Config
(
var
map
:
Map
[
String
,
Any
])
extends
Logging
{
def
this
()
=
{
this
(
Map
())
logger
.
info
(
"Init phase of config"
)
...
...
flexiprep/src/main/java/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
View file @
09190f82
...
...
@@ -11,18 +11,17 @@ import scala.util.parsing.json._
import
org.broadinstitute.sting.commandline._
import
nl.lumc.sasc.biopet.pipelines.flexiprep.scripts._
class
Flexiprep
(
private
var
globalConfig
:
Config
)
extends
QScript
{
class
Flexiprep
(
private
var
globalConfig
:
Config
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
new
Config
())
@Argument
(
doc
=
"Config Json file"
,
shortName
=
"config"
,
required
=
false
)
va
r
configfiles
:
List
[
File
]
=
Nil
@Argument
(
doc
=
"Config Json file"
,
shortName
=
"config"
,
required
=
false
)
va
l
configfiles
:
List
[
File
]
=
Nil
@Input
(
doc
=
"R1 fastq file (gzipped allowed)"
,
shortName
=
"R1"
,
required
=
true
)
var
input_R1
:
File
=
_
@Input
(
doc
=
"R2 fastq file (gzipped allowed)"
,
shortName
=
"R2"
,
required
=
false
)
var
input_R2
:
File
=
_
@Argument
(
doc
=
"Output directory"
,
shortName
=
"outputDir"
,
required
=
true
)
var
outputDir
:
String
=
_
@Argument
(
doc
=
"Skip Trim fastq files"
,
shortName
=
"skiptrim"
,
required
=
false
)
var
skipTrim
:
Boolean
=
false
@Argument
(
doc
=
"Skip Clip fastq files"
,
shortName
=
"skipclip"
,
required
=
false
)
var
skipClip
:
Boolean
=
false
var
config
:
Config
=
_
var
outputFiles
:
Map
[
String
,
File
]
=
Map
()
//var outputFiles:Map[String,File] = Map()
var
paired
:
Boolean
=
(
input_R2
!=
null
)
def
init
()
{
...
...
@@ -38,7 +37,6 @@ class Flexiprep(private var globalConfig: Config) extends QScript {
def
script
()
{
init
()
runInitialFastqc
()
outputFiles
+=
(
"output_R1"
->
zcatIfNeeded
(
input_R1
,
outputDir
))
...
...
@@ -53,7 +51,6 @@ class Flexiprep(private var globalConfig: Config) extends QScript {
results
=
runTrimClip
(
outputFiles
(
"output_R1"
),
outputDir
)
outputFiles
+=
(
"output_R1"
->
results
(
"output_R1"
))
}
runFinalFastqc
()
}
...
...
gatk/examples/test.json
View file @
09190f82
...
...
@@ -14,10 +14,10 @@
"1000G"
:
"/home/pjvan_thof/pipelines/test/test"
,
"mills"
:
"/home/pjvan_thof/pipelines/test/test"
},
"
S
amples"
:
{
"
s
amples"
:
{
"test"
:
{
"ID"
:
"test"
,
"
R
uns"
:
{
"
r
uns"
:
{
"1"
:
{
"ID"
:
"1"
,
"R1"
:
"/home/pjvan_thof/pipelines/test/test.fastq"
...
...
gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala
View file @
09190f82
...
...
@@ -12,23 +12,19 @@ import org.broadinstitute.sting.queue.function._
import
scala.util.parsing.json._
import
org.broadinstitute.sting.utils.variant._
class
Gatk
(
private
var
globalConfig
:
Config
)
extends
QScript
{
class
Gatk
(
private
var
globalConfig
:
Config
)
extends
QScript
with
BiopetQScript
{
qscript
=>
def
this
()
=
this
(
new
Config
())
@Argument
(
doc
=
"Config Json file"
,
shortName
=
"config"
)
var
configfiles
:
List
[
File
]
=
Nil
@Argument
(
doc
=
"Only Sample"
,
shortName
=
"sample"
,
required
=
false
)
var
onlySample
:
String
=
_
@Argument
(
doc
=
"Only Sample"
,
shortName
=
"sample"
,
required
=
false
)
var
onlySample
:
String
=
""
@Argument
(
doc
=
"Output directory"
,
shortName
=
"outputDir"
,
required
=
true
)
var
outputDir
:
String
=
_
def
this
()
=
this
(
new
Config
())
var
config
:
Config
=
_
//
var config: Config = _
var
referenceFile
:
File
=
_
var
dbsnp
:
File
=
_
var
gvcfFiles
:
List
[
File
]
=
Nil
trait
gatkArguments
extends
CommandLineGATK
{
this
.
reference_sequence
=
referenceFile
this
.
memoryLimit
=
2
this
.
jobResourceRequests
:+=
"h_vmem=4G"
}
def
init
()
{
for
(
file
<-
configfiles
)
globalConfig
.
loadConfigFile
(
file
)
config
=
Config
.
mergeConfigs
(
globalConfig
.
getAsConfig
(
"gatk"
),
globalConfig
)
...
...
@@ -40,154 +36,46 @@ class Gatk(private var globalConfig: Config) extends QScript {
}
def
script
()
{
this
.
init
()
if
(
config
.
contains
(
"Samples"
))
for
((
key
,
value
)
<-
config
.
getAsMap
(
"Samples"
))
{
if
(
onlySample
==
null
||
onlySample
==
key
)
{
var
sample
:
Config
=
config
.
getAsConfig
(
"Samples"
).
getAsConfig
(
key
)
if
(
sample
.
getAsString
(
"ID"
)
==
key
)
{
var
files
:
Map
[
String
,
List
[
File
]]
=
sampleJobs
(
sample
)
if
(
files
.
contains
(
"gvcf"
))
for
(
file
<-
files
(
"gvcf"
))
gvcfFiles
:+=
file
}
else
logger
.
warn
(
"Key is not the same as ID on value for sample"
)
}
else
logger
.
info
(
"Skipping Sample: "
+
key
)
}
else
logger
.
warn
(
"No Samples found in config"
)
if
(
onlySample
==
null
)
{
init
()
if
(
onlySample
.
isEmpty
)
{
runSamplesJobs
//SampleWide jobs
if
(
gvcfFiles
.
size
>
0
)
{
val
genotypeGVCFs
=
new
GenotypeGVCFs
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"genotypegvcfs"
),
qscript
.
config
)
this
.
variant
=
gvcfFiles
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
this
.
out
=
new
File
(
outputDir
,
"final.vcf"
)
}
add
(
genotypeGVCFs
)
//Snp recal
val
snpVariantRecalibrator
=
new
VariantRecalibrator
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"variantrecalibrator"
),
qscript
.
config
)
this
.
input
+:=
genotypeGVCFs
.
out
this
.
nt
=
4
this
.
memoryLimit
=
2
*
nt
this
.
recal_file
=
swapExt
(
genotypeGVCFs
.
out
,
".vcf"
,
".snp.recal"
)
this
.
tranches_file
=
swapExt
(
genotypeGVCFs
.
out
,
".vcf"
,
".snp.tranches"
)
this
.
resource
=
Seq
(
new
TaggedFile
(
config
.
getAsString
(
"hapmap"
),
"known=false,training=true,truth=true,prior=15.0"
),
new
TaggedFile
(
config
.
getAsString
(
"omni"
),
"known=false,training=true,truth=true,prior=12.0"
),
new
TaggedFile
(
config
.
getAsString
(
"1000G"
),
"known=false,training=true,truth=false,prior=10.0"
),
new
TaggedFile
(
config
.
getAsString
(
"dbsnp"
),
"known=true,training=false,truth=false,prior=2.0"
))
this
.
an
=
Seq
(
"QD"
,
"MQ"
,
"MQRankSum"
,
"ReadPosRankSum"
,
"FS"
,
"DP"
,
"InbreedingCoeff"
)
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
}
add
(
snpVariantRecalibrator
)
val
snpApplyRecalibration
=
new
ApplyRecalibration
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"applyrecalibration"
),
qscript
.
config
)
this
.
input
+:=
genotypeGVCFs
.
out
this
.
recal_file
=
snpVariantRecalibrator
.
recal_file
this
.
tranches_file
=
snpVariantRecalibrator
.
tranches_file
this
.
out
=
swapExt
(
genotypeGVCFs
.
out
,
".vcf"
,
".snp.recal.vcf"
)
this
.
ts_filter_level
=
99.5
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
this
.
nt
=
3
this
.
memoryLimit
=
2
*
nt
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
}
add
(
snpApplyRecalibration
)
//indel recal
val
indelVariantRecalibrator
=
new
VariantRecalibrator
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"variantrecalibrator"
),
qscript
.
config
)
this
.
input
+:=
genotypeGVCFs
.
out
this
.
nt
=
4
this
.
memoryLimit
=
2
*
nt
this
.
recal_file
=
swapExt
(
genotypeGVCFs
.
out
,
".vcf"
,
".indel.recal"
)
this
.
tranches_file
=
swapExt
(
genotypeGVCFs
.
out
,
".vcf"
,
".indel.tranches"
)
this
.
resource
:+=
new
TaggedFile
(
config
.
getAsString
(
"mills"
),
"known=false,training=true,truth=true,prior=12.0"
)
this
.
resource
:+=
new
TaggedFile
(
config
.
getAsString
(
"dbsnp"
),
"known=true,training=false,truth=false,prior=2.0"
)
this
.
an
=
Seq
(
"QD"
,
"DP"
,
"FS"
,
"ReadPosRankSum"
,
"MQRankSum"
,
"InbreedingCoeff"
)
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
}
add
(
indelVariantRecalibrator
)
val
indelApplyRecalibration
=
new
ApplyRecalibration
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"applyrecalibration"
),
qscript
.
config
)
this
.
input
+:=
genotypeGVCFs
.
out
this
.
recal_file
=
indelVariantRecalibrator
.
recal_file
this
.
tranches_file
=
indelVariantRecalibrator
.
tranches_file
this
.
out
=
swapExt
(
genotypeGVCFs
.
out
,
".vcf"
,
".indel.recal.vcf"
)
this
.
ts_filter_level
=
99.0
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
this
.
nt
=
3
this
.
memoryLimit
=
2
*
nt
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
}
add
(
indelApplyRecalibration
)
var
vcfFile
=
addGenotypeGVCFs
(
gvcfFiles
,
outputDir
+
"recalibration/"
)
vcfFile
=
addSnpVariantRecalibrator
(
vcfFile
,
outputDir
+
"recalibration/"
)
vcfFile
=
addIndelVariantRecalibrator
(
vcfFile
,
outputDir
+
"recalibration/"
)
}
else
logger
.
warn
(
"No gVCFs to genotype"
)
}
}
else
runSingleSampleJobs
(
onlySample
)
}
// Called for each sample
def
s
ampleJobs
(
sampleConfig
:
Config
)
:
Map
[
String
,
List
[
File
]]
=
{
override
def
runSingleS
ampleJobs
(
sampleConfig
:
Config
)
:
Map
[
String
,
List
[
File
]]
=
{
var
outputFiles
:
Map
[
String
,
List
[
File
]]
=
Map
()
outputFiles
+=
(
"FinalBams"
->
List
())
var
runs
:
List
[
Map
[
String
,
File
]]
=
Nil
if
(
sampleConfig
.
contains
(
"ID"
))
{
var
sampleID
:
String
=
sampleConfig
.
getAsString
(
"ID"
)
this
.
logger
.
info
(
"Starting generate jobs for sample: "
+
sampleID
)
for
(
key
<-
sampleConfig
.
getAsMap
(
"Runs"
).
keySet
)
{
var
runConfig
=
sampleConfig
.
getAsConfig
(
"Runs"
).
getAsConfig
(
key
)
var
run
:
Map
[
String
,
File
]
=
runJobs
(
runConfig
,
sampleConfig
)
var
FinalBams
:
List
[
File
]
=
outputFiles
(
"FinalBams"
)
if
(
run
.
contains
(
"FinalBam"
))
FinalBams
:+=
run
(
"FinalBam"
)
else
logger
.
warn
(
"No Final bam for Sample: "
+
sampleID
+
" Run: "
+
runConfig
)
outputFiles
+=
(
"FinalBams"
->
FinalBams
)
runs
+:=
run
}
// Variant calling
val
haplotypeCaller
=
new
HaplotypeCaller
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"haplotypecaller"
),
qscript
.
config
)
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
this
.
input_file
=
outputFiles
(
"FinalBams"
)
this
.
out
=
new
File
(
outputDir
,
sampleID
+
"/"
+
sampleID
+
".gvcf.vcf"
)
if
(
dbsnp
!=
null
)
this
.
dbsnp
=
qscript
.
dbsnp
this
.
nct
=
3
this
.
memoryLimit
=
this
.
nct
*
2
// GVCF options
this
.
emitRefConfidence
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
haplotypecaller
.
HaplotypeCaller
.
ReferenceConfidenceMode
.
GVCF
this
.
variant_index_type
=
GATKVCFIndexType
.
LINEAR
this
.
variant_index_parameter
=
128000
}
if
(
haplotypeCaller
.
input_file
.
size
>
0
)
{
add
(
haplotypeCaller
)
outputFiles
+=
(
"gvcf"
->
List
(
haplotypeCaller
.
out
))
}
}
else
{
this
.
logger
.
warn
(
"Sample in config missing ID, skipping sample"
)
var
runBamfiles
:
List
[
File
]
=
List
()
var
sampleID
:
String
=
sampleConfig
.
getAsString
(
"ID"
)
for
((
run
,
runFiles
)
<-
runRunsJobs
(
sampleConfig
))
{
runBamfiles
+:=
runFiles
(
"FinalBam"
)
}
outputFiles
+=
(
"FinalBams"
->
runBamfiles
)
if
(
runBamfiles
.
size
>
0
)
{
var
gvcfFile
=
addHaplotypeCaller
(
runBamfiles
,
new
File
(
outputDir
+
sampleID
+
"/"
+
sampleID
+
".gvcf.vcf"
))
outputFiles
+=
(
"gvcf"
->
List
(
gvcfFile
))
gvcfFiles
:+=
gvcfFile
}
else
logger
.
warn
(
"No bamfiles for variant calling for sample: "
+
sampleID
)
return
outputFiles
}
// Called for each run from a sample
def
r
unJobs
(
runConfig
:
Config
,
sampleConfig
:
Config
)
:
Map
[
String
,
File
]
=
{
override
def
runSingleR
unJobs
(
runConfig
:
Config
,
sampleConfig
:
Config
)
:
Map
[
String
,
File
]
=
{
var
outputFiles
:
Map
[
String
,
File
]
=
Map
()
val
runID
:
String
=
runConfig
.
getAsString
(
"ID"
)
val
fastq_R1
:
String
=
runConfig
.
getAsString
(
"R1"
,
null
)
val
fastq_R2
:
String
=
runConfig
.
getAsString
(
"R2"
,
null
)
val
paired
:
Boolean
=
(
fastq_R2
!=
null
)
val
sampleID
:
String
=
sampleConfig
.
get
(
"ID"
).
toString
if
(
fastq_R1
!=
null
)
{
val
runDir
:
String
=
outputDir
+
sampleID
+
"/run_"
+
runID
+
"/"
val
runDir
:
String
=
outputDir
+
sampleID
+
"/run_"
+
runID
+
"/"
if
(
runConfig
.
contains
(
"R1"
))
{
val
mapping
=
new
Mapping
(
config
)
mapping
.
input_R1
=
fastq_R1
if
(
paired
)
mapping
.
input_R2
=
fastq_R2
mapping
.
outputDir
=
runDir
+
"mapping/"
mapping
.
RGSM
=
sampleID
mapping
.
RGLB
=
runID
if
(
runConfig
.
contains
(
"PL"
))
mapping
.
RGPL
=
runConfig
.
getAsString
(
"PL"
)
if
(
runConfig
.
contains
(
"PU"
))
mapping
.
RGPU
=
runConfig
.
getAsString
(
"PU"
)
if
(
runConfig
.
contains
(
"CN"
))
mapping
.
RGCN
=
runConfig
.
getAsString
(
"CN"
)
mapping
.
loadRunConfig
(
runConfig
,
sampleConfig
,
runDir
)
mapping
.
script
addAll
(
mapping
.
functions
)
// Add functions of mapping to curent function pool
...
...
@@ -195,24 +83,24 @@ class Gatk(private var globalConfig: Config) extends QScript {
bamFile
=
addBaseRecalibrator
(
bamFile
,
runDir
)
// Base recalibrator
outputFiles
+=
(
"FinalBam"
->
bamFile
)
}
else
this
.
logger
.
error
(
"Sample: "
+
sampleID
+
": No R1 found for run
s
: "
+
runConfig
)
}
else
this
.
logger
.
error
(
"Sample: "
+
sampleID
+
": No R1 found for run: "
+
runConfig
)
return
outputFiles
}
def
addMarkDuplicates
(
inputBams
:
List
[
File
],
outputFile
:
File
,
dir
:
String
)
:
File
=
{
val
markDuplicates
=
new
MarkDuplicates
{
this
.
input
=
inputBams
this
.
output
=
outputFile
this
.
REMOVE_DUPLICATES
=
false
this
.
metrics
=
swapExt
(
dir
,
outputFile
,
".bam"
,
".metrics"
)
this
.
outputIndex
=
swapExt
(
dir
,
this
.
output
,
".bam"
,
".bai"
)
this
.
memoryLimit
=
2
this
.
jobResourceRequests
:+=
"h_vmem=4G"
}
add
(
markDuplicates
)
// def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = {
// val markDuplicates = new MarkDuplicates {
// this.input = inputBams
// this.output = outputFile
// this.REMOVE_DUPLICATES = false
// this.metrics = swapExt(dir,outputFile,".bam",".metrics")
// this.outputIndex = swapExt(dir,this.output,".bam",".bai")
// this.memoryLimit = 2
// this.jobResourceRequests :+= "h_vmem=4G"
// }
// add(markDuplicates)
//
// return markDuplicates.output
// }
return
markDuplicates
.
output
}
def
addIndelRealign
(
inputBam
:
File
,
dir
:
String
)
:
File
=
{
val
realignerTargetCreator
=
new
RealignerTargetCreator
with
gatkArguments
{
...
...
@@ -257,6 +145,109 @@ class Gatk(private var globalConfig: Config) extends QScript {
return
printReads
.
o
}
def
addHaplotypeCaller
(
bamfiles
:
List
[
File
],
outputfile
:
File
)
:
File
=
{
val
haplotypeCaller
=
new
HaplotypeCaller
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"haplotypecaller"
),
qscript
.
config
)
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
this
.
input_file
=
bamfiles
this
.
out
=
outputfile
if
(
dbsnp
!=
null
)
this
.
dbsnp
=
qscript
.
dbsnp
this
.
nct
=
3
this
.
memoryLimit
=
this
.
nct
*
2
// GVCF options
this
.
emitRefConfidence
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
haplotypecaller
.
HaplotypeCaller
.
ReferenceConfidenceMode
.
GVCF
this
.
variant_index_type
=
GATKVCFIndexType
.
LINEAR
this
.
variant_index_parameter
=
128000
}
add
(
haplotypeCaller
)
return
haplotypeCaller
.
out
}
def
addSnpVariantRecalibrator
(
inputVcf
:
File
,
dir
:
String
)
:
File
=
{
val
snpVariantRecalibrator
=
new
VariantRecalibrator
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"variantrecalibrator"
),
qscript
.
config
)
this
.
input
+:=
inputVcf
this
.
nt
=
4
this
.
memoryLimit
=
2
*
nt
this
.
recal_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".snp.recal"
)
this
.
tranches_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".snp.tranches"
)
this
.
resource
=
Seq
(
new
TaggedFile
(
config
.
getAsString
(
"hapmap"
),
"known=false,training=true,truth=true,prior=15.0"
),
new
TaggedFile
(
config
.
getAsString
(
"omni"
),
"known=false,training=true,truth=true,prior=12.0"
),
new
TaggedFile
(
config
.
getAsString
(
"1000G"
),
"known=false,training=true,truth=false,prior=10.0"
),
new
TaggedFile
(
config
.
getAsString
(
"dbsnp"
),
"known=true,training=false,truth=false,prior=2.0"
))
this
.
an
=
Seq
(
"QD"
,
"MQ"
,
"MQRankSum"
,
"ReadPosRankSum"
,
"FS"
,
"DP"
,
"InbreedingCoeff"
)
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
}
add
(
snpVariantRecalibrator
)
val
snpApplyRecalibration
=
new
ApplyRecalibration
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"applyrecalibration"
),
qscript
.
config
)
this
.
input
+:=
inputVcf
this
.
recal_file
=
snpVariantRecalibrator
.
recal_file
this
.
tranches_file
=
snpVariantRecalibrator
.
tranches_file
this
.
out
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".snp.recal.vcf"
)
this
.
ts_filter_level
=
99.5
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
this
.
nt
=
3
this
.
memoryLimit
=
2
*
nt
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
}
add
(
snpApplyRecalibration
)
return
snpApplyRecalibration
.
out
}
def
addIndelVariantRecalibrator
(
inputVcf
:
File
,
dir
:
String
)
:
File
=
{
val
indelVariantRecalibrator
=
new
VariantRecalibrator
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"variantrecalibrator"
),
qscript
.
config
)
this
.
input
+:=
inputVcf
this
.
nt
=
4
this
.
memoryLimit
=
2
*
nt
this
.
recal_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
)
this
.
tranches_file
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
)
this
.
resource
:+=
new
TaggedFile
(
config
.
getAsString
(
"mills"
),
"known=false,training=true,truth=true,prior=12.0"
)
this
.
resource
:+=
new
TaggedFile
(
config
.
getAsString
(
"dbsnp"
),
"known=true,training=false,truth=false,prior=2.0"
)
this
.
an
=
Seq
(
"QD"
,
"DP"
,
"FS"
,
"ReadPosRankSum"
,
"MQRankSum"
,
"InbreedingCoeff"
)
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
}
add
(
indelVariantRecalibrator
)
val
indelApplyRecalibration
=
new
ApplyRecalibration
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"applyrecalibration"
),
qscript
.
config
)
this
.
input
+:=
inputVcf
this
.
recal_file
=
indelVariantRecalibrator
.
recal_file
this
.
tranches_file
=
indelVariantRecalibrator
.
tranches_file
this
.
out
=
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
)
this
.
ts_filter_level
=
99.0
this
.
mode
=
org
.
broadinstitute
.
sting
.
gatk
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
this
.
nt
=
3
this
.
memoryLimit
=
2
*
nt
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
}
add
(
indelApplyRecalibration
)
return
indelApplyRecalibration
.
out
}
def
addGenotypeGVCFs
(
gvcfFiles
:
List
[
File
],
dir
:
String
)
:
File
=
{
val
genotypeGVCFs
=
new
GenotypeGVCFs
()
with
gatkArguments
{
val
config
:
Config
=
Config
.
mergeConfigs
(
qscript
.
config
.
getAsConfig
(
"genotypegvcfs"
),
qscript
.
config
)
this
.
variant
=
gvcfFiles
if
(
config
.
contains
(
"scattercount"
))
this
.
scatterCount
=
config
.
getAsInt
(
"scattercount"
)
this
.
out
=
new
File
(
outputDir
,
"genotype.vcf"
)
}
add
(
genotypeGVCFs
)
return
genotypeGVCFs
.
out
}
trait
gatkArguments
extends
CommandLineGATK
{
this
.
reference_sequence
=
referenceFile
this
.
memoryLimit
=
2
this
.
jobResourceRequests
:+=
"h_vmem=4G"
}
}
object
Gatk
extends
PipelineCommand
{
...
...
mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala
View file @
09190f82
...
...
@@ -14,7 +14,7 @@ import org.broadinstitute.sting.queue.function._
import
scala.util.parsing.json._
import
org.broadinstitute.sting.utils.variant._
class
Mapping
(
private
var
globalConfig
:
Config
)
extends
QScript
{
class
Mapping
(
private
var
globalConfig
:
Config
)
extends
QScript
with
BiopetQScript
{
qscript
=>
@Argument
(
doc
=
"Config Json file"
,
shortName
=
"config"
,
required
=
false
)
var
configfiles
:
List
[
File
]
=
Nil
@Input
(
doc
=
"R1 fastq file"
,
shortName
=
"R1"
,
required
=
true
)
var
input_R1
:
File
=
_
...
...
@@ -38,15 +38,13 @@ class Mapping(private var globalConfig: Config) extends QScript {
def
this
()
=
this
(
new
Config
())
var
config
:
Config
=
_
var
referenceFile
:
File
=
_
var
outputFiles
:
Map
[
String
,
File
]
=
Map
()
var
paired
:
Boolean
=
false
def
init
()
{
for
(
file
<-
configfiles
)
globalConfig
.
loadConfigFile
(
file
)
config
=
Config
.
mergeConfigs
(
globalConfig
.
getAsConfig
(
"mapping"
),
globalConfig
)
if
(
aligner
==
null
)
aligner
=
"bwa"
if
(
aligner
==
null
)
aligner
=
config
.
getAsString
(
"aligner"
,
"bwa"
)
referenceFile
=
config
.
getAsString
(
"referenceFile"
)
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on mapping module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
...
...
@@ -157,13 +155,25 @@ class Mapping(private var globalConfig: Config) extends QScript {
RG
+=
"PL:"
+
RGPL
+
"\\t"
RG
+=
"PU:"
+
RGPU
+
"\\t"
RG
+=
"SM:"
+
RGSM
+
"\\t"
RG
+=
"CN:"
+
RGCN
+
"\\t"
RG
+=
"DS"
+
RGDS
+
"\\t"
RG
+=
"DT"
+
RGDT
+
"\\t"
RG
+=
"PI"
+
RGPI
+
"\\t"
if
(
RGCN
!=
null
)
RG
+=
"CN:"
+
RGCN
+
"\\t"
if
(
RGDS
!=
null
)
RG
+=
"DS"
+
RGDS
+
"\\t"
if
(
RGDT
!=
null
)
RG
+=
"DT"
+
RGDT
+
"\\t"
if
(
RGPI
>
0
)
RG
+=
"PI"
+
RGPI
+
"\\t"
return
RG
.
substring
(
0
,
RG
.
lastIndexOf
(
"\\t"
))
}
def
loadRunConfig
(
runConfig
:
Config
,
sampleConfig
:
Config
,
runDir
:
String
)
{
input_R1
=
runConfig
.
getAsString
(
"R1"
,
null
)
input_R2
=
runConfig
.
getAsString
(
"R2"
,
null
)
paired
=
(
input_R2
!=
null
)
RGLB
=
runConfig
.
getAsString
(
"ID"
)
RGSM
=
sampleConfig
.
get
(
"ID"
).
toString
if
(
runConfig
.
contains
(
"PL"
))
RGPL
=
runConfig
.
getAsString
(
"PL"
)
if
(
runConfig
.
contains
(
"PU"
))
RGPU
=
runConfig
.
getAsString
(
"PU"
)
if
(
runConfig
.
contains
(
"CN"
))
RGCN
=
runConfig
.
getAsString
(
"CN"
)
outputDir
=
runDir
}
}
object
Mapping
extends
PipelineCommand
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment