Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
050af98d
Commit
050af98d
authored
Nov 29, 2015
by
Peter van 't Hof
Browse files
Remove old code
parent
88d47fcf
Changes
5
Hide whitespace changes
Inline
Side-by-side
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala
deleted
100644 → 0
View file @
88d47fcf
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
org.broadinstitute.gatk.queue.QScript
import
scala.util.Random
class
GatkBenchmarkGenotyping
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"Sample gvcf file"
)
var
sampleGvcf
:
File
=
_
@Argument
(
doc
=
"SampleName"
,
required
=
true
)
var
sampleName
:
String
=
_
@Input
(
doc
=
"Gvcf files"
,
shortName
=
"I"
,
required
=
false
)
var
gvcfFiles
:
List
[
File
]
=
Nil
var
reference
:
File
=
config
(
"reference"
)
@Argument
(
doc
=
"Dbsnp"
,
shortName
=
"dbsnp"
,
required
=
false
)
var
dbsnp
:
File
=
config
(
"dbsnp"
)
def
init
()
{
if
(
config
.
contains
(
"gvcffiles"
))
for
(
file
<-
config
(
"gvcffiles"
).
asList
)
gvcfFiles
::=
file
.
toString
}
def
biopetScript
()
{
var
todoGvcfs
=
gvcfFiles
var
gvcfPool
:
List
[
File
]
=
Nil
addGenotypingPipeline
(
gvcfPool
)
while
(
todoGvcfs
.
nonEmpty
)
{
val
index
=
Random
.
nextInt
(
todoGvcfs
.
size
)
gvcfPool
::=
todoGvcfs
(
index
)
addGenotypingPipeline
(
gvcfPool
)
todoGvcfs
=
todoGvcfs
.
filter
(
b
=>
b
!=
todoGvcfs
(
index
))
}
}
def
addGenotypingPipeline
(
gvcfPool
:
List
[
File
])
{
val
gatkGenotyping
=
new
GatkGenotyping
(
this
)
gatkGenotyping
.
inputGvcfs
=
sampleGvcf
::
gvcfPool
gatkGenotyping
.
samples
:+=
sampleName
gatkGenotyping
.
outputDir
=
new
File
(
outputDir
,
"samples_"
+
gvcfPool
.
size
)
gatkGenotyping
.
init
()
gatkGenotyping
.
biopetScript
()
addAll
(
gatkGenotyping
.
functions
)
}
}
object
GatkBenchmarkGenotyping
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala
deleted
100644 → 0
View file @
88d47fcf
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.extensions.gatk.broad.
{
GenotypeGVCFs
,
SelectVariants
}
import
org.broadinstitute.gatk.queue.QScript
class
GatkGenotyping
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"Gvcf files"
,
shortName
=
"I"
)
var
inputGvcfs
:
List
[
File
]
=
Nil
@Argument
(
doc
=
"Reference"
,
shortName
=
"R"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
)
@Argument
(
doc
=
"Dbsnp"
,
shortName
=
"dbsnp"
,
required
=
false
)
var
dbsnp
:
File
=
config
(
"dbsnp"
)
@Argument
(
doc
=
"OutputName"
,
required
=
false
)
var
outputName
:
String
=
"genotype"
@Output
(
doc
=
"OutputFile"
,
shortName
=
"O"
,
required
=
false
)
var
outputFile
:
File
=
_
@Argument
(
doc
=
"Samples"
,
shortName
=
"sample"
,
required
=
false
)
var
samples
:
List
[
String
]
=
Nil
def
init
()
{
require
(
outputName
!=
null
,
"Outputname is null"
)
if
(
outputFile
==
null
)
outputFile
=
new
File
(
outputDir
,
outputName
+
".vcf.gz"
)
}
def
biopetScript
()
{
addGenotypeGVCFs
(
inputGvcfs
,
outputFile
)
if
(
samples
.
nonEmpty
)
{
if
(
samples
.
size
>
1
)
addSelectVariants
(
outputFile
,
samples
,
new
File
(
outputDir
,
"samples/"
),
"all"
)
for
(
sample
<-
samples
)
addSelectVariants
(
outputFile
,
List
(
sample
),
new
File
(
outputDir
,
"samples/"
),
sample
)
}
}
def
addGenotypeGVCFs
(
gvcfFiles
:
List
[
File
],
outputFile
:
File
)
:
File
=
{
val
genotypeGVCFs
=
GenotypeGVCFs
(
this
,
gvcfFiles
,
outputFile
)
add
(
genotypeGVCFs
)
genotypeGVCFs
.
out
}
def
addSelectVariants
(
inputFile
:
File
,
samples
:
List
[
String
],
outputDir
:
File
,
name
:
String
)
{
val
selectVariants
=
SelectVariants
(
this
,
inputFile
,
new
File
(
outputDir
,
name
+
".vcf.gz"
))
selectVariants
.
excludeNonVariants
=
true
for
(
sample
<-
samples
)
selectVariants
.
sample_name
:+=
sample
add
(
selectVariants
)
}
}
object
GatkGenotyping
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
deleted
100644 → 0
View file @
88d47fcf
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package
nl.lumc.sasc.biopet.pipelines.gatk
import
htsjdk.samtools.SamReaderFactory
import
nl.lumc.sasc.biopet.core.
{
MultiSampleQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.extensions.gatk.broad.
{
CombineGVCFs
,
CombineVariants
}
import
nl.lumc.sasc.biopet.extensions.picard.
{
AddOrReplaceReadGroups
,
SamToFastq
}
import
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import
nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
import
nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import
org.broadinstitute.gatk.queue.QScript
import
scala.collection.JavaConversions._
class
GatkPipeline
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
with
SummaryQScript
{
qscript
=>
def
this
()
=
this
(
null
)
@Argument
(
doc
=
"Skip Genotyping step"
,
shortName
=
"skipgenotyping"
,
required
=
false
)
var
skipGenotyping
:
Boolean
=
config
(
"skip_genotyping"
,
default
=
false
)
/** Merge gvcfs */
var
mergeGvcfs
:
Boolean
=
config
(
"merge_gvcfs"
,
default
=
false
)
/** Joint variantcalling */
var
jointVariantcalling
:
Boolean
=
config
(
"joint_variantcalling"
,
default
=
false
)
/** Joint genotyping */
var
jointGenotyping
:
Boolean
=
config
(
"joint_genotyping"
,
default
=
false
)
var
singleSampleCalling
=
config
(
"single_sample_calling"
,
default
=
true
)
var
reference
:
File
=
config
(
"reference"
)
var
useAllelesOption
:
Boolean
=
config
(
"use_alleles_option"
,
default
=
false
)
val
externalGvcfs
=
config
(
"external_gvcfs_files"
,
default
=
Nil
).
asFileList
def
summaryFile
=
new
File
(
outputDir
,
"GatkPipeline.summary.json"
)
//TODO: Add summary
def
summaryFiles
=
Map
()
//TODO: Add summary
def
summarySettings
=
Map
()
def
makeSample
(
id
:
String
)
=
new
Sample
(
id
)
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
//TODO: Add summary
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
//TODO: Add summary
def
summaryStats
:
Map
[
String
,
Any
]
=
Map
()
def
makeLibrary
(
id
:
String
)
=
new
Library
(
id
)
class
Library
(
libId
:
String
)
extends
AbstractLibrary
(
libId
)
{
//TODO: Add summary
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
//TODO: Add summary
def
summaryStats
:
Map
[
String
,
Any
]
=
Map
()
val
mapping
=
new
Mapping
(
qscript
)
mapping
.
sampleId
=
Some
(
sampleId
)
mapping
.
libId
=
Some
(
libId
)
mapping
.
outputDir
=
libDir
/** Library variantcalling */
val
gatkVariantcalling
=
new
GatkVariantcalling
(
qscript
)
gatkVariantcalling
.
doublePreProces
=
false
gatkVariantcalling
.
sampleID
=
sampleId
gatkVariantcalling
.
outputDir
=
new
File
(
libDir
,
"variantcalling"
)
protected
def
addJobs
()
:
Unit
=
{
val
bamFile
:
Option
[
File
]
=
if
(
config
.
contains
(
"R1"
))
{
mapping
.
input_R1
=
config
(
"R1"
)
mapping
.
input_R2
=
config
(
"R2"
)
mapping
.
init
()
mapping
.
biopetScript
()
addAll
(
mapping
.
functions
)
// Add functions of mapping to curent function pool
Some
(
mapping
.
finalBamFile
)
}
else
if
(
config
.
contains
(
"bam"
))
{
var
bamFile
:
File
=
config
(
"bam"
)
if
(!
bamFile
.
exists
)
throw
new
IllegalStateException
(
"Bam in config does not exist, file: "
+
bamFile
)
if
(
config
(
"bam_to_fastq"
,
default
=
false
).
asBoolean
)
{
val
samToFastq
=
SamToFastq
(
qscript
,
bamFile
,
libDir
+
sampleId
+
"-"
+
libId
+
".R1.fastq"
,
libDir
+
sampleId
+
"-"
+
libId
+
".R2.fastq"
)
samToFastq
.
isIntermediate
=
true
qscript
.
add
(
samToFastq
)
mapping
.
input_R1
=
samToFastq
.
fastqR1
mapping
.
input_R2
=
Some
(
samToFastq
.
fastqR2
)
mapping
.
init
()
mapping
.
biopetScript
()
addAll
(
mapping
.
functions
)
// Add functions of mapping to curent function pool
Some
(
mapping
.
finalBamFile
)
}
else
{
var
readGroupOke
=
true
val
inputSam
=
SamReaderFactory
.
makeDefault
.
open
(
bamFile
)
val
header
=
inputSam
.
getFileHeader
.
getReadGroups
for
(
readGroup
<-
inputSam
.
getFileHeader
.
getReadGroups
)
{
if
(
readGroup
.
getSample
!=
sampleId
)
logger
.
warn
(
"Sample ID readgroup in bam file is not the same"
)
if
(
readGroup
.
getLibrary
!=
libId
)
logger
.
warn
(
"Library ID readgroup in bam file is not the same"
)
if
(
readGroup
.
getSample
!=
sampleId
||
readGroup
.
getLibrary
!=
libId
)
readGroupOke
=
false
}
inputSam
.
close
()
if
(!
readGroupOke
)
{
if
(
config
(
"correct_readgroups"
,
default
=
false
))
{
logger
.
info
(
"Correcting readgroups, file:"
+
bamFile
)
val
aorrg
=
AddOrReplaceReadGroups
(
qscript
,
bamFile
,
new
File
(
libDir
+
sampleId
+
"-"
+
libId
+
".bam"
))
aorrg
.
RGID
=
sampleId
+
"-"
+
libId
aorrg
.
RGLB
=
libId
aorrg
.
RGSM
=
sampleId
aorrg
.
isIntermediate
=
true
qscript
.
add
(
aorrg
)
bamFile
=
aorrg
.
output
}
else
throw
new
IllegalStateException
(
"Sample readgroup and/or library of input bamfile is not correct, file: "
+
bamFile
+
"\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this"
)
}
addAll
(
BamMetrics
(
qscript
,
bamFile
,
libDir
+
"metrics/"
).
functions
)
Some
(
bamFile
)
}
}
else
{
logger
.
error
(
"Sample: "
+
sampleId
+
": No R1 found for run: "
+
libId
)
None
}
if
(
bamFile
.
isDefined
)
{
gatkVariantcalling
.
inputBams
=
List
(
bamFile
.
get
)
gatkVariantcalling
.
variantcalling
=
config
(
"library_variantcalling"
,
default
=
false
)
gatkVariantcalling
.
init
()
gatkVariantcalling
.
biopetScript
()
addAll
(
gatkVariantcalling
.
functions
)
}
addSummaryQScript
(
mapping
)
}
}
/** sample variantcalling */
val
gatkVariantcalling
=
new
GatkVariantcalling
(
qscript
)
gatkVariantcalling
.
sampleID
=
sampleId
gatkVariantcalling
.
outputDir
=
new
File
(
sampleDir
,
"variantcalling"
)
protected
def
addJobs
()
:
Unit
=
{
addPerLibJobs
()
gatkVariantcalling
.
inputBams
=
libraries
.
map
(
_
.
_2
.
mapping
.
finalBamFile
).
toList
gatkVariantcalling
.
preProcesBams
=
false
if
(!
singleSampleCalling
)
{
gatkVariantcalling
.
useHaplotypecaller
=
false
gatkVariantcalling
.
useUnifiedGenotyper
=
false
}
gatkVariantcalling
.
init
()
gatkVariantcalling
.
biopetScript
()
addAll
(
gatkVariantcalling
.
functions
)
gatkVariantcalling
.
inputBams
.
foreach
(
x
=>
addAll
(
Bam2Wig
(
qscript
,
x
).
functions
))
}
}
def
init
()
{
}
val
multisampleVariantcalling
=
new
GatkVariantcalling
(
this
)
{
override
def
configName
=
"gatkvariantcalling"
override
def
configPath
:
List
[
String
]
=
super
.
configPath
:::
"multisample"
::
Nil
}
def
biopetScript
()
:
Unit
=
{
addSamplesJobs
()
addSummaryJobs
()
}
def
addMultiSampleJobs
()
:
Unit
=
{
val
gvcfFiles
:
List
[
File
]
=
if
(
mergeGvcfs
&&
externalGvcfs
.
size
+
samples
.
size
>
1
)
{
val
newFile
=
new
File
(
outputDir
,
"merged.gvcf.vcf.gz"
)
add
(
CombineGVCFs
(
this
,
externalGvcfs
++
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
gvcfFile
),
newFile
))
List
(
newFile
)
}
else
externalGvcfs
++
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
gvcfFile
)
if
(!
skipGenotyping
&&
gvcfFiles
.
nonEmpty
)
{
if
(
jointGenotyping
)
{
val
gatkGenotyping
=
new
GatkGenotyping
(
this
)
gatkGenotyping
.
inputGvcfs
=
gvcfFiles
gatkGenotyping
.
outputDir
=
new
File
(
outputDir
,
"genotyping"
)
gatkGenotyping
.
init
()
gatkGenotyping
.
biopetScript
()
addAll
(
gatkGenotyping
.
functions
)
var
vcfFile
=
gatkGenotyping
.
outputFile
}
}
else
logger
.
warn
(
"No gVCFs to genotype"
)
if
(
jointVariantcalling
)
{
val
allBamfiles
=
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
bamFiles
).
toList
.
fold
(
Nil
)(
_
++
_
)
val
allRawVcfFiles
=
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
rawVcfFile
).
filter
(
_
!=
null
).
toList
val
gatkVariantcalling
=
new
GatkVariantcalling
(
this
)
{
override
def
configName
=
"gatkvariantcalling"
override
def
configPath
:
List
[
String
]
=
super
.
configPath
:::
"multisample"
::
Nil
}
if
(
gatkVariantcalling
.
useMpileup
)
{
val
cvRaw
=
CombineVariants
(
this
,
allRawVcfFiles
.
toList
,
new
File
(
outputDir
,
"variantcalling/multisample.raw.vcf.gz"
))
add
(
cvRaw
)
gatkVariantcalling
.
rawVcfInput
=
cvRaw
.
out
}
multisampleVariantcalling
.
preProcesBams
=
false
multisampleVariantcalling
.
doublePreProces
=
false
multisampleVariantcalling
.
inputBams
=
allBamfiles
.
toList
multisampleVariantcalling
.
outputDir
=
new
File
(
outputDir
,
"variantcalling"
)
multisampleVariantcalling
.
outputName
=
"multisample"
multisampleVariantcalling
.
init
()
multisampleVariantcalling
.
biopetScript
()
addAll
(
multisampleVariantcalling
.
functions
)
if
(
config
(
"inputtype"
,
default
=
"dna"
).
asString
!=
"rna"
&&
config
(
"recalibration"
,
default
=
false
).
asBoolean
)
{
val
recalibration
=
new
GatkVariantRecalibration
(
this
)
recalibration
.
inputVcf
=
multisampleVariantcalling
.
scriptOutput
.
finalVcfFile
recalibration
.
bamFiles
=
allBamfiles
recalibration
.
outputDir
=
new
File
(
outputDir
,
"recalibration"
)
recalibration
.
init
()
recalibration
.
biopetScript
()
}
}
}
}
object
GatkPipeline
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala
deleted
100644 → 0
View file @
88d47fcf
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.extensions.gatk.broad.
{
ApplyRecalibration
,
VariantAnnotator
,
VariantRecalibrator
}
import
org.broadinstitute.gatk.queue.QScript
class
GatkVariantRecalibration
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"input vcf file"
,
shortName
=
"I"
)
var
inputVcf
:
File
=
_
@Input
(
doc
=
"input vcf file"
,
shortName
=
"BAM"
,
required
=
false
)
var
bamFiles
:
List
[
File
]
=
Nil
@Output
(
doc
=
"output vcf file"
,
shortName
=
"out"
)
var
outputVcf
:
File
=
_
def
init
()
{
require
(
inputVcf
!=
null
,
"Missing Output directory on gatk module"
)
}
def
biopetScript
()
{
var
vcfFile
:
File
=
if
(
bamFiles
.
nonEmpty
)
addVariantAnnotator
(
inputVcf
,
bamFiles
,
outputDir
)
else
inputVcf
vcfFile
=
addSnpVariantRecalibrator
(
vcfFile
,
outputDir
)
vcfFile
=
addIndelVariantRecalibrator
(
vcfFile
,
outputDir
)
}
def
addSnpVariantRecalibrator
(
inputVcf
:
File
,
dir
:
File
)
:
File
=
{
val
snpRecal
=
VariantRecalibrator
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
),
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
),
indel
=
false
)
if
(
snpRecal
.
resource
.
nonEmpty
)
{
add
(
snpRecal
)
val
snpApply
=
ApplyRecalibration
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
),
snpRecal
.
recal_file
,
snpRecal
.
tranches_file
,
indel
=
false
)
add
(
snpApply
)
snpApply
.
out
}
else
{
logger
.
warn
(
"Skipped snp Recalibration, resource is missing"
)
inputVcf
}
}
def
addIndelVariantRecalibrator
(
inputVcf
:
File
,
dir
:
File
)
:
File
=
{
val
indelRecal
=
VariantRecalibrator
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
),
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
),
indel
=
true
)
if
(
indelRecal
.
resource
.
nonEmpty
)
{
add
(
indelRecal
)
val
indelApply
=
ApplyRecalibration
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
),
indelRecal
.
recal_file
,
indelRecal
.
tranches_file
,
indel
=
true
)
add
(
indelApply
)
indelApply
.
out
}
else
{
logger
.
warn
(
"Skipped indel Recalibration, resource is missing"
)
inputVcf
}
}
def
addVariantAnnotator
(
inputvcf
:
File
,
bamfiles
:
List
[
File
],
dir
:
File
)
:
File
=
{
val
variantAnnotator
=
VariantAnnotator
(
this
,
inputvcf
,
bamfiles
,
swapExt
(
dir
,
inputvcf
,
".vcf"
,
".anotated.vcf"
))
add
(
variantAnnotator
)
variantAnnotator
.
out
}
}
object
GatkVariantRecalibration
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
deleted
100644 → 0
View file @
88d47fcf
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package
nl.lumc.sasc.biopet.pipelines.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.extensions.Ln
import
nl.lumc.sasc.biopet.extensions.gatk.broad._
import
nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
import
nl.lumc.sasc.biopet.extensions.tools.
{
MergeAlleles
,
MpileupToVcf
,
VcfFilter
,
VcfStats
}
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import
scala.collection.SortedMap
import
scala.language.reflectiveCalls
class
GatkVariantcalling
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
val
scriptOutput
=
new
GatkVariantcalling
.
ScriptOutput
@Input
(
doc
=
"Bam files (should be deduped bams)"
,
shortName
=
"BAM"
)
var
inputBams
:
List
[
File
]
=
Nil
@Input
(
doc
=
"Raw vcf file"
,
shortName
=
"raw"
)
var
rawVcfInput
:
File
=
_
@Argument
(
doc
=
"Reference"
,
shortName
=
"R"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
)
@Argument
(
doc
=
"OutputName"
,
required
=
false
)
var
outputName
:
String
=
_
@Argument
(
doc
=
"Sample name"
,
required
=
false
)
var
sampleID
:
String
=
_
var
preProcesBams
:
Boolean
=
config
(
"pre_proces_bams"
,
default
=
true
)
var
variantcalling
:
Boolean
=
true
var
doublePreProces
:
Boolean
=
config
(
"double_pre_proces"
,
default
=
true
)
var
useHaplotypecaller
:
Boolean
=
config
(
"use_haplotypecaller"
,
default
=
true
)
var
useUnifiedGenotyper
:
Boolean
=
config
(
"use_unifiedgenotyper"
,
default
=
false
)
var
useAllelesOption
:
Boolean
=
config
(
"use_alleles_option"
,
default
=
false
)
var
useMpileup
:
Boolean
=
config
(
"use_mpileup"
,
default
=
true
)
var
useIndelRealigner
:
Boolean
=
config
(
"use_indel_realign"
,
default
=
true
)
var
useBaseRecalibration
:
Boolean
=
config
(
"use_base_recalibration"
,
default
=
true
)
def
init
()
{
if
(
outputName
==
null
&&
sampleID
!=
null
)
outputName
=
sampleID
else
if
(
outputName
==
null
)
outputName
=
config
(
"output_name"
,
default
=
"noname"
)
val
baseRecalibrator
=
new
BaseRecalibrator
(
this
)
if
(
preProcesBams
&&
useBaseRecalibration
&&
baseRecalibrator
.
knownSites
.
isEmpty
)
{
logger
.
warn
(
"No Known site found, skipping base recalibration"
)
useBaseRecalibration
=
false
}
}
private
def
doublePreProces
(
files
:
List
[
File
])
:
List
[
File
]
=
{
if
(
files
.
isEmpty
)
throw
new
IllegalStateException
(
"Files can't be empty"
)
else
if
(!
doublePreProces
)
files
else
if
(
files
.
size
==
1
)
{
val
bamFile
=
new
File
(
outputDir
,
files
.
head
.
getName
)
if
(
bamFile
!=
files
.
head
)
{
val
oldIndex
:
File
=
new
File
(
files
.
head
.
getAbsolutePath
.
stripSuffix
(
".bam"
)
+
".bai"
)
val
newIndex
:
File
=
swapExt
(
outputDir
,
bamFile
,
".bam"
,
".bai"
)
val
baiLn
=
Ln
(
this
,
oldIndex
,
newIndex
)
add
(
baiLn
)
val
bamLn
=
Ln
(
this
,
files
.
head
,
bamFile
)
bamLn
.
deps
:+=
baiLn
.
output
add
(
bamLn
)
}
List
(
bamFile
)
}
else
{
val
markDup
=
MarkDuplicates
(
this
,
files
,
new
File
(
outputDir
,
outputName
+
".dedup.bam"
))
markDup
.
isIntermediate
=
useIndelRealigner
add
(
markDup
)
if
(
useIndelRealigner
)
{
List
(
addIndelRealign
(
markDup
.
output
,
outputDir
,
isIntermediate
=
false
))
}
else
{
List
(
markDup
.
output
)
}
}
}
def
biopetScript
()
{
scriptOutput
.
bamFiles
=
{
doublePreProces
(
if
(
preProcesBams
)
{
for
(
inputBam
<-
inputBams
)
yield
{
var
bamFile
=
inputBam
if
(
useIndelRealigner
)
bamFile
=
addIndelRealign
(
bamFile
,
outputDir
,
isIntermediate
=
useBaseRecalibration
)
if
(
useBaseRecalibration
)
bamFile
=
addBaseRecalibrator
(
bamFile
,
outputDir
,
isIntermediate
=
inputBams
.
size
>
1
)
bamFile
}
}
else
{
inputBams
})
}
if
(
variantcalling
)
{
var
mergBuffer
:
SortedMap
[
String
,
File
]
=
SortedMap
()