Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
88b4b2a4
Commit
88b4b2a4
authored
Nov 22, 2014
by
Peter van 't Hof
Browse files
Added protected artifact for gatk pipelines
parent
6441dfd0
Changes
10
Hide whitespace changes
Inline
Side-by-side
protected/biopet-gatk-pipelines/.gitignore
0 → 100644
View file @
88b4b2a4
/target/
\ No newline at end of file
protected/biopet-gatk-pipelines/pom.xml
0 → 100644
View file @
88b4b2a4
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
BiopetGatkPipelines
</artifactId>
<version>
0.2.0-DEV
</version>
<packaging>
jar
</packaging>
<inceptionYear>
2014
</inceptionYear>
<name>
BiopetGatkPipelines
</name>
<url>
http://maven.apache.org
</url>
<properties>
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
<sting.unpack.phase>
prepare-package
</sting.unpack.phase>
<sting.shade.phase>
package
</sting.shade.phase>
<app.main.class>
nl.lumc.sasc.biopet.core.BiopetExecutable
</app.main.class>
</properties>
<dependencies>
<dependency>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
BiopetFramework
</artifactId>
<version>
0.2.0-DEV
</version>
</dependency>
<dependency>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
BiopetGatkExtensions
</artifactId>
<version>
0.2.0-DEV
</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>
src/main/resources
</directory>
<includes>
<include>
**/*
</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>
org.scala-tools
</groupId>
<artifactId>
maven-scala-plugin
</artifactId>
<version>
2.15.2
</version>
<executions>
<execution>
<id>
scala-compile
</id>
<goals>
<goal>
compile
</goal>
<goal>
testCompile
</goal>
</goals>
<configuration>
<args>
<arg>
-dependencyfile
</arg>
<arg>
${project.build.directory}/.scala_dependencies
</arg>
<arg>
-deprecation
</arg>
<arg>
-feature
</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-jar-plugin
</artifactId>
<version>
2.5
</version>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>
true
</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>
true
</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-compiler-plugin
</artifactId>
<version>
2.3.2
</version>
<configuration>
<showDeprecation>
true
</showDeprecation>
</configuration>
</plugin>
</plugins>
</build>
</project>
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala
0 → 100644
View file @
88b4b2a4
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
}
import
scala.util.Random
class
GatkBenchmarkGenotyping
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"Sample gvcf file"
)
var
sampleGvcf
:
File
=
_
@Argument
(
doc
=
"SampleName"
,
required
=
true
)
var
sampleName
:
String
=
_
@Input
(
doc
=
"Gvcf files"
,
shortName
=
"I"
,
required
=
false
)
var
gvcfFiles
:
List
[
File
]
=
Nil
var
reference
:
File
=
config
(
"reference"
)
@Argument
(
doc
=
"Dbsnp"
,
shortName
=
"dbsnp"
,
required
=
false
)
var
dbsnp
:
File
=
config
(
"dbsnp"
)
def
init
()
{
if
(
config
.
contains
(
"gvcffiles"
))
for
(
file
<-
config
(
"gvcffiles"
).
asList
)
{
gvcfFiles
::=
file
.
toString
}
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on gatk module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
}
def
biopetScript
()
{
var
todoGvcfs
=
gvcfFiles
var
gvcfPool
:
List
[
File
]
=
Nil
addGenotypingPipeline
(
gvcfPool
)
while
(
todoGvcfs
.
size
>
0
)
{
val
index
=
Random
.
nextInt
(
todoGvcfs
.
size
)
gvcfPool
::=
todoGvcfs
(
index
)
addGenotypingPipeline
(
gvcfPool
)
todoGvcfs
=
todoGvcfs
.
filter
(
b
=>
b
!=
todoGvcfs
(
index
))
}
}
def
addGenotypingPipeline
(
gvcfPool
:
List
[
File
])
{
val
gatkGenotyping
=
new
GatkGenotyping
(
this
)
gatkGenotyping
.
inputGvcfs
=
sampleGvcf
::
gvcfPool
gatkGenotyping
.
samples
:+=
sampleName
gatkGenotyping
.
outputDir
=
outputDir
+
"samples_"
+
gvcfPool
.
size
+
"/"
gatkGenotyping
.
init
gatkGenotyping
.
biopetScript
addAll
(
gatkGenotyping
.
functions
)
}
}
object
GatkBenchmarkGenotyping
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala
0 → 100644
View file @
88b4b2a4
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.gatk.
{
GenotypeGVCFs
,
SelectVariants
}
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
,
Argument
}
class
GatkGenotyping
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"Gvcf files"
,
shortName
=
"I"
)
var
inputGvcfs
:
List
[
File
]
=
Nil
@Argument
(
doc
=
"Reference"
,
shortName
=
"R"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
)
@Argument
(
doc
=
"Dbsnp"
,
shortName
=
"dbsnp"
,
required
=
false
)
var
dbsnp
:
File
=
config
(
"dbsnp"
)
@Argument
(
doc
=
"OutputName"
,
required
=
false
)
var
outputName
:
String
=
"genotype"
@Output
(
doc
=
"OutputFile"
,
shortName
=
"O"
,
required
=
false
)
var
outputFile
:
File
=
_
@Argument
(
doc
=
"Samples"
,
shortName
=
"sample"
,
required
=
false
)
var
samples
:
List
[
String
]
=
Nil
def
init
()
{
if
(
outputFile
==
null
)
outputFile
=
outputDir
+
outputName
+
".vcf.gz"
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on gatk module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
}
def
biopetScript
()
{
addGenotypeGVCFs
(
inputGvcfs
,
outputFile
)
if
(!
samples
.
isEmpty
)
{
if
(
samples
.
size
>
1
)
addSelectVariants
(
outputFile
,
samples
,
outputDir
+
"samples/"
,
"all"
)
for
(
sample
<-
samples
)
addSelectVariants
(
outputFile
,
List
(
sample
),
outputDir
+
"samples/"
,
sample
)
}
}
def
addGenotypeGVCFs
(
gvcfFiles
:
List
[
File
],
outputFile
:
File
)
:
File
=
{
val
genotypeGVCFs
=
GenotypeGVCFs
(
this
,
gvcfFiles
,
outputFile
)
add
(
genotypeGVCFs
)
return
genotypeGVCFs
.
out
}
def
addSelectVariants
(
inputFile
:
File
,
samples
:
List
[
String
],
outputDir
:
String
,
name
:
String
)
{
val
selectVariants
=
SelectVariants
(
this
,
inputFile
,
outputDir
+
name
+
".vcf.gz"
)
selectVariants
.
excludeNonVariants
=
true
for
(
sample
<-
samples
)
selectVariants
.
sample_name
:+=
sample
add
(
selectVariants
)
}
}
object
GatkGenotyping
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
0 → 100644
View file @
88b4b2a4
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.MultiSampleQScript
import
nl.lumc.sasc.biopet.core.PipelineCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
htsjdk.samtools.SamReaderFactory
import
scala.collection.JavaConversions._
import
java.io.File
import
nl.lumc.sasc.biopet.extensions.gatk.
{
CombineVariants
,
CombineGVCFs
}
import
nl.lumc.sasc.biopet.extensions.picard.AddOrReplaceReadGroups
import
nl.lumc.sasc.biopet.extensions.picard.SamToFastq
import
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import
nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.utils.commandline.
{
Argument
}
class
GatkPipeline
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
def
this
()
=
this
(
null
)
@Argument
(
doc
=
"Only Sample"
,
shortName
=
"sample"
,
required
=
false
)
val
onlySample
:
List
[
String
]
=
Nil
@Argument
(
doc
=
"Skip Genotyping step"
,
shortName
=
"skipgenotyping"
,
required
=
false
)
var
skipGenotyping
:
Boolean
=
false
@Argument
(
doc
=
"Merge gvcfs"
,
shortName
=
"mergegvcfs"
,
required
=
false
)
var
mergeGvcfs
:
Boolean
=
false
@Argument
(
doc
=
"Joint variantcalling"
,
shortName
=
"jointVariantCalling"
,
required
=
false
)
var
jointVariantcalling
:
Boolean
=
config
(
"joint_variantcalling"
,
default
=
false
)
@Argument
(
doc
=
"Joint genotyping"
,
shortName
=
"jointGenotyping"
,
required
=
false
)
var
jointGenotyping
:
Boolean
=
config
(
"joint_genotyping"
,
default
=
false
)
var
singleSampleCalling
=
config
(
"single_sample_calling"
,
default
=
true
)
var
reference
:
File
=
config
(
"reference"
,
required
=
true
)
var
dbsnp
:
File
=
config
(
"dbsnp"
)
var
gvcfFiles
:
List
[
File
]
=
Nil
var
finalBamFiles
:
List
[
File
]
=
Nil
var
useAllelesOption
:
Boolean
=
config
(
"use_alleles_option"
,
default
=
false
)
class
LibraryOutput
extends
AbstractLibraryOutput
{
var
mappedBamFile
:
File
=
_
var
variantcalling
:
GatkVariantcalling.ScriptOutput
=
_
}
class
SampleOutput
extends
AbstractSampleOutput
{
var
variantcalling
:
GatkVariantcalling.ScriptOutput
=
_
}
def
init
()
{
if
(
config
.
contains
(
"target_bed"
))
{
defaults
++=
Map
(
"gatk"
->
Map
((
"intervals"
->
config
(
"target_bed"
).
asStringList
)))
}
if
(
config
.
contains
(
"gvcfFiles"
))
for
(
file
<-
config
(
"gvcfFiles"
).
asList
)
gvcfFiles
:+=
file
.
toString
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on gatk module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
}
val
multisampleVariantcalling
=
new
GatkVariantcalling
(
this
)
{
override
protected
lazy
val
configName
=
"gatkvariantcalling"
override
def
configPath
:
List
[
String
]
=
"multisample"
::
super
.
configPath
}
def
biopetScript
()
{
if
(
onlySample
.
isEmpty
)
{
runSamplesJobs
//SampleWide jobs
if
(
mergeGvcfs
&&
gvcfFiles
.
size
>
0
)
{
val
newFile
=
outputDir
+
"merged.gvcf.vcf.gz"
add
(
CombineGVCFs
(
this
,
gvcfFiles
,
newFile
))
gvcfFiles
=
List
(
newFile
)
}
if
(!
skipGenotyping
&&
gvcfFiles
.
size
>
0
)
{
if
(
jointGenotyping
)
{
val
gatkGenotyping
=
new
GatkGenotyping
(
this
)
gatkGenotyping
.
inputGvcfs
=
gvcfFiles
gatkGenotyping
.
outputDir
=
outputDir
+
"genotyping/"
gatkGenotyping
.
init
gatkGenotyping
.
biopetScript
addAll
(
gatkGenotyping
.
functions
)
var
vcfFile
=
gatkGenotyping
.
outputFile
}
}
else
logger
.
warn
(
"No gVCFs to genotype"
)
if
(
jointVariantcalling
)
{
val
allBamfiles
=
for
(
(
sampleID
,
sampleOutput
)
<-
samplesOutput
;
file
<-
sampleOutput
.
variantcalling
.
bamFiles
)
yield
file
val
allRawVcfFiles
=
for
((
sampleID
,
sampleOutput
)
<-
samplesOutput
)
yield
sampleOutput
.
variantcalling
.
rawFilterVcfFile
val
gatkVariantcalling
=
new
GatkVariantcalling
(
this
)
{
override
protected
lazy
val
configName
=
"gatkvariantcalling"
override
def
configPath
:
List
[
String
]
=
"multisample"
::
super
.
configPath
}
if
(
gatkVariantcalling
.
useMpileup
)
{
val
cvRaw
=
CombineVariants
(
this
,
allRawVcfFiles
.
toList
,
outputDir
+
"variantcalling/multisample.raw.vcf.gz"
)
add
(
cvRaw
)
gatkVariantcalling
.
rawVcfInput
=
cvRaw
.
out
}
multisampleVariantcalling
.
preProcesBams
=
false
multisampleVariantcalling
.
doublePreProces
=
false
multisampleVariantcalling
.
inputBams
=
allBamfiles
.
toList
multisampleVariantcalling
.
outputDir
=
outputDir
+
"variantcalling"
multisampleVariantcalling
.
outputName
=
"multisample"
multisampleVariantcalling
.
init
multisampleVariantcalling
.
biopetScript
addAll
(
multisampleVariantcalling
.
functions
)
if
(
config
(
"inputtype"
,
default
=
"dna"
).
asString
!=
"rna"
&&
config
(
"recalibration"
,
default
=
false
).
asBoolean
)
{
val
recalibration
=
new
GatkVariantRecalibration
(
this
)
recalibration
.
inputVcf
=
multisampleVariantcalling
.
scriptOutput
.
finalVcfFile
recalibration
.
bamFiles
=
finalBamFiles
recalibration
.
outputDir
=
outputDir
+
"recalibration/"
recalibration
.
init
recalibration
.
biopetScript
}
}
}
else
for
(
sample
<-
onlySample
)
runSingleSampleJobs
(
sample
)
}
// Called for each sample
def
runSingleSampleJobs
(
sampleConfig
:
Map
[
String
,
Any
])
:
SampleOutput
=
{
val
sampleOutput
=
new
SampleOutput
var
libraryBamfiles
:
List
[
File
]
=
List
()
val
sampleID
:
String
=
sampleConfig
(
"ID"
).
toString
sampleOutput
.
libraries
=
runLibraryJobs
(
sampleConfig
)
val
sampleDir
=
globalSampleDir
+
sampleID
for
((
libraryID
,
libraryOutput
)
<-
sampleOutput
.
libraries
)
{
libraryBamfiles
++=
libraryOutput
.
variantcalling
.
bamFiles
}
if
(
libraryBamfiles
.
size
>
0
)
{
finalBamFiles
++=
libraryBamfiles
val
gatkVariantcalling
=
new
GatkVariantcalling
(
this
)
gatkVariantcalling
.
inputBams
=
libraryBamfiles
gatkVariantcalling
.
outputDir
=
sampleDir
+
"/variantcalling/"
gatkVariantcalling
.
preProcesBams
=
false
if
(!
singleSampleCalling
)
{
gatkVariantcalling
.
useHaplotypecaller
=
false
gatkVariantcalling
.
useUnifiedGenotyper
=
false
}
gatkVariantcalling
.
sampleID
=
sampleID
gatkVariantcalling
.
init
gatkVariantcalling
.
biopetScript
addAll
(
gatkVariantcalling
.
functions
)
sampleOutput
.
variantcalling
=
gatkVariantcalling
.
scriptOutput
gvcfFiles
:+=
gatkVariantcalling
.
scriptOutput
.
gvcfFile
}
else
logger
.
warn
(
"No bamfiles for variant calling for sample: "
+
sampleID
)
return
sampleOutput
}
// Called for each run from a sample
def
runSingleLibraryJobs
(
runConfig
:
Map
[
String
,
Any
],
sampleConfig
:
Map
[
String
,
Any
])
:
LibraryOutput
=
{
val
libraryOutput
=
new
LibraryOutput
val
runID
:
String
=
runConfig
(
"ID"
).
toString
val
sampleID
:
String
=
sampleConfig
(
"ID"
).
toString
val
runDir
:
String
=
globalSampleDir
+
sampleID
+
"/run_"
+
runID
+
"/"
var
inputType
=
""
if
(
runConfig
.
contains
(
"inputtype"
))
inputType
=
runConfig
(
"inputtype"
).
toString
else
inputType
=
config
(
"inputtype"
,
default
=
"dna"
).
toString
if
(
runConfig
.
contains
(
"R1"
))
{
val
mapping
=
Mapping
.
loadFromLibraryConfig
(
this
,
runConfig
,
sampleConfig
,
runDir
)
addAll
(
mapping
.
functions
)
// Add functions of mapping to curent function pool
libraryOutput
.
mappedBamFile
=
mapping
.
outputFiles
(
"finalBamFile"
)
}
else
if
(
runConfig
.
contains
(
"bam"
))
{
var
bamFile
=
new
File
(
runConfig
(
"bam"
).
toString
)
if
(!
bamFile
.
exists
)
throw
new
IllegalStateException
(
"Bam in config does not exist, file: "
+
bamFile
)
if
(
config
(
"bam_to_fastq"
,
default
=
false
).
asBoolean
)
{
val
samToFastq
=
SamToFastq
(
this
,
bamFile
,
runDir
+
sampleID
+
"-"
+
runID
+
".R1.fastq"
,
runDir
+
sampleID
+
"-"
+
runID
+
".R2.fastq"
)
add
(
samToFastq
,
isIntermediate
=
true
)
val
mapping
=
Mapping
.
loadFromLibraryConfig
(
this
,
runConfig
,
sampleConfig
,
runDir
,
startJobs
=
false
)
mapping
.
input_R1
=
samToFastq
.
fastqR1
mapping
.
input_R2
=
samToFastq
.
fastqR2
mapping
.
init
mapping
.
biopetScript
addAll
(
mapping
.
functions
)
// Add functions of mapping to curent function pool
libraryOutput
.
mappedBamFile
=
mapping
.
outputFiles
(
"finalBamFile"
)
}
else
{
var
readGroupOke
=
true
val
inputSam
=
SamReaderFactory
.
makeDefault
.
open
(
bamFile
)
val
header
=
inputSam
.
getFileHeader
.
getReadGroups
for
(
readGroup
<-
inputSam
.
getFileHeader
.
getReadGroups
)
{
if
(
readGroup
.
getSample
!=
sampleID
)
logger
.
warn
(
"Sample ID readgroup in bam file is not the same"
)
if
(
readGroup
.
getLibrary
!=
runID
)
logger
.
warn
(
"Library ID readgroup in bam file is not the same"
)
if
(
readGroup
.
getSample
!=
sampleID
||
readGroup
.
getLibrary
!=
runID
)
readGroupOke
=
false
}
inputSam
.
close
if
(!
readGroupOke
)
{
if
(
config
(
"correct_readgroups"
,
default
=
false
))
{
logger
.
info
(
"Correcting readgroups, file:"
+
bamFile
)
val
aorrg
=
AddOrReplaceReadGroups
(
this
,
bamFile
,
new
File
(
runDir
+
sampleID
+
"-"
+
runID
+
".bam"
))
aorrg
.
RGID
=
sampleID
+
"-"
+
runID
aorrg
.
RGLB
=
runID
aorrg
.
RGSM
=
sampleID
if
(
runConfig
.
contains
(
"PL"
))
aorrg
.
RGPL
=
runConfig
(
"PL"
).
toString
else
aorrg
.
RGPL
=
"illumina"
if
(
runConfig
.
contains
(
"PU"
))
aorrg
.
RGPU
=
runConfig
(
"PU"
).
toString
else
aorrg
.
RGPU
=
"na"
if
(
runConfig
.
contains
(
"CN"
))
aorrg
.
RGCN
=
runConfig
(
"CN"
).
toString
add
(
aorrg
,
isIntermediate
=
true
)
bamFile
=
aorrg
.
output
}
else
throw
new
IllegalStateException
(
"Readgroup sample and/or library of input bamfile is not correct, file: "
+
bamFile
+
"\nPossible to set 'correct_readgroups' to true on config to automatic fix this"
)
}
addAll
(
BamMetrics
(
this
,
bamFile
,
runDir
+
"metrics/"
).
functions
)
libraryOutput
.
mappedBamFile
=
bamFile
}
}
else
logger
.
error
(
"Sample: "
+
sampleID
+
": No R1 found for run: "
+
runConfig
)
val
gatkVariantcalling
=
new
GatkVariantcalling
(
this
)
gatkVariantcalling
.
inputBams
=
List
(
libraryOutput
.
mappedBamFile
)
gatkVariantcalling
.
outputDir
=
runDir
gatkVariantcalling
.
variantcalling
=
config
(
"library_variantcalling"
,
default
=
false
)
gatkVariantcalling
.
preProcesBams
=
true
gatkVariantcalling
.
sampleID
=
sampleID
gatkVariantcalling
.
init
gatkVariantcalling
.
biopetScript
addAll
(
gatkVariantcalling
.
functions
)
libraryOutput
.
variantcalling
=
gatkVariantcalling
.
scriptOutput
return
libraryOutput
}
}
object
GatkPipeline
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala
0 → 100644
View file @
88b4b2a4
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.BiopetQScript
import
nl.lumc.sasc.biopet.core.PipelineCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.gatk.ApplyRecalibration
import
nl.lumc.sasc.biopet.extensions.gatk.VariantAnnotator
import
nl.lumc.sasc.biopet.extensions.gatk.VariantRecalibrator
import
org.broadinstitute.gatk.queue.QScript
class
GatkVariantRecalibration
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
@Input
(
doc
=
"input vcf file"
,
shortName
=
"I"
)
var
inputVcf
:
File
=
_
@Input
(
doc
=
"input vcf file"
,
shortName
=
"BAM"
,
required
=
false
)
var
bamFiles
:
List
[
File
]
=
Nil
@Output
(
doc
=
"output vcf file"
,
shortName
=
"out"
)
var
outputVcf
:
File
=
_
def
init
()
{
if
(
inputVcf
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on gatk module"
)
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on gatk module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
}
def
biopetScript
()
{
var
vcfFile
:
File
=
if
(!
bamFiles
.
isEmpty
)
addVariantAnnotator
(
inputVcf
,
bamFiles
,
outputDir
)
else
inputVcf
vcfFile
=
addSnpVariantRecalibrator
(
vcfFile
,
outputDir
)
vcfFile
=
addIndelVariantRecalibrator
(
vcfFile
,
outputDir
)
}
def
addSnpVariantRecalibrator
(
inputVcf
:
File
,
dir
:
String
)
:
File
=
{
val
snpRecal
=
VariantRecalibrator
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
),
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
),
indel
=
false
)
if
(!
snpRecal
.
resource
.
isEmpty
)
{
add
(
snpRecal
)
val
snpApply
=
ApplyRecalibration
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
),
snpRecal
.
recal_file
,
snpRecal
.
tranches_file
,
indel
=
false
)
add
(
snpApply
)
return
snpApply
.
out
}
else
{
logger
.
warn
(
"Skipped snp Recalibration, resource is missing"
)
return
inputVcf
}
}
def
addIndelVariantRecalibrator
(
inputVcf
:
File
,
dir
:
String
)
:
File
=
{
val
indelRecal
=
VariantRecalibrator
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal"
),
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.tranches"
),
indel
=
true
)
if
(!
indelRecal
.
resource
.
isEmpty
)
{
add
(
indelRecal
)
val
indelApply
=
ApplyRecalibration
(
this
,
inputVcf
,
swapExt
(
dir
,
inputVcf
,
".vcf"
,
".indel.recal.vcf"
),
indelRecal
.
recal_file
,
indelRecal
.
tranches_file
,
indel
=
true
)
add
(
indelApply
)
return
indelApply
.
out
}
else
{
logger
.
warn
(
"Skipped indel Recalibration, resource is missing"
)
return
inputVcf
}
}
def
addVariantAnnotator
(
inputvcf
:
File
,
bamfiles
:
List
[
File
],
dir
:
String
)
:
File
=
{
val
variantAnnotator
=
VariantAnnotator
(
this
,
inputvcf
,
bamfiles
,
swapExt
(
dir
,
inputvcf
,
".vcf"
,
".anotated.vcf"
))
add
(
variantAnnotator
)
return
variantAnnotator
.
out
}
}
object
GatkVariantRecalibration
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
0 → 100644
View file @
88b4b2a4
package
nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
java.io.File
import
nl.lumc.sasc.biopet.tools.
{
MpileupToVcf
,
VcfFilter
,
MergeAlleles
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.gatk.
{
AnalyzeCovariates
,
BaseRecalibrator
,
GenotypeGVCFs
,
HaplotypeCaller
,
IndelRealigner
,
PrintReads
,
RealignerTargetCreator
,
SelectVariants
,
CombineVariants
,
UnifiedGenotyper
}
import
nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
}
import
scala.collection.SortedMap
import
scala.language.reflectiveCalls
class
GatkVariantcalling
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
def
this
()
=
this
(
null
)
val
scriptOutput
=
new
GatkVariantcalling
.
ScriptOutput
@Input
(
doc
=
"Bam files (should be deduped bams)"
,
shortName
=
"BAM"
)
var
inputBams
:
List
[
File
]
=
Nil
@Input
(
doc
=
"Raw vcf file"
,
shortName
=
"raw"
)
var
rawVcfInput
:
File
=
_
@Argument
(
doc
=
"Reference"
,
shortName
=
"R"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
,
required
=
true
)