Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
22b4b757
Commit
22b4b757
authored
Sep 22, 2014
by
Peter van 't Hof
Browse files
Added filter step
parent
f0fd2b99
Changes
3
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/VcfFilter.scala
0 → 100644
View file @
22b4b757
package
nl.lumc.sasc.biopet.core.apps
import
htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import
htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import
htsjdk.variant.vcf.VCFFileReader
import
java.io.File
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Output
,
Input
}
import
scala.collection.JavaConversions._
class
VcfFilter
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input vcf"
,
shortName
=
"I"
,
required
=
true
)
var
inputVcf
:
File
=
_
@Output
(
doc
=
"Output vcf"
,
shortName
=
"o"
,
required
=
false
)
var
outputVcf
:
File
=
_
var
minSampleDepth
:
Option
[
Int
]
=
_
var
minTotalDepth
:
Option
[
Int
]
=
_
var
minAlternateDepth
:
Option
[
Int
]
=
_
var
minSamplesPass
:
Option
[
Int
]
=
_
override
val
defaultVmem
=
"8G"
memoryLimit
=
Option
(
4.0
)
override
def
afterGraph
{
minSampleDepth
=
config
(
"min_sample_depth"
)
minTotalDepth
=
config
(
"min_total_depth"
)
minAlternateDepth
=
config
(
"min_alternate_depth"
)
minSamplesPass
=
config
(
"min_samples_pass"
)
}
override
def
commandLine
=
super
.
commandLine
+
required
(
"-I"
,
inputVcf
)
+
required
(
"-o"
,
outputVcf
)
+
optional
(
"-minSampleDepth"
,
minSampleDepth
)
+
optional
(
"-minTotalDepth"
,
minTotalDepth
)
+
optional
(
"-minAlternateDepth"
,
minAlternateDepth
)
+
optional
(
"-minSamplesPass"
,
minSamplesPass
)
}
object
VcfFilter
{
var
inputVcf
:
File
=
_
var
outputVcf
:
File
=
_
var
minSampleDepth
=
-
1
var
minTotalDepth
=
-
1
var
minAlternateDepth
=
-
1
var
minSamplesPass
=
0
/**
* @param args the command line arguments
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
for
(
t
<-
0
until
args
.
size
)
{
args
(
t
)
match
{
case
"-I"
=>
inputVcf
=
new
File
(
args
(
t
+
1
))
case
"-o"
=>
outputVcf
=
new
File
(
args
(
t
+
1
))
case
"-minSampleDepth"
=>
minSampleDepth
=
args
(
t
+
1
).
toInt
case
"-minTotalDepth"
=>
minTotalDepth
=
args
(
t
+
1
).
toInt
case
"-minAlternateDepth"
=>
minAlternateDepth
=
args
(
t
+
1
).
toInt
case
"-minSamplesPass"
=>
minSamplesPass
=
args
(
t
+
1
).
toInt
case
_
=>
}
}
if
(
inputVcf
==
null
)
throw
new
IllegalStateException
(
"No inputVcf, use -I"
)
if
(
outputVcf
==
null
)
throw
new
IllegalStateException
(
"No outputVcf, use -o"
)
val
reader
=
new
VCFFileReader
(
inputVcf
)
val
header
=
reader
.
getFileHeader
val
builder
=
new
VariantContextWriterBuilder
().
setOutputFile
(
outputVcf
)
val
writer
=
new
AsyncVariantContextWriter
(
builder
.
build
)
writer
.
writeHeader
(
header
)
for
(
record
<-
reader
)
{
val
chr
=
record
.
getChr
val
pos
=
record
.
getStart
val
genotypes
=
for
(
genotype
<-
record
.
getGenotypes
)
yield
{
genotype
.
getDP
>=
minSampleDepth
&&
List
(
genotype
.
getAD
:_
*
).
tail
.
count
(
_
>=
minAlternateDepth
)
>
0
}
if
(
record
.
getAttributeAsInt
(
"DP"
,
-
1
)
>=
minTotalDepth
&&
genotypes
.
count
(
_
==
true
)
>=
minSamplesPass
)
{
writer
.
add
(
record
)
}
}
reader
.
close
writer
.
close
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
View file @
22b4b757
...
...
@@ -199,7 +199,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
gatkVariantcalling
.
biopetScript
addAll
(
gatkVariantcalling
.
functions
)
libraryOutput
.
variantcalling
=
gatkVariantcalling
.
scriptOutput
//libraryOutput.preProcesBamFile = gatkVariantcalling.outputFiles("final_bam")
return
libraryOutput
}
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
View file @
22b4b757
...
...
@@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
java.io.File
import
nl.lumc.sasc.biopet.core.apps.MpileupToVcf
import
nl.lumc.sasc.biopet.core.apps.
{
MpileupToVcf
,
VcfFilter
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.gatk.
{
AnalyzeCovariates
,
BaseRecalibrator
,
GenotypeGVCFs
,
HaplotypeCaller
,
IndelRealigner
,
PrintReads
,
RealignerTargetCreator
}
import
nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
...
...
@@ -90,10 +90,16 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
add
(
m2v
)
scriptOutput
.
rawVcfFile
=
m2v
.
output
val
vcfFilter
=
new
VcfFilter
(
this
)
vcfFilter
.
defaults
++=
Map
(
"min_sample_depth"
->
8
,
"min_alternate_depth"
->
4
,
"min_samples_pass"
->
1
)
vcfFilter
.
inputVcf
=
m2v
.
output
vcfFilter
.
outputVcf
=
this
.
swapExt
(
outputDir
,
m2v
.
output
,
".vcf"
,
".filter.vcf.gz"
)
add
(
vcfFilter
)
val
hcAlleles
=
new
HaplotypeCaller
(
this
)
hcAlleles
.
input_file
=
scriptOutput
.
bamFiles
hcAlleles
.
out
=
outputDir
+
outputName
+
".genotype_raw_alleles.vcf.gz"
hcAlleles
.
alleles
=
m2v
.
output
hcAlleles
.
alleles
=
vcfFilter
.
output
Vcf
hcAlleles
.
genotyping_mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
genotyper
.
GenotypingOutputMode
.
GENOTYPE_GIVEN_ALLELES
add
(
hcAlleles
)
scriptOutput
.
rawGenotypeVcf
=
hcAlleles
.
out
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment