Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
610e9ad5
Commit
610e9ad5
authored
Dec 04, 2014
by
bow
Browse files
Merge branch 'feature-genotype_filter' into 'develop'
Feature genotype filter For #92 See merge request !62
parents
54d50322
4a4fa9be
Changes
3
Hide whitespace changes
Inline
Side-by-side
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
View file @
610e9ad5
...
...
@@ -49,9 +49,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
def
init
()
{
if
(
config
.
contains
(
"target_bed"
))
{
defaults
++=
Map
(
"gatk"
->
Map
((
"intervals"
->
config
(
"target_bed"
).
asStringList
)))
}
if
(
config
.
contains
(
"gvcfFiles"
))
for
(
file
<-
config
(
"gvcfFiles"
).
asList
)
gvcfFiles
:+=
file
.
toString
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala
View file @
610e9ad5
...
...
@@ -35,8 +35,7 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction
override
val
defaultVmem
=
"6G"
memoryLimit
=
Option
(
2.0
)
if
(
config
.
contains
(
"target_bed"
))
defaults
++=
Map
(
"samtoolsmpileup"
->
Map
(
"interval_bed"
->
config
(
"target_bed"
).
asStringList
.
head
,
"disable_baq"
->
true
,
"min_map_quality"
->
1
))
defaults
++=
Map
(
"samtoolsmpileup"
->
Map
(
"disable_baq"
->
true
,
"min_map_quality"
->
1
))
override
def
afterGraph
{
super
.
afterGraph
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala
View file @
610e9ad5
...
...
@@ -2,7 +2,8 @@ package nl.lumc.sasc.biopet.tools
import
htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import
htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import
htsjdk.variant.vcf.VCFFileReader
import
htsjdk.variant.vcf.
{
VCFHeader
,
VCFFileReader
}
import
htsjdk.variant.variantcontext.VariantContext
import
java.io.File
import
java.util.ArrayList
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
...
...
@@ -48,81 +49,189 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
}
object
VcfFilter
extends
ToolCommand
{
case
class
Args
(
inputVcf
:
File
=
null
,
outputVcf
:
File
=
null
,
minSampleDepth
:
Int
=
-
1
,
minTotalDepth
:
Int
=
-
1
,
minAlternateDepth
:
Int
=
-
1
,
minSamplesPass
:
Int
=
0
,
minBamAlternateDepth
:
Int
=
0
,
filterRefCalls
:
Boolean
=
false
)
extends
AbstractArgs
case
class
Args
(
inputVcf
:
File
=
null
,
outputVcf
:
File
=
null
,
minQualscore
:
Option
[
Double
]
=
None
,
minSampleDepth
:
Int
=
-
1
,
minTotalDepth
:
Int
=
-
1
,
minAlternateDepth
:
Int
=
-
1
,
minSamplesPass
:
Int
=
0
,
minBamAlternateDepth
:
Int
=
0
,
mustHaveVariant
:
List
[
String
]
=
Nil
,
denovoInSample
:
String
=
null
,
diffGenotype
:
List
[(
String
,
String
)]
=
Nil
,
filterHetVarToHomVar
:
List
[(
String
,
String
)]
=
Nil
,
filterRefCalls
:
Boolean
=
false
,
filterNoCalls
:
Boolean
=
false
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'I'
,
"inputVcf"
)
required
()
maxOccurs
(
1
)
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
inputVcf
=
x
)
}
}
text
(
"Input vcf file"
)
opt
[
File
](
'o'
,
"outputVcf"
)
required
()
maxOccurs
(
1
)
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
outputVcf
=
x
)
}
text
(
"
o
utput file
, default to stdout
"
)
opt
[
Int
](
"minSampleDepth"
)
unbounded
()
action
{
(
x
,
c
)
=>
}
text
(
"
O
utput
vcf
file"
)
opt
[
Int
](
"minSampleDepth"
)
unbounded
()
valueName
(
"<int>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
minSampleDepth
=
x
)
}
opt
[
Int
](
"minTotalDepth"
)
unbounded
()
action
{
(
x
,
c
)
=>
}
text
(
"Min value for DP in genotype fields"
)
opt
[
Int
](
"minTotalDepth"
)
unbounded
()
valueName
(
"<int>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
minTotalDepth
=
x
)
}
opt
[
Int
](
"minAlternateDepth"
)
unbounded
()
action
{
(
x
,
c
)
=>
}
text
(
"Min value of DP field in INFO fields"
)
opt
[
Int
](
"minAlternateDepth"
)
unbounded
()
valueName
(
"<int>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
minAlternateDepth
=
x
)
}
opt
[
Int
](
"minSamplesPass"
)
unbounded
()
action
{
(
x
,
c
)
=>
}
text
(
"Min value of AD field in genotype fields"
)
opt
[
Int
](
"minSamplesPass"
)
unbounded
()
valueName
(
"<int>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
minSamplesPass
=
x
)
}
opt
[
Int
](
"minBamAlternateDepth"
)
unbounded
()
action
{
(
x
,
c
)
=>
}
text
(
"Min number opf samples to pass --minAlternateDepth, --minBamAlternateDepth and --minSampleDepth"
)
opt
[
Int
](
"minBamAlternateDepth"
)
unbounded
()
valueName
(
"<int>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
minBamAlternateDepth
=
x
)
}
}
// TODO: Convert this to more generic filter
opt
[
String
](
"denovoInSample"
)
maxOccurs
(
1
)
unbounded
()
valueName
(
"<sample>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
denovoInSample
=
x
)
}
text
(
"Only show variants that contain unique alleles in compete set for given sample"
)
opt
[
String
](
"mustHaveVariant"
)
unbounded
()
valueName
(
"<sample>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
mustHaveVariant
=
x
::
c
.
mustHaveVariant
)
}
text
(
"Given sample must have 1 alternative allele"
)
opt
[
String
](
"diffGenotype"
)
unbounded
()
valueName
(
"<sample:sample>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
diffGenotype
=
(
x
.
split
(
":"
)(
0
),
x
.
split
(
":"
)(
1
))
::
c
.
diffGenotype
)
}
validate
{
x
=>
if
(
x
.
split
(
":"
).
length
==
2
)
success
else
failure
(
"--notSameGenotype should be in this format: sample:sample"
)
}
text
(
"Given samples must have a different genotype"
)
opt
[
String
](
"filterHetVarToHomVar"
)
unbounded
()
valueName
(
"<sample:sample>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
filterHetVarToHomVar
=
(
x
.
split
(
":"
)(
0
),
x
.
split
(
":"
)(
1
))
::
c
.
filterHetVarToHomVar
)
}
validate
{
x
=>
if
(
x
.
split
(
":"
).
length
==
2
)
success
else
failure
(
"--filterHetVarToHomVar should be in this format: sample:sample"
)
}
text
(
"If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filterd"
)
opt
[
Unit
](
"filterRefCalls"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
filterRefCalls
=
true
)
}
}
text
(
"Filter when there are only ref calls"
)
opt
[
Unit
](
"filterNoCalls"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
filterNoCalls
=
true
)
}
text
(
"Filter when there are only no calls"
)
opt
[
Double
](
"minQualscore"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
minQualscore
=
Some
(
x
))
}
text
(
"Min qual score"
)
}
var
commandArgs
:
Args
=
_
/**
* @param args the command line arguments
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
argsParser
=
new
OptParser
val
commandArgs
:
Args
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
sys
.
exit
(
1
)
commandArgs
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
sys
.
exit
(
1
)
val
reader
=
new
VCFFileReader
(
commandArgs
.
inputVcf
,
false
)
val
header
=
reader
.
getFileHeader
val
writer
=
new
AsyncVariantContextWriter
(
new
VariantContextWriterBuilder
().
setOutputFile
(
commandArgs
.
outputVcf
).
build
)
writer
.
writeHeader
(
header
)
val
bamADFields
=
(
for
(
line
<-
header
.
getInfoHeaderLines
if
line
.
getID
.
startsWith
(
"BAM-AD-"
))
yield
line
.
getID
).
toList
val
bamDPFields
=
(
for
(
line
<-
header
.
getInfoHeaderLines
if
line
.
getID
.
startsWith
(
"BAM-DP-"
))
yield
line
.
getID
).
toList
for
(
record
<-
reader
)
{
val
genotypes
=
for
(
genotype
<-
record
.
getGenotypes
)
yield
{
val
DP
=
if
(
genotype
.
hasDP
)
genotype
.
getDP
else
-
1
val
AD
=
if
(
genotype
.
hasAD
)
List
(
genotype
.
getAD
:
_
*
)
else
Nil
DP
>=
commandArgs
.
minSampleDepth
&&
(
if
(!
AD
.
isEmpty
)
AD
.
tail
.
count
(
_
>=
commandArgs
.
minAlternateDepth
)
>
0
else
true
)
&&
!(
commandArgs
.
filterRefCalls
&&
genotype
.
isHomRef
)
if
(
minQualscore
(
record
)
&&
filterRefCalls
(
record
)
&&
filterNoCalls
(
record
)
&&
minTotalDepth
(
record
)
&&
minSampleDepth
(
record
)
&&
minAlternateDepth
(
record
)
&&
minBamAlternateDepth
(
record
,
header
)
&&
mustHaveVariant
(
record
)
&&
notSameGenotype
(
record
)
&&
filterHetVarToHomVar
(
record
)
&&
denovoInSample
(
record
))
{
writer
.
add
(
record
)
}
}
reader
.
close
writer
.
close
}
def
minQualscore
(
record
:
VariantContext
)
:
Boolean
=
{
if
(
commandArgs
.
minQualscore
.
isEmpty
)
return
true
record
.
getPhredScaledQual
>=
commandArgs
.
minQualscore
.
get
}
def
filterRefCalls
(
record
:
VariantContext
)
:
Boolean
=
{
if
(
commandArgs
.
filterNoCalls
)
record
.
getGenotypes
.
exists
(
g
=>
!
g
.
isHomRef
)
else
true
}
def
filterNoCalls
(
record
:
VariantContext
)
:
Boolean
=
{
if
(
commandArgs
.
filterNoCalls
)
record
.
getGenotypes
.
exists
(
g
=>
!
g
.
isNoCall
)
else
true
}
def
minTotalDepth
(
record
:
VariantContext
)
:
Boolean
=
{
record
.
getAttributeAsInt
(
"DP"
,
-
1
)
>=
commandArgs
.
minTotalDepth
}
val
bamADvalues
=
(
for
(
field
<-
bamADFields
)
yield
{
record
.
getAttribute
(
field
,
new
ArrayList
)
match
{
case
t
:
ArrayList
[
_
]
if
t
.
length
>
1
=>
{
for
(
i
<-
1
until
t
.
size
)
yield
{
t
(
i
)
match
{
case
a
:
Int
=>
a
>
commandArgs
.
minBamAlternateDepth
case
a
:
String
=>
a
.
toInt
>
commandArgs
.
minBamAlternateDepth
case
_
=>
false
}
def
minSampleDepth
(
record
:
VariantContext
)
:
Boolean
=
{
record
.
getGenotypes
.
count
(
genotype
=>
{
val
DP
=
if
(
genotype
.
hasDP
)
genotype
.
getDP
else
-
1
DP
>=
commandArgs
.
minSampleDepth
})
>=
commandArgs
.
minSamplesPass
}
def
minAlternateDepth
(
record
:
VariantContext
)
:
Boolean
=
{
record
.
getGenotypes
.
count
(
genotype
=>
{
val
AD
=
if
(
genotype
.
hasAD
)
List
(
genotype
.
getAD
:
_
*
)
else
Nil
if
(!
AD
.
isEmpty
)
AD
.
tail
.
count
(
_
>=
commandArgs
.
minAlternateDepth
)
>
0
else
true
})
>=
commandArgs
.
minSamplesPass
}
def
minBamAlternateDepth
(
record
:
VariantContext
,
header
:
VCFHeader
)
:
Boolean
=
{
val
bamADFields
=
(
for
(
line
<-
header
.
getInfoHeaderLines
if
line
.
getID
.
startsWith
(
"BAM-AD-"
))
yield
line
.
getID
).
toList
val
bamADvalues
=
(
for
(
field
<-
bamADFields
)
yield
{
record
.
getAttribute
(
field
,
new
ArrayList
)
match
{
case
t
:
ArrayList
[
_
]
if
t
.
length
>
1
=>
{
for
(
i
<-
1
until
t
.
size
)
yield
{
t
(
i
)
match
{
case
a
:
Int
=>
a
>
commandArgs
.
minBamAlternateDepth
case
a
:
String
=>
a
.
toInt
>
commandArgs
.
minBamAlternateDepth
case
_
=>
false
}
}
case
_
=>
List
(
false
)
}
}).
flatten
case
_
=>
List
(
false
)
}
}).
flatten
if
(
record
.
getAttributeAsInt
(
"DP"
,
-
1
)
>=
commandArgs
.
minTotalDepth
&&
genotypes
.
count
(
_
==
true
)
>=
commandArgs
.
minSamplesPass
&&
(
commandArgs
.
minBamAlternateDepth
<=
0
||
bamADvalues
.
count
(
_
==
true
)
>=
commandArgs
.
minSamplesPass
))
writer
.
add
(
record
)
return
commandArgs
.
minBamAlternateDepth
<=
0
||
bamADvalues
.
count
(
_
==
true
)
>=
commandArgs
.
minSamplesPass
}
def
mustHaveVariant
(
record
:
VariantContext
)
:
Boolean
=
{
return
!
commandArgs
.
mustHaveVariant
.
map
(
record
.
getGenotype
(
_
)).
exists
(
a
=>
a
.
isHomRef
||
a
.
isNoCall
)
}
def
notSameGenotype
(
record
:
VariantContext
)
:
Boolean
=
{
for
((
sample1
,
sample2
)
<-
commandArgs
.
diffGenotype
)
{
val
genotype1
=
record
.
getGenotype
(
sample1
)
val
genotype2
=
record
.
getGenotype
(
sample2
)
if
(
genotype1
.
sameGenotype
(
genotype2
))
return
false
}
reader
.
close
writer
.
close
return
true
}
def
filterHetVarToHomVar
(
record
:
VariantContext
)
:
Boolean
=
{
for
((
sample1
,
sample2
)
<-
commandArgs
.
filterHetVarToHomVar
)
{
val
genotype1
=
record
.
getGenotype
(
sample1
)
val
genotype2
=
record
.
getGenotype
(
sample2
)
if
(
genotype1
.
isHet
&&
!
genotype1
.
getAlleles
.
forall
(
_
.
isNonReference
))
{
for
(
allele
<-
genotype1
.
getAlleles
if
allele
.
isNonReference
)
{
if
(
genotype2
.
getAlleles
.
forall
(
_
.
basesMatch
(
allele
)))
return
false
}
}
}
return
true
}
def
denovoInSample
(
record
:
VariantContext
)
:
Boolean
=
{
if
(
commandArgs
.
denovoInSample
==
null
)
return
true
val
genotype
=
record
.
getGenotype
(
commandArgs
.
denovoInSample
)
for
(
allele
<-
genotype
.
getAlleles
if
allele
.
isNonReference
)
{
for
(
g
<-
record
.
getGenotypes
if
g
.
getSampleName
!=
commandArgs
.
denovoInSample
)
{
if
(
g
.
getAlleles
.
exists
(
_
.
basesMatch
(
allele
)))
return
false
}
}
return
true
}
}
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment