Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
6a965b1c
Commit
6a965b1c
authored
May 29, 2016
by
bow
Browse files
Merge branch 'feature-unique_sample_filter' into 'develop'
Added more filter options fixes #316 See merge request !410
parents
ece09a76
02936179
Changes
2
Hide whitespace changes
Inline
Side-by-side
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala
View file @
6a965b1c
...
...
@@ -32,6 +32,11 @@ object VcfFilter extends ToolCommand {
}
}
case
class
BooleanArgs
(
uniqueOnly
:
Boolean
=
false
,
sharedOnly
:
Boolean
=
false
,
filterRefCalls
:
Boolean
=
false
,
filterNoCalls
:
Boolean
=
false
)
case
class
Args
(
inputVcf
:
File
=
null
,
outputVcf
:
File
=
null
,
invertedOutputVcf
:
Option
[
File
]
=
None
,
...
...
@@ -43,17 +48,17 @@ object VcfFilter extends ToolCommand {
mustHaveVariant
:
List
[
String
]
=
Nil
,
calledIn
:
List
[
String
]
=
Nil
,
mustHaveGenotype
:
List
[(
String
,
GenotypeType
)]
=
Nil
,
deNovo
InSample
:
String
=
null
,
uniqueVariant
InSample
:
String
=
null
,
resToDom
:
List
[
Trio
]
=
Nil
,
trioCompound
:
List
[
Trio
]
=
Nil
,
deNovoTrio
:
List
[
Trio
]
=
Nil
,
trioLossOfHet
:
List
[
Trio
]
=
Nil
,
booleanArgs
:
BooleanArgs
=
BooleanArgs
(),
diffGenotype
:
List
[(
String
,
String
)]
=
Nil
,
filterHetVarToHomVar
:
List
[(
String
,
String
)]
=
Nil
,
filterRefCalls
:
Boolean
=
false
,
filterNoCalls
:
Boolean
=
false
,
iDset
:
Set
[
String
]
=
Set
(),
minGenomeQuality
:
Int
=
0
)
extends
AbstractArgs
minGenomeQuality
:
Int
=
0
)
extends
AbstractArgs
{
}
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'I'
,
"inputVcf"
)
required
()
maxOccurs
1
valueName
"<file>"
action
{
(
x
,
c
)
=>
...
...
@@ -84,7 +89,7 @@ object VcfFilter extends ToolCommand {
c
.
copy
(
trioCompound
=
new
Trio
(
x
)
::
c
.
trioCompound
)
}
text
"Only shows variants where child is a compound variant combined from both parants"
opt
[
String
](
"deNovoInSample"
)
maxOccurs
1
unbounded
()
valueName
"<sample>"
action
{
(
x
,
c
)
=>
c
.
copy
(
deNovo
InSample
=
x
)
c
.
copy
(
uniqueVariant
InSample
=
x
)
}
text
"Only show variants that contain unique alleles in complete set for given sample"
opt
[
String
](
"deNovoTrio"
)
unbounded
()
valueName
"<child:father:mother>"
action
{
(
x
,
c
)
=>
c
.
copy
(
deNovoTrio
=
new
Trio
(
x
)
::
c
.
deNovoTrio
)
...
...
@@ -113,11 +118,17 @@ object VcfFilter extends ToolCommand {
}
validate
{
x
=>
if
(
x
.
split
(
":"
).
length
==
2
)
success
else
failure
(
"--filterHetVarToHomVar should be in this format: sample:sample"
)
}
text
"If variants in sample 1 are heterogeneous and alternative alleles are homogeneous in sample 2 variants are filtered"
opt
[
Unit
](
"filterRefCalls"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
filterRefCalls
=
true
)
c
.
copy
(
booleanArgs
=
c
.
booleanArgs
.
copy
(
filterRefCalls
=
true
)
)
}
text
"Filter when there are only ref calls"
opt
[
Unit
](
"filterNoCalls"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
filterNoCalls
=
true
)
c
.
copy
(
booleanArgs
=
c
.
booleanArgs
.
copy
(
filterNoCalls
=
true
)
)
}
text
"Filter when there are only no calls"
opt
[
Unit
](
"uniqueOnly"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
booleanArgs
=
c
.
booleanArgs
.
copy
(
uniqueOnly
=
true
))
}
text
"Filter when there more then 1 sample have this variant"
opt
[
Unit
](
"sharedOnly"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
booleanArgs
=
c
.
booleanArgs
.
copy
(
sharedOnly
=
true
))
}
text
"Filter when not all samples have this variant"
opt
[
Double
](
"minQualScore"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
minQualScore
=
Some
(
x
))
}
text
"Min qual score"
...
...
@@ -136,7 +147,7 @@ object VcfFilter extends ToolCommand {
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
logger
.
info
(
"Start"
)
val
argsParser
=
new
OptParser
val
cmdArgs
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
(
throw
new
IllegalArgumentException
)
val
cmdArgs
=
argsParser
.
parse
(
args
,
Args
())
.
getOrElse
{
throw
new
IllegalArgumentException
}
val
reader
=
new
VCFFileReader
(
cmdArgs
.
inputVcf
,
false
)
val
header
=
reader
.
getFileHeader
...
...
@@ -158,8 +169,10 @@ object VcfFilter extends ToolCommand {
var
counterLeft
=
0
for
(
record
<-
reader
)
{
if
(
cmdArgs
.
minQualScore
.
map
(
minQualscore
(
record
,
_
)).
getOrElse
(
true
)
&&
(!
cmdArgs
.
filterRefCalls
||
hasNonRefCalls
(
record
))
&&
(!
cmdArgs
.
filterNoCalls
||
hasCalls
(
record
))
&&
(!
cmdArgs
.
booleanArgs
.
filterRefCalls
||
hasNonRefCalls
(
record
))
&&
(!
cmdArgs
.
booleanArgs
.
filterNoCalls
||
hasCalls
(
record
))
&&
(!
cmdArgs
.
booleanArgs
.
uniqueOnly
||
hasUniqeSample
(
record
))
&&
(!
cmdArgs
.
booleanArgs
.
sharedOnly
||
allSamplesVariant
(
record
))
&&
hasMinTotalDepth
(
record
,
cmdArgs
.
minTotalDepth
)
&&
hasMinSampleDepth
(
record
,
cmdArgs
.
minSampleDepth
,
cmdArgs
.
minSamplesPass
)
&&
minAlternateDepth
(
record
,
cmdArgs
.
minAlternateDepth
,
cmdArgs
.
minSamplesPass
)
&&
...
...
@@ -172,7 +185,7 @@ object VcfFilter extends ToolCommand {
cmdArgs
.
filterHetVarToHomVar
.
isEmpty
||
cmdArgs
.
filterHetVarToHomVar
.
forall
(
x
=>
filterHetVarToHomVar
(
record
,
x
.
_1
,
x
.
_2
))
)
&&
denovo
InSample
(
record
,
cmdArgs
.
deNovo
InSample
)
&&
uniqueVariant
InSample
(
record
,
cmdArgs
.
uniqueVariant
InSample
)
&&
denovoTrio
(
record
,
cmdArgs
.
deNovoTrio
)
&&
denovoTrio
(
record
,
cmdArgs
.
trioLossOfHet
,
onlyLossHet
=
true
)
&&
resToDom
(
record
,
cmdArgs
.
resToDom
)
&&
...
...
@@ -194,6 +207,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if given samples are called
*
* @param record VCF record
* @param samples Samples that need this sample to be called
* @return false when filters fail
...
...
@@ -205,16 +219,20 @@ object VcfFilter extends ToolCommand {
/**
* Checks if given genotypes for given samples are there
*
* @param record VCF record
* @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype)
* @return false when filter fails
*/
def
hasGenotype
(
record
:
VariantContext
,
samplesGenotypes
:
List
[(
String
,
GenotypeType
)])
:
Boolean
=
{
samplesGenotypes
.
forall
(
x
=>
record
.
getGenotype
(
x
.
_1
).
getType
==
x
.
_2
)
samplesGenotypes
.
forall
{
x
=>
record
.
getGenotype
(
x
.
_1
).
getType
==
x
.
_2
}
}
/**
* Checks if record has atleast minQualScore
*
* @param record VCF record
* @param minQualScore Minimal quality score
* @return false when filters fail
...
...
@@ -233,6 +251,16 @@ object VcfFilter extends ToolCommand {
record
.
getGenotypes
.
exists
(
g
=>
!
g
.
isNoCall
)
}
/** Checks if there is a variant in only 1 sample */
def
hasUniqeSample
(
record
:
VariantContext
)
:
Boolean
=
{
record
.
getSampleNames
.
exists
(
uniqueVariantInSample
(
record
,
_
))
}
/** Checks if all samples are a variant */
def
allSamplesVariant
(
record
:
VariantContext
)
:
Boolean
=
{
record
.
getGenotypes
.
forall
(
g
=>
!
g
.
isNonInformative
&&
g
.
getAlleles
.
exists
(
a
=>
a
.
isNonReference
&&
!
a
.
isNoCall
))
}
/** returns true when DP INFO field is atleast the given value */
def
hasMinTotalDepth
(
record
:
VariantContext
,
minTotalDepth
:
Int
)
:
Boolean
=
{
record
.
getAttributeAsInt
(
"DP"
,
-
1
)
>=
minTotalDepth
...
...
@@ -240,6 +268,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if DP genotype field have a minimal value
*
* @param record VCF record
* @param minSampleDepth minimal depth
* @param minSamplesPass Minimal number of samples to pass filter
...
...
@@ -254,6 +283,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if non-ref AD genotype field have a minimal value
*
* @param record VCF record
* @param minAlternateDepth minimal depth
* @param minSamplesPass Minimal number of samples to pass filter
...
...
@@ -268,6 +298,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if genome quality field has minimum value
*
* @param record VCF record
* @param minGQ smallest GQ allowed
* @param minSamplesPass number of samples to consider
...
...
@@ -282,6 +313,7 @@ object VcfFilter extends ToolCommand {
/**
* Checks if given samples does have a variant hin this record
*
* @param record VCF record
* @param mustHaveVariant List of samples that should have this variant
* @return true if filter passed
...
...
@@ -311,11 +343,12 @@ object VcfFilter extends ToolCommand {
}
/** Checks if given sample have alternative alleles that are unique in the VCF record */
def
denovo
InSample
(
record
:
VariantContext
,
sample
:
String
)
:
Boolean
=
{
def
uniqueVariant
InSample
(
record
:
VariantContext
,
sample
:
String
)
:
Boolean
=
{
if
(
sample
==
null
)
return
true
val
genotype
=
record
.
getGenotype
(
sample
)
if
(
genotype
.
isNoCall
)
return
false
for
(
allele
<-
genotype
.
getAlleles
)
{
if
(
genotype
.
getAlleles
.
forall
(
_
.
isReference
))
return
false
for
(
allele
<-
genotype
.
getAlleles
if
allele
.
isNonReference
)
{
for
(
g
<-
record
.
getGenotypes
if
g
.
getSampleName
!=
sample
)
{
if
(
g
.
getAlleles
.
exists
(
_
.
basesMatch
(
allele
)))
return
false
}
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala
View file @
6a965b1c
...
...
@@ -70,24 +70,22 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers {
/**
* This should simply not raise an exception
*/
val
tmp
=
File
.
createTempFile
(
"VCfFilter"
,
".vcf
.gz
"
)
val
tmp
=
File
.
createTempFile
(
"VCfFilter"
,
".vcf"
)
tmp
.
deleteOnExit
()
val
tmpPath
=
tmp
.
getAbsolutePath
val
arguments
:
Array
[
String
]
=
Array
(
"-I"
,
veppedPath
,
"-o"
,
tmpPath
,
val
arguments
:
Array
[
String
]
=
Array
(
"-I"
,
veppedPath
,
"-o"
,
tmp
.
getAbsolutePath
,
"--mustHaveGenotype"
,
"Sample_101:HET"
)
main
(
arguments
)
val
size
=
new
VCFFileReader
(
new
File
(
tmpPath
)
,
false
).
size
val
size
=
new
VCFFileReader
(
tmp
,
false
).
size
size
shouldBe
1
val
tmp2
=
File
.
createTempFile
(
"VcfFilter"
,
".vcf.gz"
)
tmp2
.
deleteOnExit
()
val
tmpPath2
=
tmp2
.
getAbsolutePath
val
arguments2
:
Array
[
String
]
=
Array
(
"-I"
,
veppedPath
,
"-o"
,
tmpPath2
,
val
arguments2
:
Array
[
String
]
=
Array
(
"-I"
,
veppedPath
,
"-o"
,
tmp2
.
getAbsolutePath
,
"--mustHaveGenotype"
,
"Sample_101:HOM_VAR"
)
main
(
arguments2
)
val
size2
=
new
VCFFileReader
(
new
File
(
tmpPath2
)
,
false
).
size
val
size2
=
new
VCFFileReader
(
tmp2
,
false
).
size
size2
shouldBe
0
}
...
...
@@ -209,9 +207,9 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers {
val
reader
=
new
VCFFileReader
(
vepped
,
false
)
val
record
=
reader
.
iterator
().
next
()
denovo
InSample
(
record
,
"Sample_101"
)
shouldBe
false
denovo
InSample
(
record
,
"Sample_102"
)
shouldBe
false
denovo
InSample
(
record
,
"Sample_103"
)
shouldBe
false
uniqueVariant
InSample
(
record
,
"Sample_101"
)
shouldBe
false
uniqueVariant
InSample
(
record
,
"Sample_102"
)
shouldBe
false
uniqueVariant
InSample
(
record
,
"Sample_103"
)
shouldBe
false
}
@Test
def
testResToDom
()
=
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment