Skip to content
Snippets Groups Projects
Commit d5dd7f1a authored by van den Berg's avatar van den Berg
Browse files

Add option to filter records based on read depth

parent ec9bba93
No related branches found
No related tags found
2 merge requests!6Merge testing into master,!5Merge new testing code into devel
Pipeline #2742 failed
......@@ -135,11 +135,15 @@ technologies. E.g, when comparing a WES VCF file vs a SNP array, this
tool can be quite useful.
Output is a simple JSON file listing counts of concordant and discordant
alleles.
alleles and some other metrics. It is also possible to output the discordant
VCF records.
Multisample VCF files are allowed; the samples to be evaluated have to be set
through a CLI argument.
Variants from the `--call-vcf` are filtered to have a Genotype Quality (GQ) of
at least 30 by default. This can be overruled by specifying `--min-qual 0`.
The optional flag `--min-depth` can be used to set the minimum read coverage.
#### Usage
......@@ -156,6 +160,8 @@ Options:
called multiple times [required]
-s, --stats PATH Path to output stats json file
-dc, --discordant PATH Path to output discordant VCF file
-mq, --min-qual FLOAT Minimum quality of variants to consider
-md, --min-depth FLOAT Minimum depth of variants to consider
--help Show this message and exit.
```
......
......@@ -36,12 +36,16 @@ from .gcoverage import RefRecord, region_coverages
@click.option("-dc", "--discordant", type=click.Path(writable=True),
help="Path to output discordant VCF file",
required=False)
def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples, stats,
discordant):
@click.option("-mq", "--min-qual", type=float,
help="Minimum quality of variants to consider", default=30)
@click.option("-md", "--min-depth", type=float,
help="Minimum depth of variants to consider", default=0)
def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples,
min_qual, min_depth, stats, discordant):
c_vcf = VCF(call_vcf, gts012=True)
p_vcf = VCF(positive_vcf, gts012=True)
st, disc = site_concordancy(c_vcf, p_vcf, call_samples,
positive_samples)
positive_samples, min_qual, min_depth)
# Write the stats json file
with click.open_file(stats, 'w') as fout:
print(json.dumps(st), file=fout)
......
......@@ -15,7 +15,8 @@ def site_concordancy(call_vcf: VCF,
positive_vcf: VCF,
call_samples: List[str],
positive_samples: List[str],
min_gq: float = 30) -> Dict[str, float]:
min_gq: float = 30,
min_dp: float = 0) -> Dict[str, float]:
"""
Calculate concordance between sites of two call sets,
of which one contains known true positives.
......@@ -32,9 +33,11 @@ def site_concordancy(call_vcf: VCF,
:param call_vcf: The VCF to evaluate. Must have gts012=True
:param positive_vcf: The VCF file containing true positive sites.
Must have gts012=True
:param call_samples: List of samples in `call_vcf` to consider
:param call_samples: List of samples in `call_vcf` to consider.
:param positive_samples: List of samples in `positive_vcf` to consider.
Must have same length than `call_samples`
:param min_qg: Minimum quality of variants to consider.
:param min_dp: Minimum depth of variants to consider.
:raises: ValueError if sample lists are not of same size.
......@@ -58,7 +61,8 @@ def site_concordancy(call_vcf: VCF,
"alleles_concordant": 0,
"alleles_discordant": 0,
"alleles_no_call": 0,
"alleles_low_qual": 0
"alleles_low_qual": 0,
"alleles_low_depth": 0
}
discordant_count = 0
discordant_records = list()
......@@ -91,10 +95,19 @@ def site_concordancy(call_vcf: VCF,
p_gt = pos_record.gt_types[p_s]
c_gt = call_record.gt_types[c_s]
# If the site does not pass the quality requirements
c_gq = call_record.gt_quals[c_s]
if c_gq < min_gq:
d['alleles_low_qual'] += 2
elif p_gt == 0 and c_gt == 0:
c_dp = call_record.gt_depths[c_s]
if c_gq < min_gq or c_dp < min_dp:
if c_gq < min_gq:
d['alleles_low_qual'] += 2
if c_dp < min_dp:
d['alleles_low_depth'] += 2
continue
# If the site passed the quality requirements
if p_gt == 0 and c_gt == 0:
# both homozygous reference
d['alleles_concordant'] += 2
d['alleles_hom_ref_concordant'] += 2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment