Commits (35)
......@@ -4,4 +4,5 @@ build:
- docker
- pip install --upgrade pip setuptools wheel
- pip install '.'
- pip install cython numpy
- tox
......@@ -135,11 +135,15 @@ technologies. E.g, when comparing a WES VCF file vs a SNP array, this
tool can be quite useful.
Output is a simple JSON file listing counts of concordant and discordant
alleles and some other metrics. It is also possible to output the discordant
VCF records.
Multisample VCF files are allowed; the samples to be evaluated have to be set
through a CLI argument.
Variants from the `--call-vcf` are filtered to have a Genotype Quality (GQ) of
at least 30 by default. This can be overruled by specifying `--min-qual 0`.
The optional flag `--min-depth` can be used to set the minimum read coverage.
#### Usage
......@@ -155,7 +159,9 @@ Options:
-ps, --positive-samples TEXT Sample(s) in positive-vcf to consider. May be
called multiple times [required]
-s, --stats PATH Path to output stats json file
-dc, --discordant PATH Path to output discordant VCF file
-dc, --discordant PATH Path to output gzipped discordant vcf file
-mq, --min-qual FLOAT Minimum quality of variants to consider
-md, --min-depth INTEGER Minimum depth of variants to consider
--help Show this message and exit.
This diff is collapsed.
envlist = py36
deps =
commands = pytest
......@@ -6,9 +6,11 @@ vtools.cli
:copyright: (c) Leiden University Medical Center
:license: MIT
import os
import json
import click
from cyvcf2 import VCF, Writer
import gzip
from .evaluate import site_concordancy
from .filter import FilterParams, FilterClass, Filterer
......@@ -32,25 +34,44 @@ from .gcoverage import RefRecord, region_coverages
"May be called multiple times",
@click.option("-s", "--stats", type=click.Path(writable=True),
help="Path to output stats json file", default='-')
@click.option("-dc", "--discordant", type=click.Path(writable=True),
help="Path to output discordant VCF file",
help="Path to output stats json file")
@click.option("-dvcf", "--discordant-vcf", type=click.Path(writable=True),
help="Path to output the discordant vcf file",
def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples, stats,
@click.option("-mq", "--min-qual", type=float,
help="Minimum quality of variants to consider", default=30)
@click.option("-md", "--min-depth", type=int,
help="Minimum depth of variants to consider", default=0)
def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples,
min_qual, min_depth, stats, discordant_vcf):
c_vcf = VCF(call_vcf, gts012=True)
p_vcf = VCF(positive_vcf, gts012=True)
st, disc = site_concordancy(c_vcf, p_vcf, call_samples,
positive_samples, min_qual, min_depth)
# Write the stats json file
with click.open_file(stats, 'w') as fout:
print(json.dumps(st), file=fout)
# If specified, write the discordant variants
if discordant:
with click.open_file(discordant, 'w') as fout:
if stats is None:
with click.open_file(stats, 'w') as fout:
# If there were discordand records, and a discordant-vcf should be written
if len(disc) > 0 and discordant_vcf:
# make sure the parent folder exists
parent_folder = os.path.dirname(discordant_vcf)
os.makedirs(parent_folder, exist_ok=True)
with click.open_file(discordant_vcf, 'w') as fout:
# First, we write the vcf header
with gzip.open(call_vcf, 'rt') as fin:
for line in fin:
if line.startswith('#'):
# Then we write the vcf records that were discordant
for record in disc:
print(record, file=fout, end='')
This diff is collapsed.