...
 
Commits (38)
......@@ -4,4 +4,5 @@ build:
- docker
script:
- pip install --upgrade pip setuptools wheel
- pip install '.'
- pip install cython numpy
- tox
include vtools/*.pyx
\ No newline at end of file
......@@ -135,11 +135,15 @@ technologies. E.g, when comparing a WES VCF file vs a SNP array, this
tool can be quite useful.
Output is a simple JSON file listing counts of concordant and discordant
alleles.
alleles and some other metrics. It is also possible to output the discordant
VCF records.
Multisample VCF files are allowed; the samples to be evaluated have to be set
through a CLI argument.
Variants from the `--call-vcf` are filtered to have a Genotype Quality (GQ) of
at least 30 by default. This can be overruled by specifying `--min-qual 0`.
The optional flag `--min-depth` can be used to set the minimum read coverage.
#### Usage
......@@ -155,7 +159,9 @@ Options:
-ps, --positive-samples TEXT Sample(s) in positive-vcf to consider. May be
called multiple times [required]
-s, --stats PATH Path to output stats json file
-dc, --discordant PATH Path to output discordant VCF file
-dc, --discordant PATH Path to output gzipped discordant vcf file
-mq, --min-qual FLOAT Minimum quality of variants to consider
-md, --min-depth INTEGER Minimum depth of variants to consider
--help Show this message and exit.
```
......
[metadata]
license_file=LICENSE
......@@ -7,69 +7,35 @@ setup.py
:license: MIT
"""
from os.path import abspath, dirname, join
import sys
import pkg_resources
import subprocess
from setuptools import setup, find_packages
# Temporarily install dependencies required by setup.py before trying to
# import them. From https://bitbucket.org/dholth/setup-requires
sys.path[0:0] = ['setup-requires']
pkg_resources.working_set.add_entry('setup-requires')
def missing_requirements(specifiers):
for specifier in specifiers:
try:
pkg_resources.require(specifier)
except pkg_resources.DistributionNotFound:
yield specifier
def install_requirements(specifiers):
to_install = list(specifiers)
if to_install:
cmd = [sys.executable, "-m", "pip", "install",
"-t", "setup-requires"] + to_install
subprocess.call(cmd)
requires = ['cython', 'numpy']
install_requirements(missing_requirements(requires))
from Cython.Build import cythonize
import numpy as np
from setuptools import setup, find_packages, Extension
readme_file = join(abspath(dirname(__file__)), "README.md")
with open(readme_file) as desc_handle:
long_desc = desc_handle.read()
# create extensions and add numpy includes to all of them.
cython_extensions = cythonize("vtools/*.pyx")
for ext in cython_extensions:
ext.include_dirs.append(np.get_include())
setup(
name="v-tools",
version="1.0.0",
version="1.0.0-dev",
description="Various tools operating over VCF files",
long_description=long_desc,
author="Sander Bollen",
author_email="a.h.b.bollen@lumc.nl",
url="https://git.lumc.nl/klinische-genetica/capture-lumc/vtools",
license="MIT",
packages=find_packages(),
package_dir={"": "src"},
packages=find_packages("src"),
package_data={
'vtools': ['vtools/*.pyx']
},
python_requires=">=3.6",
zip_safe=False,
include_package_data=True,
setup_requires=["cython"],
install_requires=[
"click",
"cyvcf2",
"numpy",
"cython",
"tqdm"
],
entry_points={
......@@ -87,5 +53,5 @@ setup(
"Programming Language :: Python :: 3.7",
"Topic :: Scientific/Engineering :: Bio-Informatics"
],
ext_modules=cython_extensions
ext_modules=[Extension("vtools.optimized", ["src/vtools/optimized.pyx"])]
)
......@@ -6,9 +6,11 @@ vtools.cli
:copyright: (c) Leiden University Medical Center
:license: MIT
"""
import os
import json
import click
from cyvcf2 import VCF, Writer
import gzip
from .evaluate import site_concordancy
from .filter import FilterParams, FilterClass, Filterer
......@@ -32,25 +34,44 @@ from .gcoverage import RefRecord, region_coverages
"May be called multiple times",
required=True)
@click.option("-s", "--stats", type=click.Path(writable=True),
help="Path to output stats json file", default='-')
@click.option("-dc", "--discordant", type=click.Path(writable=True),
help="Path to output discordant VCF file",
help="Path to output stats json file")
@click.option("-dvcf", "--discordant-vcf", type=click.Path(writable=True),
help="Path to output the discordant vcf file",
required=False)
def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples, stats,
discordant):
@click.option("-mq", "--min-qual", type=float,
help="Minimum quality of variants to consider", default=30)
@click.option("-md", "--min-depth", type=int,
help="Minimum depth of variants to consider", default=0)
def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples,
min_qual, min_depth, stats, discordant_vcf):
c_vcf = VCF(call_vcf, gts012=True)
p_vcf = VCF(positive_vcf, gts012=True)
st, disc = site_concordancy(c_vcf, p_vcf, call_samples,
positive_samples)
positive_samples, min_qual, min_depth)
# Write the stats json file
with click.open_file(stats, 'w') as fout:
print(json.dumps(st), file=fout)
# If specified, write the discordant variants
if discordant:
with click.open_file(discordant, 'w') as fout:
if stats is None:
print(json.dumps(st))
else:
with click.open_file(stats, 'w') as fout:
fout.write(json.dumps(st))
# If there were discordand records, and a discordant-vcf should be written
if len(disc) > 0 and discordant_vcf:
# make sure the parent folder exists
parent_folder = os.path.dirname(discordant_vcf)
os.makedirs(parent_folder, exist_ok=True)
with click.open_file(discordant_vcf, 'w') as fout:
# First, we write the vcf header
with gzip.open(call_vcf, 'rt') as fin:
for line in fin:
if line.startswith('#'):
fout.write(line)
else:
break
# Then we write the vcf records that were discordant
for record in disc:
print(record, file=fout, end='')
fout.write(str(record))
@click.command()
......
This diff is collapsed.
[tox]
envlist = py36
[testenv]
deps =
pytest
commands = pytest
"""
vtools
~~~~~~
:copyright: (c) 2018 Sander Bollen
:copyright: (c) 2018 Leiden University Medical Center
:license: MIT
"""