Commit 03894b0d authored by Sander Bollen's avatar Sander Bollen

cythonize stats

parent 28ea8217
......@@ -6,9 +6,10 @@ vtools.optimized
:copyright: (c) 2018 Leiden University Medical Center
:license: MIT
"""
from collections import Counter
import numpy
cimport numpy as np
cpdef int amount_atleast(np.int64_t[::1] values, int atleast):
"""
Return amount of values at least `atleast`
......@@ -24,3 +25,97 @@ cpdef int amount_atleast(np.int64_t[::1] values, int atleast):
if val >= atleast:
passed += 1
return passed
cdef class Sample(object):
cdef unicode name
cdef size_t idx
cdef int transversions
cdef int transitions
cdef int hom_ref
cdef int het
cdef int hom_alt
cdef int deletions
cdef int insertions
cdef int snps
cdef int gq_counter[100]
def __init__(self, name, size_t idx):
self.name = name
self.idx = idx
self.transversions = 0
self.transitions = 0
self.hom_ref = 0
self.het = 0
self.hom_alt = 0
self.deletions = 0
self.insertions = 0
self.snps = 0
@property
def ti_tv(self):
cdef float titv
if self.transversions > 0:
titv = float(self.transitions)/self.transversions
else:
titv = numpy.nan
return titv
cpdef void add_variant(self, var):
"""assuming gts012=True"""
cdef int typ = var.gt_types[self.idx]
if typ == 3:
return
cdef int gq = var.gt_quals[self.idx]
self.gq_counter[gq] += 1
if typ == 0:
self.hom_ref += 1
return
if typ == 1:
self.het += 1
if typ == 2:
self.hom_alt += 1
if var.is_snp and var.is_transition:
self.transitions += 1
self.snps += 1
elif var.is_snp:
self.transversions += 1
self.snps += 1
elif var.is_indel and var.is_deletion:
self.deletions += 1
elif var.is_indel:
self.insertions += 1
return
@property
def gq_distr(self):
return [self.gq_counter[x] for x in range(100)]
@property
def total_variants(self):
return self.hom_alt + self.het
@property
def as_dict(self):
return {
"name": self.name,
"total_variants": self.total_variants,
"variant_types": {
"snps": self.snps,
"deletions": self.deletions,
"insertions": self.insertions,
},
"genotypes": {
"hom_ref": self.hom_ref,
"het": self.het,
"hom_alt": self.hom_alt
},
"transitions": self.transitions,
"transversions": self.transversions,
"ti_tv_ratio": self.ti_tv,
"gq_distribution": self.gq_distr
}
......@@ -15,92 +15,14 @@ from tqdm import tqdm
from collections import Counter
from .optimized import Sample
def gen_chrom_counter(vcf_reader):
"""Generate chromosome counter from VCF reader"""
return Counter({n: 0 for n in vcf_reader.seqnames})
class Sample(object):
def __init__(self, name, idx):
self.name = name
self.idx = idx
self.transversions = 0
self.transitions = 0
self.hom_ref = 0
self.het = 0
self.hom_alt = 0
self.deletions = 0
self.insertions = 0
self.snps = 0
self.__gq_counter = Counter({i: 0 for i in range(100)})
@property
def ti_tv(self):
if self.transversions > 0:
return float(self.transitions)/self.transversions
return numpy.nan
def add_variant(self, var):
"""assuming gts012=True"""
typ = var.gt_types[self.idx]
if typ == 3:
return None
gq = var.gt_quals[self.idx]
self.__gq_counter[gq] += 1
if typ == 0:
self.hom_ref += 1
return None
if typ == 1:
self.het += 1
if typ == 2:
self.hom_alt += 1
if var.is_snp and var.is_transition:
self.transitions += 1
self.snps += 1
elif var.is_snp:
self.transversions += 1
self.snps += 1
elif var.is_indel and var.is_deletion:
self.deletions += 1
elif var.is_indel:
self.insertions += 1
@property
def gq_distr(self):
return [self.__gq_counter[x] for x in range(100)]
@property
def total_variants(self):
return self.hom_alt + self.het
@property
def as_dict(self):
return {
"name": self.name,
"total_variants": self.total_variants,
"variant_types": {
"snps": self.snps,
"deletions": self.deletions,
"insertions": self.insertions,
},
"genotypes": {
"hom_ref": self.hom_ref,
"het": self.het,
"hom_alt": self.hom_alt
},
"transitions": self.transitions,
"transversions": self.transversions,
"ti_tv_ratio": self.ti_tv,
"gq_distribution": self.gq_distr
}
class Stats(object):
def __init__(self, vcf_path):
self.path = vcf_path
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment