diff --git a/vtools/cli.py b/vtools/cli.py index 1f3dd496731f0d4809fdab930d527018ff0f7638..88f68f45ce563b5d92ae3e9e4e4ef3af1d8263d6 100644 --- a/vtools/cli.py +++ b/vtools/cli.py @@ -108,7 +108,7 @@ def gcoverage_cli(input_gvcf, refflat_file, per_exon): header = None with open(refflat_file) as handle: for line in handle: - r = RefRecord(line) + r = RefRecord.from_line(line) if not per_exon: regions = [x[1] for x in r.cds_exons] cov = region_coverages(reader, regions) diff --git a/vtools/gcoverage.py b/vtools/gcoverage.py index cce0e0803e6c988017478c886b8879d223564e5e..dacc0394ac2420a8e8b914a9416fcf8213bd0809 100644 --- a/vtools/gcoverage.py +++ b/vtools/gcoverage.py @@ -13,7 +13,7 @@ import numpy as np from collections import namedtuple from itertools import chain -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, NamedTuple from .optimized import amount_atleast @@ -132,37 +132,38 @@ class CovStats(object): return s -class RefRecord(object): - def __init__(self, line: str): - self.line = line # type: str - self.gene = None # type: Optional[str] - self.transcript = None # type: Optional[str] - self.contig = None # type: Optional[str] - self.start = None # type: Optional[int] - self.end = None # type: Optional[int] - self.cds_start = None # type: Optional[int] - self.cds_end = None # type: Optional[int] - self.exon_starts = [] # type: List[int] - self.exon_ends = [] # type: List[int] - self.forward = True - - self.parse() - - def parse(self): - contents = self.line.strip().split("\t") +class RefRecord(NamedTuple): + gene: str + transcript: str + contig: str + start: int + end: int + cds_start: int + cds_end: int + exon_starts: List[int] + exon_ends: List[int] + forward: bool + + @classmethod + def from_line(cls, line): + contents = line.strip().split("\t") if len(contents) < 11: raise ValueError("refFlat line must have at least 11 fields") - self.gene = contents[0] - self.transcript = contents[1] - self.contig = contents[2] + gene = contents[0] + transcript = contents[1] + contig = contents[2] if "-" in contents[3].strip(): - self.forward = False - self.start = int(contents[4]) - self.end = int(contents[5]) - self.cds_start = int(contents[6]) - self.cds_end = int(contents[7]) - self.exon_starts = [int(x) for x in contents[9].split(",")[:-1]] - self.exon_ends = [int(x) for x in contents[10].split(",")[:-1]] + forward = False + else: + forward = True + start = int(contents[4]) + end = int(contents[5]) + cds_start = int(contents[6]) + cds_end = int(contents[7]) + exon_starts = [int(x) for x in contents[9].split(",")[:-1]] + exon_ends = [int(x) for x in contents[10].split(",")[:-1]] + return cls(gene, transcript, contig, start, end, cds_start, + cds_end, exon_starts, exon_ends, forward) @property def exons(self) -> List[Region]: