diff --git a/README.md b/README.md index 3f781609dedcbe5647ab0a29541fd26af9a77841..a998cd631eedea639fccee0fd7e9176904b29cce 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,8 @@ Options: called multiple times [required] -ps, --positive-samples TEXT Sample(s) in positive-vcf to consider. May be called multiple times [required] + -s, --stats PATH Path to output stats json file + -dc, --discordant PATH Path to output discordant VCF file --help Show this message and exit. ``` diff --git a/vtools/cli.py b/vtools/cli.py index 88f68f45ce563b5d92ae3e9e4e4ef3af1d8263d6..77d54a728fff48aa2fad7cf46b5da7947c78195d 100644 --- a/vtools/cli.py +++ b/vtools/cli.py @@ -31,11 +31,26 @@ from .gcoverage import RefRecord, region_coverages help="Sample(s) in positive-vcf to consider. " "May be called multiple times", required=True) -def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples): +@click.option("-s", "--stats", type=click.Path(writable=True), + help="Path to output stats json file", default='-') +@click.option("-dc", "--discordant", type=click.Path(writable=True), + help="Path to output discordant VCF file", + required=False) +def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples, stats, + discordant): c_vcf = VCF(call_vcf, gts012=True) p_vcf = VCF(positive_vcf, gts012=True) - evaluated = site_concordancy(c_vcf, p_vcf, call_samples, positive_samples) - print(json.dumps(evaluated)) + st, disc = site_concordancy(c_vcf, p_vcf, call_samples, + positive_samples) + # Write the stats json file + with click.open_file(stats, 'w') as fout: + print(json.dumps(st), file=fout) + + # If specified, write the discordant variants + if discordant: + with click.open_file(discordant, 'w') as fout: + for record in disc: + print(record, file=fout, end='') @click.command() diff --git a/vtools/evaluate.py b/vtools/evaluate.py index 1e21a3a49cefc63075579572bc437d985743f5e0..089e6a9de569979640c5ae165194e71a7c87280c 100644 --- a/vtools/evaluate.py +++ b/vtools/evaluate.py @@ -60,6 +60,8 @@ def site_concordancy(call_vcf: VCF, "alleles_no_call": 0, "alleles_low_qual": 0 } + discordant_count = 0 + discordant_records = list() for pos_record in positive_vcf: d['total_sites'] += 1 query_str = "{0}:{1}-{2}".format( @@ -130,4 +132,9 @@ def site_concordancy(call_vcf: VCF, # the same has no call in one or more of the VCF files d['alleles_no_call'] += 2 - return d + # The current variant is discordant + if d['alleles_discordant'] > discordant_count: + discordant_count = d['alleles_discordant'] + discordant_records.append(call_record) + + return d, discordant_records