diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ea2f40c45968ed52baff0153146e607fa7781a05..ed7d19d3aad6ca8af55cdb6aec807a796acc55e1 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -4,4 +4,5 @@ build:
     - docker
   script:
     - pip install --upgrade pip setuptools wheel
-    - pip install '.'
+    - pip install cython numpy
+    - tox
diff --git a/README.md b/README.md
index 947346e65f5acdc2cb89815c30797b824832e4c8..0d707d83911d518694455e15fbd1212aff609beb 100644
--- a/README.md
+++ b/README.md
@@ -135,11 +135,15 @@ technologies. E.g, when comparing a WES VCF file vs a SNP array, this
 tool can be quite useful.
 
 Output is a simple JSON file listing counts of concordant and discordant
-alleles. 
+alleles and some other metrics. It is also possible to output the discordant
+VCF records.
 
 Multisample VCF files are allowed; the samples to be evaluated have to be set 
 through a CLI argument.
 
+Variants from the `--call-vcf` are filtered to have a Genotype Quality (GQ) of
+at least 30 by default. This can be overruled by specifying `--min-qual 0`.
+The optional flag `--min-depth` can be used to set the minimum read coverage.
 
 #### Usage
 
@@ -154,10 +158,12 @@ Options:
                                 called multiple times  [required]
   -ps, --positive-samples TEXT  Sample(s) in positive-vcf to consider. May be
                                 called multiple times  [required]
-  -s, --stats PATH              Path to output stats json file  [required]
-  -dc, --discordant PATH        Path to output discordant VCF file
-  --help                        Show this message and exit
-  ```
+  -s, --stats PATH              Path to output stats json file
+  -dc, --discordant PATH        Path to output gzipped discordant vcf file
+  -mq, --min-qual FLOAT         Minimum quality of variants to consider
+  -md, --min-depth INTEGER      Minimum depth of variants to consider
+  --help                        Show this message and exit.
+```
 
 ## License
 
diff --git a/tests/cases/dummy_decomposed.vcf.gz b/tests/cases/dummy_decomposed.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..09642f104fb1e3a673add5c28804af5297c7468d
Binary files /dev/null and b/tests/cases/dummy_decomposed.vcf.gz differ
diff --git a/tests/cases/dummy_decomposed.vcf.gz.tbi b/tests/cases/dummy_decomposed.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..b61781774ec3e28d970e66d942d351f4637d2019
Binary files /dev/null and b/tests/cases/dummy_decomposed.vcf.gz.tbi differ
diff --git a/tests/cases/dummy_haploid.vcf.gz b/tests/cases/dummy_haploid.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..e0f25567c4e32ae177347b1243e8ed439852a78e
Binary files /dev/null and b/tests/cases/dummy_haploid.vcf.gz differ
diff --git a/tests/cases/dummy_haploid.vcf.gz.tbi b/tests/cases/dummy_haploid.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..a45d08c4d0fe3703b41aa5274203d985e7e934d9
Binary files /dev/null and b/tests/cases/dummy_haploid.vcf.gz.tbi differ
diff --git a/tests/cases/dummy_phased_blank.vcf.gz b/tests/cases/dummy_phased_blank.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cee7e4b957acc3391221c4fb4b616a677692aca7
Binary files /dev/null and b/tests/cases/dummy_phased_blank.vcf.gz differ
diff --git a/tests/cases/dummy_phased_blank.vcf.gz.tbi b/tests/cases/dummy_phased_blank.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..271fa29b09e858f39223dd295d18a885b34b04cf
Binary files /dev/null and b/tests/cases/dummy_phased_blank.vcf.gz.tbi differ
diff --git a/tests/cases/gatk.vcf.gz b/tests/cases/gatk.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..df6722dd81a3e3c9425a9fadf40e965ff288fd60
Binary files /dev/null and b/tests/cases/gatk.vcf.gz differ
diff --git a/tests/cases/gatk.vcf.gz.tbi b/tests/cases/gatk.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..26f3289b70db11b8b83bdb80424688968852fba5
Binary files /dev/null and b/tests/cases/gatk.vcf.gz.tbi differ
diff --git a/tests/cases/gatk_RGQ.vcf.gz b/tests/cases/gatk_RGQ.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..46a7b8df09b91362d2bc28bf63ca99c40dbcdd75
Binary files /dev/null and b/tests/cases/gatk_RGQ.vcf.gz differ
diff --git a/tests/cases/gatk_RGQ.vcf.gz.tbi b/tests/cases/gatk_RGQ.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..792beb715455fdd4b892f3b7ae538f352a37c207
Binary files /dev/null and b/tests/cases/gatk_RGQ.vcf.gz.tbi differ
diff --git a/tests/cases/gatk_no_alt.vcf.gz b/tests/cases/gatk_no_alt.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2eadfc08e074500ef0aaa88fa1f923f157847836
Binary files /dev/null and b/tests/cases/gatk_no_alt.vcf.gz differ
diff --git a/tests/cases/gatk_no_alt.vcf.gz.tbi b/tests/cases/gatk_no_alt.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..4a964e0351e4325914edef48315c6a4fdfc84165
Binary files /dev/null and b/tests/cases/gatk_no_alt.vcf.gz.tbi differ
diff --git a/tests/cases/gatk_partial_call.vcf.gz b/tests/cases/gatk_partial_call.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3ba8f8a666e0037892e28ade7172780a28adf74a
Binary files /dev/null and b/tests/cases/gatk_partial_call.vcf.gz differ
diff --git a/tests/cases/gatk_partial_call.vcf.gz.tbi b/tests/cases/gatk_partial_call.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..3205497d3fb444c435910397efb54e0fc53295d6
Binary files /dev/null and b/tests/cases/gatk_partial_call.vcf.gz.tbi differ
diff --git a/tests/cases/gatk_ref_alt_changed.vcf.gz b/tests/cases/gatk_ref_alt_changed.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d6e7abc4e9c71e290cd03a48acabdc66a3c8e3f0
Binary files /dev/null and b/tests/cases/gatk_ref_alt_changed.vcf.gz differ
diff --git a/tests/cases/gatk_ref_alt_changed.vcf.gz.tbi b/tests/cases/gatk_ref_alt_changed.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..16b661a64744cf268cc76e6ef270345a55668af2
Binary files /dev/null and b/tests/cases/gatk_ref_alt_changed.vcf.gz.tbi differ
diff --git a/tests/cases/gatk_truncated.vcf.gz b/tests/cases/gatk_truncated.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f07ffaed0445f1c1e63206f517df0dda21f53bca
Binary files /dev/null and b/tests/cases/gatk_truncated.vcf.gz differ
diff --git a/tests/cases/gatk_truncated.vcf.gz.tbi b/tests/cases/gatk_truncated.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..38bcbf2d56e47126019c1119e3af7644b4e7a82e
Binary files /dev/null and b/tests/cases/gatk_truncated.vcf.gz.tbi differ
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc167975361de7a390e43bfcc5eb5e62e29ed076
--- /dev/null
+++ b/tests/test_evaluate.py
@@ -0,0 +1,443 @@
+import pytest
+from collections import defaultdict
+
+from vtools.evaluate import site_concordancy
+from vtools.evaluate import parse_variants
+
+from cyvcf2 import VCF
+
+
+@pytest.fixture(scope='module')
+def known_concordant():
+    filename = 'tests/cases/gatk.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['NA12878'],
+                               positive_samples=['NA12878'],
+                               min_gq=0, min_dp=0)
+    return d
+
+
+def test_total_sites(known_concordant):
+    assert known_concordant['total_sites'] == 37
+
+
+def test_sites_considered(known_concordant):
+    assert known_concordant['sites_considered'] == 37
+
+
+def test_alleles_considered(known_concordant):
+    assert known_concordant['alleles_considered'] == 74
+
+
+def test_alleles_het_concordant(known_concordant):
+    assert known_concordant['alleles_het_concordant'] == 42
+
+
+def test_alleles_hom_alt_concordant(known_concordant):
+    assert known_concordant['alleles_hom_alt_concordant'] == 18
+
+
+def test_alleles_hom_ref_concordant(known_concordant):
+    assert known_concordant['alleles_hom_ref_concordant'] == 14
+
+
+def test_alleles_concordant(known_concordant):
+    assert known_concordant['alleles_concordant'] == 74
+
+
+def test_alleles_discordant(known_concordant):
+    assert known_concordant['alleles_discordant'] == 0
+
+
+def test_alleles_no_call(known_concordant):
+    assert known_concordant['alleles_no_call'] == 0
+
+
+def test_alleles_low_qual(known_concordant):
+    assert known_concordant['alleles_low_qual'] == 0
+
+
+def test_alleles_low_depth(known_concordant):
+    assert known_concordant['alleles_low_depth'] == 0
+
+
+@pytest.fixture(scope='module')
+def BLANK_NA12878():
+    filename = 'tests/cases/gatk.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['NA12878'],
+                               min_gq=30, min_dp=20)
+    return d
+
+
+def test_low_qual_30(BLANK_NA12878):
+    assert BLANK_NA12878['alleles_low_qual'] == 34
+
+
+def test_low_depth_20(BLANK_NA12878):
+    assert BLANK_NA12878['alleles_low_depth'] == 36
+
+
+def test_no_call(BLANK_NA12878):
+    assert BLANK_NA12878['alleles_no_call'] == 8
+
+
+def test_truncated_call_file():
+    """ When the call set is truncated, i.e. is missing variants which are
+    present in the positive vcf file """
+
+    filename = 'tests/cases/gatk.vcf.gz'
+    truncated = 'tests/cases/gatk_truncated.vcf.gz'
+    call = VCF(truncated, gts012=True)
+    positive = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['NA12878'],
+                               positive_samples=['BLANK'],
+                               min_gq=30, min_dp=20)
+    assert d['alleles_no_call'] == 12
+
+
+def test_truncated_positive_file():
+    """ When the positive set is truncated, i.e. the called vcf file contains
+    variants which are absent from the positive vcf file """
+    filename = 'tests/cases/gatk.vcf.gz'
+    truncated = 'tests/cases/gatk_truncated.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(truncated, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['NA12878'],
+                               positive_samples=['BLANK'],
+                               min_gq=30, min_dp=20)
+    assert d['alleles_no_call'] == 0
+
+
+def test_phased_positive_file():
+    """ Test error message when the positive vcf contains phased variants """
+    filename = 'tests/cases/gatk.vcf.gz'
+    phased = 'tests/cases/dummy_phased_blank.vcf.gz'
+    call = VCF(filename, gts012=True)
+    phased = VCF(phased, gts012=True)
+    with pytest.raises(NotImplementedError,
+                       match='Phased variants are not supported'):
+        site_concordancy(call, phased, call_samples=['NA12878'],
+                         positive_samples=['BLANK'])
+
+
+def test_phased_call_file():
+    """ Test error message when the call vcf contains phased variants """
+    filename = 'tests/cases/gatk.vcf.gz'
+    phased = 'tests/cases/dummy_phased_blank.vcf.gz'
+    call = VCF(phased, gts012=True)
+    positive = VCF(filename, gts012=True)
+    with pytest.raises(NotImplementedError,
+                       match='Phased variants are not supported'):
+        site_concordancy(call, positive, call_samples=['BLANK'],
+                         positive_samples=['NA12878'])
+
+
+def test_decomposed_positive_file():
+    """ Test error message when the positive vcf contains decomposed variants
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    decomposed = 'tests/cases/dummy_decomposed.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(decomposed, gts012=True)
+    with pytest.raises(NotImplementedError,
+                       match='Decomposed variants are not supported'):
+        site_concordancy(call, positive, call_samples=['BLANK'],
+                         positive_samples=['NA12878'])
+
+
+def test_decomposed_call_file():
+    """ Test error message when the positive vcf contains decomposed variants
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    decomposed = 'tests/cases/dummy_decomposed.vcf.gz'
+    call = VCF(decomposed, gts012=True)
+    positive = VCF(filename, gts012=True)
+    with pytest.raises(NotImplementedError,
+                       match='Decomposed variants are not supported'):
+        site_concordancy(call, positive, call_samples=['BLANK'],
+                         positive_samples=['NA12878'])
+
+
+def test_haploid_positive_file():
+    """ Test error message when the positive vcf contains non-diploid variants
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    haploid = 'tests/cases/dummy_haploid.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(haploid, gts012=True)
+    with pytest.raises(NotImplementedError,
+                       match='Non-diploid variants are not supported'):
+        site_concordancy(call, positive, call_samples=['NA12878'],
+                         positive_samples=['BLANK'])
+
+
+def test_haploid_call_file():
+    """ Test error message when the call vcf contains non-diploid variants
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    haploid = 'tests/cases/dummy_haploid.vcf.gz'
+    call = VCF(haploid, gts012=True)
+    positive = VCF(filename, gts012=True)
+    with pytest.raises(NotImplementedError,
+                       match='Non-diploid variants are not supported'):
+        site_concordancy(call, positive, call_samples=['BLANK'],
+                         positive_samples=['NA12878'])
+
+
+@pytest.fixture(scope='module')
+def partial_call_file():
+    """ Test statistics when the call vcf contains partial variants """
+    filename = 'tests/cases/gatk.vcf.gz'
+    partial = 'tests/cases/gatk_partial_call.vcf.gz'
+    call = VCF(partial, gts012=True)
+    positive = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['BLANK'], min_dp=0, min_gq=0)
+    return d
+
+
+def test_partial_call_total_sites(partial_call_file):
+    assert partial_call_file['total_sites'] == 37
+
+
+def test_partial_call_alleles_hom_ref_concordant(partial_call_file):
+    assert partial_call_file['alleles_hom_ref_concordant'] == 0
+
+
+def test_partial_call_alleles_het_concordant(partial_call_file):
+    assert partial_call_file['alleles_het_concordant'] == 0
+
+
+def test_partial_call_alleles_hom_alt_concordant(partial_call_file):
+    assert partial_call_file['alleles_hom_alt_concordant'] == 0
+
+
+def test_partial_call_alleles_concordant(partial_call_file):
+    assert partial_call_file['alleles_concordant'] == 6
+
+
+def test_partial_call_alleles_discordant(partial_call_file):
+    assert partial_call_file['alleles_discordant'] == 0
+
+
+def test_partial_call_alleles_no_call(partial_call_file):
+    assert partial_call_file['alleles_no_call'] == 68
+
+
+@pytest.fixture(scope='module')
+def partial_positive_file():
+    """ Test statistics when the call vcf contains partial variants """
+    filename = 'tests/cases/gatk.vcf.gz'
+    partial = 'tests/cases/gatk_partial_call.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(partial, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['BLANK'], min_dp=0, min_gq=0)
+    return d
+
+
+def test_partial_positive_total_sites(partial_positive_file):
+    assert partial_positive_file['total_sites'] == 6
+
+
+def test_partial_positive_hom_ref_concordant(partial_positive_file):
+    assert partial_positive_file['alleles_hom_ref_concordant'] == 0
+
+
+def test_partial_positive_het_concordant(partial_positive_file):
+    assert partial_positive_file['alleles_het_concordant'] == 0
+
+
+def test_partial_positive_hom_alt_concordant(partial_positive_file):
+    assert partial_positive_file['alleles_hom_alt_concordant'] == 0
+
+
+def test_partial_positive_concordant(partial_positive_file):
+    assert partial_positive_file['alleles_concordant'] == 6
+
+
+def test_partial_positive_no_call(partial_positive_file):
+    assert partial_positive_file['alleles_no_call'] == 0
+
+
+@pytest.fixture(scope='module')
+def ref_alt_changed_positive():
+    """ Test statistics when the ref and alt have been changed.
+
+    Only the REF and ALT have been changed, and the gt calls for BLANK have
+    been updated to keep the actual genotype the same
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    mixed = 'tests/cases/gatk_ref_alt_changed.vcf.gz'
+    positive = VCF(mixed, gts012=True)
+    call = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['BLANK'], min_dp=0, min_gq=0)
+    return d
+
+
+def test_ref_alt_changed_positive_total(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['total_sites'] == 10
+
+
+def test_ref_alt_changed_positive_hom_ref_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_hom_ref_concordant'] == 2
+
+
+def test_ref_alt_changed_positive_het_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_het_concordant'] == 12
+
+
+def test_ref_alt_changed_positive_hom_alt_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_hom_alt_concordant'] == 6
+
+
+def test_ref_alt_changed_positive_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_concordant'] == 20
+
+
+def test_ref_alt_changed_positive_no_call(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_no_call'] == 0
+
+
+@pytest.fixture(scope='module')
+def ref_alt_changed_call():
+    """ Test statistics when the ref and alt have been changed.
+
+    Only the REF and ALT have been changed, and the gt calls for BLANK have
+    been updated to keep the actual genotype the same
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    mixed = 'tests/cases/gatk_ref_alt_changed.vcf.gz'
+    positive = VCF(filename, gts012=True)
+    call = VCF(mixed, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['BLANK'], min_dp=0, min_gq=0)
+    return d
+
+
+def test_ref_alt_changed_call_total(ref_alt_changed_call):
+    assert ref_alt_changed_call['total_sites'] == 37
+
+
+def test_ref_alt_changed_call_hom_ref_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_hom_ref_concordant'] == 8
+
+
+def test_ref_alt_changed_call_het_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_het_concordant'] == 12
+
+
+def test_ref_alt_changed_call_hom_alt_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_hom_alt_concordant'] == 0
+
+
+def test_ref_alt_changed_call_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_concordant'] == 20
+
+
+def test_ref_alt_changed_call_no_call(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_no_call'] == 54
+
+
+def test_parse_variants_no_call():
+    """ This should be counted as a single no call """
+    results = defaultdict(int)
+    call = ['.', 'A']
+    pos = ['A', 'G']
+
+    parse_variants('A', call, pos, results)
+    assert results['alleles_no_call'] == 1
+
+
+def test_parse_variants_concordant():
+    """ This should be counted as a single concordant allele """
+    results = defaultdict(int)
+    call = ['.', 'A']
+    pos = ['A', 'G']
+
+    parse_variants('A', call, pos, results)
+    assert results['alleles_concordant'] == 1
+
+
+def test_known_concordant_RGQ():
+    """ 0/0 calls from GATK have RGQ annotation instead of GQ
+        These should be treated the same, so when RGQ is present it should be
+        used instead of the QC when filtering variants.
+    """
+    filename = 'tests/cases/gatk_RGQ.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['NA12878'],
+                               positive_samples=['NA12878'],
+                               min_gq=0, min_dp=0)
+    assert d['alleles_hom_ref_concordant'] == 14
+
+
+def test_known_concordant_RGQ_min_qc_100():
+    """ 0/0 calls from GATK have RGQ annotation instead of GQ
+        These should be treated the same, so when RGQ is present it should be
+        used instead of the QC when filtering variants.
+
+        The max value of both GQ and RGQ is 99, so filtering for GQ or RGQ >=
+        100 should give us 0 hom ref concordant variants.
+    """
+    filename = 'tests/cases/gatk_RGQ.vcf.gz'
+    call = VCF(filename, gts012=True)
+    positive = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['NA12878'],
+                               positive_samples=['NA12878'],
+                               min_gq=100, min_dp=0)
+    assert d['alleles_hom_ref_concordant'] == 0
+
+
+@pytest.fixture(scope='module')
+def gatk_no_alt_in_call():
+    """ Test statistics when the ALT allele is missing from the called vcf
+
+    The ALT allele has been set to '.' for each variant, and the corresponding
+    GT has been set to 0/0 to generate valid variants.
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    no_alt = 'tests/cases/gatk_no_alt.vcf.gz'
+    positive = VCF(filename, gts012=True)
+    call = VCF(no_alt, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['BLANK'], min_dp=0, min_gq=0)
+    return d
+
+
+def test_no_alt_call_total_sites(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['total_sites'] == 37
+
+
+def test_no_alt_call_sites_considered(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['sites_considered'] == 37
+
+
+def test_no_alt_call_het_concordant(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['alleles_het_concordant'] == 0
+
+
+def test_no_alt_call_hom_alt_concordant(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['alleles_hom_alt_concordant'] == 0
+
+
+def test_no_alt_call_hom_ref_concordant(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['alleles_hom_ref_concordant'] == 32
+
+
+def test_no_alt_call_alleles_concordant(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['alleles_concordant'] == 46
+
+
+def test_no_alt_call_alleles_discordant(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['alleles_discordant'] == 20
+
+
+def test_no_alt_call_alleles_no_call(gatk_no_alt_in_call):
+    assert gatk_no_alt_in_call['alleles_no_call'] == 8
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000000000000000000000000000000000000..8b42584536c20bded1f0c00a9910c2c156f3c89c
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,8 @@
+[tox]
+envlist = py36
+
+[testenv]
+deps =
+    pytest
+
+commands = pytest
diff --git a/vtools/cli.py b/vtools/cli.py
index 30cd698f6ea2a5811d578601798de07122a93d19..97ee6df2ed2c7a840d0248450ebfef4a84197751 100644
--- a/vtools/cli.py
+++ b/vtools/cli.py
@@ -9,6 +9,7 @@ vtools.cli
 import json
 import click
 from cyvcf2 import VCF, Writer
+import gzip
 
 from .evaluate import site_concordancy
 from .filter import FilterParams, FilterClass, Filterer
@@ -32,25 +33,40 @@ from .gcoverage import RefRecord, region_coverages
                    "May be called multiple times",
               required=True)
 @click.option("-s", "--stats", type=click.Path(writable=True),
-              help="Path to output stats json file", required=True)
-@click.option("-dc", "--discordant", type=click.Path(writable=True),
-              help="Path to output discordant VCF file",
+              help="Path to output stats json file")
+@click.option("-dvcf", "--discordant-vcf", type=click.Path(writable=True),
+              help="Path to output gzipped discordant vcf file",
               required=False)
-def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples, stats,
-                 discordant):
+@click.option("-mq", "--min-qual", type=float,
+              help="Minimum quality of variants to consider", default=30)
+@click.option("-md", "--min-depth", type=int,
+              help="Minimum depth of variants to consider", default=0)
+def evaluate_cli(call_vcf, positive_vcf, call_samples, positive_samples,
+                 min_qual, min_depth, stats, discordant_vcf):
     c_vcf = VCF(call_vcf, gts012=True)
     p_vcf = VCF(positive_vcf, gts012=True)
     st, disc = site_concordancy(c_vcf, p_vcf, call_samples,
-                                positive_samples)
+                                positive_samples, min_qual, min_depth)
     # Write the stats json file
-    with open(stats, 'w') as fout:
-        print(json.dumps(st), file=fout)
-
-    # If specified, write the discordant variants
-    if discordant:
-        with open(discordant, 'w') as fout:
+    if stats is None:
+        print(json.dumps(st))
+    else:
+        with click.open_file(stats, 'w') as fout:
+            fout.write(json.dumps(st))
+
+    # If there were discordand records, and a discordant-vcf should be written
+    if len(disc) > 0 and discordant_vcf:
+        with click.open_file(discordant_vcf, 'w') as fout:
+            # First, we write the vcf header
+            with gzip.open(call_vcf, 'rt') as fin:
+                for line in fin:
+                    if line.startswith('#'):
+                        fout.write(line)
+                    else:
+                        break
+            # Then we write the vcf records that were discordant
             for record in disc:
-                print(record, file=fout, end='')
+                fout.write(str(record))
 
 
 @click.command()
diff --git a/vtools/evaluate.py b/vtools/evaluate.py
index 089e6a9de569979640c5ae165194e71a7c87280c..396d68940eee87037a3bff3de5b43e39effd4382 100644
--- a/vtools/evaluate.py
+++ b/vtools/evaluate.py
@@ -7,15 +7,62 @@ vtools.evaluate
 :license: MIT
 """
 from typing import List, Dict
+from types import SimpleNamespace
 
 from cyvcf2 import VCF
 
 
+def parse_variants(ref: str, call: List[str], pos: List[str],
+                   results: Dict[str, int]):
+    """ Parse the variants and add to results """
+
+    call_variant = set(call)
+    pos_variant = set(pos)
+
+    # The types of concordant calls are counted separately
+    if call_variant == pos_variant:
+        # These variants are homozygous reference
+        if len(call_variant) == 1 and next(iter(call_variant)) == ref:
+            results['alleles_hom_ref_concordant'] += 2
+        # These variants are heterozygous, since they have different calls
+        elif len(call_variant) > 1:
+            results['alleles_het_concordant'] += 2
+        # If they are not homozygous reference, and not heterozygous, they must
+        # be homozygous alternative, for whichever alt allele they have
+        else:
+            results['alleles_hom_alt_concordant'] += 2
+
+    # Here we count all alleles independently, also for the concordant calls
+    for allele in call:
+        if allele == '.':
+            results['alleles_no_call'] += 1
+        elif allele in pos:
+            results['alleles_concordant'] += 1
+            # We cannot match an A/A call with A/G, so we need to remove the
+            # A call from the positive set once we have 'used' it
+            pos.remove(allele)
+        else:
+            results['alleles_discordant'] += 1
+
+
+def RGQ_header_defined(vcf):
+    """ Determine whether the RGQ annotation is defined in the FORMAT header of
+    the vcf.
+    """
+    try:
+        vcf.get_header_type('RGQ')
+    except KeyError:
+        return False
+    else:
+        return True
+
+
 def site_concordancy(call_vcf: VCF,
                      positive_vcf: VCF,
                      call_samples: List[str],
                      positive_samples: List[str],
-                     min_gq: float = 30) -> Dict[str, float]:
+                     min_gq: float = 30,
+                     min_dp: float = 0) -> Dict[str, float]:
     """
     Calculate concordance between sites of two call sets,
     of which one contains known true positives.
@@ -32,9 +79,11 @@ def site_concordancy(call_vcf: VCF,
     :param call_vcf: The VCF to evaluate. Must have gts012=True
     :param positive_vcf: The VCF file containing true positive sites.
         Must have gts012=True
-    :param call_samples: List of samples in `call_vcf` to consider
+    :param call_samples: List of samples in `call_vcf` to consider.
     :param positive_samples: List of samples in `positive_vcf` to consider.
     Must have same length than `call_samples`
+    :param min_qg: Minimum quality of variants to consider.
+    :param min_dp: Minimum depth of variants to consider.
 
     :raises: ValueError if sample lists are not of same size.
 
@@ -58,79 +107,130 @@ def site_concordancy(call_vcf: VCF,
         "alleles_concordant": 0,
         "alleles_discordant": 0,
         "alleles_no_call": 0,
-        "alleles_low_qual": 0
+        "alleles_low_qual": 0,
+        "alleles_low_depth": 0
     }
+
+    # Determine if the 'RQC' annotation is present in the FORMAT header
+    # This is needed for avoid getting an KeyError when we want to see if the
+    # RQC annotation is set for a given variant
+    RGQ_present = RGQ_header_defined(call_vcf)
+
+    # Keep track of the discordant sites
     discordant_count = 0
     discordant_records = list()
+
+    # Keep track of the previous record, to make sure the positive vcf file
+    # does not contain decomposed records
+    prev_record = SimpleNamespace()
+    prev_record.POS = -1
+    prev_record.CHROM = -1
+
     for pos_record in positive_vcf:
+        # Check to make sure the POS and CHROM are different from the previous
+        # variant
+        if (prev_record.POS == pos_record.POS and
+                prev_record.CHROM == pos_record.CHROM):
+            raise NotImplementedError('Decomposed variants are not supported')
+        else:
+            prev_record.POS = pos_record.POS
+            prev_record.CHROM = pos_record.CHROM
+
         d['total_sites'] += 1
         query_str = "{0}:{1}-{2}".format(
             pos_record.CHROM,
-            pos_record.start,
+            pos_record.start+1,
             pos_record.end
         )
 
-        it = call_vcf(query_str)
-        same = []
-        for it_record in it:
-            if (it_record.CHROM == pos_record.CHROM
-                    and it_record.POS == pos_record.POS
-                    and it_record.REF == pos_record.REF
-                    and it_record.ALT == pos_record.ALT):
-
-                same.append(it_record)
+        call_records = list(call_vcf(query_str))
 
-        if len(same) != 1:
+        # If the variant is not present in the call vcf
+        if len(call_records) == 0:
+            d['alleles_no_call'] += 2
             continue
 
-        call_record = same[0]
+        # If there are multiple variants with the same CHROM and POS, the
+        # variant in the call vcf file must have been decomposted
+        if len(call_records) > 1:
+            raise NotImplementedError('Decomposed variants are not supported')
+
+        # Now we know there is only one call record
+        call_record = call_records[0]
+
         d['sites_considered'] += 1
-        d['alleles_considered'] += (2*len(positive_samples))
+        d['alleles_considered'] += (2 * len(positive_samples))
 
         for p_s, c_s in zip(pos_sample_idx, call_sample_idx):
             p_gt = pos_record.gt_types[p_s]
             c_gt = call_record.gt_types[c_s]
 
-            c_gq = call_record.gt_quals[c_s]
+            # Is one of the sites no call. This is only for fully no call
+            # sites. Partial no call site are handled in parse_variants
+            if p_gt == 3 or c_gt == 3:
+                d['alleles_no_call'] += 2
+                continue
+
+            # Get the quality requirements for the call site
+            c_dp = call_record.gt_depths[c_s]
+            # If the 'RGQ' is defined, which is the case for homref calls from
+            # GATK, use this value instead of 'GQ'
+            # See
+            # https://gatkforums.broadinstitute.org/gatk/discussion/9907/genotypegvcfs-no-records-in-vcf  # noqa
+            if RGQ_present and call_record.format('RGQ') is not None:
+                c_gq = call_record.format('RGQ')[0][0]
+            else:
+                c_gq = call_record.gt_quals[c_s]
+
+            # Did we fail any of the quality control checks?
+            # We use this variable since we want to count all quality checks
+            # independenly, and a single sample can fail multiple checks
+            failed_quality_check = False
+
+            # Is the QG of the call site below the threshold?
             if c_gq < min_gq:
                 d['alleles_low_qual'] += 2
-            elif p_gt == 0 and c_gt == 0:
-                # both homozygous reference
-                d['alleles_concordant'] += 2
-                d['alleles_hom_ref_concordant'] += 2
-            elif p_gt == 0 and c_gt == 1:
-                # heterozygous when homozygous reference
-                d['alleles_concordant'] += 1
-                d['alleles_discordant'] += 1
-            elif p_gt == 0 and c_gt == 2:
-                # homozygous alt when homozygous ref
-                d['alleles_discordant'] += 2
-            elif p_gt == 1 and c_gt == 0:
-                # homozygous ref when heterozygous
-                d['alleles_concordant'] += 1
-                d['alleles_discordant'] += 1
-            elif p_gt == 1 and c_gt == 1:
-                # both heterozygous
-                d['alleles_concordant'] += 2
-                d['alleles_het_concordant'] += 2
-            elif p_gt == 1 and c_gt == 2:
-                # homozygous alt when heterozygous
-                d['alleles_concordant'] += 1
-                d['alleles_discordant'] += 1
-            elif p_gt == 2 and c_gt == 0:
-                # homozygous ref when homozygous alt
-                d['alleles_discordant'] += 2
-            elif p_gt == 2 and c_gt == 1:
-                # heterozygous when homozygous alt
-                d['alleles_concordant'] += 1
-                d['alleles_discordant'] += 1
-            elif p_gt == 2 and c_gt == 2:
-                # both homozygous alt
-                d['alleles_concordant'] += 2
-                d['alleles_hom_alt_concordant'] += 2
-            else:
-                # the same has no call in one or more of the VCF files
-                d['alleles_no_call'] += 2
+                failed_quality_check = True
+
+            # Is the DP of the call site below the threshold?
+            if c_dp < min_dp:
+                d['alleles_low_depth'] += 2
+                failed_quality_check = True
+
+            # Go to the next variant if any of the quality checks failed
+            if failed_quality_check:
+                continue
+
+            # Get the genotypes
+            pos = pos_record.genotypes[p_s]
+            cal = call_record.genotypes[c_s]
+
+            # If the genotypes are not diploid
+            if len(pos) != 3 or len(cal) != 3:
+                raise NotImplementedError('Non-diploid variants are not '
+                                          'supported')
+            # If the genotypes are phased
+            if pos[2] or cal[2]:
+                raise NotImplementedError('Phased variants are not supported')
+
+            # Get the genotyped bases. There is a deprecationWarning when using
+            # gt_bases directly so we go via the alleles and the gt call
+            pos_alleles = [pos_record.REF] + pos_record.ALT
+            cal_alleles = [call_record.REF] + call_record.ALT
+
+            # The third item in pos is a boolean indicating phasing, so we
+            # don't use that
+            # No call is indicated by -1 in pyvcf2, so here we convert negative
+            # values to '.', which is what is used in the vcf to indicate no
+            # call
+            pos_gt = [pos_alleles[x] if x >= 0 else '.' for x in pos[:2]]
+            cal_gt = [cal_alleles[x] if x >= 0 else '.' for x in cal[:2]]
+
+            # Parse the genotypes and add the results into d
+            # We also need to know the reference call to determine if variants
+            # are hom ref. For this, we take the positive vcf REF call to be
+            # the truth
+            parse_variants(pos_record.REF, cal_gt, pos_gt, d)
 
             # The current variant is discordant
             if d['alleles_discordant'] > discordant_count: