Commit 097bfea5 authored by Sander Bollen's avatar Sander Bollen
Browse files

rewrite interface to use settings json in stead of flat arguments when designing primers

parent 195f6d48
from builtins import (map, open, str)
__author__ = 'ahbbollen'
from pathlib import Path
from typing import Optional
from tempfile import NamedTemporaryFile
from subprocess import check_call, call
import os
import warnings
from Bio.Seq import Seq
......@@ -11,13 +12,10 @@ from pyfaidx import Fasta
from pysam import AlignmentFile
from .models import Primer, Region
from .primer3 import Primer3, parse_primer3_output
from .primer3 import Primer3, parse_primer3_output, parse_settings
from .utils import (NoPrimersException, calc_gc, NEW_VCF,
generate_fastq_from_primers, is_at_least_version_samtools)
PRIMER3_SCRIPT = os.path.join(os.path.join(os.path.dirname(__file__),
"static"), 'getprimers.sh')
def get_sequence_fasta(region, reference=None, padding=True):
ref = Fasta(reference)
......@@ -34,9 +32,8 @@ def get_sequence_fasta(region, reference=None, padding=True):
return ref[chrom][region.start_w_padding:region.stop_w_padding].seq
def run_primer3(sequence, region, padding=True,
primer3_script=PRIMER3_SCRIPT, product_size="200-450",
n_primers=4, prim3_exe=None, **kwargs):
def run_primer3(sequence, region, primer3_exe: str, settings_dict: dict,
padding=True):
"""Run primer 3. All other kwargs will be passed on to primer3"""
if padding:
target_start = region.padding_left
......@@ -47,7 +44,7 @@ def run_primer3(sequence, region, padding=True,
target = ",".join(map(str, [target_start, target_len]))
p3 = Primer3(prim3_exe, sequence, target, target, **kwargs)
p3 = Primer3(primer3_exe, sequence, target, target, settings_dict)
p3_out = p3.run()
primers = parse_primer3_output(p3_out)
return primers
......@@ -373,13 +370,16 @@ def chop_region(region, size):
return regions
def get_primer_from_region(region, reference, product_size, n_prims,
bwa_exe, samtools_exe, primer3_exe,
def get_primer_from_region(region, reference, bwa_exe,
samtools_exe, primer3_exe,
output_bam=None, dbsnp=None, field=None,
max_freq=None, strict=False, min_margin=10,
**prim_args):
settings_json: Optional[Path] = None):
"""**prim_args will be passed on to primer3"""
settings_dict = parse_settings(settings_json)
product_size = settings_dict['primer_product_size_range']
min_length, max_length = list(map(int, product_size.split("-")))
regions = chop_region(region, min_length)
if any([x.size(True) > max_length for x in regions]):
......@@ -390,10 +390,8 @@ def get_primer_from_region(region, reference, product_size, n_prims,
for reg in regions:
sequence = get_sequence_fasta(reg, reference=reference)
raw_primers = run_primer3(sequence, reg, padding=True,
product_size=product_size,
n_primers=n_prims, prim3_exe=primer3_exe,
**prim_args)
raw_primers = run_primer3(sequence, reg, primer3_exe,
settings_dict, padding=True)
bam = aln_primers(raw_primers, bwa_exe=bwa_exe,
samtools_exe=samtools_exe,
ref=reference, output_bam=output_bam)
......
......@@ -91,19 +91,12 @@ class Primer3(object):
def __init__(self, primer3_exe: str, template: str,
target: str, excluded_region: str,
settings_json: Optional[Path] = None):
settings_dict: dict):
self.primer3_exe = primer3_exe
self.template = template
self.target = target
self.excluded_region = excluded_region
self.settings_json = settings_json
self.__settings_dict = None
@property
def settings_dict(self) -> dict:
if self.__settings_dict is None:
self.__settings_dict = parse_settings(self.settings_json)
return self.__settings_dict
self.settings_dict = settings_dict
def run(self):
cfg = NamedTemporaryFile(delete=False, mode="w")
......
......@@ -3,6 +3,8 @@ This script generates primer pairs from either LOVD input or BED files for
regions.
Output in Miracle XML or TSV
"""
from pathlib import Path
from typing import Optional
import argparse
......@@ -16,11 +18,11 @@ from prinia.utils import generate_fastq_from_primers, NoPrimersException
__author__ = 'ahbbollen'
def primers_from_lovd(lovd_file, padding, product_size, n_prims, reference,
bwa_exe, samtools_exe, primer3_exe, output_bam, dbsnp,
field, max_freq, m13=False, m13_f="", m13_r="",
strict=False, min_margin=10, ignore_errors=False,
**prim_args):
def primers_from_lovd(lovd_file, padding, reference, bwa_exe, samtools_exe,
primer3_exe, output_bam, dbsnp, field, max_freq,
m13=False, m13_f="", m13_r="", strict=False,
min_margin=10, ignore_errors=False,
settings_json: Optional[Path] = None):
"""**prim_args will be passed on to primer3"""
variants = var_from_lovd(lovd_file)
......@@ -28,14 +30,15 @@ def primers_from_lovd(lovd_file, padding, product_size, n_prims, reference,
for var in variants:
region = Region.from_variant(var, padding_l=padding, padding_r=padding)
try:
_, prims = get_primer_from_region(region, reference, product_size,
n_prims, bwa_exe, samtools_exe,
primer3_exe,
_, prims = get_primer_from_region(region, reference,
bwa_exe=bwa_exe,
samtools_exe=samtools_exe,
primer3_exe=primer3_exe,
output_bam=output_bam,
dbsnp=dbsnp, field=field,
max_freq=max_freq, strict=strict,
min_margin=min_margin,
**prim_args)
settings_json=settings_json)
primers.append(prims[0])
except NoPrimersException:
if ignore_errors:
......@@ -48,11 +51,11 @@ def primers_from_lovd(lovd_file, padding, product_size, n_prims, reference,
return variants, primers
def primers_from_region(bed_path, padding, product_size, n_prims, reference,
bwa_exe, samtools_exe, primer3_exe, output_bam,
dbsnp, field, max_freq, m13=False, m13_f="",
m13_r="", strict=False, min_margin=10,
**prim_args):
def primers_from_region(bed_path, padding, reference, bwa_exe, samtools_exe,
primer3_exe, output_bam, dbsnp, field, max_freq,
m13=False, m13_f="", m13_r="", strict=False,
min_margin=10, ignore_errors=False,
settings_json: Optional[Path] = None):
"""**prim_args will be passed on to primer3"""
regions = []
primers = []
......@@ -62,19 +65,24 @@ def primers_from_region(bed_path, padding, product_size, n_prims, reference,
continue
region = Region.from_bed(line, reference, padding_l=padding,
padding_r=padding)
regs, prims = get_primer_from_region(region, reference,
product_size, n_prims,
bwa_exe, samtools_exe,
primer3_exe,
output_bam=output_bam,
dbsnp=dbsnp, field=field,
max_freq=max_freq,
strict=strict,
min_margin=min_margin,
**prim_args)
regions += regs
primers += prims
try:
regs, prims = get_primer_from_region(region, reference,
bwa_exe=bwa_exe,
samtools_exe=samtools_exe,
primer3_exe=primer3_exe,
output_bam=output_bam,
dbsnp=dbsnp, field=field,
max_freq=max_freq,
strict=strict,
min_margin=min_margin,
settings_json=settings_json) # noqa
regions += regs
primers += prims
except NoPrimersException:
if ignore_errors:
continue
else:
raise NoPrimersException
if m13:
primers = m13_primers(primers, m13_f, m13_r)
......@@ -147,12 +155,6 @@ def main():
parser.add_argument('-s', '--sample', help="Same ID for regions")
parser.add_argument('--product_size',
help="Size range of desired product. "
"Defaults to 200-450. "
"This will be taken as a minimum product size in "
"the case of regions",
default="200-450")
parser.add_argument("--min-margin", type=int, default=10,
help="Minimum distance from region or variant. "
"Default = 10")
......@@ -161,9 +163,6 @@ def main():
"max product size will "
"NOT be returned",
action="store_true")
parser.add_argument('--n_raw_primers',
help="Legacy option. Will be ignored",
default=4, type=int)
parser.add_argument('--m13', action="store_true",
help="Output primers with m13 tails")
......@@ -200,25 +199,13 @@ def main():
parser.add_argument('--bwa', help="Path to BWA exe", default="bwa")
parser.add_argument('--samtools', help="Path to samtools exe",
default="samtools")
parser.add_argument('--settings-json', type=Path, default=None,
required=False,
help="Optional path to primer3 settings json file.")
parser.add_argument("--ignore-errors", help="Ignore errors",
action="store_true")
parser.add_argument("--opt-primer-length",
help="Optimum primer length (default = 25)",
type=int, default=25)
parser.add_argument("--opt-gc-perc",
help="Optimum primer GC percentage (default = 50)",
type=int, default=50)
parser.add_argument("--min-melting-temperature",
help="Minimum primer melting temperature "
"(default = 58)",
type=int, default=58)
parser.add_argument("--max-melting-temperature",
help="Maximum primer melting temperature "
"(default = 62)",
type=int, default=62)
args = parser.parse_args()
if not args.lovd and not args.region:
......@@ -233,64 +220,40 @@ def main():
if args.field and not args.allele_freq:
raise ValueError("Must set an allele frequency")
primers = []
common_args = {
"padding": args.padding,
"reference": args.reference,
"bwa_exe": args.bwa,
"samtools_exe": args.samtools,
"primer3_exe": args.primer3,
"output_bam": args.bam,
"dbsnp": args.dbsnp,
"field": args.field,
"max_freq": args.allele_freq,
"m13": args.m13,
"m13_f": args.m13_forward,
"m13_r": args.m13_reverse,
"strict": args.strict,
"min_margin": args.min_margin,
"ignore_errors": args.ignore_errors,
"settings_json": args.settings_json
}
if args.lovd and args.xml:
variants, primers = primers_from_lovd(args.lovd, args.padding,
args.product_size,
args.n_raw_primers,
args.reference, args.bwa,
args.samtools, args.primer3,
args.bam, args.dbsnp,
args.field, args.allele_freq,
args.m13, args.m13_forward,
args.m13_reverse,
args.strict,
args.min_margin,
args.ignore_errors)
variants, primers = primers_from_lovd(args.lovd, **common_args)
primers_to_xml(variants, primers, args.xml, type='variants')
elif args.region and args.xml:
regions, primers = primers_from_region(args.region, args.padding,
args.product_size,
args.n_raw_primers,
args.reference, args.bwa,
args.samtools, args.primer3,
args.bam, args.dbsnp,
args.field, args.allele_freq,
args.m13, args.m13_forward,
args.m13_reverse,
args.strict,
args.min_margin)
regions, primers = primers_from_region(args.region, **common_args)
primers_to_xml(regions, primers, args.xml, type='regions',
sample=args.sample)
elif args.xml and args.tsv:
variants, primers = primers_from_lovd(args.lovd, args.padding,
args.product_size,
args.n_raw_primers,
args.reference, args.bwa,
args.samtools, args.primer3,
args.bam, args.dbsnp,
args.field, args.allele_freq,
args.m13, args.m13_forward,
args.m13_reverse,
args.strict,
args.min_margin,
args.ignore_errors)
variants, primers = primers_from_lovd(args.lovd, **common_args)
primers_to_tsv(variants, primers, args.tsv, type='variants')
elif args.region and args.tsv:
regions, primers = primers_from_region(args.region, args.padding,
args.product_size,
args.n_raw_primers,
args.reference, args.bwa,
args.samtools, args.primer3,
args.bam, args.dbsnp,
args.field, args.allele_freq,
args.m13, args.m13_forward,
args.m13_reverse,
args.strict,
args.min_margin)
regions, primers = primers_from_region(args.region, **common_args)
primers_to_tsv(regions, primers, args.tsv, type='regions',
sample=args.sample)
else:
......
......@@ -123,6 +123,8 @@ def generate_fastq_from_primers(primers, forward_path, reverse_path):
reverse_handle = open(reverse_path, "w")
seqs = [primer_to_seq_record(x) for x in primers]
if len(seqs) < 1:
raise NoPrimersException
forwards = [s[0] for s in seqs]
reverses = [s[1] for s in seqs]
......
......@@ -143,6 +143,7 @@ def rCRS():
@pytest.mark.parametrize("settings_json, ignored", configuration_data)
def test_primer3_run_non_failing(settings_json, ignored, rCRS):
settings_dict = parse_settings(settings_json)
# assumes primer3_core is on the PATH
p = Primer3("primer3_core", rCRS, "3300-3400", "3200-3500", settings_json)
p = Primer3("primer3_core", rCRS, "3300-3400", "3200-3500", settings_dict)
p.run()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment