Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Klinische Genetica
prinia
Commits
2ce69ed4
Commit
2ce69ed4
authored
Sep 10, 2018
by
Sander Bollen
Browse files
flake8 fixes
parent
d860edaf
Changes
9
Hide whitespace changes
Inline
Side-by-side
prinia/__init__.py
View file @
2ce69ed4
from
builtins
import
(
ascii
,
bytes
,
chr
,
dict
,
filter
,
hex
,
input
,
int
,
map
,
next
,
oct
,
open
,
pow
,
range
,
round
,
str
,
super
,
zip
)
__author__
=
'ahbbollen'
prinia/design.py
View file @
2ce69ed4
from
builtins
import
(
ascii
,
bytes
,
chr
,
dict
,
filter
,
hex
,
input
,
map
,
next
,
oct
,
open
,
pow
,
range
,
round
,
str
,
super
,
zip
)
from
builtins
import
(
map
,
open
,
str
)
__author__
=
'ahbbollen'
from
tempfile
import
NamedTemporaryFile
...
...
@@ -17,7 +15,8 @@ from .primer3 import Primer3, parse_primer3_output
from
.utils
import
(
NoPrimersException
,
calc_gc
,
NEW_VCF
,
generate_fastq_from_primers
,
is_at_least_version_samtools
)
PRIMER3_SCRIPT
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
)
,
"static"
),
'getprimers.sh'
)
PRIMER3_SCRIPT
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"static"
),
'getprimers.sh'
)
def
get_sequence_fasta
(
region
,
reference
=
None
,
padding
=
True
):
...
...
@@ -76,16 +75,17 @@ def samtools_version_check(samtools_exe):
return
True
def
aln_primers
(
primers
,
bwa_exe
=
None
,
samtools_exe
=
None
,
ref
=
None
,
output_bam
=
None
):
def
aln_primers
(
primers
,
bwa_exe
=
None
,
samtools_exe
=
None
,
ref
=
None
,
output_bam
=
None
):
"""
Align primers with BWA.
This only works with BWA-ALN due to the short read length.
Align primers with BWA.
This only works with BWA-ALN due to the short read length.
:param primers: List of primers
:param bwa_exe: Path to BWA
:param samtools_exe: Path to samtools
:param ref: Path to reference fasta
:param output_bam: Path to final bam file
:return: instance of pysam.AlignmentFile
:return: instance of pysam.AlignmentFile
"""
fq1
=
NamedTemporaryFile
()
fq2
=
NamedTemporaryFile
()
...
...
@@ -104,7 +104,8 @@ def aln_primers(primers, bwa_exe=None, samtools_exe=None, ref=None, output_bam=N
raise
ValueError
(
"bwa aln crashed with error code {0}"
.
format
(
r
))
sam
=
NamedTemporaryFile
()
sam_args
=
[
bwa_exe
,
'sampe'
,
ref
,
sai1
.
name
,
sai2
.
name
,
fq1
.
name
,
fq2
.
name
]
sam_args
=
[
bwa_exe
,
'sampe'
,
ref
,
sai1
.
name
,
sai2
.
name
,
fq1
.
name
,
fq2
.
name
]
r
=
check_call
(
sam_args
,
stdout
=
sam
)
if
r
!=
0
:
raise
ValueError
(
"bwa sampe crashed with error code {0}"
.
format
(
r
))
...
...
@@ -113,20 +114,27 @@ def aln_primers(primers, bwa_exe=None, samtools_exe=None, ref=None, output_bam=N
bam_args
=
[
samtools_exe
,
'view'
,
'-Shb'
,
sam
.
name
]
r
=
check_call
(
bam_args
,
stdout
=
bam
)
if
r
!=
0
:
raise
ValueError
(
"samtools view crashed with error code {0}"
.
format
(
r
))
raise
ValueError
(
"samtools view crashed with error code {0}"
.
format
(
r
)
)
if
samtools_version_check
(
samtools_exe
):
final_args
=
[
samtools_exe
,
'sort'
,
'-O'
,
'bam'
,
'-o'
,
output_bam
,
bam
.
name
]
final_args
=
[
samtools_exe
,
'sort'
,
'-O'
,
'bam'
,
'-o'
,
output_bam
,
bam
.
name
]
else
:
final_args
=
[
samtools_exe
,
'sort'
,
"-f"
,
bam
.
name
,
output_bam
]
r
=
check_call
(
final_args
)
if
r
!=
0
:
raise
ValueError
(
"samtools sort crashed with error code {0}"
.
format
(
r
))
raise
ValueError
(
"samtools sort crashed with error code {0}"
.
format
(
r
)
)
index_args
=
[
samtools_exe
,
"index"
,
output_bam
]
r
=
check_call
(
index_args
)
if
r
!=
0
:
raise
ValueError
(
"samtools index crashed with error code {0}"
.
format
(
r
))
raise
ValueError
(
"samtools index crashed with error code {0}"
.
format
(
r
)
)
# cleanup
for
i
in
[
fq1
,
fq2
,
sai1
,
sai2
,
sam
,
bam
]:
...
...
@@ -159,7 +167,8 @@ def _has_alternative_alignments(aligned_segment):
return
"XA"
in
[
x
[
0
]
for
x
in
tags
]
def
create_primer_from_pair
(
read1
,
read2
,
position
=
0
,
reverse_as_complement
=
True
):
def
create_primer_from_pair
(
read1
,
read2
,
position
=
0
,
reverse_as_complement
=
True
):
"""Create primer from read pair"""
if
reverse_as_complement
:
seq
=
Seq
(
read2
.
query_sequence
)
...
...
@@ -182,11 +191,11 @@ def create_primer_from_pair(read1, read2, position=0, reverse_as_complement=True
def
create_primers_bwa
(
bam_handle
,
variant
=
None
,
region
=
None
):
"""
Find correct pairs in a bam file containing primers for a variant or region
Find correct pairs in a bam file containing primers for a variant or region
This requires to hold the bam file in memory. Use only with small bam files
:param bam_handle: pysam.AlignmentFile instance
:param variant: variant
:param region: region
:param region: region
:return: generator of primers
"""
if
variant
is
not
None
and
region
is
not
None
:
...
...
@@ -236,13 +245,16 @@ def create_primers_bwa(bam_handle, variant=None, region=None):
continue
if
variant
is
not
None
:
yield
create_primer_from_pair
(
pair
[
'r1'
],
pair
[
'r2'
],
variant
.
position_g_start
)
yield
create_primer_from_pair
(
pair
[
'r1'
],
pair
[
'r2'
],
variant
.
position_g_start
)
else
:
position
=
int
(
region
.
start
)
+
int
((
float
((
int
(
region
.
stop
)
-
int
(
region
.
start
)))
/
2
))
position
=
int
(
region
.
start
)
+
int
((
float
(
(
int
(
region
.
stop
)
-
int
(
region
.
start
)))
/
2
))
yield
create_primer_from_pair
(
pair
[
'r1'
],
pair
[
'r2'
],
position
)
def
find_best_bwa
(
bam
,
variant
=
None
,
region
=
None
,
accept_snp
=
False
,
field
=
None
,
max_freq
=
None
,
dbsnp
=
None
):
def
find_best_bwa
(
bam
,
variant
=
None
,
region
=
None
,
accept_snp
=
False
,
field
=
None
,
max_freq
=
None
,
dbsnp
=
None
):
primers
=
[]
for
primer
in
create_primers_bwa
(
bam
,
variant
=
variant
,
region
=
region
):
if
not
accept_snp
and
max_freq
is
not
None
and
dbsnp
is
not
None
:
...
...
@@ -280,7 +292,7 @@ def _freq_in_query(query, field):
def
find_snps
(
primer
,
db_snp
=
None
,
field
=
"AF"
):
try
:
import
vcf
except
:
except
ImportError
:
return
None
if
db_snp
is
None
:
...
...
@@ -293,10 +305,12 @@ def find_snps(primer, db_snp=None, field="AF"):
contigs
=
list
(
reader
.
contigs
.
keys
())
if
len
(
contigs
)
==
0
:
raise
ValueError
(
"The VCF file does not contain contigs in the header."
)
if
"chr"
in
contigs
[
0
]
and
not
"chr"
in
primer
.
chromosome
:
raise
ValueError
(
"The VCF file does not contain contigs in the header."
)
if
"chr"
in
contigs
[
0
]
and
"chr"
not
in
primer
.
chromosome
:
chrom
=
"chr"
+
primer
.
chromosome
elif
"chr"
in
primer
.
chromosome
and
not
"chr"
in
contigs
[
0
]:
elif
"chr"
in
primer
.
chromosome
and
"chr"
not
in
contigs
[
0
]:
chrom
=
primer
.
chromosome
.
split
(
"chr"
)[
1
]
else
:
chrom
=
primer
.
chromosome
...
...
@@ -308,7 +322,8 @@ def find_snps(primer, db_snp=None, field="AF"):
left_n
,
left_freqs
=
_freq_in_query
(
query
,
field
)
# can't have two pysam queries open at the same time, so have to do this this way
# can't have two pysam queries open at the same time,
# so have to do this this way
if
NEW_VCF
:
right_query
=
reader
.
fetch
(
chrom
,
int
(
primer
.
right_pos
),
right_end
)
else
:
...
...
@@ -336,8 +351,10 @@ def chop_region(region, size):
"""
if
len
(
region
)
<=
size
:
return
[
region
]
first
=
Region
(
chromosome
=
region
.
chr
,
start
=
region
.
start
,
stop
=
region
.
start
+
size
,
acc_nr
=
region
.
acc_nr
,
padding_left
=
region
.
padding_left
,
padding_right
=
region
.
padding_right
,
first
=
Region
(
chromosome
=
region
.
chr
,
start
=
region
.
start
,
stop
=
region
.
start
+
size
,
acc_nr
=
region
.
acc_nr
,
padding_left
=
region
.
padding_left
,
padding_right
=
region
.
padding_right
,
other
=
region
.
other_information
)
regions
=
[
first
]
while
sum
([
len
(
x
)
for
x
in
regions
])
<
len
(
region
):
...
...
@@ -347,8 +364,10 @@ def chop_region(region, size):
else
:
stop
=
last
.
stop
+
size
nex
=
Region
(
chromosome
=
region
.
chr
,
start
=
last
.
stop
,
stop
=
stop
,
acc_nr
=
region
.
acc_nr
,
padding_left
=
region
.
padding_left
,
padding_right
=
region
.
padding_right
,
nex
=
Region
(
chromosome
=
region
.
chr
,
start
=
last
.
stop
,
stop
=
stop
,
acc_nr
=
region
.
acc_nr
,
padding_left
=
region
.
padding_left
,
padding_right
=
region
.
padding_right
,
other
=
region
.
other_information
)
regions
.
append
(
nex
)
return
regions
...
...
@@ -375,23 +394,30 @@ def get_primer_from_region(region, reference, product_size, n_prims,
product_size
=
product_size
,
n_primers
=
n_prims
,
prim3_exe
=
primer3_exe
,
**
prim_args
)
bam
=
aln_primers
(
raw_primers
,
bwa_exe
=
bwa_exe
,
samtools_exe
=
samtools_exe
,
bam
=
aln_primers
(
raw_primers
,
bwa_exe
=
bwa_exe
,
samtools_exe
=
samtools_exe
,
ref
=
reference
,
output_bam
=
output_bam
)
prims
=
find_best_bwa
(
bam
,
region
=
reg
,
dbsnp
=
dbsnp
,
field
=
field
,
max_freq
=
max_freq
)
prims
=
find_best_bwa
(
bam
,
region
=
reg
,
dbsnp
=
dbsnp
,
field
=
field
,
max_freq
=
max_freq
)
tmp_reg
=
[]
tmp_prim
=
[]
# filter out primers too close to the variant
for
primer
in
prims
:
if
not
(
int
(
primer
.
left_pos
)
+
int
(
primer
.
left_len
)
+
min_margin
)
<
int
(
reg
.
start
):
if
not
(
int
(
primer
.
left_pos
)
+
int
(
primer
.
left_len
)
+
min_margin
)
<
int
(
reg
.
start
):
continue
if
not
(
int
(
primer
.
right_pos
)
-
min_margin
)
>
int
(
reg
.
stop
):
continue
n_region
=
Region
(
start
=
int
(
primer
.
left_pos
),
stop
=
int
(
primer
.
right_pos
)
+
len
(
primer
.
right
),
chromosome
=
primer
.
chromosome
,
padding_left
=
0
,
padding_right
=
0
,
n_region
=
Region
(
start
=
int
(
primer
.
left_pos
),
stop
=
int
(
primer
.
right_pos
)
+
len
(
primer
.
right
),
chromosome
=
primer
.
chromosome
,
padding_left
=
0
,
padding_right
=
0
,
acc_nr
=
"NA"
,
other
=
"NA"
)
primer
.
fragment_sequence
=
get_sequence_fasta
(
n_region
,
reference
=
reference
,
padding
=
False
)
primer
.
fragment_sequence
=
get_sequence_fasta
(
n_region
,
reference
=
reference
,
padding
=
False
)
if
strict
and
len
(
primer
.
fragment_sequence
)
>
max_length
:
continue
else
:
...
...
prinia/lovd.py
View file @
2ce69ed4
from
builtins
import
(
ascii
,
bytes
,
chr
,
dict
,
filter
,
hex
,
input
,
map
,
next
,
oct
,
open
,
pow
,
range
,
round
,
str
,
super
,
zip
)
from
builtins
import
(
map
,
open
,
str
,
zip
)
__author__
=
'ahbbollen'
...
...
@@ -39,7 +37,9 @@ def var_from_lovd(path):
elif
seen_header
:
data_lines
+=
1
contents
=
line
.
split
(
"
\t
"
)
assert
len
(
contents
)
==
len
(
d
.
keys
()),
"Different amount of values in data line?"
if
len
(
contents
)
!=
len
(
d
.
keys
()):
raise
ValueError
(
"Number of data fields does not "
"match number of headers."
)
for
header
,
field
in
zip
(
d
.
keys
(),
contents
):
if
field
.
strip
(
'""'
):
d
[
header
].
append
(
field
.
strip
(
'""'
))
...
...
@@ -52,7 +52,3 @@ def var_from_lovd(path):
comments
[
"n_line"
]
=
data_lines
return
Variant
.
from_dict
(
d
,
comments
)
prinia/miracle_xml.py
View file @
2ce69ed4
from
builtins
import
(
ascii
,
bytes
,
chr
,
dict
,
filter
,
hex
,
input
,
map
,
next
,
oct
,
open
,
pow
,
range
,
round
,
str
,
super
,
zip
)
"""
Parsing of Miracle XML files
"""
...
...
@@ -12,12 +8,14 @@ from io import StringIO
from
datetime
import
datetime
from
lxml
import
etree
DEFAULT_XSD
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"static"
),
'variant.xsd'
)
REGION_XSD
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"static"
),
'region.xsd'
)
from
.models
import
*
from
.models
import
Variant
,
Primer
from
.utils
import
calc_gc
,
datehash
DEFAULT_XSD
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"static"
),
'variant.xsd'
)
REGION_XSD
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"static"
),
'region.xsd'
)
try
:
from
itertools
import
zip_longest
as
longzip
except
ImportError
:
...
...
@@ -95,7 +93,8 @@ def miracle_to_primer_and_var(xml, xsd=DEFAULT_XSD):
var
.
allele
=
'Heterozygous'
elif
item
.
find
(
'UITSLAG_CODE'
).
text
==
'HOM'
:
var
.
allele
=
'Homozygous'
# FIXME: should include cases of maternally hemizygous and paternally hemizygous
# FIXME: should include cases of maternally hemizygous and
# paternally hemizygous
else
:
var
.
allele
=
'NA'
...
...
@@ -182,15 +181,19 @@ def primer_to_xml(root, prim, var):
return
root
def
vars_and_primers_to_xml
(
variants
,
primers
,
xml_path
=
None
,
xsd
=
DEFAULT_XSD
,
old
=
True
):
def
vars_and_primers_to_xml
(
variants
,
primers
,
xml_path
=
None
,
xsd
=
DEFAULT_XSD
,
old
=
True
):
panel
=
int
(
variants
[
0
].
in_gene_panel
)
==
1
if
old
:
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'variant.xsd'
)
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'variant.xsd'
)
else
:
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'variant_new.xsd'
)
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'variant_new.xsd'
)
document
,
results
=
common_xml
(
variants
[
0
].
miracle_id
,
variants
[
0
].
datum
,
panel
,
old
=
old
)
document
,
results
=
common_xml
(
variants
[
0
].
miracle_id
,
variants
[
0
].
datum
,
panel
,
old
=
old
)
i
=
1
for
var
,
prim
in
longzip
(
variants
,
primers
,
fillvalue
=
None
):
...
...
@@ -218,7 +221,7 @@ def vars_and_primers_to_xml(variants, primers, xml_path=None, xsd=DEFAULT_XSD, o
name
=
etree
.
SubElement
(
gene
,
"NAAM"
)
name
.
text
=
var
.
gene_name
loc
=
etree
.
SubElement
(
gene
,
"LOCATIE"
)
loc
=
etree
.
SubElement
(
gene
,
"LOCATIE"
)
# noqa
build
=
etree
.
SubElement
(
variant
,
"BUILD"
)
build
.
text
=
var
.
refseq_build
...
...
@@ -244,9 +247,11 @@ def vars_and_primers_to_xml(variants, primers, xml_path=None, xsd=DEFAULT_XSD, o
if
prim
is
not
None
:
frag_len
=
etree
.
SubElement
(
prims
,
"FRAGMENT_LENGTH"
)
# fragment is the region between the end of the forward primer, and the start of the reverse primer
# fragment is the region between the end of the forward primer,
# and the start of the reverse primer
frag_len
.
text
=
str
(
int
(
prim
.
right_pos
)
-
(
int
(
prim
.
left_pos
)
+
len
(
prim
.
left
)))
frag_len
.
text
=
str
(
int
(
prim
.
right_pos
)
-
(
int
(
prim
.
left_pos
)
+
len
(
prim
.
left
)))
gc_perc
=
etree
.
SubElement
(
prims
,
"GC_PERC"
)
...
...
@@ -254,7 +259,7 @@ def vars_and_primers_to_xml(variants, primers, xml_path=None, xsd=DEFAULT_XSD, o
gc_perc
.
text
=
str
(
int
(
calc_gc
(
prim
.
fragment_sequence
)))
except
ValueError
:
gc_perc
.
text
=
'0'
_
=
primer_to_xml
(
prims
,
prim
,
var
)
_
=
primer_to_xml
(
prims
,
prim
,
var
)
# noqa
else
:
comment
=
etree
.
SubElement
(
uitslag
,
"OPMERKING"
)
comment
.
text
=
"NO PRIMERS FOUND"
...
...
@@ -308,11 +313,15 @@ def vars_and_primers_to_xml(variants, primers, xml_path=None, xsd=DEFAULT_XSD, o
return
xml
def
regions_and_primers_to_xml
(
variants
,
primers
,
xml_path
=
None
,
xsd
=
REGION_XSD
,
sample
=
''
,
build
=
"GRCh37"
,
old
=
True
):
def
regions_and_primers_to_xml
(
variants
,
primers
,
xml_path
=
None
,
xsd
=
REGION_XSD
,
sample
=
''
,
build
=
"GRCh37"
,
old
=
True
):
if
old
:
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'region.xsd'
)
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'region.xsd'
)
else
:
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'region_new.xsd'
)
xsd
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'static'
),
'region_new.xsd'
)
document
,
results
=
common_xml
(
sample
,
''
,
old
=
old
)
i
=
1
...
...
@@ -333,8 +342,10 @@ def regions_and_primers_to_xml(variants, primers, xml_path=None, xsd=REGION_XSD,
code
=
etree
.
SubElement
(
gene
,
"CODE"
)
if
region
.
other_information
!=
"NA"
:
try
:
code
.
text
=
region
.
other_information
.
split
(
","
)[
0
].
split
(
"|"
)[
0
]
except
:
code
.
text
=
region
.
other_information
.
split
(
","
)[
0
].
split
(
"|"
)[
0
]
except
IndexError
:
code
.
text
=
""
else
:
code
.
text
=
""
...
...
@@ -357,7 +368,8 @@ def regions_and_primers_to_xml(variants, primers, xml_path=None, xsd=REGION_XSD,
if
"NA"
in
region
.
other_information
:
f_code
.
text
=
prim
.
left_name
+
"."
+
datehash
()
else
:
f_code
.
text
=
region
.
other_information
.
split
(
"|"
)[
0
]
+
"_F."
+
datehash
()
f_code
.
text
=
(
region
.
other_information
.
split
(
"|"
)[
0
]
+
"_F."
+
datehash
())
f_seq
=
etree
.
SubElement
(
primer_f
,
"SEQUENTIE"
)
f_seq
.
text
=
prim
.
left
f_coord
=
etree
.
SubElement
(
primer_f
,
"COORDINATE"
)
...
...
@@ -368,7 +380,8 @@ def regions_and_primers_to_xml(variants, primers, xml_path=None, xsd=REGION_XSD,
if
"NA"
in
region
.
other_information
:
r_code
.
text
=
prim
.
right_name
+
"."
+
datehash
()
else
:
r_code
.
text
=
region
.
other_information
.
split
(
"|"
)[
0
]
+
"_R."
+
datehash
()
r_code
.
text
=
(
region
.
other_information
.
split
(
"|"
)[
0
]
+
"_R."
+
datehash
())
r_seq
=
etree
.
SubElement
(
primer_r
,
"SEQUENTIE"
)
r_seq
.
text
=
prim
.
right
r_coord
=
etree
.
SubElement
(
primer_r
,
"COORDINATE"
)
...
...
prinia/models.py
View file @
2ce69ed4
from
builtins
import
(
ascii
,
bytes
,
chr
,
dict
,
filter
,
hex
,
input
,
map
,
next
,
oct
,
open
,
pow
,
range
,
round
,
str
,
super
,
zip
)
from
builtins
import
(
range
,
str
)
__author__
=
'ahbbollen'
...
...
@@ -48,14 +46,22 @@ class Variant(object):
for
k
in
dictionary
.
keys
():
try
:
setattr
(
x
,
k
,
dictionary
[
k
][
n
])
except
:
except
:
# noqa
pass
x
.
variant_on_genome
=
dictionary
[
"VariantOnGenome/DNA"
][
n
]
x
.
variant_on_transcript_dna
=
dictionary
[
"VariantOnTranscript/DNA"
][
n
]
x
.
variant_on_transcript_rna
=
dictionary
[
"VariantOnTranscript/RNA"
][
n
]
x
.
variant_on_transcript_protein
=
dictionary
[
"VariantOnTranscript/Protein"
][
n
]
x
.
variant_on_genome_origin
=
dictionary
[
"VariantOnGenome/Genetic_origin"
][
n
]
x
.
variant_on_transcript_dna
=
dictionary
[
"VariantOnTranscript/DNA"
][
n
]
x
.
variant_on_transcript_rna
=
dictionary
[
"VariantOnTranscript/RNA"
][
n
]
x
.
variant_on_transcript_protein
=
dictionary
[
"VariantOnTranscript/Protein"
][
n
]
x
.
variant_on_genome_origin
=
dictionary
[
"VariantOnGenome/Genetic_origin"
][
n
]
if
x
.
confirm_in_lab
==
"0"
:
x
.
confirm_in_lab
=
False
...
...
@@ -96,9 +102,10 @@ class Primer(object):
setattr
(
self
,
kw
,
kv
)
@
classmethod
def
from_p3
(
cls
,
forward
,
reverse
,
fragment
=
None
,
chromosome
=
None
,
position
=
None
):
def
from_p3
(
cls
,
forward
,
reverse
,
fragment
=
None
,
chromosome
=
None
,
position
=
None
):
# line looks like this:
#
0 CAGCACTGCTTGAGGGGAA
0 19
0 57.89 59.926
0.00
0.00 36.40
1.074
# 0 CAGCACTGCTTGAGGGGAA 0 19 0 57.89 59.926 0.00 0.00 36.40 1.074
f_contents
=
[
x
for
x
in
forward
.
strip
().
split
(
" "
)
if
x
]
r_contents
=
[
x
for
x
in
reverse
.
strip
().
split
(
" "
)
if
x
]
primer
=
cls
()
...
...
@@ -161,8 +168,10 @@ class Region(object):
@
classmethod
def
from_variant
(
cls
,
variant
,
padding_l
=
0
,
padding_r
=
0
):
return
cls
(
chromosome
=
variant
.
chromosome
,
acc_nr
=
variant
.
genomic_id_ncbi
,
start
=
variant
.
position_g_start
,
stop
=
variant
.
position_g_end
,
return
cls
(
chromosome
=
variant
.
chromosome
,
acc_nr
=
variant
.
genomic_id_ncbi
,
start
=
variant
.
position_g_start
,
stop
=
variant
.
position_g_end
,
padding_left
=
padding_l
,
padding_right
=
padding_r
)
...
...
@@ -175,9 +184,14 @@ class Region(object):
else
:
other
=
"NA"
return
cls
(
chromosome
=
contents
[
0
],
acc_nr
=
'NA'
,
start
=
int
(
contents
[
1
]),
stop
=
int
(
contents
[
2
]),
padding_left
=
padding_l
,
padding_right
=
padding_r
,
ref
=
reference
,
other
=
other
)
return
cls
(
chromosome
=
contents
[
0
],
acc_nr
=
'NA'
,
start
=
int
(
contents
[
1
]),
stop
=
int
(
contents
[
2
]),
padding_left
=
padding_l
,
padding_right
=
padding_r
,
ref
=
reference
,
other
=
other
)
@
classmethod
def
cut
(
cls
,
region
,
max_size
,
padded
=
False
):
...
...
prinia/primer3.py
View file @
2ce69ed4
...
...
@@ -69,19 +69,17 @@ class Primer3(object):
"PRIMER_MIN_TM={it}
\n
"
\
"PRIMER_MAX_TM={at}
\n
"
\
"PRIMER_NUM_RETURN=200
\n
"
\
"="
.
format
(
seq
=
self
.
template
,
tar
=
self
.
target
,
exc
=
self
.
excluded_region
,
gc
=
self
.
opt_gc_perc
,
size
=
self
.
opt_prim_length
,
isize
=
self
.
opt_prim_length
-
5
,
asize
=
self
.
opt_prim_length
+
5
,
range
=
self
.
range
,
osize
=
self
.
opt_size
,
it
=
self
.
min_melting_t
,
at
=
self
.
max_melting_t
)
"="
.
format
(
seq
=
self
.
template
,
tar
=
self
.
target
,
exc
=
self
.
excluded_region
,
gc
=
self
.
opt_gc_perc
,
size
=
self
.
opt_prim_length
,
isize
=
self
.
opt_prim_length
-
5
,
asize
=
self
.
opt_prim_length
+
5
,
range
=
self
.
range
,
osize
=
self
.
opt_size
,
it
=
self
.
min_melting_t
,
at
=
self
.
max_melting_t
)
handle
.
write
(
cfg_str
)
...
...
@@ -93,7 +91,7 @@ class Primer3(object):
cfg
.
close
()
args
=
[
self
.
primer3_exe
,
"-output"
,
out
.
name
,
cfg
.
name
]
_
=
check_call
(
args
)
_
=
check_call
(
args
)
# noqa
retval
=
[]
with
open
(
out
.
name
)
as
handle
:
...
...
prinia/primerdesign.py
View file @
2ce69ed4
from
builtins
import
(
ascii
,
bytes
,
chr
,
dict
,
filter
,
hex
,
input
,
map
,
next
,
oct
,
open
,
pow
,
range
,
round
,
str
,
super
,
zip
)
"""
This script generates primer pairs from either LOVD input or BED files for regions.
This script generates primer pairs from either LOVD input or BED files for
regions.
Output in Miracle XML or TSV
"""
import
argparse
from
prinia.lovd
import
*
from
prinia.design
import
*
from
prinia.miracle_xml
import
vars_and_primers_to_xml
,
regions_and_primers_to_xml
from
prinia.lovd
import
var_from_lovd
from
prinia.design
import
get_primer_from_region
from
prinia.models
import
Region
from
prinia.miracle_xml
import
(
vars_and_primers_to_xml
,
regions_and_primers_to_xml
)
from
prinia.utils
import
generate_fastq_from_primers
,
NoPrimersException
__author__
=
'ahbbollen'
...
...
@@ -65,7 +65,8 @@ def primers_from_region(bed_path, padding, product_size, n_prims, reference,
regs
,
prims
=
get_primer_from_region
(
region
,
reference
,
product_size
,
n_prims
,
bwa_exe
,
samtools_exe
,
primer3_exe
,
output_bam
=
output_bam
,
primer3_exe
,
output_bam
=
output_bam
,
dbsnp
=
dbsnp
,
field
=
field
,
max_freq
=
max_freq
,
strict
=
strict
,
...
...
@@ -84,7 +85,8 @@ def primers_to_xml(object, primers, xml, type='variants', sample=None):
if
type
==
'variants'
:
xml_out
=
vars_and_primers_to_xml
(
object
,
primers
,
xml
)
elif
type
==
'regions'
:
xml_out
=
regions_and_primers_to_xml
(
object
,
primers
,
xml
,
sample
=
sample
)
xml_out
=
regions_and_primers_to_xml
(
object
,
primers
,
xml
,
sample
=
sample
)
else
:
raise
ValueError
(
"Wrong type specified"
)
...
...
@@ -95,21 +97,25 @@ def primers_to_xml(object, primers, xml, type='variants', sample=None):
def
primers_to_tsv
(
object
,
primers
,
tsv
,
type
=
'variants'
,
sample
=
'NA'
):
with
open
(
tsv
,
"wb"
)
as
handle
:
if
type
==
'variants'
:
header
=
[
'Sample'
,
'Chromosome'
,
'Hgvs (genomic)'
,
'transcript'
,
'hgvs (transcript)'
,
'fragment'
,
'forward'
,
'reverse'
]
header
=
[
'Sample'
,
'Chromosome'
,
'Hgvs (genomic)'
,
'transcript'
,
'hgvs (transcript)'
,
'fragment'
,
'forward'
,
'reverse'
]
else
:
header
=
[
'Sample'
,
'Chr'
,
'Start'
,
'Stop'
,
"size"
,
"search_size"
,
'other_information'
,
'fragment'
,
'forward'
,
'reverse'
]
header
=
[
'Sample'
,
'Chr'
,
'Start'
,
'Stop'
,
"size"
,
"search_size"
,
'other_information'
,
'fragment'
,
'forward'
,
'reverse'
]
headerline
=
"
\t
"
.
join
(
header
)
+
"
\n
"
handle
.
write
(
headerline
.
encode
())
for
o
,
prim
in
zip
(
object
,
primers
):
line
=
[]
if
type
==
'variants'
:
line
+=
[
o
.
miracle_id
,
o
.
chromosome
,
o
.
variant_on_genome
,
o
.
transcript_id_ncbi
,
line
+=
[
o
.
miracle_id
,
o
.
chromosome
,
o
.
variant_on_genome
,
o
.
transcript_id_ncbi
,
o
.
variant_on_transcript_dna
]
else
:
line
+=
[
sample
,
o
.
chr
,
o
.
start
,
o
.
stop
,
len
(
o
),
o
.
size
(
padded
=
True
),
o
.
other_information
]
line
+=
[
sample
,
o
.
chr
,
o
.
start
,
o
.
stop
,
len
(
o
),
o
.
size
(
padded
=
True
),
o
.
other_information
]
line
+=
[
prim
.
fragment_sequence
,
prim
.
left
,
prim
.
right
]
linest
=
"
\t
"
.
join
(
map
(
str
,
line
))
+
"
\n
"
linest
=
"
\t
"
.
join
(
map
(
str
,
line
))
+
"
\n
"
handle
.
write
(
linest
.
encode
())
...
...
@@ -126,49 +132,77 @@ def main():
group
=
parser
.
add_mutually_exclusive_group
(
required
=
True
)
group
.
add_argument
(
"-l"
,
"--lovd"
,
help
=
"Input LOVD file"
)
group
.
add_argument
(
'--region'
,
help
=
"Input region file"
)
parser
.
add_argument
(
'-p'
,
'--padding'
,
help
=
"Padding around regions or variants in bases. Defaults to 100"
,
parser
.
add_argument
(
'-p'
,
'--padding'
,
help
=
"Padding around regions or variants in bases. "
"Defaults to 100"
,
type
=
int
,
default
=
100
)
output_group
=
parser
.
add_mutually_exclusive_group
(
required
=
True
)
output_group
.
add_argument
(
'-x'
,
'--xml'
,
help
=
"Output Miracle XML file"
)
output_group
.
add_argument
(
'-t'
,
'--tsv'
,
help
=
"Output TSV file"
)
parser
.
add_argument
(
'-b'
,
'--bam'
,
help
=
"Path to output BAM file containing primers"
,
required
=
True
)
parser
.
add_argument
(
'-b'
,
'--bam'
,
help
=
"Path to output BAM file containing primers"
,
required
=
True
)
parser
.
add_argument
(
'-s'
,
'--sample'
,
help
=
"Same ID for regions"
)