Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
LightMotif
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Martin Larralde
LightMotif
Commits
5c3f73c8
Commit
5c3f73c8
authored
1 year ago
by
Martin Larralde
Browse files
Options
Downloads
Patches
Plain Diff
Add support for parsing more metadata from TRANSFAC files
parent
4183f7d1
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
lightmotif-transfac/src/lib.rs
+417
-71
417 additions, 71 deletions
lightmotif-transfac/src/lib.rs
with
417 additions
and
71 deletions
lightmotif-transfac/src/lib.rs
+
417
−
71
View file @
5c3f73c8
...
...
@@ -11,12 +11,14 @@ use nom::bytes::complete::take_until;
use
nom
::
bytes
::
complete
::
take_while
;
use
nom
::
bytes
::
complete
::
take_while1
;
use
nom
::
character
::
complete
::
anychar
;
use
nom
::
character
::
complete
::
char
;
use
nom
::
character
::
complete
::
line_ending
;
use
nom
::
character
::
complete
::
not_line_ending
;
use
nom
::
character
::
complete
::
space0
;
use
nom
::
character
::
streaming
::
space1
;
use
nom
::
combinator
::
eof
;
use
nom
::
combinator
::
map_res
;
use
nom
::
combinator
::
opt
;
use
nom
::
error
::
Error
;
use
nom
::
error
::
ErrorKind
;
use
nom
::
multi
::
count
;
...
...
@@ -33,14 +35,67 @@ use lightmotif::CountMatrix;
use
lightmotif
::
DenseMatrix
;
use
lightmotif
::
Symbol
;
pub
struct
TransfacMatrix
<
A
:
Alphabet
,
const
K
:
usize
>
{
#[derive(Clone,
Debug)]
pub
struct
Matrix
<
A
:
Alphabet
,
const
K
:
usize
>
{
id
:
Option
<
String
>
,
accession
:
Option
<
String
>
,
name
:
Option
<
String
>
,
counts
:
CountMatrix
<
A
,
K
>
,
dates
:
Vec
<
Date
>
,
references
:
Vec
<
Reference
>
,
sites
:
Vec
<
String
>
,
}
#[derive(Debug,
Clone,
Copy,
PartialEq,
Eq)]
pub
enum
DateKind
{
Created
,
Updated
,
}
#[derive(Debug,
Clone)]
pub
struct
Date
{
kind
:
DateKind
,
author
:
String
,
day
:
u8
,
month
:
u8
,
year
:
u16
,
}
#[derive(Clone,
Debug)]
pub
struct
ReferenceNumber
{
local
:
u32
,
xref
:
Option
<
String
>
,
}
impl
ReferenceNumber
{
pub
fn
new
(
local
:
u32
)
->
Self
{
Self
{
local
,
xref
:
None
}
}
pub
fn
with_xref
<
X
>
(
local
:
u32
,
xref
:
X
)
->
Self
where
X
:
Into
<
Option
<
String
>>
,
{
Self
{
local
,
xref
:
xref
.into
(),
}
}
}
#[derive(Clone,
Debug)]
pub
struct
Reference
{
number
:
ReferenceNumber
,
// authors: String,
title
:
Option
<
String
>
,
link
:
Option
<
String
>
,
pmid
:
Option
<
String
>
,
}
fn
parse_line
(
input
:
&
str
)
->
IResult
<&
str
,
&
str
>
{
match
memchr
::
memchr
(
b'\n'
,
input
.as_bytes
())
{
None
=>
Err
(
nom
::
Err
::
Error
(
Error
::
new
(
input
,
ErrorKind
::
Verify
))),
Some
(
i
)
if
i
==
input
.len
()
=>
Ok
((
""
,
input
)),
None
=>
Err
(
nom
::
Err
::
Error
(
Error
::
new
(
input
,
ErrorKind
::
Char
))),
Some
(
i
)
if
i
==
input
.len
()
-
1
=>
Ok
((
""
,
input
)),
Some
(
i
)
=>
{
let
(
line
,
rest
)
=
input
.split_at
(
i
+
1
);
Ok
((
rest
,
line
))
...
...
@@ -48,21 +103,6 @@ fn parse_line(input: &str) -> IResult<&str, &str> {
}
}
fn
parse_ac
(
input
:
&
str
)
->
IResult
<&
str
,
&
str
>
{
let
(
input
,
line
)
=
preceded
(
tag
(
"AC"
),
parse_line
)(
input
)
?
;
Ok
((
input
,
line
.trim
()))
}
fn
parse_id
(
input
:
&
str
)
->
IResult
<&
str
,
&
str
>
{
let
(
input
,
line
)
=
preceded
(
tag
(
"ID"
),
parse_line
)(
input
)
?
;
Ok
((
input
,
line
.trim
()))
}
fn
parse_bf
(
input
:
&
str
)
->
IResult
<&
str
,
&
str
>
{
let
(
input
,
line
)
=
preceded
(
tag
(
"BF"
),
parse_line
)(
input
)
?
;
Ok
((
input
,
line
.trim
()))
}
fn
parse_alphabet
<
S
:
Symbol
>
(
input
:
&
str
)
->
IResult
<&
str
,
Vec
<
S
>>
{
delimited
(
alt
((
tag
(
"PO"
),
tag
(
"P0"
))),
...
...
@@ -83,37 +123,180 @@ fn parse_row(input: &str, k: usize) -> IResult<&str, Vec<u32>> {
}
fn
parse_tag
(
input
:
&
str
)
->
IResult
<&
str
,
&
str
>
{
nom
::
branch
::
alt
((
tag
(
"BF"
),
tag
(
"ID"
),
tag
(
"XX"
),
tag
(
"P0"
),
tag
(
"PO"
),
tag
(
"//"
),
))(
input
)
}
pub
fn
parse_matrix
<
A
:
Alphabet
,
const
K
:
usize
>
(
mut
input
:
&
str
,
)
->
IResult
<&
str
,
CountMatrix
<
A
,
K
>>
{
let
(
rest
,
tag
)
=
nom
::
bytes
::
complete
::
take
(
2usize
)(
input
)
?
;
match
tag
{
"AC"
|
"BA"
|
"BS"
|
"BF"
|
"CC"
|
"CO"
|
"DT"
|
"ID"
|
"NA"
|
"P0"
|
"PO"
|
"RN"
|
"XX"
|
"//"
=>
Ok
((
rest
,
tag
)),
_
=>
Err
(
nom
::
Err
::
Error
(
Error
::
new
(
input
,
ErrorKind
::
Alt
))),
}
}
fn
parse_reference_number
(
input
:
&
str
)
->
IResult
<&
str
,
ReferenceNumber
>
{
let
(
rest
,
number
)
=
preceded
(
terminated
(
tag
(
"RN"
),
space0
),
delimited
(
char
(
'['
),
nom
::
character
::
complete
::
u32
,
char
(
']'
)),
)(
input
)
?
;
match
opt
(
anychar
)(
rest
)
?
.1
{
Some
(
';'
)
=>
{
let
(
rest
,
xref
)
=
delimited
(
char
(
';'
),
take_till
(|
c
|
c
==
'.'
),
char
(
'.'
))(
rest
)
?
;
let
(
rest
,
_
)
=
parse_line
(
rest
)
?
;
Ok
((
rest
,
ReferenceNumber
::
with_xref
(
number
,
xref
.trim
()
.to_string
()),
))
}
_
=>
{
let
(
rest
,
_
)
=
parse_line
(
input
)
?
;
Ok
((
rest
,
ReferenceNumber
::
new
(
number
)))
}
}
}
fn
parse_datekind
(
input
:
&
str
)
->
IResult
<&
str
,
DateKind
>
{
match
alt
((
tag
(
"created"
),
tag
(
"updated"
)))(
input
)
?
{
(
rest
,
"created"
)
=>
Ok
((
rest
,
DateKind
::
Created
)),
(
rest
,
"updated"
)
=>
Ok
((
rest
,
DateKind
::
Updated
)),
_
=>
unreachable!
(),
}
}
fn
parse_date
(
input
:
&
str
)
->
IResult
<&
str
,
Date
>
{
let
(
rest
,
_
)
=
terminated
(
tag
(
"DT"
),
space0
)(
input
)
?
;
let
(
rest
,
day
)
=
terminated
(
nom
::
character
::
complete
::
u8
,
char
(
'.'
))(
rest
)
?
;
let
(
rest
,
month
)
=
terminated
(
nom
::
character
::
complete
::
u8
,
char
(
'.'
))(
rest
)
?
;
let
(
rest
,
year
)
=
nom
::
character
::
complete
::
u16
(
rest
)
?
;
let
(
rest
,
_
)
=
space0
(
rest
)
?
;
let
(
rest
,
kind
)
=
delimited
(
char
(
'('
),
parse_datekind
,
char
(
')'
))(
rest
)
?
;
let
(
rest
,
author
)
=
delimited
(
char
(
';'
),
preceded
(
space0
,
take_till
(|
c
|
c
==
'.'
)),
char
(
'.'
),
)(
rest
)
?
;
let
(
rest
,
_
)
=
parse_line
(
rest
)
?
;
Ok
((
rest
,
Date
{
author
:
author
.to_string
(),
kind
,
year
,
month
,
day
,
},
))
}
fn
parse_reference
(
mut
input
:
&
str
)
->
IResult
<&
str
,
Reference
>
{
let
mut
pmid
=
None
;
let
mut
link
=
None
;
let
mut
title
=
None
;
let
(
rest
,
number
)
=
parse_reference_number
(
input
)
?
;
input
=
rest
;
loop
{
match
nom
::
bytes
::
complete
::
take
(
2usize
)(
input
)
?
.1
{
"RX"
=>
{
let
(
rest
,
line
)
=
preceded
(
preceded
(
terminated
(
tag
(
"RX"
),
space0
),
terminated
(
tag
(
"PUBMED:"
),
space0
),
),
terminated
(
take_till
(|
c
|
c
==
'.'
),
char
(
'.'
)),
)(
input
)
?
;
let
(
rest
,
_
)
=
parse_line
(
rest
)
?
;
pmid
=
Some
(
line
.to_string
());
input
=
rest
;
}
"RA"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"RA"
),
parse_line
)(
input
)
?
;
// ra = Some(line.trim());
input
=
rest
;
}
"RL"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"RL"
),
parse_line
)(
input
)
?
;
link
=
Some
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"RT"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"RT"
),
parse_line
)(
input
)
?
;
title
=
Some
(
line
.trim
()
.to_string
());
input
=
rest
;
}
_
=>
break
,
}
}
Ok
((
input
,
Reference
{
number
,
pmid
,
link
,
title
,
},
))
}
pub
fn
parse_matrix
<
A
:
Alphabet
,
const
K
:
usize
>
(
mut
input
:
&
str
)
->
IResult
<&
str
,
Matrix
<
A
,
K
>>
{
let
mut
accession
=
None
;
let
mut
ba
=
None
;
let
mut
name
=
None
;
let
mut
id
=
None
;
let
mut
bf
=
None
;
let
mut
copyright
=
None
;
let
mut
dates
=
Vec
::
new
();
let
mut
references
=
Vec
::
new
();
let
mut
comments
=
Vec
::
new
();
let
mut
sites
=
Vec
::
new
();
let
mut
factors
=
Vec
::
new
();
let
mut
countmatrix
=
None
;
loop
{
match
parse_tag
(
input
)
?
.1
{
"XX"
=>
{
let
(
rest
,
_
)
=
parse_line
(
input
)
?
;
"AC"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"AC"
),
parse_line
)(
input
)
?
;
accession
=
Some
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"ID"
=>
{
let
(
rest
,
line
)
=
parse_id
(
input
)
?
;
id
=
Some
(
line
.trim
());
"BA"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"BA"
),
parse_line
)(
input
)
?
;
ba
=
Some
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"BS"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"BS"
),
parse_line
)(
input
)
?
;
sites
.push
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"BF"
=>
{
let
(
rest
,
line
)
=
parse_bf
(
input
)
?
;
bf
=
Some
(
line
.trim
());
let
(
rest
,
line
)
=
preceded
(
tag
(
"BF"
),
parse_line
)(
input
)
?
;
factors
.push
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"CC"
=>
{
let
(
rest
,
lines
)
=
many1
(
preceded
(
tag
(
"CC"
),
parse_line
))(
input
)
?
;
comments
.push
(
lines
.join
(
" "
));
input
=
rest
;
}
"CO"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"CO"
),
parse_line
)(
input
)
?
;
copyright
=
Some
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"DT"
=>
{
let
(
rest
,
date
)
=
parse_date
(
input
)
?
;
dates
.push
(
date
);
input
=
rest
;
}
"ID"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"ID"
),
parse_line
)(
input
)
?
;
id
=
Some
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"NA"
=>
{
let
(
rest
,
line
)
=
preceded
(
tag
(
"NA"
),
parse_line
)(
input
)
?
;
name
=
Some
(
line
.trim
()
.to_string
());
input
=
rest
;
}
"P0"
|
"PO"
=>
{
...
...
@@ -121,7 +304,7 @@ pub fn parse_matrix<A: Alphabet, const K: usize>(
let
(
rest
,
symbols
)
=
parse_alphabet
::
<
A
::
Symbol
>
(
input
)
?
;
let
(
rest
,
counts
)
=
many1
(|
l
|
parse_row
(
l
,
symbols
.len
()))(
rest
)
?
;
input
=
rest
;
//
parse
//
read counts into a dense matrix
let
mut
data
=
DenseMatrix
::
<
u32
,
K
>
::
new
(
counts
.len
());
for
(
i
,
count
)
in
counts
.iter
()
.enumerate
()
{
for
(
s
,
&
c
)
in
symbols
.iter
()
.zip
(
count
.iter
())
{
...
...
@@ -130,21 +313,36 @@ pub fn parse_matrix<A: Alphabet, const K: usize>(
}
countmatrix
=
Some
(
data
);
}
"RN"
=>
{
let
(
rest
,
reference
)
=
parse_reference
(
input
)
?
;
references
.push
(
reference
);
input
=
rest
;
}
"//"
=>
{
input
=
preceded
(
tag
(
"//"
),
parse_line
)(
input
)
?
.0
;
break
;
}
"XX"
=>
input
=
parse_line
(
input
)
?
.0
,
_
=>
unreachable!
(),
}
}
let
matrix
=
CountMatrix
::
new
(
countmatrix
.unwrap
())
.unwrap
();
let
counts
=
CountMatrix
::
new
(
countmatrix
.unwrap
())
.unwrap
();
let
matrix
=
Matrix
{
accession
,
id
,
name
,
counts
,
dates
,
references
,
sites
,
};
Ok
((
input
,
matrix
))
}
pub
fn
parse_matrices
<
A
:
Alphabet
,
const
K
:
usize
>
(
input
:
&
str
,
)
->
IResult
<&
str
,
Vec
<
Count
Matrix
<
A
,
K
>>>
{
)
->
IResult
<&
str
,
Vec
<
Matrix
<
A
,
K
>>>
{
let
(
input
,
(
matrices
,
_
))
=
many_till
(
parse_matrix
,
eof
)(
input
)
?
;
Ok
((
input
,
matrices
))
}
...
...
@@ -157,22 +355,6 @@ mod test {
use
lightmotif
::
Nucleotide
;
use
lightmotif
::
Symbol
;
#[test]
fn
test_parse_id
()
{
let
line
=
"ID prodoric_MX000001
\n
"
;
let
res
=
super
::
parse_id
(
line
)
.unwrap
();
assert_eq!
(
res
.0
,
""
);
assert_eq!
(
res
.1
,
"prodoric_MX000001"
)
}
#[test]
fn
test_parse_bf
()
{
let
line
=
"BF Pseudomonas aeruginosa
\n
"
;
let
res
=
super
::
parse_bf
(
line
)
.unwrap
();
assert_eq!
(
res
.0
,
""
);
assert_eq!
(
res
.1
,
"Pseudomonas aeruginosa"
);
}
#[test]
fn
test_parse_alphabet
()
{
let
line
=
"P0 A T G C
\n
"
;
...
...
@@ -202,7 +384,7 @@ mod test {
}
#[test]
fn
test_parse_
mat
ri
x
()
{
fn
test_parse_
prodo
ri
c
()
{
let
text
=
concat!
(
"ID prodoric_MX000001
\n
"
,
"BF Pseudomonas aeruginosa
\n
"
,
...
...
@@ -221,17 +403,181 @@ mod test {
assert_eq!
(
res
.0
,
""
);
let
matrix
=
res
.1
;
// assert_eq!(matrix.name, "prodoric_MX000001");
assert_eq!
(
matrix
.counts
()
.rows
(),
7
);
assert_eq!
(
matrix
.counts
()[
0
][
Nucleotide
::
A
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
()[
0
][
Nucleotide
::
T
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
()[
0
][
Nucleotide
::
G
.as_index
()],
2
);
assert_eq!
(
matrix
.counts
()[
0
][
Nucleotide
::
C
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
()[
0
][
Nucleotide
::
N
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
()[
5
][
Nucleotide
::
A
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
()[
5
][
Nucleotide
::
T
.as_index
()],
1
);
assert_eq!
(
matrix
.counts
()[
5
][
Nucleotide
::
G
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
()[
5
][
Nucleotide
::
C
.as_index
()],
1
);
assert_eq!
(
matrix
.counts
()[
5
][
Nucleotide
::
N
.as_index
()],
0
);
assert_eq!
(
matrix
.id
,
Some
(
String
::
from
(
"prodoric_MX000001"
)));
assert_eq!
(
matrix
.counts
.counts
()
.rows
(),
7
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
A
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
T
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
G
.as_index
()],
2
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
C
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
N
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
A
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
T
.as_index
()],
1
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
G
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
C
.as_index
()],
1
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
N
.as_index
()],
0
);
}
#[test]
fn
test_parse_reference_number
()
{
let
res
=
super
::
parse_reference_number
(
"RN [1]
\n
"
)
.unwrap
()
.1
;
assert_eq!
(
res
.local
,
1
);
assert!
(
res
.xref
.is_none
());
let
res
=
super
::
parse_reference_number
(
"RN [2]; RE0000531.
\n
"
)
.unwrap
()
.1
;
assert_eq!
(
res
.local
,
2
);
assert_eq!
(
res
.xref
,
Some
(
String
::
from
(
"RE0000531"
)));
}
#[test]
fn
test_parse_reference
()
{
let
text
=
concat!
(
"RN [1]; RE0000231.
\n
"
,
"RX PUBMED: 1846322.
\n
"
,
"RA Sun X.-H., Baltimore D.
\n
"
,
"RT An inhibitory domain of E12 transcription factor prevents DNA binding in E12 homodimers but not in E12 heterodimers
\n
"
,
"RL Cell 64:459-470 (1991).
\n
"
,
"XX
\n
"
,
);
let
res
=
super
::
parse_reference
(
text
)
.unwrap
()
.1
;
assert_eq!
(
res
.number.local
,
1
);
assert_eq!
(
res
.number.xref
,
Some
(
String
::
from
(
"RE0000231"
)));
assert_eq!
(
res
.link
,
Some
(
String
::
from
(
"Cell 64:459-470 (1991)."
)));
assert_eq!
(
res
.pmid
,
Some
(
String
::
from
(
"1846322"
)));
let
text
=
concat!
(
"RN [1]
\n
"
,
"RA Biedenkapp H., Borgmeyer U., Sippel A., Klempnauer K.-H.;
\n
"
,
"RT Viral myb oncogene encodes a sequence-specific DNA-binding activity;
\n
"
,
"RL Nature 335:835-837 (1988).
\n
"
,
"XX
\n
"
,
);
let
res
=
super
::
parse_reference
(
text
)
.unwrap
()
.1
;
assert_eq!
(
res
.number.local
,
1
);
assert_eq!
(
res
.number.xref
,
None
);
assert_eq!
(
res
.pmid
,
None
);
assert_eq!
(
res
.title
,
Some
(
String
::
from
(
"Viral myb oncogene encodes a sequence-specific DNA-binding activity;"
))
);
assert_eq!
(
res
.link
,
Some
(
String
::
from
(
"Nature 335:835-837 (1988)."
)));
}
#[test]
fn
test_parse_transfac_v2
()
{
let
text
=
concat!
(
"AC M00001
\n
"
,
"XX
\n
"
,
"DT 19.10.1992 (created); EWI.
\n
"
,
"XX
\n
"
,
"PO A C G T
\n
"
,
"01 1 2 2 0
\n
"
,
"02 2 1 2 0
\n
"
,
"03 3 0 1 1
\n
"
,
"04 0 5 0 0
\n
"
,
"05 5 0 0 0
\n
"
,
"06 0 0 4 1
\n
"
,
"07 0 1 4 0
\n
"
,
"08 0 0 0 5
\n
"
,
"09 0 0 5 0
\n
"
,
"10 0 1 2 2
\n
"
,
"11 0 2 0 3
\n
"
,
"12 1 0 3 1
\n
"
,
"XX
\n
"
,
"BF MyoD
\n
"
,
"XX
\n
"
,
"BA 5 functional elements in 3 genes
\n
"
,
"XX
\n
"
,
"CC Test comment.
\n
"
,
"XX
\n
"
,
"//
\n
"
,
);
let
res
=
super
::
parse_matrix
::
<
Dna
,
{
Dna
::
K
}
>
(
text
)
.unwrap
();
assert_eq!
(
res
.0
,
""
);
let
matrix
=
res
.1
;
assert_eq!
(
matrix
.accession
,
Some
(
String
::
from
(
"M00001"
)));
assert_eq!
(
matrix
.counts
.counts
()
.rows
(),
12
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
A
.as_index
()],
1
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
T
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
G
.as_index
()],
2
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
C
.as_index
()],
2
);
assert_eq!
(
matrix
.counts
.counts
()[
0
][
Nucleotide
::
N
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
A
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
T
.as_index
()],
1
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
G
.as_index
()],
4
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
C
.as_index
()],
0
);
assert_eq!
(
matrix
.counts
.counts
()[
5
][
Nucleotide
::
N
.as_index
()],
0
);
}
#[test]
fn
test_parse_transfac_v9
()
{
let
text
=
concat!
(
"AC M00030
\n
"
,
"XX
\n
"
,
"ID F$MATA1_01
\n
"
,
"XX
\n
"
,
"DT 18.10.1994 (created); ewi.
\n
"
,
"DT 16.10.1995 (updated); ewi.
\n
"
,
"CO Copyright (C), Biobase GmbH.
\n
"
,
"XX
\n
"
,
"NA MATa1
\n
"
,
"XX
\n
"
,
// "DE mating factor a1\n",
"XX
\n
"
,
"BF T00488; MATa1; Species: yeast, Saccharomyces cerevisiae.
\n
"
,
"XX
\n
"
,
"P0 A C G T
\n
"
,
"01 0 1 1 12 T
\n
"
,
"02 0 0 14 0 G
\n
"
,
"03 14 0 0 0 A
\n
"
,
"04 0 0 0 14 T
\n
"
,
"05 0 0 14 0 G
\n
"
,
"06 1 2 0 11 T
\n
"
,
"07 10 0 3 1 A
\n
"
,
"08 6 2 4 2 N
\n
"
,
"09 5 4 1 4 N
\n
"
,
"10 2 1 1 10 T
\n
"
,
"XX
\n
"
,
"BA a1 half-sites of 14 hsg operators of 4 genes
\n
"
,
"XX
\n
"
,
"BS TGATGTACTT; R05553; 1; 10;; p.
\n
"
,
"BS TGATGTAATC; R05554; 1; 10;; p.
\n
"
,
"BS TGATGTGTAA; R05555; 1; 10;; p.
\n
"
,
"BS TGATGCAGAA; R05556; 1; 10;; p.
\n
"
,
"BS TGATGAAGCG; R05557; 1; 10;; p.
\n
"
,
"BS TGATGTTAAT; R05558; 1; 10;; p.
\n
"
,
"BS TGATGTAAAT; R05559; 1; 10;; p.
\n
"
,
"BS TGATGTAACT; R05560; 1; 10;; p.
\n
"
,
"BS TGATGCAGTT; R05561; 1; 10;; p.
\n
"
,
"BS TGATGTGAAT; R05562; 1; 10;; p.
\n
"
,
"BS CGATGTGCTT; R05563; 1; 10;; p.
\n
"
,
"BS TGATGTATCT; R05564; 1; 10;; p.
\n
"
,
"BS GGATGTAACT; R05565; 1; 10;; p.
\n
"
,
"BS TGATGTAGGT; R05566; 1; 10;; p.
\n
"
,
"XX
\n
"
,
"CC compiled sequences
\n
"
,
"XX
\n
"
,
"RN [1]; RE0000546.
\n
"
,
"RX PUBMED: 7907979.
\n
"
,
"RA Goutte C., Johnson A. D.
\n
"
,
"RT Recognition of a DNA operator by a dimer composed of two different homeodomain proteins
\n
"
,
"RL EMBO J. 13:1434-1442 (1994).
\n
"
,
"XX
\n
"
,
"//
\n
"
,
);
let
res
=
super
::
parse_matrix
::
<
Dna
,
{
Dna
::
K
}
>
(
text
)
.unwrap
();
let
matrix
=
res
.1
;
assert_eq!
(
res
.0
,
""
);
assert_eq!
(
matrix
.name
,
Some
(
String
::
from
(
"MATa1"
)));
assert_eq!
(
matrix
.dates
.len
(),
2
);
assert_eq!
(
matrix
.dates
[
0
]
.author
,
"ewi"
);
assert_eq!
(
matrix
.dates
[
1
]
.author
,
"ewi"
);
assert_eq!
(
matrix
.dates
[
1
]
.day
,
16
);
assert_eq!
(
matrix
.dates
[
1
]
.month
,
10
);
assert_eq!
(
matrix
.dates
[
1
]
.year
,
1995
);
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment