Skip to content
Snippets Groups Projects
Commit 692be1bc authored by Martin Larralde's avatar Martin Larralde
Browse files

Add experimental Python bindings using PyO3

parent 47cd1dce
No related branches found
No related tags found
No related merge requests found
[workspace]
members = ["lightmotif", "lightmotif-bench", "lightmotif-transfac"]
members = ["lightmotif", "lightmotif-bench", "lightmotif-transfac", "lightmotif-py"]
# Created by https://www.gitignore.io/api/rust,python
# Edit at https://www.gitignore.io/?templates=rust,python
callgrind.out.*
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don’t work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
### Rust ###
# Generated by Cargo
# will have compiled files and executables
/target/
.cargo
# These are backup files generated by rustfmt
**/*.rs.bk
# cargo vendor
.cargo/config.toml
crates/
# End of https://www.gitignore.io/api/rust,python
[package]
name = "lightmotif-py"
version = "0.1.0"
authors = ["Martin Larralde <martin.larralde@embl.de>"]
edition = "2021"
license = "MIT"
description = "PyO3 bindings and Python interface to the lightmotif crate."
repository = "https://github.com/althonos/lightmotif"
homepage = "https://github.com/althonos/lightmotif"
readme = "README.md"
keywords = []
build = "build.rs"
[lib]
crate-type = ["cdylib", "rlib"]
path = "lightmotif/lib/lib.rs"
[build-dependencies.built]
version = "0.6"
features = ["chrono"]
[build-dependencies]
project-root = "0.2.2"
[dependencies.lightmotif]
path = "../lightmotif"
version = "0.1.0"
[dependencies]
pyo3 = "0.18.3"
pyo3-built = "0.4.7"
[features]
default = []
extension-module = ["pyo3/extension-module"]
nightly = ["pyo3/nightly"]
\ No newline at end of file
# 🎼🧬 `lightmotif` [![Star me](https://img.shields.io/github/stars/althonos/lightmotif.svg?style=social&label=Star&maxAge=3600)](https://github.com/althonos/lightmotif/stargazers)
*A lightweight [platform-accelerated](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) library for [biological motif](https://en.wikipedia.org/wiki/Sequence_motif) scanning using [position weight matrices](https://en.wikipedia.org/wiki/Position_weight_matrix)*.
\ No newline at end of file
extern crate built;
extern crate project_root;
fn main() {
let src = project_root::get_project_root().unwrap();
let dst = std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("built.rs");
let mut opts = built::Options::default();
opts.set_dependencies(true);
// opts.set_compiler(true);
opts.set_env(true);
built::write_built_file_with_opts(&opts, std::path::Path::new(&src), &dst)
.expect("Failed to acquire build-time information");
// built::write_built_file()
// .expect("Failed to acquire build-time information");
}
from .lib import *
extern crate pyo3;
#[macro_use]
extern crate pyo3_built;
extern crate lightmotif;
#[cfg(target_feature = "avx2")]
use std::arch::x86_64::{__m256, __m256i};
use lightmotif as lm;
use lightmotif::Alphabet;
use lightmotif::Symbol;
use pyo3::exceptions::PyBufferError;
use pyo3::exceptions::PyIndexError;
use pyo3::exceptions::PyTypeError;
use pyo3::exceptions::PyValueError;
use pyo3::ffi::Py_ssize_t;
use pyo3::prelude::*;
use pyo3::types::PyDict;
use pyo3::types::PyString;
use pyo3::AsPyPointer;
#[allow(dead_code)]
mod build {
include!(concat!(env!("OUT_DIR"), "/built.rs"));
}
// --- Compile-time constants --------------------------------------------------
#[cfg(target_feature = "avx2")]
type Vector = __m256;
#[cfg(target_feature = "avx2")]
const C: usize = std::mem::size_of::<__m256i>();
#[cfg(not(target_feature = "avx2"))]
type Vector = f32;
#[cfg(not(target_feature = "avx2"))]
const C: usize = std::mem::size_of::<f32>();
// --- Helpers -----------------------------------------------------------------
fn dict_to_alphabet_array<'py, A: lm::Alphabet, const K: usize>(
d: &'py PyDict,
) -> PyResult<[f32; K]> {
let mut p = [0.0; K];
for (k, v) in d.iter() {
let s = k.extract::<&PyString>()?.to_str()?;
if s.len() != 1 {
return Err(PyValueError::new_err((
"Invalid key for pseudocount:",
s.to_string(),
)));
}
let x = s.chars().next().unwrap();
let symbol = <A as lm::Alphabet>::Symbol::from_char(x)
.map_err(|_| PyValueError::new_err(("Invalid key for pseudocount:", x)))?;
let value = v.extract::<f32>()?;
p[symbol.as_index()] = value;
}
Ok(p)
}
// --- EncodedSequence ---------------------------------------------------------
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct EncodedSequence {
data: lm::EncodedSequence<lm::Dna>,
}
#[pymethods]
impl EncodedSequence {
/// Encode a sequence with the given alphabet.
#[new]
pub fn __init__(sequence: &PyString) -> PyResult<PyClassInitializer<Self>> {
let seq = sequence.to_str()?;
let data = lm::EncodedSequence::encode(&seq).map_err(|lm::InvalidSymbol(x)| {
PyValueError::new_err(format!("Invalid symbol in input: {}", x))
})?;
Ok(EncodedSequence { data }.into())
}
/// Create a copy of this sequence.
pub fn copy(&self) -> EncodedSequence {
self.clone()
}
/// Convert this sequence into a striped matrix.
pub fn stripe(&self) -> StripedSequence {
StripedSequence {
data: self.data.to_striped(),
}
}
}
// --- StripedSequence ---------------------------------------------------------
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct StripedSequence {
data: lm::StripedSequence<lm::Dna, C>,
}
// --- CountMatrix -------------------------------------------------------------
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct CountMatrix {
data: lm::CountMatrix<lm::Dna, { lm::Dna::K }>,
}
#[pymethods]
impl CountMatrix {
pub fn normalize(&self, pseudocount: Option<PyObject>) -> PyResult<FrequencyMatrix> {
let pseudo = Python::with_gil(|py| {
if let Some(obj) = pseudocount {
if let Ok(x) = obj.extract::<f32>(py) {
Ok(lm::Pseudocounts::from(x))
} else if let Ok(d) = obj.extract::<&PyDict>(py) {
let p = dict_to_alphabet_array::<lm::Dna, { lm::Dna::K }>(d)?;
Ok(lm::Pseudocounts::from(p))
} else {
Err(PyTypeError::new_err("Invalid type for pseudocount"))
}
} else {
Ok(lm::Pseudocounts::default())
}
})?;
let data = self.data.to_freq(pseudo);
Ok(FrequencyMatrix { data })
}
}
// --- FrequencyMatrix ---------------------------------------------------------
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct FrequencyMatrix {
data: lm::FrequencyMatrix<lm::Dna, { lm::Dna::K }>,
}
#[pymethods]
impl FrequencyMatrix {
pub fn log_odds(&self, background: Option<PyObject>) -> PyResult<ScoringMatrix> {
let bg = Python::with_gil(|py| {
if let Some(obj) = background {
if let Ok(d) = obj.extract::<&PyDict>(py) {
let p = dict_to_alphabet_array::<lm::Dna, { lm::Dna::K }>(d)?;
Ok(lm::Background::from(p))
} else {
Err(PyTypeError::new_err("Invalid type for pseudocount"))
}
} else {
Ok(lm::Background::uniform())
}
})?;
let data = self.data.to_scoring(bg);
Ok(ScoringMatrix { data })
}
}
// --- ScoringMatrix -----------------------------------------------------------
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct ScoringMatrix {
data: lm::ScoringMatrix<lm::Dna, { lm::Dna::K }>,
}
#[pymethods]
impl ScoringMatrix {
/// Return the PSSM score for all positions of the given sequence.
pub fn calculate(&self, sequence: &mut StripedSequence) -> PyResult<StripedScores> {
let pli = lm::Pipeline::<lm::Dna, Vector>::new();
sequence.data.configure(&self.data);
let scores = pli.score(&sequence.data, &self.data);
Ok(StripedScores::from(scores))
}
}
// --- Scores ------------------------------------------------------------------
#[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)]
pub struct StripedScores {
scores: lm::StripedScores<Vector, C>,
shape: [Py_ssize_t; 2],
strides: [Py_ssize_t; 2],
}
#[pymethods]
impl StripedScores {
fn __len__(&self) -> usize {
self.scores.length
}
fn __getitem__(&self, index: isize) -> PyResult<f32> {
if index < self.scores.length as isize && index >= 0 {
Ok(self.scores[index as usize])
} else {
Err(PyIndexError::new_err("list index out of range"))
}
}
unsafe fn __getbuffer__(
mut slf: PyRefMut<'_, Self>,
view: *mut pyo3::ffi::Py_buffer,
flags: std::os::raw::c_int,
) -> PyResult<()> {
if view.is_null() {
return Err(PyBufferError::new_err("View is null"));
}
if (flags & pyo3::ffi::PyBUF_WRITABLE) == pyo3::ffi::PyBUF_WRITABLE {
return Err(PyBufferError::new_err("Object is not writable"));
}
(*view).obj = pyo3::ffi::_Py_NewRef(slf.as_ptr());
let data = slf.scores.data[0].as_ptr();
(*view).buf = data as *mut std::os::raw::c_void;
(*view).len = slf.scores.length as isize;
(*view).readonly = 1;
(*view).itemsize = std::mem::size_of::<f32>() as isize;
let msg = std::ffi::CStr::from_bytes_with_nul(b"f\0").unwrap();
(*view).format = msg.as_ptr() as *mut _;
(*view).ndim = 2;
(*view).shape = slf.shape.as_mut_ptr();
(*view).strides = slf.strides.as_mut_ptr();
(*view).suboffsets = std::ptr::null_mut();
(*view).internal = std::ptr::null_mut();
Ok(())
}
}
impl From<lm::StripedScores<Vector, C>> for StripedScores {
fn from(mut scores: lm::StripedScores<Vector, C>) -> Self {
// extract the matrix shape
let cols = scores.data.columns();
let rows = scores.data.rows();
// record the matrix shape as a Fortran buffer
let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
let strides = [
std::mem::size_of::<f32>() as Py_ssize_t,
(cols.next_power_of_two() * std::mem::size_of::<f32>()) as Py_ssize_t,
];
// mask the remaining positions that are outside the sequence length
for i in scores.length..scores.data.rows() * cols {
let row = i % rows;
let col = i / rows;
scores.data[row][col] = -f32::INFINITY;
}
// return a Python object implementing the buffer protocol
Self {
scores,
shape,
strides,
}
}
}
// --- Module ------------------------------------------------------------------
#[pyfunction]
fn create<'py>(sequences: &'py PyAny) -> PyResult<CountMatrix> {
let py = sequences.py();
let mut encoded = Vec::new();
for seq in sequences.iter()? {
let s = seq?.extract::<&PyString>()?.to_str()?;
let x = py
.allow_threads(|| lm::EncodedSequence::encode(&s))
.map_err(|_| PyValueError::new_err("Invalid symbol in sequence"))?;
encoded.push(x);
}
let data = lm::CountMatrix::from_sequences(encoded)
.map_err(|_| PyValueError::new_err("Inconsistent sequence length"))?;
Ok(CountMatrix { data })
}
/// PyO3 bindings to ``lightmotif``, a library for fast PWM motif scanning.
///
/// The API is similar to the `Bio.motifs` module from Biopython on purpose.
#[pymodule]
#[pyo3(name = "lib")]
pub fn init(py: Python, m: &PyModule) -> PyResult<()> {
m.add("__package__", "pyskani")?;
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
m.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?;
m.add("__build__", pyo3_built!(py, build))?;
m.add_class::<EncodedSequence>()?;
m.add_class::<StripedSequence>()?;
m.add_class::<CountMatrix>()?;
m.add_class::<FrequencyMatrix>()?;
m.add_class::<ScoringMatrix>()?;
m.add_function(wrap_pyfunction!(create, m)?)?;
Ok(())
}
# https://gist.github.com/althonos/6914b896789d3f2078d1e6237642c35c
# --- Setuptools metadata ---------------------------------------------------
[metadata]
name = lightmotif
author = Martin Larralde
author_email = martin.larralde@embl.de
home_page = https://github.com/althonos/lightmotif
description = PyO3 bindings and Python interface to lightmotif, a library for platform-accelerated biological motif scanning using position weight matrices.
long_description = file: README.md
long_description_content_type = text/markdown
license = MIT
platform = any
keywords = bioinformatics, genomics, motif, pssm, matrix
classifiers =
Development Status :: 3 - Alpha
Intended Audience :: Developers
Intended Audience :: Science/Research
License :: OSI Approved :: MIT License
Operating System :: OS Independent
Programming Language :: Rust
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
Topic :: Scientific/Engineering :: Bio-Informatics
Topic :: Scientific/Engineering :: Medical Science Apps.
Topic :: Software Development :: Libraries :: Python Modules
# Typing :: Typed
project_urls =
Bug Tracker = https://github.com/althonos/lightmotif/issues
Changelog = https://github.com/althonos/lightmotif/blob/master/CHANGELOG.md
Coverage = https://codecov.io/gh/althonos/lightmotif/
Builds = https://github.com/althonos/lightmotif/actions/
PyPI = https://pypi.org/project/lightmotif
[options]
zip_safe = true
packages =
lightmotif
lightmotif._lib
lightmotif.tests
test_suite = lightmotif.tests
python_requires = >=3.7
setup_requires =
setuptools >=39.2
setuptools-rust >=1.0
[options.package_data]
lightmotif =
py.typed
*.pyi
lightmotif._lib =
*.rs
lightmotif.tests =
requirements.txt
[bdist_wheel]
universal = false
[build_rust]
release = true
[alias]
sdist = vendor sdist
# --- Python tools configuration --------------------------------------------
[coverage:report]
show_missing = true
exclude_lines =
pragma: no cover
if typing.TYPE_CHECKING:
@abc.abstractmethod
@abc.abstractproperty
raise NotImplementedError
return NotImplemented
raise UnexpectedError
raise AllocationError
[mypy]
disallow_any_decorated = true
disallow_any_generics = true
disallow_any_unimported = false
disallow_subclassing_any = false
disallow_untyped_calls = true
disallow_untyped_defs = true
ignore_missing_imports = true
warn_unused_ignores = true
warn_return_any = true
[mypy-lightmotif.tests.*]
ignore_errors = True
\ No newline at end of file
#!/usr/bin/env python3
import configparser
import os
import shutil
import subprocess
import sys
import urllib.request
from distutils.errors import DistutilsPlatformError
from distutils.log import INFO
import setuptools
import setuptools_rust as rust
from setuptools.command.sdist import sdist as _sdist
from setuptools_rust.build import build_rust as _build_rust
try:
from setuptools_rust.rustc_info import get_rust_version
except ImportError:
from setuptools_rust.utils import get_rust_version
class sdist(_sdist):
def run(self):
# build `pyproject.toml` from `setup.cfg`
c = configparser.ConfigParser()
c.add_section("build-system")
c.set("build-system", "requires", str(self.distribution.setup_requires))
c.set("build-system", 'build-backend', '"setuptools.build_meta"')
with open("pyproject.toml", "w") as pyproject:
c.write(pyproject)
# run the rest of the packaging
_sdist.run(self)
class build_rust(_build_rust):
def run(self):
rustc = get_rust_version()
if rustc is not None:
nightly = rustc is not None and "nightly" in rustc.prerelease
else:
self.setup_temp_rustc_unix(toolchain="stable", profile="minimal")
nightly = False
if self.inplace:
self.extensions[0].strip = rust.Strip.No
if nightly:
self.extensions[0].features = (*self.extensions[0].features, "nightly")
_build_rust.run(self)
def setup_temp_rustc_unix(self, toolchain, profile):
rustup_sh = os.path.join(self.build_temp, "rustup.sh")
os.environ["CARGO_HOME"] = os.path.join(self.build_temp, "cargo")
os.environ["RUSTUP_HOME"] = os.path.join(self.build_temp, "rustup")
self.mkpath(os.environ["CARGO_HOME"])
self.mkpath(os.environ["RUSTUP_HOME"])
self.announce("downloading rustup.sh install script", level=INFO)
with urllib.request.urlopen("https://sh.rustup.rs") as res:
with open(rustup_sh, "wb") as dst:
shutil.copyfileobj(res, dst)
self.announce("installing Rust compiler to {}".format(self.build_temp), level=INFO)
proc = subprocess.run([
"sh",
rustup_sh,
"-y",
"--default-toolchain",
toolchain,
"--profile",
profile,
"--no-modify-path"
])
proc.check_returncode()
self.announce("updating $PATH variable to use local Rust compiler", level=INFO)
os.environ["PATH"] = ":".join([
os.path.abspath(os.path.join(os.environ["CARGO_HOME"], "bin")),
os.environ["PATH"]
])
def get_dylib_ext_path(self, ext, module_name):
ext_path = _build_rust.get_dylib_ext_path(self, ext, module_name)
if self.inplace:
base = os.path.basename(ext_path)
folder = os.path.dirname(os.path.realpath(__file__))
prefix = os.path.sep.join(ext.name.split(".")[:-1])
ext_path = os.path.join(folder, prefix, base)
return ext_path
# HACK: Use the `configparser` from Python to read the `Cargo.toml`
# manifest file (this... works) so that the package version
# can be extracted from there and synchronized everywhere.
parser = configparser.ConfigParser()
parser.read(os.path.join(os.path.dirname(__file__), "Cargo.toml"))
version = parser.get("package", "version").strip('"')
setuptools.setup(
version=version,
setup_requires=["setuptools", "setuptools_rust"],
cmdclass=dict(sdist=sdist, build_rust=build_rust),
rust_extensions=[rust.RustExtension(
"lightmotif.lib",
path="Cargo.toml",
binding=rust.Binding.PyO3,
strip=rust.Strip.Debug,
features=["extension-module"],
)],
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment