Skip to content
Snippets Groups Projects
Commit f57957fd authored by Martin Larralde's avatar Martin Larralde
Browse files

Improve documentation of Python objects in `lightmotif-py`

parent a82ecced
No related branches found
No related tags found
No related merge requests found
...@@ -67,6 +67,7 @@ fn dict_to_alphabet_array<'py, A: lightmotif::Alphabet>( ...@@ -67,6 +67,7 @@ fn dict_to_alphabet_array<'py, A: lightmotif::Alphabet>(
// --- EncodedSequence --------------------------------------------------------- // --- EncodedSequence ---------------------------------------------------------
/// A biological sequence encoded as digits.
#[pyclass(module = "lightmotif.lib")] #[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct EncodedSequence { pub struct EncodedSequence {
...@@ -102,6 +103,7 @@ impl EncodedSequence { ...@@ -102,6 +103,7 @@ impl EncodedSequence {
// --- StripedSequence --------------------------------------------------------- // --- StripedSequence ---------------------------------------------------------
/// An encoded biological sequence stored in a column-major matrix.
#[pyclass(module = "lightmotif.lib")] #[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct StripedSequence { pub struct StripedSequence {
...@@ -110,6 +112,7 @@ pub struct StripedSequence { ...@@ -110,6 +112,7 @@ pub struct StripedSequence {
// --- CountMatrix ------------------------------------------------------------- // --- CountMatrix -------------------------------------------------------------
/// A matrix storing the count of a motif letters at each position.
#[pyclass(module = "lightmotif.lib")] #[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct CountMatrix { pub struct CountMatrix {
...@@ -157,6 +160,23 @@ impl CountMatrix { ...@@ -157,6 +160,23 @@ impl CountMatrix {
} }
} }
/// Normalize this count matrix to obtain a position weight matrix.
///
/// This method converts the count matrix to a weight matrix. Each row
/// from the matrix is normalized so that they sum to ``1.0``. Each element
/// is then divided by a uniform background probability to obtain
/// odds-ratio at every position of the motif. Pseudocounts can be given
/// to prevent zero elements, which may translate into -∞ scores in the
/// final position-specific scoring matrix.
///
/// Arguments:
/// pseudocount (`float`, `dict` or `None`): The pseudocounts to apply
/// before normalizing the count matrix. If a `float` is given,
/// then a similar pseudocount is applied to every column of the
/// matrix (excluding the default symbol). Otherwise, a `dict`
/// may be given to map each symbol of the alphabet to a distinct
/// pseudocount. If `None` given, no pseudocount is used.
///
pub fn normalize(&self, pseudocount: Option<PyObject>) -> PyResult<WeightMatrix> { pub fn normalize(&self, pseudocount: Option<PyObject>) -> PyResult<WeightMatrix> {
let pseudo = Python::with_gil(|py| { let pseudo = Python::with_gil(|py| {
if let Some(obj) = pseudocount { if let Some(obj) = pseudocount {
...@@ -183,8 +203,9 @@ impl From<lightmotif::CountMatrix<lightmotif::Dna>> for CountMatrix { ...@@ -183,8 +203,9 @@ impl From<lightmotif::CountMatrix<lightmotif::Dna>> for CountMatrix {
} }
} }
// --- FrequencyMatrix --------------------------------------------------------- // --- WeightMatrix ------------------------------------------------------------
/// A matrix storing position-specific odds-ratio for a motif.
#[pyclass(module = "lightmotif.lib")] #[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct WeightMatrix { pub struct WeightMatrix {
...@@ -202,6 +223,13 @@ impl WeightMatrix { ...@@ -202,6 +223,13 @@ impl WeightMatrix {
} }
} }
/// Log-scale this weight matrix to obtain a position-specific scoring matrix.
///
/// Arguments:
/// background (`dict` or `None`): The background frequencies to use for
/// rescaling the weight matrix before computing log-odds-ratio. If
/// `None` given, uniform background frequencies will be used.
///
pub fn log_odds(&self, background: Option<PyObject>) -> PyResult<ScoringMatrix> { pub fn log_odds(&self, background: Option<PyObject>) -> PyResult<ScoringMatrix> {
// extract the background from the method argument // extract the background from the method argument
let bg = Python::with_gil(|py| { let bg = Python::with_gil(|py| {
...@@ -234,6 +262,7 @@ impl From<lightmotif::WeightMatrix<lightmotif::Dna>> for WeightMatrix { ...@@ -234,6 +262,7 @@ impl From<lightmotif::WeightMatrix<lightmotif::Dna>> for WeightMatrix {
// --- ScoringMatrix ----------------------------------------------------------- // --- ScoringMatrix -----------------------------------------------------------
/// A matrix storing position-specific odds-ratio for a motif.
#[pyclass(module = "lightmotif.lib")] #[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct ScoringMatrix { pub struct ScoringMatrix {
...@@ -251,7 +280,13 @@ impl ScoringMatrix { ...@@ -251,7 +280,13 @@ impl ScoringMatrix {
} }
} }
/// Return the PSSM score for all positions of the given sequence. /// Compute the PSSM score for all positions of the given sequence.
///
/// Returns:
/// `~lightmotif.StripedScores`: The PSSM scores for every position
/// of the input sequence, stored into a striped matrix for fast
/// vectorized operations.
///
pub fn calculate( pub fn calculate(
slf: PyRef<'_, Self>, slf: PyRef<'_, Self>,
sequence: &mut StripedSequence, sequence: &mut StripedSequence,
...@@ -288,6 +323,7 @@ impl From<lightmotif::ScoringMatrix<lightmotif::Dna>> for ScoringMatrix { ...@@ -288,6 +323,7 @@ impl From<lightmotif::ScoringMatrix<lightmotif::Dna>> for ScoringMatrix {
// --- Scores ------------------------------------------------------------------ // --- Scores ------------------------------------------------------------------
/// A striped matrix storing scores obtained with a scoring matrix.
#[pyclass(module = "lightmotif.lib")] #[pyclass(module = "lightmotif.lib")]
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct StripedScores { pub struct StripedScores {
...@@ -389,6 +425,17 @@ pub struct Motif { ...@@ -389,6 +425,17 @@ pub struct Motif {
// --- Module ------------------------------------------------------------------ // --- Module ------------------------------------------------------------------
/// Create a new motif from an iterable of sequences. /// Create a new motif from an iterable of sequences.
///
/// All sequences must have the same length, and must contain only valid DNA
/// symbols (*A*, *T*, *G*, *C*, or *N* as a wildcard).
///
/// Example:
/// >>> sequences = ["TATAAT", "TATAAA", "TATATT", "TATAAT"]
/// >>> motif = lightmotif.create(sequences)
///
/// Returns:
/// `~lightmotif.Motif`: The motif corresponding to the given sequences.
///
#[pyfunction] #[pyfunction]
pub fn create<'py>(sequences: &'py PyAny) -> PyResult<Motif> { pub fn create<'py>(sequences: &'py PyAny) -> PyResult<Motif> {
let py = sequences.py(); let py = sequences.py();
...@@ -446,6 +493,10 @@ pub fn init(_py: Python, m: &PyModule) -> PyResult<()> { ...@@ -446,6 +493,10 @@ pub fn init(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<WeightMatrix>()?; m.add_class::<WeightMatrix>()?;
m.add_class::<ScoringMatrix>()?; m.add_class::<ScoringMatrix>()?;
m.add_class::<StripedScores>()?;
m.add_class::<Motif>()?;
m.add_function(wrap_pyfunction!(create, m)?)?; m.add_function(wrap_pyfunction!(create, m)?)?;
m.add_function(wrap_pyfunction!(stripe, m)?)?; m.add_function(wrap_pyfunction!(stripe, m)?)?;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment