Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/foapy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from foapy.core import alphabet # noqa: F401
from foapy.core import binding # noqa: F401
from foapy.core import intervals # noqa: F401
from foapy.core import intervals_chain # noqa: F401
from foapy.core import intervals_distribution # noqa: F401
from foapy.core import intervals_tuple # noqa: F401
from foapy.core import mode # noqa: F401
from foapy.core import order # noqa: F401

Expand All @@ -41,6 +44,8 @@
__all__ = list(
__foapy_submodules__
| {"order", "intervals", "alphabet", "binding", "mode"}
| {"intervals_chain", "intervals_tuple"}
| {"intervals_distribution"}
| {"__version__", "__array_namespace_info__"}
)

Expand Down Expand Up @@ -74,6 +79,9 @@ def __dir__():
"exceptions" "ma",
"order",
"intervals",
"intervals_chain",
"intervals_tuple",
"intervals_distribution",
"alphabet",
"binding",
"mode",
Expand Down
16 changes: 15 additions & 1 deletion src/foapy/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,26 @@
from ._alphabet import alphabet # noqa: F401
from ._binding import binding # noqa: F401
from ._mode import mode # noqa: F401
from ._intervals_chain import intervals_chain # noqa: F401
from ._intervals_tuple import intervals_tuple # noqa: F401
from ._intervals_distribution import intervals_distribution # noqa: F401
from ._intervals import intervals # noqa: F401
from ._order import order # noqa: F401

# isort: on

__all__ = list({"binding", "mode", "intervals", "order", "alphabet"})
__all__ = list(
{
"binding",
"mode",
"intervals",
"order",
"alphabet",
"intervals_chain",
"intervals_tuple",
"intervals_distribution",
}
)

def __dir__():
return __all__
38 changes: 4 additions & 34 deletions src/foapy/core/_intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from numpy import ndarray

from foapy.core import binding as constants_binding
from foapy.core import intervals_chain, intervals_tuple
from foapy.core import mode as constants_mode


Expand Down Expand Up @@ -135,40 +136,9 @@ def intervals(X, binding: int, mode: int) -> ndarray:
if binding == constants_binding.end:
ar = ar[::-1]

perm = ar.argsort(kind="mergesort")

mask_shape = ar.shape
mask = np.empty(mask_shape[0] + 1, dtype=bool)
mask[:1] = True
mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
mask[-1:] = True # or mask[-1] = True

first_mask = mask[:-1]
last_mask = mask[1:]

intervals = np.empty(ar.shape, dtype=np.intp)
intervals[1:] = perm[1:] - perm[:-1]

delta = len(ar) - perm[last_mask] if mode == constants_mode.cycle else 1
intervals[first_mask] = perm[first_mask] + delta

inverse_perm = np.empty(ar.shape, dtype=np.intp)
inverse_perm[perm] = np.arange(ar.shape[0])

if mode == constants_mode.lossy:
intervals[first_mask] = 0
intervals = intervals[inverse_perm]
result = intervals[intervals != 0]
elif mode == constants_mode.normal:
result = intervals[inverse_perm]
elif mode == constants_mode.cycle:
result = intervals[inverse_perm]
elif mode == constants_mode.redundant:
result = intervals[inverse_perm]
redundant_intervals = len(ar) - perm[last_mask]
if binding == constants_binding.end:
redundant_intervals = redundant_intervals[::-1]
result = np.concatenate((result, redundant_intervals))
result = intervals_chain(ar, mode)
result = intervals_tuple(ar, result, mode, binding)

if binding == constants_binding.end:
result = result[::-1]

Expand Down
99 changes: 99 additions & 0 deletions src/foapy/core/_intervals_chain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import numpy as np
from numpy import ndarray

from foapy.core import mode as constants_mode


def intervals_chain(ar: ndarray, mode: int) -> ndarray:
"""
Build an intervals chain from a 1-D array.

An intervals chain is an n-tuple of natural numbers representing the distance
between equal elements in a sequence. This function encapsulates the core
chain-building logic: given a 1-D array [ar] (already oriented for the chosen
binding direction) and a [mode], it computes the raw intervals array before
any binding-specific reversal is applied.

The function supports four behavioural strategies at sequence boundaries:

* **normal / bounded** ([mode.normal]) – the leading boundary interval
(distance from the virtual start to the first occurrence) is included;
the trailing boundary interval is not added.
* **cyclic** ([mode.cycle]) – the leading and trailing boundary intervals
are summed into a single interval placed at the position of the first
occurrence, as if the sequence were circular.
* **lossy** ([mode.lossy]) – boundary (first-occurrence) intervals are set
to [0] so the caller can filter them out.
* **redundant** ([mode.redundant]) – same as [mode.normal] for the chain itself;
the caller is responsible for appending the trailing boundary intervals.

Parameters
----------
ar : ndarray
1-D array whose intervals chain is to be built. For [binding.end]
the caller must reverse [ar] **before** passing it here, and reverse
the result afterwards.
mode : int
One of [mode.lossy], [mode.normal], [mode.cycle],
[mode.redundant]. Controls how boundary intervals are handled.

Returns
-------
chain : ndarray
Raw intervals array of the same length as [ar], in the original
element order. For [mode.lossy] the boundary zeros are still
present; filtering is left to the caller.

Notes
-----
This function is the low-level building block used by
[foapy.intervals]. It does not validate [mode] or the shape
of [ar] – validation is the responsibility of the caller.

Examples
--------
Build a bounded (normal) intervals chain:

>>> import numpy as np
>>> from foapy.core import intervals_chain
>>> from foapy.core import mode
>>> ar = np.asarray(['b', 'a', 'b', 'c', 'b'])
>>> intervals_chain(ar, mode.normal)
array([1, 2, 2, 4, 2])

Build a cyclic intervals chain (leading boundary becomes wrap-around sum):

>>> intervals_chain(ar, mode.cycle)
array([1, 5, 2, 5, 2])

For lossy mode the first-occurrence intervals are zeroed out so the
caller can filter them with [result][result != 0]:

>>> intervals_chain(ar, mode.lossy)
array([0, 0, 2, 0, 2])
"""

perm = ar.argsort(kind="mergesort")

mask = np.empty(ar.shape[0] + 1, dtype=bool)
mask[:1] = True
mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
mask[-1:] = True # or mask[-1:] = True

first_mask = mask[:-1]
last_mask = mask[1:]

chain = np.empty(ar.shape, dtype=np.intp)
chain[1:] = perm[1:] - perm[:-1]

delta = len(ar) - perm[last_mask] if mode == constants_mode.cycle else 1
chain[first_mask] = perm[first_mask] + delta

if mode == constants_mode.lossy:
chain[first_mask] = 0

inverse_perm = np.empty(ar.shape, dtype=np.intp)
inverse_perm[perm] = np.arange(ar.shape[0])
chain = chain[inverse_perm]

return chain
80 changes: 80 additions & 0 deletions src/foapy/core/_intervals_distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import numpy as np
from numpy import ndarray

from foapy.core import intervals_tuple as _intervals_tuple # noqa: F401


def intervals_distribution(tuple_result: ndarray) -> ndarray:
"""
Calculate intervals distribution from an intervals tuple.

An intervals distribution is an n-tuple of natural numbers where the
index represents the interval length and the value is the count of
its appearances in the intervals tuple.

Caller is responsible for preparing the tuple_result correctly:
- For mode.lossy: boundary intervals already removed by intervals_tuple
- For mode.redundant: trailing intervals already appended by intervals_tuple
- For mode.normal, mode.cycle: pass tuple_result as-is

Parameters
----------
tuple_result : ndarray
Intervals tuple produced by intervals_tuple.

Returns
-------
result : ndarray
Array of length max(tuple_result) where result[i] is the count
of interval value i+1 in the tuple.

Examples
--------
>>> import numpy as np
>>> from foapy import binding, mode
>>> from foapy.core import intervals_chain
>>> from foapy.core import intervals_tuple
>>> from foapy.core import intervals_distribution
>>> ar = np.asarray([2, 4, 2, 2, 4])

From documentation example - chain [1, 2, 3, 2, 4, 6]:

>>> intervals_distribution(np.array([1, 2, 3, 2, 4, 6]))
array([1, 2, 1, 1, 0, 1])

Normal mode:

>>> chain = intervals_chain(ar, mode.normal)
>>> tpl = intervals_tuple(ar, chain, mode.normal, binding.start)
>>> intervals_distribution(tpl)
array([2, 2, 1])

Lossy mode — boundary intervals already removed:

>>> chain = intervals_chain(ar, mode.lossy)
>>> tpl = intervals_tuple(ar, chain, mode.lossy, binding.start)
>>> intervals_distribution(tpl)
array([1, 1, 1])

Redundant mode — trailing intervals already appended:

>>> chain = intervals_chain(ar, mode.redundant)
>>> tpl = intervals_tuple(ar, chain, mode.redundant, binding.start)
>>> intervals_distribution(tpl)
array([2, 3, 1])

Empty tuple:

>>> intervals_distribution(np.array([]))
array([])
"""

if len(tuple_result) == 0:
return np.array([], dtype=np.intp)

max_interval = int(tuple_result.max())
distribution = np.zeros(max_interval, dtype=np.intp)
for interval in tuple_result:
distribution[int(interval) - 1] += 1

Comment on lines +77 to +79
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not use loops. We should use numpy functions instead of loops

return distribution
75 changes: 75 additions & 0 deletions src/foapy/core/_intervals_tuple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import numpy as np
from numpy import ndarray

from foapy.core import binding as constants_binding
from foapy.core import mode as constants_mode


def intervals_tuple(ar: ndarray, chain: ndarray, mode: int, binding: int) -> ndarray:
"""
Convert an intervals chain into a final intervals tuple.

Applies unchaining strategies to the raw intervals chain produced by
[intervals_chain], handling boundary intervals according to the given mode.

Parameters
----------
ar : ndarray
1-D array (already oriented for binding direction) used to compute
trailing boundary intervals for [mode.redundant].
chain : ndarray
Raw intervals chain produced by [intervals_chain].
mode : int
One of [mode.lossy], [mode.normal], [mode.cycle], [mode.redundant].
binding : int
One of [binding.start], [binding.end]. Used to correctly order
trailing boundary intervals for [mode.redundant].

Returns
-------
result : ndarray
Final intervals tuple.

Examples
--------
>>> import numpy as np
>>> from foapy.core import mode, binding
>>> from foapy.core import intervals_chain
>>> from foapy.core import intervals_tuple
>>> ar = np.asarray(['b', 'a', 'b', 'c', 'b'])

Normal mode — chain is returned as-is:

>>> chain = intervals_chain(ar, mode.normal)
>>> intervals_tuple(ar, chain, mode.normal)
array([1, 2, 2, 4, 2])

Lossy mode — boundary (zero) intervals are removed:

>>> chain = intervals_chain(ar, mode.lossy)
>>> intervals_tuple(ar, chain, mode.lossy)
array([2, 2])

Redundant mode — trailing boundary intervals are appended:

>>> chain = intervals_chain(ar, mode.redundant)
>>> intervals_tuple(ar, chain, mode.redundant)
array([1, 2, 2, 4, 2, 4, 1, 2])
"""

if mode == constants_mode.lossy:
return chain[chain != 0]

if mode == constants_mode.redundant:
perm = ar.argsort(kind="mergesort")
mask = np.empty(ar.shape[0] + 1, dtype=bool)
mask[:1] = True
mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
mask[-1:] = True
last_mask = mask[1:]
trailing = len(ar) - perm[last_mask]
if binding == constants_binding.end:
trailing = trailing[::-1]
return np.concatenate((chain, trailing))
Comment on lines +63 to +73
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Это вряд ли работает корректно. Нужно сделать тесты.
На вход же приходит intervals_chain - если ты их отсортируешь то потеряешь связанность


return chain
Loading