From 13e478bcafe4a4cef618606e1a99d0d7ecec27c8 Mon Sep 17 00:00:00 2001 From: Maximus2012 Date: Fri, 13 Mar 2026 18:47:18 +0600 Subject: [PATCH 1/2] feat: extract intervals_chain, intervals_tuple, intervals_distribution from intervals.py (#83 #84 #85) --- src/foapy/__init__.py | 8 ++ src/foapy/core/__init__.py | 6 +- src/foapy/core/_intervals.py | 38 +-------- src/foapy/core/_intervals_chain.py | 99 +++++++++++++++++++++++ src/foapy/core/_intervals_distribution.py | 80 ++++++++++++++++++ src/foapy/core/_intervals_tuple.py | 75 +++++++++++++++++ 6 files changed, 271 insertions(+), 35 deletions(-) create mode 100644 src/foapy/core/_intervals_chain.py create mode 100644 src/foapy/core/_intervals_distribution.py create mode 100644 src/foapy/core/_intervals_tuple.py diff --git a/src/foapy/__init__.py b/src/foapy/__init__.py index 7747f277..dad48c9b 100644 --- a/src/foapy/__init__.py +++ b/src/foapy/__init__.py @@ -29,6 +29,9 @@ from foapy.core import alphabet # noqa: F401 from foapy.core import binding # noqa: F401 from foapy.core import intervals # noqa: F401 + from foapy.core import intervals_chain # noqa: F401 + from foapy.core import intervals_distribution # noqa: F401 + from foapy.core import intervals_tuple # noqa: F401 from foapy.core import mode # noqa: F401 from foapy.core import order # noqa: F401 @@ -41,6 +44,8 @@ __all__ = list( __foapy_submodules__ | {"order", "intervals", "alphabet", "binding", "mode"} + | {"intervals_chain", "intervals_tuple"} + | {"intervals_distribution"} | {"__version__", "__array_namespace_info__"} ) @@ -74,6 +79,9 @@ def __dir__(): "exceptions" "ma", "order", "intervals", + "intervals_chain", + "intervals_tuple", + "intervals_distribution", "alphabet", "binding", "mode", diff --git a/src/foapy/core/__init__.py b/src/foapy/core/__init__.py index 693700fb..ce85d271 100644 --- a/src/foapy/core/__init__.py +++ b/src/foapy/core/__init__.py @@ -14,12 +14,16 @@ from ._alphabet import alphabet # noqa: F401 from ._binding import binding # noqa: F401 from ._mode import mode # noqa: F401 + from ._intervals_chain import intervals_chain # noqa: F401 + from ._intervals_tuple import intervals_tuple # noqa: F401 + from ._intervals_distribution import intervals_distribution # noqa: F401 from ._intervals import intervals # noqa: F401 from ._order import order # noqa: F401 # isort: on - __all__ = list({"binding", "mode", "intervals", "order", "alphabet"}) + __all__ = list({"binding", "mode", "intervals", "order", "alphabet", + "intervals_chain", "intervals_tuple", "intervals_distribution"}) def __dir__(): return __all__ diff --git a/src/foapy/core/_intervals.py b/src/foapy/core/_intervals.py index 158838ae..208fbfaf 100644 --- a/src/foapy/core/_intervals.py +++ b/src/foapy/core/_intervals.py @@ -2,6 +2,7 @@ from numpy import ndarray from foapy.core import binding as constants_binding +from foapy.core import intervals_chain, intervals_tuple from foapy.core import mode as constants_mode @@ -135,40 +136,9 @@ def intervals(X, binding: int, mode: int) -> ndarray: if binding == constants_binding.end: ar = ar[::-1] - perm = ar.argsort(kind="mergesort") - - mask_shape = ar.shape - mask = np.empty(mask_shape[0] + 1, dtype=bool) - mask[:1] = True - mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]] - mask[-1:] = True # or mask[-1] = True - - first_mask = mask[:-1] - last_mask = mask[1:] - - intervals = np.empty(ar.shape, dtype=np.intp) - intervals[1:] = perm[1:] - perm[:-1] - - delta = len(ar) - perm[last_mask] if mode == constants_mode.cycle else 1 - intervals[first_mask] = perm[first_mask] + delta - - inverse_perm = np.empty(ar.shape, dtype=np.intp) - inverse_perm[perm] = np.arange(ar.shape[0]) - - if mode == constants_mode.lossy: - intervals[first_mask] = 0 - intervals = intervals[inverse_perm] - result = intervals[intervals != 0] - elif mode == constants_mode.normal: - result = intervals[inverse_perm] - elif mode == constants_mode.cycle: - result = intervals[inverse_perm] - elif mode == constants_mode.redundant: - result = intervals[inverse_perm] - redundant_intervals = len(ar) - perm[last_mask] - if binding == constants_binding.end: - redundant_intervals = redundant_intervals[::-1] - result = np.concatenate((result, redundant_intervals)) + result = intervals_chain(ar, mode) + result = intervals_tuple(ar, result, mode, binding) + if binding == constants_binding.end: result = result[::-1] diff --git a/src/foapy/core/_intervals_chain.py b/src/foapy/core/_intervals_chain.py new file mode 100644 index 00000000..b6914184 --- /dev/null +++ b/src/foapy/core/_intervals_chain.py @@ -0,0 +1,99 @@ +import numpy as np +from numpy import ndarray + +from foapy.core import mode as constants_mode + + +def intervals_chain(ar: ndarray, mode: int) -> ndarray: + """ + Build an intervals chain from a 1-D array. + + An intervals chain is an n-tuple of natural numbers representing the distance + between equal elements in a sequence. This function encapsulates the core + chain-building logic: given a 1-D array [ar] (already oriented for the chosen + binding direction) and a [mode], it computes the raw intervals array before + any binding-specific reversal is applied. + + The function supports four behavioural strategies at sequence boundaries: + + * **normal / bounded** ([mode.normal]) – the leading boundary interval + (distance from the virtual start to the first occurrence) is included; + the trailing boundary interval is not added. + * **cyclic** ([mode.cycle]) – the leading and trailing boundary intervals + are summed into a single interval placed at the position of the first + occurrence, as if the sequence were circular. + * **lossy** ([mode.lossy]) – boundary (first-occurrence) intervals are set + to [0] so the caller can filter them out. + * **redundant** ([mode.redundant]) – same as [mode.normal] for the chain itself; + the caller is responsible for appending the trailing boundary intervals. + + Parameters + ---------- + ar : ndarray + 1-D array whose intervals chain is to be built. For [binding.end] + the caller must reverse [ar] **before** passing it here, and reverse + the result afterwards. + mode : int + One of [mode.lossy], [mode.normal], [mode.cycle], + [mode.redundant]. Controls how boundary intervals are handled. + + Returns + ------- + chain : ndarray + Raw intervals array of the same length as [ar], in the original + element order. For [mode.lossy] the boundary zeros are still + present; filtering is left to the caller. + + Notes + ----- + This function is the low-level building block used by + [foapy.intervals]. It does not validate [mode] or the shape + of [ar] – validation is the responsibility of the caller. + + Examples + -------- + Build a bounded (normal) intervals chain: + + >>> import numpy as np + >>> from foapy.core import intervals_chain + >>> from foapy.core import mode + >>> ar = np.asarray(['b', 'a', 'b', 'c', 'b']) + >>> intervals_chain(ar, mode.normal) + array([1, 2, 2, 4, 2]) + + Build a cyclic intervals chain (leading boundary becomes wrap-around sum): + + >>> intervals_chain(ar, mode.cycle) + array([1, 5, 2, 5, 2]) + + For lossy mode the first-occurrence intervals are zeroed out so the + caller can filter them with [result][result != 0]: + + >>> intervals_chain(ar, mode.lossy) + array([0, 0, 2, 0, 2]) + """ + + perm = ar.argsort(kind="mergesort") + + mask = np.empty(ar.shape[0] + 1, dtype=bool) + mask[:1] = True + mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]] + mask[-1:] = True # or mask[-1:] = True + + first_mask = mask[:-1] + last_mask = mask[1:] + + chain = np.empty(ar.shape, dtype=np.intp) + chain[1:] = perm[1:] - perm[:-1] + + delta = len(ar) - perm[last_mask] if mode == constants_mode.cycle else 1 + chain[first_mask] = perm[first_mask] + delta + + if mode == constants_mode.lossy: + chain[first_mask] = 0 + + inverse_perm = np.empty(ar.shape, dtype=np.intp) + inverse_perm[perm] = np.arange(ar.shape[0]) + chain = chain[inverse_perm] + + return chain diff --git a/src/foapy/core/_intervals_distribution.py b/src/foapy/core/_intervals_distribution.py new file mode 100644 index 00000000..45f18bb6 --- /dev/null +++ b/src/foapy/core/_intervals_distribution.py @@ -0,0 +1,80 @@ +import numpy as np +from numpy import ndarray + +from foapy.core import intervals_tuple as _intervals_tuple # noqa: F401 + + +def intervals_distribution(tuple_result: ndarray) -> ndarray: + """ + Calculate intervals distribution from an intervals tuple. + + An intervals distribution is an n-tuple of natural numbers where the + index represents the interval length and the value is the count of + its appearances in the intervals tuple. + + Caller is responsible for preparing the tuple_result correctly: + - For mode.lossy: boundary intervals already removed by intervals_tuple + - For mode.redundant: trailing intervals already appended by intervals_tuple + - For mode.normal, mode.cycle: pass tuple_result as-is + + Parameters + ---------- + tuple_result : ndarray + Intervals tuple produced by intervals_tuple. + + Returns + ------- + result : ndarray + Array of length max(tuple_result) where result[i] is the count + of interval value i+1 in the tuple. + + Examples + -------- + >>> import numpy as np + >>> from foapy import binding, mode + >>> from foapy.core import intervals_chain + >>> from foapy.core import intervals_tuple + >>> from foapy.core import intervals_distribution + >>> ar = np.asarray([2, 4, 2, 2, 4]) + + From documentation example - chain [1, 2, 3, 2, 4, 6]: + + >>> intervals_distribution(np.array([1, 2, 3, 2, 4, 6])) + array([1, 2, 1, 1, 0, 1]) + + Normal mode: + + >>> chain = intervals_chain(ar, mode.normal) + >>> tpl = intervals_tuple(ar, chain, mode.normal, binding.start) + >>> intervals_distribution(tpl) + array([2, 2, 1]) + + Lossy mode — boundary intervals already removed: + + >>> chain = intervals_chain(ar, mode.lossy) + >>> tpl = intervals_tuple(ar, chain, mode.lossy, binding.start) + >>> intervals_distribution(tpl) + array([1, 1, 1]) + + Redundant mode — trailing intervals already appended: + + >>> chain = intervals_chain(ar, mode.redundant) + >>> tpl = intervals_tuple(ar, chain, mode.redundant, binding.start) + >>> intervals_distribution(tpl) + array([2, 3, 1]) + + Empty tuple: + + >>> intervals_distribution(np.array([])) + array([]) + """ + + if len(tuple_result) == 0: + return np.array([], dtype=np.intp) + + max_interval = int(tuple_result.max()) + distribution = np.zeros(max_interval, dtype=np.intp) + for interval in tuple_result: + distribution[int(interval) - 1] += 1 + + return distribution diff --git a/src/foapy/core/_intervals_tuple.py b/src/foapy/core/_intervals_tuple.py new file mode 100644 index 00000000..f0e27249 --- /dev/null +++ b/src/foapy/core/_intervals_tuple.py @@ -0,0 +1,75 @@ +import numpy as np +from numpy import ndarray + +from foapy.core import binding as constants_binding +from foapy.core import mode as constants_mode + + +def intervals_tuple(ar: ndarray, chain: ndarray, mode: int, binding: int) -> ndarray: + """ + Convert an intervals chain into a final intervals tuple. + + Applies unchaining strategies to the raw intervals chain produced by + [intervals_chain], handling boundary intervals according to the given mode. + + Parameters + ---------- + ar : ndarray + 1-D array (already oriented for binding direction) used to compute + trailing boundary intervals for [mode.redundant]. + chain : ndarray + Raw intervals chain produced by [intervals_chain]. + mode : int + One of [mode.lossy], [mode.normal], [mode.cycle], [mode.redundant]. + binding : int + One of [binding.start], [binding.end]. Used to correctly order + trailing boundary intervals for [mode.redundant]. + + Returns + ------- + result : ndarray + Final intervals tuple. + + Examples + -------- + >>> import numpy as np + >>> from foapy.core import mode, binding + >>> from foapy.core import intervals_chain + >>> from foapy.core import intervals_tuple + >>> ar = np.asarray(['b', 'a', 'b', 'c', 'b']) + + Normal mode — chain is returned as-is: + + >>> chain = intervals_chain(ar, mode.normal) + >>> intervals_tuple(ar, chain, mode.normal) + array([1, 2, 2, 4, 2]) + + Lossy mode — boundary (zero) intervals are removed: + + >>> chain = intervals_chain(ar, mode.lossy) + >>> intervals_tuple(ar, chain, mode.lossy) + array([2, 2]) + + Redundant mode — trailing boundary intervals are appended: + + >>> chain = intervals_chain(ar, mode.redundant) + >>> intervals_tuple(ar, chain, mode.redundant) + array([1, 2, 2, 4, 2, 4, 1, 2]) + """ + + if mode == constants_mode.lossy: + return chain[chain != 0] + + if mode == constants_mode.redundant: + perm = ar.argsort(kind="mergesort") + mask = np.empty(ar.shape[0] + 1, dtype=bool) + mask[:1] = True + mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]] + mask[-1:] = True + last_mask = mask[1:] + trailing = len(ar) - perm[last_mask] + if binding == constants_binding.end: + trailing = trailing[::-1] + return np.concatenate((chain, trailing)) + + return chain From 3b26e12ef2c621e4836c4b294e250d922eb6b268 Mon Sep 17 00:00:00 2001 From: Maximus2012 Date: Fri, 13 Mar 2026 19:02:46 +0600 Subject: [PATCH 2/2] style: reformat __init__.py --- src/foapy/core/__init__.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/foapy/core/__init__.py b/src/foapy/core/__init__.py index ce85d271..44ead92d 100644 --- a/src/foapy/core/__init__.py +++ b/src/foapy/core/__init__.py @@ -22,8 +22,18 @@ # isort: on - __all__ = list({"binding", "mode", "intervals", "order", "alphabet", - "intervals_chain", "intervals_tuple", "intervals_distribution"}) + __all__ = list( + { + "binding", + "mode", + "intervals", + "order", + "alphabet", + "intervals_chain", + "intervals_tuple", + "intervals_distribution", + } + ) def __dir__(): return __all__