intervals-mining-lab · Maximus2012 · Mar 13, 2026 · Mar 13, 2026 · goruha · Mar 28, 2026
diff --git a/src/foapy/__init__.py b/src/foapy/__init__.py
@@ -29,6 +29,9 @@
     from foapy.core import alphabet  # noqa: F401
     from foapy.core import binding  # noqa: F401
     from foapy.core import intervals  # noqa: F401
+    from foapy.core import intervals_chain  # noqa: F401
+    from foapy.core import intervals_distribution  # noqa: F401
+    from foapy.core import intervals_tuple  # noqa: F401
     from foapy.core import mode  # noqa: F401
     from foapy.core import order  # noqa: F401
 
@@ -41,6 +44,8 @@
     __all__ = list(
         __foapy_submodules__
         | {"order", "intervals", "alphabet", "binding", "mode"}
+        | {"intervals_chain", "intervals_tuple"}
+        | {"intervals_distribution"}
         | {"__version__", "__array_namespace_info__"}
     )
 
@@ -74,6 +79,9 @@ def __dir__():
             "exceptions" "ma",
             "order",
             "intervals",
+            "intervals_chain",
+            "intervals_tuple",
+            "intervals_distribution",
             "alphabet",
             "binding",
             "mode",

diff --git a/src/foapy/core/__init__.py b/src/foapy/core/__init__.py
@@ -14,12 +14,26 @@
     from ._alphabet import alphabet  # noqa: F401
     from ._binding import binding  # noqa: F401
     from ._mode import mode  # noqa: F401
+    from ._intervals_chain import intervals_chain  # noqa: F401
+    from ._intervals_tuple import intervals_tuple  # noqa: F401
+    from ._intervals_distribution import intervals_distribution  # noqa: F401
     from ._intervals import intervals  # noqa: F401
     from ._order import order  # noqa: F401
 
     # isort: on
 
-    __all__ = list({"binding", "mode", "intervals", "order", "alphabet"})
+    __all__ = list(
+        {
+            "binding",
+            "mode",
+            "intervals",
+            "order",
+            "alphabet",
+            "intervals_chain",
+            "intervals_tuple",
+            "intervals_distribution",
+        }
+    )
 
     def __dir__():
         return __all__
diff --git a/src/foapy/core/_intervals.py b/src/foapy/core/_intervals.py
@@ -2,6 +2,7 @@
 from numpy import ndarray
 
 from foapy.core import binding as constants_binding
+from foapy.core import intervals_chain, intervals_tuple
 from foapy.core import mode as constants_mode
 
 
@@ -135,40 +136,9 @@ def intervals(X, binding: int, mode: int) -> ndarray:
     if binding == constants_binding.end:
         ar = ar[::-1]
 
-    perm = ar.argsort(kind="mergesort")
-
-    mask_shape = ar.shape
-    mask = np.empty(mask_shape[0] + 1, dtype=bool)
-    mask[:1] = True
-    mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
-    mask[-1:] = True  # or  mask[-1] = True
-
-    first_mask = mask[:-1]
-    last_mask = mask[1:]
-
-    intervals = np.empty(ar.shape, dtype=np.intp)
-    intervals[1:] = perm[1:] - perm[:-1]
-
-    delta = len(ar) - perm[last_mask] if mode == constants_mode.cycle else 1
-    intervals[first_mask] = perm[first_mask] + delta
-
-    inverse_perm = np.empty(ar.shape, dtype=np.intp)
-    inverse_perm[perm] = np.arange(ar.shape[0])
-
-    if mode == constants_mode.lossy:
-        intervals[first_mask] = 0
-        intervals = intervals[inverse_perm]
-        result = intervals[intervals != 0]
-    elif mode == constants_mode.normal:
-        result = intervals[inverse_perm]
-    elif mode == constants_mode.cycle:
-        result = intervals[inverse_perm]
-    elif mode == constants_mode.redundant:
-        result = intervals[inverse_perm]
-        redundant_intervals = len(ar) - perm[last_mask]
-        if binding == constants_binding.end:
-            redundant_intervals = redundant_intervals[::-1]
-        result = np.concatenate((result, redundant_intervals))
+    result = intervals_chain(ar, mode)
+    result = intervals_tuple(ar, result, mode, binding)
+
     if binding == constants_binding.end:
         result = result[::-1]
 

diff --git a/src/foapy/core/_intervals_chain.py b/src/foapy/core/_intervals_chain.py
@@ -0,0 +1,99 @@
+import numpy as np
+from numpy import ndarray
+
+from foapy.core import mode as constants_mode
+
+
+def intervals_chain(ar: ndarray, mode: int) -> ndarray:
+    """
+    Build an intervals chain from a 1-D array.
+
+    An intervals chain is an n-tuple of natural numbers representing the distance
+    between equal elements in a sequence. This function encapsulates the core
+    chain-building logic: given a 1-D array [ar] (already oriented for the chosen
+    binding direction) and a [mode], it computes the raw intervals array before
+    any binding-specific reversal is applied.
+
+    The function supports four behavioural strategies at sequence boundaries:
+
+    * **normal / bounded** ([mode.normal]) – the leading boundary interval
+      (distance from the virtual start to the first occurrence) is included;
+      the trailing boundary interval is not added.
+    * **cyclic** ([mode.cycle]) – the leading and trailing boundary intervals
+      are summed into a single interval placed at the position of the first
+      occurrence, as if the sequence were circular.
+    * **lossy** ([mode.lossy]) – boundary (first-occurrence) intervals are set
+      to [0] so the caller can filter them out.
+    * **redundant** ([mode.redundant]) – same as [mode.normal] for the chain itself;
+      the caller is responsible for appending the trailing boundary intervals.
+
+    Parameters
+    ----------
+    ar : ndarray
+        1-D array whose intervals chain is to be built.  For [binding.end]
+        the caller must reverse [ar] **before** passing it here, and reverse
+        the result afterwards.
+    mode : int
+        One of [mode.lossy], [mode.normal], [mode.cycle],
+        [mode.redundant].  Controls how boundary intervals are handled.
+
+    Returns
+    -------
+    chain : ndarray
+        Raw intervals array of the same length as [ar], in the original
+        element order.  For [mode.lossy] the boundary zeros are still
+        present; filtering is left to the caller.
+
+    Notes
+    -----
+    This function is the low-level building block used by
+    [foapy.intervals].  It does not validate [mode] or the shape
+    of [ar] – validation is the responsibility of the caller.
+
+    Examples
+    --------
+    Build a bounded (normal) intervals chain:
+
+    >>> import numpy as np
+    >>> from foapy.core import intervals_chain
+    >>> from foapy.core import mode
+    >>> ar = np.asarray(['b', 'a', 'b', 'c', 'b'])
+    >>> intervals_chain(ar, mode.normal)
+    array([1, 2, 2, 4, 2])
+
+    Build a cyclic intervals chain (leading boundary becomes wrap-around sum):
+
+    >>> intervals_chain(ar, mode.cycle)
+    array([1, 5, 2, 5, 2])
+
+    For lossy mode the first-occurrence intervals are zeroed out so the
+    caller can filter them with [result][result != 0]:
+
+    >>> intervals_chain(ar, mode.lossy)
+    array([0, 0, 2, 0, 2])
+    """
+
+    perm = ar.argsort(kind="mergesort")
+
+    mask = np.empty(ar.shape[0] + 1, dtype=bool)
+    mask[:1] = True
+    mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
+    mask[-1:] = True  # or mask[-1:] = True
+
+    first_mask = mask[:-1]
+    last_mask = mask[1:]
+
+    chain = np.empty(ar.shape, dtype=np.intp)
+    chain[1:] = perm[1:] - perm[:-1]
+
+    delta = len(ar) - perm[last_mask] if mode == constants_mode.cycle else 1
+    chain[first_mask] = perm[first_mask] + delta
+
+    if mode == constants_mode.lossy:
+        chain[first_mask] = 0
+
+    inverse_perm = np.empty(ar.shape, dtype=np.intp)
+    inverse_perm[perm] = np.arange(ar.shape[0])
+    chain = chain[inverse_perm]
+
+    return chain
diff --git a/src/foapy/core/_intervals_distribution.py b/src/foapy/core/_intervals_distribution.py
@@ -0,0 +1,80 @@
+import numpy as np
+from numpy import ndarray
+
+from foapy.core import intervals_tuple as _intervals_tuple  # noqa: F401
+
+
+def intervals_distribution(tuple_result: ndarray) -> ndarray:
+    """
+    Calculate intervals distribution from an intervals tuple.
+
+    An intervals distribution is an n-tuple of natural numbers where the
+    index represents the interval length and the value is the count of
+    its appearances in the intervals tuple.
+
+    Caller is responsible for preparing the tuple_result correctly:
+    - For mode.lossy: boundary intervals already removed by intervals_tuple
+    - For mode.redundant: trailing intervals already appended by intervals_tuple
+    - For mode.normal, mode.cycle: pass tuple_result as-is
+
+    Parameters
+    ----------
+    tuple_result : ndarray
+        Intervals tuple produced by intervals_tuple.
+
+    Returns
+    -------
+    result : ndarray
+        Array of length max(tuple_result) where result[i] is the count
+        of interval value i+1 in the tuple.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from foapy import binding, mode
+    >>> from foapy.core import intervals_chain
+    >>> from foapy.core import intervals_tuple
+    >>> from foapy.core import intervals_distribution
+    >>> ar = np.asarray([2, 4, 2, 2, 4])
+
+    From documentation example - chain [1, 2, 3, 2, 4, 6]:
+
+    >>> intervals_distribution(np.array([1, 2, 3, 2, 4, 6]))
+    array([1, 2, 1, 1, 0, 1])
+
+    Normal mode:
+
+    >>> chain = intervals_chain(ar, mode.normal)
+    >>> tpl = intervals_tuple(ar, chain, mode.normal, binding.start)
+    >>> intervals_distribution(tpl)
+    array([2, 2, 1])
+
+    Lossy mode — boundary intervals already removed:
+
+    >>> chain = intervals_chain(ar, mode.lossy)
+    >>> tpl = intervals_tuple(ar, chain, mode.lossy, binding.start)
+    >>> intervals_distribution(tpl)
+    array([1, 1, 1])
+
+    Redundant mode — trailing intervals already appended:
+
+    >>> chain = intervals_chain(ar, mode.redundant)
+    >>> tpl = intervals_tuple(ar, chain, mode.redundant, binding.start)
+    >>> intervals_distribution(tpl)
+    array([2, 3, 1])
+
+    Empty tuple:
+
+    >>> intervals_distribution(np.array([]))
+    array([])
+    """
+
+    if len(tuple_result) == 0:
+        return np.array([], dtype=np.intp)
+
+    max_interval = int(tuple_result.max())
+    distribution = np.zeros(max_interval, dtype=np.intp)
+    for interval in tuple_result:
+        distribution[int(interval) - 1] += 1
+
+    return distribution
diff --git a/src/foapy/core/_intervals_tuple.py b/src/foapy/core/_intervals_tuple.py
@@ -0,0 +1,75 @@
+import numpy as np
+from numpy import ndarray
+
+from foapy.core import binding as constants_binding
+from foapy.core import mode as constants_mode
+
+
+def intervals_tuple(ar: ndarray, chain: ndarray, mode: int, binding: int) -> ndarray:
+    """
+    Convert an intervals chain into a final intervals tuple.
+
+    Applies unchaining strategies to the raw intervals chain produced by
+    [intervals_chain], handling boundary intervals according to the given mode.
+
+    Parameters
+    ----------
+    ar : ndarray
+        1-D array (already oriented for binding direction) used to compute
+        trailing boundary intervals for [mode.redundant].
+    chain : ndarray
+        Raw intervals chain produced by [intervals_chain].
+    mode : int
+        One of [mode.lossy], [mode.normal], [mode.cycle], [mode.redundant].
+    binding : int
+        One of [binding.start], [binding.end]. Used to correctly order
+        trailing boundary intervals for [mode.redundant].
+
+    Returns
+    -------
+    result : ndarray
+        Final intervals tuple.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from foapy.core import mode, binding
+    >>> from foapy.core import intervals_chain
+    >>> from foapy.core import intervals_tuple
+    >>> ar = np.asarray(['b', 'a', 'b', 'c', 'b'])
+
+    Normal mode — chain is returned as-is:
+
+    >>> chain = intervals_chain(ar, mode.normal)
+    >>> intervals_tuple(ar, chain, mode.normal)
+    array([1, 2, 2, 4, 2])
+
+    Lossy mode — boundary (zero) intervals are removed:
+
+    >>> chain = intervals_chain(ar, mode.lossy)
+    >>> intervals_tuple(ar, chain, mode.lossy)
+    array([2, 2])
+
+    Redundant mode — trailing boundary intervals are appended:
+
+    >>> chain = intervals_chain(ar, mode.redundant)
+    >>> intervals_tuple(ar, chain, mode.redundant)
+    array([1, 2, 2, 4, 2, 4, 1, 2])
+    """
+
+    if mode == constants_mode.lossy:
+        return chain[chain != 0]
+
+    if mode == constants_mode.redundant:
+        perm = ar.argsort(kind="mergesort")
+        mask = np.empty(ar.shape[0] + 1, dtype=bool)
+        mask[:1] = True
+        mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
+        mask[-1:] = True
+        last_mask = mask[1:]
+        trailing = len(ar) - perm[last_mask]
+        if binding == constants_binding.end:
+            trailing = trailing[::-1]
+        return np.concatenate((chain, trailing))
+
+    return chain