Doleus · iamheinrich · Oct 4, 2025 · Aug 31, 2025 · Aug 31, 2025 · Oct 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,40 @@
 
 # CHANGELOG
 
+## v0.2.0 (2025-10-04)
+
+### Build
+
+- Replaced opencv-python with opencv-python-headless
+  ([`5f6b51c`](https://github.com/Doleus/doleus/commit/5f6b51c))
+
+### Features
+
+- Support complex slicing conditions with AND/OR logic
+  ([`4e881f5`](https://github.com/Doleus/doleus/commit/4e881f5))
+
+- Added slicing operators: in, not_in, between
+  ([`75fa504`](https://github.com/Doleus/doleus/commit/75fa504))
+
+- Added not_between operator
+  ([`a4d3442`](https://github.com/Doleus/doleus/commit/a4d3442))
+
+### Documentation
+
+- Added docs for new slicing methods
+  ([`50f77d0`](https://github.com/Doleus/doleus/commit/50f77d0))
+
+### Testing
+
+- Added tests for complex slicing conditions (AND/OR operators)
+  ([`4e881f5`](https://github.com/Doleus/doleus/commit/4e881f5))
+
+- Added tests for automatically created metadata
+  ([`65c10c3`](https://github.com/Doleus/doleus/commit/65c10c3))
+
+- Test for not_between operator
+  ([`7cec696`](https://github.com/Doleus/doleus/commit/7cec696))
+
 ## v0.1.1 (2025-03-10)
 
 ### Bug Fixes

diff --git a/README.md b/README.md
@@ -347,8 +347,84 @@ Subsets of your data filtered by metadata:
 - `slice_by_value("weather_condition", "==", "fog")` → Only foggy conditions
 - `slice_by_groundtruth_class(class_names=["pedestrian", "cyclist"])` → Specific object classes
 
-> [!NOTE]
-> **Slicing Method**: Use `slice_by_value("metadata_key", "==", "value")` for categorical filtering. In theory, all comparison operators are supported: `>`, `<`, `>=`, `<=`, `==`, `!=`.
+#### **Available Operators**
+
+Doleus supports a comprehensive set of operators for flexible data slicing:
+
+**Comparison operators:**
+
+- `>`, `<`, `>=`, `<=`, `==`, `!=` - Standard comparisons
+
+**Membership operators:**
+
+- `in` - Check if value is in a list: `slice_by_value("source", "in", ["camera_a", "camera_b"])`
+- `not_in` - Check if value is not in a list: `slice_by_value("batch_id", "not_in", [1, 2])`
+
+**Range operators:**
+
+- `between` - Check if value falls within range (inclusive): `slice_by_value("confidence", "between", [0.8, 0.95])`
+- `not_between` - Check if value falls outside range: `slice_by_value("temperature", "not_between", [20, 30])`
+
+#### **Combining Multiple Conditions**
+
+For more complex filtering, use `slice_by_conditions()` to combine multiple criteria with logical operators:
+
+```python
+# AND: All conditions must be true
+conditions = [
+    ("validated", "==", True),
+    ("confidence_score", ">=", 0.9),
+    ("source", "in", ["camera_a", "camera_b"])
+]
+high_quality = doleus_dataset.slice_by_conditions(
+    conditions,
+    logical_operator="AND",
+    slice_name="high_quality_validated"
+)
+
+# OR: Any condition must be true
+conditions = [
+    ("weather", "==", "fog"),
+    ("weather", "==", "rain"),
+    ("visibility_meters", "<", 50)
+]
+challenging_weather = doleus_dataset.slice_by_conditions(
+    conditions,
+    logical_operator="OR",
+    slice_name="challenging_conditions"
+)
+```
+
+**Practical Example - Manufacturing Quality Control:**
+
+```python
+# Find defects that are either very small OR on reflective surfaces with low confidence
+defect_conditions = [
+    ("defect_area_mm2", "<=", 1.0),           # Small defects
+    ("surface_reflectivity", ">=", 0.8),       # Highly reflective
+    ("detection_confidence", "between", [0.5, 0.7])  # Medium confidence
+]
+
+# OR logic: catches small defects OR reflective surfaces with medium confidence
+at_risk_detections = doleus_dataset.slice_by_conditions(
+    defect_conditions,
+    logical_operator="OR",
+    slice_name="at_risk_detections"
+)
+
+# AND logic: only small defects on reflective surfaces with medium confidence
+high_risk_combination = doleus_dataset.slice_by_conditions(
+    defect_conditions,
+    logical_operator="AND",
+    slice_name="high_risk_cases"
+)
+```
+
+> [!TIP] **When to Use What**:
+>
+> - Use `slice_by_value()` for single-condition filters
+> - Use `slice_by_conditions()` with `AND` when all conditions must be met (narrow down)
+> - Use `slice_by_conditions()` with `OR` when any condition suffices (cast wider net)
 
 ### **Checks**
 
@@ -359,11 +435,9 @@ Tests that compute metrics on slices:
 
 Checks become tests when you add pass/fail conditions (operator and value). Without these conditions, checks simply evaluate and report metric values.
 
-> [!NOTE]
-> **Prediction Format**: Doleus uses [torchmetrics](https://torchmetrics.readthedocs.io/) to compute metrics and expects the same prediction formats that torchmetrics functions require.
+> [!NOTE] > **Prediction Format**: Doleus uses [torchmetrics](https://torchmetrics.readthedocs.io/) to compute metrics and expects the same prediction formats that torchmetrics functions require.
 
-> [!IMPORTANT]
-> **Macro Averaging Default**: Doleus uses **macro averaging** as the default for classification metrics (Accuracy, Precision, Recall, F1) to avoid known bugs in torchmetrics' micro averaging implementation (see [GitHub issue #2280](https://github.com/Lightning-AI/torchmetrics/issues/2280)).
+> [!IMPORTANT] > **Macro Averaging Default**: Doleus uses **macro averaging** as the default for classification metrics (Accuracy, Precision, Recall, F1) to avoid known bugs in torchmetrics' micro averaging implementation (see [GitHub issue #2280](https://github.com/Lightning-AI/torchmetrics/issues/2280)).
 >
 > You can override this by setting `metric_parameters={"average": "micro"}` in your checks if needed.
 

diff --git a/doleus/datasets/base.py b/doleus/datasets/base.py
@@ -391,3 +391,48 @@ def slice_by_groundtruth_class(
             slice_name = create_filename(self.name, "class", "==", class_str)
 
         return self._create_new_instance(self.dataset, filtered_indices, slice_name)
+
+    def slice_by_conditions(
+        self,
+        conditions: List[tuple],
+        logical_operator: str = "AND",
+        slice_name: Optional[str] = None,
+    ):
+        """Create a slice based on multiple conditions.
+
+        Parameters
+        ----------
+        conditions : List[tuple]
+            List of conditions in format (metadata_key, operator_str, value).
+        logical_operator : str, optional
+            How to combine conditions: "AND" or "OR", by default "AND".
+        slice_name : str, optional
+            Name for the slice. If None, a name will be generated, by default None.
+
+        Returns
+        -------
+        Slice
+            A new slice containing datapoints that meet the criteria.
+        """
+        if logical_operator.upper() not in ["AND", "OR"]:
+            raise ValueError("logical_operator must be 'AND' or 'OR'")
+
+        indices = []
+        for i in range(len(self.dataset)):
+            if logical_operator.upper() == "AND":
+                if all(
+                    OPERATOR_DICT[op](self.metadata_store.get_metadata(i, key), val)
+                    for key, op, val in conditions
+                ):
+                    indices.append(i)
+            else:
+                if any(
+                    OPERATOR_DICT[op](self.metadata_store.get_metadata(i, key), val)
+                    for key, op, val in conditions
+                ):
+                    indices.append(i)
+
+        if slice_name is None:
+            slice_name = f"{self.name}_filtered_{len(conditions)}conditions_{logical_operator.lower()}"
+
+        return self._create_new_instance(self.dataset, indices, slice_name)
diff --git a/doleus/utils/data.py b/doleus/utils/data.py
@@ -12,6 +12,14 @@
     "==": op.eq,
     "=": op.eq,
     "!=": op.ne,
+    "in": lambda x, y: x in y,
+    "not_in": lambda x, y: x not in y,
+    "between": lambda x, y: (
+        y[0] <= x <= y[1] if isinstance(y, (list, tuple)) and len(y) == 2 else False
+    ),
+    "not_between": lambda x, y: (
+        not (y[0] <= x <= y[1]) if isinstance(y, (list, tuple)) and len(y) == 2 else False
+    ),
 }
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "doleus"
-version = "0.1.0"
+version = "0.2.0"
 description = ""
 authors = [
     {name = "Hendrik Schulze Bröring"},
@@ -14,9 +14,9 @@ dependencies = [
     "torchvision (>=0.22.1,<0.23.0)",
     "tqdm (>=4.67.1,<5.0.0)",
     "torchmetrics (>=1.7.3,<2.0.0)",
-    "opencv-python (>=4.11.0.86,<5.0.0.0)",
+    "opencv-python-headless (>=4.11.0.86,<5.0.0.0)",
     "pytz (>=2025.2,<2026.0)",
-    "numpy (>=2.3.1,<3.0.0)",
+    "numpy (>=2.0.0,<3.0.0)",
     "pillow (>=11.2.1,<12.0.0)"
 ]
 

diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pandas as pd
 import torch
+
 from doleus.utils import Task, TaskType
 
 
@@ -210,3 +211,80 @@ def test_chained_slicing(
             high_conf_validated.metadata_store.get_metadata(i, "confidence_score")
             >= 0.9
         )
+
+
+def test_slice_and_operator(
+    doleus_binary_classification_dataset, basic_metadata, numeric_metadata
+):
+    dataset = doleus_binary_classification_dataset
+    dataset.add_metadata_from_list(basic_metadata)
+    dataset.add_metadata_from_list(numeric_metadata)
+
+    conditions = [("validated", "==", True), ("confidence_score", ">=", 0.9)]
+
+    filtered_slice = dataset.slice_by_conditions(
+        conditions, logical_operator="AND", slice_name="validated_high_conf"
+    )
+
+    assert len(filtered_slice) == 3
+    assert filtered_slice.name == "validated_high_conf"
+
+    for i in range(len(filtered_slice)):
+        assert filtered_slice.metadata_store.get_metadata(i, "validated") == True
+        assert filtered_slice.metadata_store.get_metadata(i, "confidence_score") >= 0.9
+
+    camera_conditions = [
+        ("source", "in", ["camera_a"]),
+        ("confidence_score", "between", [0.85, 1.0]),
+    ]
+
+    camera_slice = dataset.slice_by_conditions(
+        camera_conditions, logical_operator="AND", slice_name="camera_a_high_conf"
+    )
+
+    assert len(camera_slice) == 4
+    for i in range(len(camera_slice)):
+        assert camera_slice.metadata_store.get_metadata(i, "source") == "camera_a"
+        score = camera_slice.metadata_store.get_metadata(i, "confidence_score")
+        assert 0.85 <= score <= 1.0
+
+
+def test_slice_or_operator(
+    doleus_binary_classification_dataset, basic_metadata, numeric_metadata
+):
+    dataset = doleus_binary_classification_dataset
+    dataset.add_metadata_from_list(basic_metadata)
+    dataset.add_metadata_from_list(numeric_metadata)
+
+    conditions = [("batch_id", "==", 1), ("batch_id", "==", 2)]
+
+    filtered_slice = dataset.slice_by_conditions(
+        conditions, logical_operator="OR", slice_name="batch_1_or_2"
+    )
+
+    assert len(filtered_slice) == 6
+    assert filtered_slice.name == "batch_1_or_2"
+
+    for i in range(len(filtered_slice)):
+        batch_id = filtered_slice.metadata_store.get_metadata(i, "batch_id")
+        assert batch_id in [1, 2]
+
+    source_slice = dataset.slice_by_value(
+        "source", "in", ["camera_a", "camera_b"], "all_sources"
+    )
+    assert len(source_slice) == 10
+
+    excluded_batch = dataset.slice_by_value(
+        "batch_id", "not_in", [1], "excluded_batch_1"
+    )
+    assert len(excluded_batch) == 7
+
+    confidence_range = dataset.slice_by_value(
+        "confidence_score", "between", [0.8, 0.95], "mid_confidence"
+    )
+    assert len(confidence_range) == 6
+
+    confidence_range_not = dataset.slice_by_value(
+        "confidence_score", "not_between", [0.8, 0.95], "not_mid_confidence"
+    )
+    assert len(confidence_range_not) == 4
diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2025 Doleus contributors
+# SPDX-License-Identifier: Apache-2.0
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# SPDX-FileCopyrightText: 2025 Doleus contributors
		# SPDX-License-Identifier: Apache-2.0