From 2179d3a3ffaff8c9f1ed115ebae398c4594701a0 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 26 Jan 2026 18:04:52 +0100 Subject: [PATCH 01/19] :sparkles: add support for split --- mindee/client_v2.py | 90 ++++++++++++++++--- mindee/input/__init__.py | 20 +++-- mindee/input/base_parameters.py | 21 +++++ mindee/input/inference_parameters.py | 16 +--- mindee/input/utility_parameters.py | 10 +++ mindee/mindee_http/mindee_api_v2.py | 35 ++++---- mindee/parsing/v2/inference.py | 26 ++---- mindee/parsing/v2/inference_response.py | 12 ++- mindee/v2/__init__.py | 15 ++++ mindee/v2/parsing/__init__.py | 15 ++++ mindee/v2/parsing/inference/__init__.py | 15 ++++ mindee/v2/parsing/inference/base_inference.py | 32 +++++++ .../inference/base_inference_response.py | 28 ++++++ mindee/v2/parsing/inference/split/__init__.py | 0 .../inference/split/split_inference.py | 20 +++++ .../parsing/inference/split/split_response.py | 20 +++++ tests/data | 2 +- 17 files changed, 303 insertions(+), 74 deletions(-) create mode 100644 mindee/input/base_parameters.py create mode 100644 mindee/input/utility_parameters.py create mode 100644 mindee/v2/__init__.py create mode 100644 mindee/v2/parsing/__init__.py create mode 100644 mindee/v2/parsing/inference/__init__.py create mode 100644 mindee/v2/parsing/inference/base_inference.py create mode 100644 mindee/v2/parsing/inference/base_inference_response.py create mode 100644 mindee/v2/parsing/inference/split/__init__.py create mode 100644 mindee/v2/parsing/inference/split/split_inference.py create mode 100644 mindee/v2/parsing/inference/split/split_response.py diff --git a/mindee/client_v2.py b/mindee/client_v2.py index 6819b2cf..e7efb6b5 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -1,10 +1,10 @@ from time import sleep -from typing import Optional, Union +from typing import Optional, Union, Type from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeError from mindee.error.mindee_http_error_v2 import handle_error_v2 -from mindee.input import UrlInputSource +from mindee.input import UrlInputSource, UtilityParameters from mindee.input.inference_parameters import InferenceParameters from mindee.input.polling_options import PollingOptions from mindee.input.sources.local_input_source import LocalInputSource @@ -15,6 +15,7 @@ is_valid_post_response, ) from mindee.parsing.v2.common_response import CommonStatus +from mindee.v2 import BaseInferenceResponse from mindee.parsing.v2.inference_response import InferenceResponse from mindee.parsing.v2.job_response import JobResponse @@ -41,20 +42,21 @@ def __init__(self, api_key: Optional[str] = None) -> None: def enqueue_inference( self, input_source: Union[LocalInputSource, UrlInputSource], - params: InferenceParameters, + params: Union[InferenceParameters, UtilityParameters], + slug: Optional[str] = None, ) -> JobResponse: """ Enqueues a document to a given model. :param input_source: The document/source file to use. Can be local or remote. - :param params: Parameters to set when sending a file. + :param slug: Slug for the endpoint. + :return: A valid inference response. """ logger.debug("Enqueuing inference using model: %s", params.model_id) - response = self.mindee_api.req_post_inference_enqueue( - input_source=input_source, params=params + input_source=input_source, params=params, slug=slug ) dict_response = response.json() @@ -79,13 +81,18 @@ def get_job(self, job_id: str) -> JobResponse: dict_response = response.json() return JobResponse(dict_response) - def get_inference(self, inference_id: str) -> InferenceResponse: + def get_inference( + self, + inference_id: str, + inference_response_type: Type[InferenceResponse] = InferenceResponse, + ) -> BaseInferenceResponse: """ Get the result of an inference that was previously enqueued. The inference will only be available after it has finished processing. :param inference_id: UUID of the inference to retrieve. + :param inference_response_type: Class of the product to instantiate. :return: An inference response. """ logger.debug("Fetching inference: %s", inference_id) @@ -94,19 +101,20 @@ def get_inference(self, inference_id: str) -> InferenceResponse: if not is_valid_get_response(response): handle_error_v2(response.json()) dict_response = response.json() - return InferenceResponse(dict_response) + return inference_response_type(dict_response) - def enqueue_and_get_inference( + def _enqueue_and_get( self, input_source: Union[LocalInputSource, UrlInputSource], - params: InferenceParameters, + params: Union[InferenceParameters, UtilityParameters], + inference_response_type: Optional[Type[InferenceResponse]] = InferenceResponse, ) -> InferenceResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. :param input_source: The document/source file to use. Can be local or remote. - :param params: Parameters to set when sending a file. + :param inference_response_type: The product class to use for the response object. :return: A valid inference response. """ @@ -117,9 +125,14 @@ def enqueue_and_get_inference( params.polling_options.delay_sec, params.polling_options.max_retries, ) - enqueue_response = self.enqueue_inference(input_source, params) + slug = ( + inference_response_type.inference.get_slug() + if inference_response_type + else None + ) + enqueue_response = self.enqueue_inference(input_source, params, slug) logger.debug( - "Successfully enqueued inference with job id: %s", enqueue_response.job.id + "Successfully enqueued document with job id: %s", enqueue_response.job.id ) sleep(params.polling_options.initial_delay_sec) try_counter = 0 @@ -134,8 +147,57 @@ def enqueue_and_get_inference( f"Parsing failed for job {job_response.job.id}: {detail}" ) if job_response.job.status == CommonStatus.PROCESSED.value: - return self.get_inference(job_response.job.id) + result = self.get_inference( + job_response.job.id, inference_response_type or InferenceResponse + ) + assert isinstance(result, InferenceResponse), ( + f'Invalid response type "{type(result)}"' + ) + return result try_counter += 1 sleep(params.polling_options.delay_sec) raise MindeeError(f"Couldn't retrieve document after {try_counter + 1} tries.") + + def enqueue_and_get_inference( + self, + input_source: Union[LocalInputSource, UrlInputSource], + params: InferenceParameters, + ) -> InferenceResponse: + """ + Enqueues to an asynchronous endpoint and automatically polls for a response. + + :param input_source: The document/source file to use. Can be local or remote. + + :param params: Parameters to set when sending a file. + + :return: A valid inference response. + """ + response = self._enqueue_and_get(input_source, params) + assert isinstance(response, InferenceResponse), ( + f'Invalid response type "{type(response)}"' + ) + return response + + def enqueue_and_get_utility( + self, + inference_response_type: Type[InferenceResponse], + input_source: Union[LocalInputSource, UrlInputSource], + params: UtilityParameters, + ) -> InferenceResponse: + """ + Enqueues to an asynchronous endpoint and automatically polls for a response. + + :param input_source: The document/source file to use. Can be local or remote. + + :param params: Parameters to set when sending a file. + + :param inference_response_type: The product class to use for the response object. + + :return: A valid inference response. + """ + response = self._enqueue_and_get(input_source, params, inference_response_type) + assert isinstance(response, inference_response_type), ( + f'Invalid response type "{type(response)}"' + ) + return response diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index 9ed79985..818efc4c 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -1,4 +1,7 @@ from mindee.input.local_response import LocalResponse +from mindee.input.base_parameters import BaseParameters +from mindee.input.inference_parameters import InferenceParameters +from mindee.input.utility_parameters import UtilityParameters from mindee.input.page_options import PageOptions from mindee.input.polling_options import PollingOptions from mindee.input.sources.base_64_input import Base64Input @@ -11,15 +14,18 @@ from mindee.input.workflow_options import WorkflowOptions __all__ = [ + "Base64Input", + "BaseParameters", + "BytesInput", + "FileInput", "InputType", + "InferenceParameters", "LocalInputSource", - "UrlInputSource", + "LocalResponse", + "PageOptions", "PathInput", - "FileInput", - "Base64Input", - "BytesInput", - "WorkflowOptions", "PollingOptions", - "PageOptions", - "LocalResponse", + "UrlInputSource", + "UtilityParameters", + "WorkflowOptions", ] diff --git a/mindee/input/base_parameters.py b/mindee/input/base_parameters.py new file mode 100644 index 00000000..f9319444 --- /dev/null +++ b/mindee/input/base_parameters.py @@ -0,0 +1,21 @@ +from abc import ABC +from dataclasses import dataclass +from typing import Optional, List + +from mindee.input.polling_options import PollingOptions + + +@dataclass +class BaseParameters(ABC): + """Base class for parameters accepted by all V2 endpoints.""" + + model_id: str + """ID of the model, required.""" + alias: Optional[str] = None + """Use an alias to link the file to your own DB. If empty, no alias will be used.""" + webhook_ids: Optional[List[str]] = None + """IDs of webhooks to propagate the API response to.""" + polling_options: Optional[PollingOptions] = None + """Options for polling. Set only if having timeout issues.""" + close_file: bool = True + """Whether to close the file after parsing.""" diff --git a/mindee/input/inference_parameters.py b/mindee/input/inference_parameters.py index 6d4e01fa..92a1080e 100644 --- a/mindee/input/inference_parameters.py +++ b/mindee/input/inference_parameters.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, asdict from typing import List, Optional, Union -from mindee.input.polling_options import PollingOptions +from mindee.input.base_parameters import BaseParameters @dataclass @@ -44,7 +44,7 @@ class DataSchemaField(StringDataClass): guidelines: Optional[str] = None """Optional extraction guidelines.""" nested_fields: Optional[dict] = None - """Subfields when type is `nested_object`. Leave empty for other types""" + """Subfields when type is `nested_object`. Leave empty for other types.""" @dataclass @@ -78,11 +78,9 @@ def __post_init__(self) -> None: @dataclass -class InferenceParameters: +class InferenceParameters(BaseParameters): """Inference parameters to set when sending a file.""" - model_id: str - """ID of the model, required.""" rag: Optional[bool] = None """Enhance extraction accuracy with Retrieval-Augmented Generation.""" raw_text: Optional[bool] = None @@ -94,14 +92,6 @@ class InferenceParameters: Boost the precision and accuracy of all extractions. Calculate confidence scores for all fields, and fill their ``confidence`` attribute. """ - alias: Optional[str] = None - """Use an alias to link the file to your own DB. If empty, no alias will be used.""" - webhook_ids: Optional[List[str]] = None - """IDs of webhooks to propagate the API response to.""" - polling_options: Optional[PollingOptions] = None - """Options for polling. Set only if having timeout issues.""" - close_file: bool = True - """Whether to close the file after parsing.""" text_context: Optional[str] = None """ Additional text context used by the model during inference. diff --git a/mindee/input/utility_parameters.py b/mindee/input/utility_parameters.py new file mode 100644 index 00000000..d05a6ad8 --- /dev/null +++ b/mindee/input/utility_parameters.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass + +from mindee.input.base_parameters import BaseParameters + + +@dataclass +class UtilityParameters(BaseParameters): + """ + Parameters accepted by any of the asynchronous **inference** utility v2 endpoints. + """ diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index 9990330c..ea7cdec4 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -4,7 +4,7 @@ import requests from mindee.error.mindee_error import MindeeApiV2Error -from mindee.input import LocalInputSource, UrlInputSource +from mindee.input import LocalInputSource, UrlInputSource, UtilityParameters from mindee.input.inference_parameters import InferenceParameters from mindee.logger import logger from mindee.mindee_http.base_settings import USER_AGENT @@ -74,34 +74,37 @@ def set_from_env(self) -> None: def req_post_inference_enqueue( self, input_source: Union[LocalInputSource, UrlInputSource], - params: InferenceParameters, + params: Union[InferenceParameters, UtilityParameters], + slug: Optional[str] = None, ) -> requests.Response: """ Make an asynchronous request to POST a document for prediction on the V2 API. :param input_source: Input object. :param params: Options for the enqueueing of the document. + :param slug: Slug to use for the enqueueing, defaults to 'inferences'. :return: requests response. """ + slug = slug if slug else "inferences" data: Dict[str, Union[str, list]] = {"model_id": params.model_id} - url = f"{self.url_root}/inferences/enqueue" - - if params.rag is not None: - data["rag"] = str(params.rag).lower() - if params.raw_text is not None: - data["raw_text"] = str(params.raw_text).lower() - if params.confidence is not None: - data["confidence"] = str(params.confidence).lower() - if params.polygon is not None: - data["polygon"] = str(params.polygon).lower() + url = f"{self.url_root}/{slug}/enqueue" + if isinstance(params, InferenceParameters): + if params.rag is not None: + data["rag"] = str(params.rag).lower() + if params.raw_text is not None: + data["raw_text"] = str(params.raw_text).lower() + if params.confidence is not None: + data["confidence"] = str(params.confidence).lower() + if params.polygon is not None: + data["polygon"] = str(params.polygon).lower() + if params.text_context and len(params.text_context): + data["text_context"] = params.text_context + if params.data_schema is not None: + data["data_schema"] = str(params.data_schema) if params.webhook_ids and len(params.webhook_ids) > 0: data["webhook_ids"] = params.webhook_ids if params.alias and len(params.alias): data["alias"] = params.alias - if params.text_context and len(params.text_context): - data["text_context"] = params.text_context - if params.data_schema is not None: - data["data_schema"] = str(params.data_schema) if isinstance(input_source, LocalInputSource): files = {"file": input_source.read_contents(params.close_file)} diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py index 86c076c9..3b54ce64 100644 --- a/mindee/parsing/v2/inference.py +++ b/mindee/parsing/v2/inference.py @@ -1,36 +1,20 @@ from mindee.parsing.common.string_dict import StringDict +from mindee.v2.parsing.inference import BaseInference from mindee.parsing.v2.inference_active_options import InferenceActiveOptions -from mindee.parsing.v2.inference_file import InferenceFile -from mindee.parsing.v2.inference_model import InferenceModel from mindee.parsing.v2.inference_result import InferenceResult -class Inference: +class Inference(BaseInference): """Inference object for a V2 API return.""" - id: str - """ID of the inference.""" - model: InferenceModel - """Model info for the inference.""" - file: InferenceFile - """File info for the inference.""" result: InferenceResult """Result of the inference.""" active_options: InferenceActiveOptions """Active options for the inference.""" + _slug: str = "inferences" + """Slug of the inference.""" def __init__(self, raw_response: StringDict): - self.id = raw_response["id"] - self.model = InferenceModel(raw_response["model"]) - self.file = InferenceFile(raw_response["file"]) + super().__init__(raw_response) self.result = InferenceResult(raw_response["result"]) self.active_options = InferenceActiveOptions(raw_response["active_options"]) - - def __str__(self) -> str: - return ( - f"Inference\n#########" - f"\n{self.model}" - f"\n\n{self.file}" - f"\n\n{self.active_options}" - f"\n\n{self.result}\n" - ) diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index f1bb71c2..d06f9323 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -1,9 +1,13 @@ +from typing_extensions import override + from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.common_response import CommonResponse from mindee.parsing.v2.inference import Inference +from mindee.v2.parsing.inference.base_inference_response import ( + BaseInferenceResponse, +) -class InferenceResponse(CommonResponse): +class InferenceResponse(BaseInferenceResponse[Inference]): """Represent an inference response from Mindee V2 API.""" inference: Inference @@ -15,3 +19,7 @@ def __init__(self, raw_response: StringDict) -> None: def __str__(self) -> str: return str(self.inference) + + @override + def _set_inference_type(self, inference_response: StringDict): + return Inference diff --git a/mindee/v2/__init__.py b/mindee/v2/__init__.py new file mode 100644 index 00000000..1aa33bf9 --- /dev/null +++ b/mindee/v2/__init__.py @@ -0,0 +1,15 @@ +from mindee.v2.parsing.inference.base_inference import BaseInference +from mindee.v2.parsing.inference.base_inference_response import ( + BaseInferenceResponse, + TypeInferenceResponse, +) +from mindee.v2.parsing.inference.split.split_inference import SplitInference +from mindee.v2.parsing.inference.split.split_response import SplitResponse + +__all__ = [ + "BaseInference", + "BaseInferenceResponse", + "TypeInferenceResponse", + "SplitInference", + "SplitResponse", +] diff --git a/mindee/v2/parsing/__init__.py b/mindee/v2/parsing/__init__.py new file mode 100644 index 00000000..1aa33bf9 --- /dev/null +++ b/mindee/v2/parsing/__init__.py @@ -0,0 +1,15 @@ +from mindee.v2.parsing.inference.base_inference import BaseInference +from mindee.v2.parsing.inference.base_inference_response import ( + BaseInferenceResponse, + TypeInferenceResponse, +) +from mindee.v2.parsing.inference.split.split_inference import SplitInference +from mindee.v2.parsing.inference.split.split_response import SplitResponse + +__all__ = [ + "BaseInference", + "BaseInferenceResponse", + "TypeInferenceResponse", + "SplitInference", + "SplitResponse", +] diff --git a/mindee/v2/parsing/inference/__init__.py b/mindee/v2/parsing/inference/__init__.py new file mode 100644 index 00000000..1aa33bf9 --- /dev/null +++ b/mindee/v2/parsing/inference/__init__.py @@ -0,0 +1,15 @@ +from mindee.v2.parsing.inference.base_inference import BaseInference +from mindee.v2.parsing.inference.base_inference_response import ( + BaseInferenceResponse, + TypeInferenceResponse, +) +from mindee.v2.parsing.inference.split.split_inference import SplitInference +from mindee.v2.parsing.inference.split.split_response import SplitResponse + +__all__ = [ + "BaseInference", + "BaseInferenceResponse", + "TypeInferenceResponse", + "SplitInference", + "SplitResponse", +] diff --git a/mindee/v2/parsing/inference/base_inference.py b/mindee/v2/parsing/inference/base_inference.py new file mode 100644 index 00000000..1264d127 --- /dev/null +++ b/mindee/v2/parsing/inference/base_inference.py @@ -0,0 +1,32 @@ +from abc import ABC +from typing import TypeVar + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.inference_file import InferenceFile +from mindee.parsing.v2.inference_model import InferenceModel + + +class BaseInference(ABC): + """Base class for V2 inference objects.""" + + model: InferenceModel + """Model info for the inference.""" + file: InferenceFile + """File info for the inference.""" + id: str + """ID of the inference.""" + _slug: str + """Slug of the inference.""" + + def __init__(self, raw_response: StringDict): + self.id = raw_response["id"] + self.model = InferenceModel(raw_response["model"]) + self.file = InferenceFile(raw_response["file"]) + + @classmethod + def get_slug(cls) -> str: + """Getter for the inference slug.""" + return cls._slug + + +TypeBaseInference = TypeVar("TypeBaseInference", bound=BaseInference) diff --git a/mindee/v2/parsing/inference/base_inference_response.py b/mindee/v2/parsing/inference/base_inference_response.py new file mode 100644 index 00000000..3610c4d8 --- /dev/null +++ b/mindee/v2/parsing/inference/base_inference_response.py @@ -0,0 +1,28 @@ +from typing import TypeVar, Generic + +from mindee.parsing.common.string_dict import StringDict +from mindee.v2.parsing.inference.base_inference import TypeBaseInference + +from mindee.parsing.v2.common_response import CommonResponse + + +class BaseInferenceResponse(CommonResponse, Generic[TypeBaseInference]): + """Base class for V2 inference responses.""" + + inference: TypeBaseInference + """The inference result for a split utility request""" + + def __init__(self, raw_response: StringDict) -> None: + super().__init__(raw_response) + self.inference = self._set_inference_type(raw_response["inference"]) + + def _set_inference_type(self, inference_response: StringDict): + """ + Sets the inference type. + + :param inference_response: Server response. + """ + raise NotImplementedError() + + +TypeInferenceResponse = TypeVar("TypeInferenceResponse", bound=BaseInferenceResponse) diff --git a/mindee/v2/parsing/inference/split/__init__.py b/mindee/v2/parsing/inference/split/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mindee/v2/parsing/inference/split/split_inference.py b/mindee/v2/parsing/inference/split/split_inference.py new file mode 100644 index 00000000..dfab6b9f --- /dev/null +++ b/mindee/v2/parsing/inference/split/split_inference.py @@ -0,0 +1,20 @@ +from typing import Any + +from mindee.parsing.common.string_dict import StringDict +from mindee.v2.parsing.inference.base_inference import BaseInference + + +class SplitInference(BaseInference): + """Split inference result.""" + + result: Any + """Result of a split inference.""" + _slug: str = "split" + """Slug of the endpoint.""" + + def __init__(self, raw_response: StringDict) -> None: + super().__init__(raw_response) + self.result = raw_response["result"] + + def __str__(self) -> str: + return f"Inference\n#########\n{self.model}\n{self.file}\n{self.result}\n" diff --git a/mindee/v2/parsing/inference/split/split_response.py b/mindee/v2/parsing/inference/split/split_response.py new file mode 100644 index 00000000..bf0687cd --- /dev/null +++ b/mindee/v2/parsing/inference/split/split_response.py @@ -0,0 +1,20 @@ +from typing import override + +from mindee.parsing.common.string_dict import StringDict +from mindee.v2.parsing.inference.base_inference_response import ( + BaseInferenceResponse, +) +from mindee.v2.parsing.inference.split.split_inference import SplitInference + + +class SplitResponse(BaseInferenceResponse[SplitInference]): + """Represent a split inference response from Mindee V2 API.""" + + @override + def _set_inference_type(self, inference_response: StringDict): + """ + Sets the inference type. + + :param inference_response: Server response. + """ + return SplitInference(inference_response) diff --git a/tests/data b/tests/data index 0c51e1d3..7bed8f9b 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 0c51e1d3e2258404c44280f25f4951ba6fe27324 +Subproject commit 7bed8f9b059f6ba3debc31e71375ea8437b6fbe2 From eeecd825f4a3d49bc0d0be46d2bb41c1e217d033 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 Jan 2026 10:27:58 +0100 Subject: [PATCH 02/19] fix typo in import --- mindee/parsing/v2/inference_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index d06f9323..836924d9 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -1,4 +1,4 @@ -from typing_extensions import override +from typing import override from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.inference import Inference From f95a2003d4731154d531fc897bb00038b513b557 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 Jan 2026 10:38:16 +0100 Subject: [PATCH 03/19] fix again --- mindee/parsing/v2/inference_response.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index 836924d9..1918d6ff 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -1,5 +1,3 @@ -from typing import override - from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.inference import Inference from mindee.v2.parsing.inference.base_inference_response import ( @@ -20,6 +18,5 @@ def __init__(self, raw_response: StringDict) -> None: def __str__(self) -> str: return str(self.inference) - @override def _set_inference_type(self, inference_response: StringDict): return Inference From 272c66b843301f10cad3c3676934c708ce1976ac Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 Jan 2026 16:11:05 +0100 Subject: [PATCH 04/19] fix issues --- mindee/client_v2.py | 34 +++++++------ mindee/mindee_http/mindee_api_v2.py | 50 +++++++++++++------ mindee/parsing/v2/inference_response.py | 1 + mindee/v2/parsing/inference/base_inference.py | 4 +- .../inference/base_inference_response.py | 13 +++-- .../parsing/inference/split/split_response.py | 9 ++-- 6 files changed, 71 insertions(+), 40 deletions(-) diff --git a/mindee/client_v2.py b/mindee/client_v2.py index e7efb6b5..d1fb2229 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -1,5 +1,5 @@ from time import sleep -from typing import Optional, Union, Type +from typing import Optional, Union, Type, TypeVar from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeError @@ -19,6 +19,10 @@ from mindee.parsing.v2.inference_response import InferenceResponse from mindee.parsing.v2.job_response import JobResponse +TypeBaseInferenceResponse = TypeVar( + "TypeBaseInferenceResponse", bound=BaseInferenceResponse +) + class ClientV2(ClientMixin): """ @@ -84,7 +88,7 @@ def get_job(self, job_id: str) -> JobResponse: def get_inference( self, inference_id: str, - inference_response_type: Type[InferenceResponse] = InferenceResponse, + inference_response_type: Type[BaseInferenceResponse] = InferenceResponse, ) -> BaseInferenceResponse: """ Get the result of an inference that was previously enqueued. @@ -96,8 +100,11 @@ def get_inference( :return: An inference response. """ logger.debug("Fetching inference: %s", inference_id) + slug = None + if inference_response_type and inference_response_type is not InferenceResponse: + slug = "utilities/" + inference_response_type.get_inference_slug() - response = self.mindee_api.req_get_inference(inference_id) + response = self.mindee_api.req_get_inference(inference_id, slug) if not is_valid_get_response(response): handle_error_v2(response.json()) dict_response = response.json() @@ -107,8 +114,10 @@ def _enqueue_and_get( self, input_source: Union[LocalInputSource, UrlInputSource], params: Union[InferenceParameters, UtilityParameters], - inference_response_type: Optional[Type[InferenceResponse]] = InferenceResponse, - ) -> InferenceResponse: + inference_response_type: Optional[ + Type[BaseInferenceResponse] + ] = InferenceResponse, + ) -> BaseInferenceResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. @@ -125,11 +134,9 @@ def _enqueue_and_get( params.polling_options.delay_sec, params.polling_options.max_retries, ) - slug = ( - inference_response_type.inference.get_slug() - if inference_response_type - else None - ) + slug = None + if inference_response_type and inference_response_type is not InferenceResponse: + slug = "utilities/" + inference_response_type.get_inference_slug() enqueue_response = self.enqueue_inference(input_source, params, slug) logger.debug( "Successfully enqueued document with job id: %s", enqueue_response.job.id @@ -150,9 +157,6 @@ def _enqueue_and_get( result = self.get_inference( job_response.job.id, inference_response_type or InferenceResponse ) - assert isinstance(result, InferenceResponse), ( - f'Invalid response type "{type(result)}"' - ) return result try_counter += 1 sleep(params.polling_options.delay_sec) @@ -181,10 +185,10 @@ def enqueue_and_get_inference( def enqueue_and_get_utility( self, - inference_response_type: Type[InferenceResponse], + inference_response_type: Type[TypeBaseInferenceResponse], input_source: Union[LocalInputSource, UrlInputSource], params: UtilityParameters, - ) -> InferenceResponse: + ) -> TypeBaseInferenceResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index ea7cdec4..b874d4ab 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -85,22 +85,12 @@ def req_post_inference_enqueue( :param slug: Slug to use for the enqueueing, defaults to 'inferences'. :return: requests response. """ - slug = slug if slug else "inferences" + if not slug: + slug = "inferences" data: Dict[str, Union[str, list]] = {"model_id": params.model_id} url = f"{self.url_root}/{slug}/enqueue" if isinstance(params, InferenceParameters): - if params.rag is not None: - data["rag"] = str(params.rag).lower() - if params.raw_text is not None: - data["raw_text"] = str(params.raw_text).lower() - if params.confidence is not None: - data["confidence"] = str(params.confidence).lower() - if params.polygon is not None: - data["polygon"] = str(params.polygon).lower() - if params.text_context and len(params.text_context): - data["text_context"] = params.text_context - if params.data_schema is not None: - data["data_schema"] = str(params.data_schema) + self._set_inference_params(data, params) if params.webhook_ids and len(params.webhook_ids) > 0: data["webhook_ids"] = params.webhook_ids if params.alias and len(params.alias): @@ -127,6 +117,28 @@ def req_post_inference_enqueue( raise MindeeApiV2Error("Invalid input source.") return response + def _set_inference_params( + self, data: dict[str, Union[str, list]], params: InferenceParameters + ) -> None: + """ + Sets the inference-specific parameters. + + :param data: Data dict to fill. + :param params: Parameters to add. + """ + if params.rag is not None: + data["rag"] = str(params.rag).lower() + if params.raw_text is not None: + data["raw_text"] = str(params.raw_text).lower() + if params.confidence is not None: + data["confidence"] = str(params.confidence).lower() + if params.polygon is not None: + data["polygon"] = str(params.polygon).lower() + if params.text_context and len(params.text_context): + data["text_context"] = params.text_context + if params.data_schema is not None: + data["data_schema"] = str(params.data_schema) + def req_get_job(self, job_id: str) -> requests.Response: """ Sends a request matching a given queue_id. Returns either a Job or a Document. @@ -140,14 +152,22 @@ def req_get_job(self, job_id: str) -> requests.Response: allow_redirects=False, ) - def req_get_inference(self, inference_id: str) -> requests.Response: + def req_get_inference( + self, inference_id: str, slug: Optional[str] + ) -> requests.Response: """ Sends a request matching a given queue_id. Returns either a Job or a Document. :param inference_id: Inference ID, returned by the job request. + :param slug: Slug of the inference, defaults to nothing. """ + + if not slug: + url = f"{self.url_root}/inferences/{inference_id}" + else: + url = f"{self.url_root}/{slug}/{inference_id}" return requests.get( - f"{self.url_root}/inferences/{inference_id}", + url, headers=self.base_headers, timeout=self.request_timeout, allow_redirects=False, diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index 1918d6ff..71e7c62c 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -10,6 +10,7 @@ class InferenceResponse(BaseInferenceResponse[Inference]): inference: Inference """Inference result.""" + inference_type = Inference def __init__(self, raw_response: StringDict) -> None: super().__init__(raw_response) diff --git a/mindee/v2/parsing/inference/base_inference.py b/mindee/v2/parsing/inference/base_inference.py index 1264d127..950ceab7 100644 --- a/mindee/v2/parsing/inference/base_inference.py +++ b/mindee/v2/parsing/inference/base_inference.py @@ -9,14 +9,14 @@ class BaseInference(ABC): """Base class for V2 inference objects.""" + _slug: str + """Slug of the inference.""" model: InferenceModel """Model info for the inference.""" file: InferenceFile """File info for the inference.""" id: str """ID of the inference.""" - _slug: str - """Slug of the inference.""" def __init__(self, raw_response: StringDict): self.id = raw_response["id"] diff --git a/mindee/v2/parsing/inference/base_inference_response.py b/mindee/v2/parsing/inference/base_inference_response.py index 3610c4d8..cac01e6e 100644 --- a/mindee/v2/parsing/inference/base_inference_response.py +++ b/mindee/v2/parsing/inference/base_inference_response.py @@ -1,7 +1,7 @@ -from typing import TypeVar, Generic +from typing import ClassVar, Type, TypeVar, Generic from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.inference.base_inference import TypeBaseInference +from mindee.v2.parsing.inference.base_inference import BaseInference, TypeBaseInference from mindee.parsing.v2.common_response import CommonResponse @@ -9,8 +9,10 @@ class BaseInferenceResponse(CommonResponse, Generic[TypeBaseInference]): """Base class for V2 inference responses.""" - inference: TypeBaseInference + inference: BaseInference """The inference result for a split utility request""" + inference_type: ClassVar[Type[BaseInference]] + """Inference class used for slug derivation.""" def __init__(self, raw_response: StringDict) -> None: super().__init__(raw_response) @@ -24,5 +26,10 @@ def _set_inference_type(self, inference_response: StringDict): """ raise NotImplementedError() + @classmethod + def get_inference_slug(cls) -> str: + """Getter for the inference slug.""" + return cls.inference_type.get_slug() + TypeInferenceResponse = TypeVar("TypeInferenceResponse", bound=BaseInferenceResponse) diff --git a/mindee/v2/parsing/inference/split/split_response.py b/mindee/v2/parsing/inference/split/split_response.py index bf0687cd..819cb555 100644 --- a/mindee/v2/parsing/inference/split/split_response.py +++ b/mindee/v2/parsing/inference/split/split_response.py @@ -1,6 +1,3 @@ -from typing import override - -from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference_response import ( BaseInferenceResponse, ) @@ -10,8 +7,10 @@ class SplitResponse(BaseInferenceResponse[SplitInference]): """Represent a split inference response from Mindee V2 API.""" - @override - def _set_inference_type(self, inference_response: StringDict): + inference: SplitInference + inference_type = SplitInference + + def _set_inference_type(self, inference_response): """ Sets the inference type. From c4340768d32ec887e79b89b0c847e821de7e37cf Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:21:29 +0100 Subject: [PATCH 05/19] fix tests --- mindee/parsing/v2/inference.py | 9 +++++++++ mindee/v2/parsing/inference/base_inference_response.py | 3 +++ tests/data | 2 +- tests/v2/input/test_local_response.py | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py index 3b54ce64..0fdbcd3a 100644 --- a/mindee/parsing/v2/inference.py +++ b/mindee/parsing/v2/inference.py @@ -18,3 +18,12 @@ def __init__(self, raw_response: StringDict): super().__init__(raw_response) self.result = InferenceResult(raw_response["result"]) self.active_options = InferenceActiveOptions(raw_response["active_options"]) + + def __str__(self) -> str: + return ( + f"Inference\n#########" + f"\n{self.model}" + f"\n\n{self.file}" + f"\n\n{self.active_options}" + f"\n\n{self.result}\n" + ) diff --git a/mindee/v2/parsing/inference/base_inference_response.py b/mindee/v2/parsing/inference/base_inference_response.py index cac01e6e..279cb043 100644 --- a/mindee/v2/parsing/inference/base_inference_response.py +++ b/mindee/v2/parsing/inference/base_inference_response.py @@ -31,5 +31,8 @@ def get_inference_slug(cls) -> str: """Getter for the inference slug.""" return cls.inference_type.get_slug() + def __str__(self) -> str: + return str(self.inference) + TypeInferenceResponse = TypeVar("TypeInferenceResponse", bound=BaseInferenceResponse) diff --git a/tests/data b/tests/data index 7bed8f9b..c30c33b5 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 7bed8f9b059f6ba3debc31e71375ea8437b6fbe2 +Subproject commit c30c33b5217613223398a4b814e9cd96e8255789 diff --git a/tests/v2/input/test_local_response.py b/tests/v2/input/test_local_response.py index 5ce07fe1..e12279ae 100644 --- a/tests/v2/input/test_local_response.py +++ b/tests/v2/input/test_local_response.py @@ -20,6 +20,7 @@ def _assert_local_response(local_response): assert not local_response.is_valid_hmac_signature( fake_hmac_signing, "invalid signature" ) + print(local_response.get_hmac_signature(fake_hmac_signing)) assert signature == local_response.get_hmac_signature(fake_hmac_signing) assert local_response.is_valid_hmac_signature(fake_hmac_signing, signature) reponse: InferenceResponse = local_response.deserialize_response(InferenceResponse) From 56b60c27c3aade45ed4124d51cc0a85bbafed6f7 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:40:01 +0100 Subject: [PATCH 06/19] add unit test --- tests/utils.py | 1 + tests/v2/parsing/test_split_response.py | 54 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 tests/v2/parsing/test_split_response.py diff --git a/tests/utils.py b/tests/utils.py index 252a699c..058e3595 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -17,6 +17,7 @@ V2_DATA_DIR = ROOT_DATA_DIR / "v2" V2_PRODUCT_DATA_DIR = V2_DATA_DIR / "products" +V2_UTILITIES_DATA_DIR = V2_DATA_DIR / "utilities" def clear_envvars(monkeypatch) -> None: diff --git a/tests/v2/parsing/test_split_response.py b/tests/v2/parsing/test_split_response.py new file mode 100644 index 00000000..08823ac4 --- /dev/null +++ b/tests/v2/parsing/test_split_response.py @@ -0,0 +1,54 @@ +import pytest + +from mindee import LocalResponse +from mindee.v2.parsing.inference.split.split_inference import SplitInference +from mindee.v2.parsing.inference.split.split_response import SplitResponse +from tests.utils import V2_UTILITIES_DATA_DIR + + +@pytest.mark.v2 +def test_split_single(): + input_inference = LocalResponse(V2_UTILITIES_DATA_DIR / "split_single.json") + split_response = input_inference.deserialize_response(SplitResponse) + assert isinstance(split_response.inference, SplitInference) + assert split_response.inference.result.get("split") + assert len(split_response.inference.result.get("split")[0].get("page_range")) == 2 + assert split_response.inference.result.get("split")[0].get("page_range")[0] == 0 + assert split_response.inference.result.get("split")[0].get("page_range")[1] == 0 + assert ( + split_response.inference.result.get("split")[0].get("document_type") + == "receipt" + ) + + +@pytest.mark.v2 +def test_split_multiple(): + input_inference = LocalResponse(V2_UTILITIES_DATA_DIR / "split_multiple.json") + split_response = input_inference.deserialize_response(SplitResponse) + assert isinstance(split_response.inference, SplitInference) + assert split_response.inference.result.get("split") + assert len(split_response.inference.result.get("split")) == 3 + + assert len(split_response.inference.result.get("split")[0].get("page_range")) == 2 + assert split_response.inference.result.get("split")[0].get("page_range")[0] == 0 + assert split_response.inference.result.get("split")[0].get("page_range")[1] == 0 + assert ( + split_response.inference.result.get("split")[0].get("document_type") + == "invoice" + ) + + assert len(split_response.inference.result.get("split")[1].get("page_range")) == 2 + assert split_response.inference.result.get("split")[1].get("page_range")[0] == 1 + assert split_response.inference.result.get("split")[1].get("page_range")[1] == 3 + assert ( + split_response.inference.result.get("split")[1].get("document_type") + == "invoice" + ) + + assert len(split_response.inference.result.get("split")[2].get("page_range")) == 2 + assert split_response.inference.result.get("split")[2].get("page_range")[0] == 4 + assert split_response.inference.result.get("split")[2].get("page_range")[1] == 4 + assert ( + split_response.inference.result.get("split")[2].get("document_type") + == "invoice" + ) From 498a5afd81a36ae702676bd668305af18ebeafee Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:42:00 +0100 Subject: [PATCH 07/19] fix forgor :skull: --- tests/v2/input/test_local_response.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/v2/input/test_local_response.py b/tests/v2/input/test_local_response.py index e12279ae..5db8be78 100644 --- a/tests/v2/input/test_local_response.py +++ b/tests/v2/input/test_local_response.py @@ -14,13 +14,12 @@ def file_path() -> Path: def _assert_local_response(local_response): fake_hmac_signing = "ogNjY44MhvKPGTtVsI8zG82JqWQa68woYQH" - signature = "1df388c992d87897fe61dfc56c444c58fc3c7369c31e2b5fd20d867695e93e85" + signature = "f390d9f7f57ac04f47b6309d8a40236b0182610804fc20e91b1f6028aaca07a7" assert local_response._file is not None assert not local_response.is_valid_hmac_signature( fake_hmac_signing, "invalid signature" ) - print(local_response.get_hmac_signature(fake_hmac_signing)) assert signature == local_response.get_hmac_signature(fake_hmac_signing) assert local_response.is_valid_hmac_signature(fake_hmac_signing, signature) reponse: InferenceResponse = local_response.deserialize_response(InferenceResponse) From a0d6cbfdf7d8b003f59b509d70971c1c05fb2007 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:58:26 +0100 Subject: [PATCH 08/19] fix broken syntax for 3.8 --- mindee/error/mindee_http_error_v2.py | 4 +-- mindee/mindee_http/mindee_api_v2.py | 6 ++-- tests/v2/parsing/test_split_integration.py | 37 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 tests/v2/parsing/test_split_integration.py diff --git a/mindee/error/mindee_http_error_v2.py b/mindee/error/mindee_http_error_v2.py index 99ba40da..a6be90f3 100644 --- a/mindee/error/mindee_http_error_v2.py +++ b/mindee/error/mindee_http_error_v2.py @@ -1,5 +1,5 @@ import json -from typing import Optional +from typing import List, Optional from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2 import ErrorItem, ErrorResponse @@ -18,7 +18,7 @@ def __init__(self, response: ErrorResponse) -> None: self.title = response.title self.code = response.code self.detail = response.detail - self.errors: list[ErrorItem] = response.errors + self.errors: List[ErrorItem] = response.errors super().__init__( f"HTTP {self.status} - {self.title} :: {self.code} - {self.detail}" ) diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index b874d4ab..e064b6ad 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -1,5 +1,5 @@ import os -from typing import Dict, Optional, Union +from typing import Dict, List, Optional, Union import requests @@ -87,7 +87,7 @@ def req_post_inference_enqueue( """ if not slug: slug = "inferences" - data: Dict[str, Union[str, list]] = {"model_id": params.model_id} + data: Dict[str, Union[str, List[str]]] = {"model_id": params.model_id} url = f"{self.url_root}/{slug}/enqueue" if isinstance(params, InferenceParameters): self._set_inference_params(data, params) @@ -118,7 +118,7 @@ def req_post_inference_enqueue( return response def _set_inference_params( - self, data: dict[str, Union[str, list]], params: InferenceParameters + self, data: Dict[str, Union[str, List[str]]], params: InferenceParameters ) -> None: """ Sets the inference-specific parameters. diff --git a/tests/v2/parsing/test_split_integration.py b/tests/v2/parsing/test_split_integration.py new file mode 100644 index 00000000..271d6515 --- /dev/null +++ b/tests/v2/parsing/test_split_integration.py @@ -0,0 +1,37 @@ +import os + +import pytest + +from mindee import ClientV2, PathInput +from mindee.input import UtilityParameters +from mindee.v2 import SplitResponse +from tests.utils import FILE_TYPES_DIR + + +@pytest.fixture(scope="session") +def split_model_id() -> str: + """Identifier of the Financial Document model, supplied through an env var.""" + return os.getenv("MINDEE_V2_SPLIT_UTILITY_MODEL_ID") + + +@pytest.fixture(scope="session") +def v2_client() -> ClientV2: + """ + Real V2 client configured with the user-supplied API key + (or skipped when the key is absent). + """ + api_key = os.getenv("MINDEE_V2_API_KEY") + return ClientV2(api_key) + + +@pytest.mark.integration +@pytest.mark.v2 +def test_split_blank(v2_client: ClientV2, split_model_id: str): + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + response = v2_client.enqueue_and_get_utility( + SplitResponse, input_source, UtilityParameters(split_model_id) + ) + assert response.inference is not None + assert response.inference.file.name == "blank_1.pdf" + assert response.inference.result.get("split") + assert len(response.inference.result.get("split")) == 1 From 195760410dba85a14c44899353cf09a302626400 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:17:06 +0100 Subject: [PATCH 09/19] fix test --- mindee/client_v2.py | 8 ++++---- mindee/input/__init__.py | 4 ++-- mindee/input/split_parameters.py | 7 +++++++ mindee/input/utility_parameters.py | 10 ---------- mindee/mindee_http/mindee_api_v2.py | 4 ++-- tests/v2/parsing/test_split_integration.py | 18 ++++++++++-------- 6 files changed, 25 insertions(+), 26 deletions(-) create mode 100644 mindee/input/split_parameters.py delete mode 100644 mindee/input/utility_parameters.py diff --git a/mindee/client_v2.py b/mindee/client_v2.py index d1fb2229..fab4463a 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -4,7 +4,7 @@ from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeError from mindee.error.mindee_http_error_v2 import handle_error_v2 -from mindee.input import UrlInputSource, UtilityParameters +from mindee.input import UrlInputSource, SplitParameters from mindee.input.inference_parameters import InferenceParameters from mindee.input.polling_options import PollingOptions from mindee.input.sources.local_input_source import LocalInputSource @@ -46,7 +46,7 @@ def __init__(self, api_key: Optional[str] = None) -> None: def enqueue_inference( self, input_source: Union[LocalInputSource, UrlInputSource], - params: Union[InferenceParameters, UtilityParameters], + params: Union[InferenceParameters, SplitParameters], slug: Optional[str] = None, ) -> JobResponse: """ @@ -113,7 +113,7 @@ def get_inference( def _enqueue_and_get( self, input_source: Union[LocalInputSource, UrlInputSource], - params: Union[InferenceParameters, UtilityParameters], + params: Union[InferenceParameters, SplitParameters], inference_response_type: Optional[ Type[BaseInferenceResponse] ] = InferenceResponse, @@ -187,7 +187,7 @@ def enqueue_and_get_utility( self, inference_response_type: Type[TypeBaseInferenceResponse], input_source: Union[LocalInputSource, UrlInputSource], - params: UtilityParameters, + params: SplitParameters, ) -> TypeBaseInferenceResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index 818efc4c..85df3ae1 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -1,7 +1,7 @@ from mindee.input.local_response import LocalResponse from mindee.input.base_parameters import BaseParameters from mindee.input.inference_parameters import InferenceParameters -from mindee.input.utility_parameters import UtilityParameters +from mindee.input.split_parameters import SplitParameters from mindee.input.page_options import PageOptions from mindee.input.polling_options import PollingOptions from mindee.input.sources.base_64_input import Base64Input @@ -26,6 +26,6 @@ "PathInput", "PollingOptions", "UrlInputSource", - "UtilityParameters", + "SplitParameters", "WorkflowOptions", ] diff --git a/mindee/input/split_parameters.py b/mindee/input/split_parameters.py new file mode 100644 index 00000000..d5957d8b --- /dev/null +++ b/mindee/input/split_parameters.py @@ -0,0 +1,7 @@ +from mindee.input.base_parameters import BaseParameters + + +class SplitParameters(BaseParameters): + """ + Parameters accepted by the split utility v2 endpoint. + """ diff --git a/mindee/input/utility_parameters.py b/mindee/input/utility_parameters.py deleted file mode 100644 index d05a6ad8..00000000 --- a/mindee/input/utility_parameters.py +++ /dev/null @@ -1,10 +0,0 @@ -from dataclasses import dataclass - -from mindee.input.base_parameters import BaseParameters - - -@dataclass -class UtilityParameters(BaseParameters): - """ - Parameters accepted by any of the asynchronous **inference** utility v2 endpoints. - """ diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index e064b6ad..8e70dd56 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -4,7 +4,7 @@ import requests from mindee.error.mindee_error import MindeeApiV2Error -from mindee.input import LocalInputSource, UrlInputSource, UtilityParameters +from mindee.input import LocalInputSource, UrlInputSource, SplitParameters from mindee.input.inference_parameters import InferenceParameters from mindee.logger import logger from mindee.mindee_http.base_settings import USER_AGENT @@ -74,7 +74,7 @@ def set_from_env(self) -> None: def req_post_inference_enqueue( self, input_source: Union[LocalInputSource, UrlInputSource], - params: Union[InferenceParameters, UtilityParameters], + params: Union[InferenceParameters, SplitParameters], slug: Optional[str] = None, ) -> requests.Response: """ diff --git a/tests/v2/parsing/test_split_integration.py b/tests/v2/parsing/test_split_integration.py index 271d6515..15316e98 100644 --- a/tests/v2/parsing/test_split_integration.py +++ b/tests/v2/parsing/test_split_integration.py @@ -3,15 +3,15 @@ import pytest from mindee import ClientV2, PathInput -from mindee.input import UtilityParameters +from mindee.input import SplitParameters from mindee.v2 import SplitResponse -from tests.utils import FILE_TYPES_DIR +from tests.utils import V1_PRODUCT_DATA_DIR @pytest.fixture(scope="session") def split_model_id() -> str: """Identifier of the Financial Document model, supplied through an env var.""" - return os.getenv("MINDEE_V2_SPLIT_UTILITY_MODEL_ID") + return os.getenv("MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID") @pytest.fixture(scope="session") @@ -27,11 +27,13 @@ def v2_client() -> ClientV2: @pytest.mark.integration @pytest.mark.v2 def test_split_blank(v2_client: ClientV2, split_model_id: str): - input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") - response = v2_client.enqueue_and_get_utility( - SplitResponse, input_source, UtilityParameters(split_model_id) + input_source = PathInput( + V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" ) + response = v2_client.enqueue_and_get_utility( + SplitResponse, input_source, SplitParameters(split_model_id) + ) # Note: do not use blank_1.pdf for this. assert response.inference is not None - assert response.inference.file.name == "blank_1.pdf" + assert response.inference.file.name == "default_sample.pdf" assert response.inference.result.get("split") - assert len(response.inference.result.get("split")) == 1 + assert len(response.inference.result.get("split")) == 2 From a63ea0a494d8b79ed9557b0da21d036787774ca7 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:05:19 +0100 Subject: [PATCH 10/19] rename client methods & deprecate old ones --- mindee/client_v2.py | 89 ++++++++++++++-------- mindee/mindee_http/mindee_api_v2.py | 4 +- tests/v2/parsing/test_split_integration.py | 2 +- 3 files changed, 61 insertions(+), 34 deletions(-) diff --git a/mindee/client_v2.py b/mindee/client_v2.py index fab4463a..89fd5fe0 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -1,10 +1,11 @@ +import warnings from time import sleep from typing import Optional, Union, Type, TypeVar from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeError from mindee.error.mindee_http_error_v2 import handle_error_v2 -from mindee.input import UrlInputSource, SplitParameters +from mindee.input import UrlInputSource, BaseParameters from mindee.input.inference_parameters import InferenceParameters from mindee.input.polling_options import PollingOptions from mindee.input.sources.local_input_source import LocalInputSource @@ -46,7 +47,23 @@ def __init__(self, api_key: Optional[str] = None) -> None: def enqueue_inference( self, input_source: Union[LocalInputSource, UrlInputSource], - params: Union[InferenceParameters, SplitParameters], + params: BaseParameters, + slug: Optional[str] = None, + disable_redundant_warnings: bool = False, + ) -> JobResponse: + """[Deprecated] Use `enqueue` instead.""" + if not disable_redundant_warnings: + warnings.warn( + "enqueue_inference is deprecated; use enqueue instead", + DeprecationWarning, + stacklevel=2, + ) + return self.enqueue(input_source, params, slug) + + def enqueue( + self, + input_source: Union[LocalInputSource, UrlInputSource], + params: BaseParameters, slug: Optional[str] = None, ) -> JobResponse: """ @@ -88,7 +105,22 @@ def get_job(self, job_id: str) -> JobResponse: def get_inference( self, inference_id: str, - inference_response_type: Type[BaseInferenceResponse] = InferenceResponse, + response_type: Type[BaseInferenceResponse] = InferenceResponse, + disable_redundant_warnings: bool = False, + ) -> BaseInferenceResponse: + """[Deprecated] Use `get_result` instead.""" + if not disable_redundant_warnings: + warnings.warn( + "get_inference is deprecated; use get_result instead", + DeprecationWarning, + stacklevel=2, + ) + return self.get_result(inference_id, response_type) + + def get_result( + self, + inference_id: str, + response_type: Type[BaseInferenceResponse] = InferenceResponse, ) -> BaseInferenceResponse: """ Get the result of an inference that was previously enqueued. @@ -96,34 +128,32 @@ def get_inference( The inference will only be available after it has finished processing. :param inference_id: UUID of the inference to retrieve. - :param inference_response_type: Class of the product to instantiate. + :param response_type: Class of the product to instantiate. :return: An inference response. """ logger.debug("Fetching inference: %s", inference_id) slug = None - if inference_response_type and inference_response_type is not InferenceResponse: - slug = "utilities/" + inference_response_type.get_inference_slug() + if response_type and response_type is not InferenceResponse: + slug = "utilities/" + response_type.get_inference_slug() response = self.mindee_api.req_get_inference(inference_id, slug) if not is_valid_get_response(response): handle_error_v2(response.json()) dict_response = response.json() - return inference_response_type(dict_response) + return response_type(dict_response) def _enqueue_and_get( self, input_source: Union[LocalInputSource, UrlInputSource], - params: Union[InferenceParameters, SplitParameters], - inference_response_type: Optional[ - Type[BaseInferenceResponse] - ] = InferenceResponse, + params: BaseParameters, + response_type: Optional[Type[BaseInferenceResponse]] = InferenceResponse, ) -> BaseInferenceResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. :param input_source: The document/source file to use. Can be local or remote. :param params: Parameters to set when sending a file. - :param inference_response_type: The product class to use for the response object. + :param response_type: The product class to use for the response object. :return: A valid inference response. """ @@ -135,9 +165,9 @@ def _enqueue_and_get( params.polling_options.max_retries, ) slug = None - if inference_response_type and inference_response_type is not InferenceResponse: - slug = "utilities/" + inference_response_type.get_inference_slug() - enqueue_response = self.enqueue_inference(input_source, params, slug) + if response_type and response_type is not InferenceResponse: + slug = "utilities/" + response_type.get_inference_slug() + enqueue_response = self.enqueue_inference(input_source, params, slug, True) logger.debug( "Successfully enqueued document with job id: %s", enqueue_response.job.id ) @@ -155,7 +185,7 @@ def _enqueue_and_get( ) if job_response.job.status == CommonStatus.PROCESSED.value: result = self.get_inference( - job_response.job.id, inference_response_type or InferenceResponse + job_response.job.id, response_type or InferenceResponse, True ) return result try_counter += 1 @@ -168,26 +198,23 @@ def enqueue_and_get_inference( input_source: Union[LocalInputSource, UrlInputSource], params: InferenceParameters, ) -> InferenceResponse: - """ - Enqueues to an asynchronous endpoint and automatically polls for a response. - - :param input_source: The document/source file to use. Can be local or remote. - - :param params: Parameters to set when sending a file. - - :return: A valid inference response. - """ + """[Deprecated] Use `enqueue_and_get_result` instead.""" + warnings.warn( + "enqueue_and_get_inference is deprecated; use enqueue_and_get_result", + DeprecationWarning, + stacklevel=2, + ) response = self._enqueue_and_get(input_source, params) assert isinstance(response, InferenceResponse), ( f'Invalid response type "{type(response)}"' ) return response - def enqueue_and_get_utility( + def enqueue_and_get_result( self, - inference_response_type: Type[TypeBaseInferenceResponse], + response_type: Type[TypeBaseInferenceResponse], input_source: Union[LocalInputSource, UrlInputSource], - params: SplitParameters, + params: BaseParameters, ) -> TypeBaseInferenceResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. @@ -196,12 +223,12 @@ def enqueue_and_get_utility( :param params: Parameters to set when sending a file. - :param inference_response_type: The product class to use for the response object. + :param response_type: The product class to use for the response object. :return: A valid inference response. """ - response = self._enqueue_and_get(input_source, params, inference_response_type) - assert isinstance(response, inference_response_type), ( + response = self._enqueue_and_get(input_source, params, response_type) + assert isinstance(response, response_type), ( f'Invalid response type "{type(response)}"' ) return response diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index 8e70dd56..9638c509 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -4,7 +4,7 @@ import requests from mindee.error.mindee_error import MindeeApiV2Error -from mindee.input import LocalInputSource, UrlInputSource, SplitParameters +from mindee.input import LocalInputSource, UrlInputSource, BaseParameters from mindee.input.inference_parameters import InferenceParameters from mindee.logger import logger from mindee.mindee_http.base_settings import USER_AGENT @@ -74,7 +74,7 @@ def set_from_env(self) -> None: def req_post_inference_enqueue( self, input_source: Union[LocalInputSource, UrlInputSource], - params: Union[InferenceParameters, SplitParameters], + params: BaseParameters, slug: Optional[str] = None, ) -> requests.Response: """ diff --git a/tests/v2/parsing/test_split_integration.py b/tests/v2/parsing/test_split_integration.py index 15316e98..9274a3ec 100644 --- a/tests/v2/parsing/test_split_integration.py +++ b/tests/v2/parsing/test_split_integration.py @@ -30,7 +30,7 @@ def test_split_blank(v2_client: ClientV2, split_model_id: str): input_source = PathInput( V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" ) - response = v2_client.enqueue_and_get_utility( + response = v2_client.enqueue_and_get_result( SplitResponse, input_source, SplitParameters(split_model_id) ) # Note: do not use blank_1.pdf for this. assert response.inference is not None From 7c23503c81a123d5439195ff3495d4afa0e4efde Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:41:47 +0100 Subject: [PATCH 11/19] move model param logic to their own classes --- mindee/input/__init__.py | 2 +- mindee/input/base_parameters.py | 17 +++++++++- mindee/input/inference_parameters.py | 23 ++++++++++++- mindee/mindee_http/mindee_api_v2.py | 33 ++----------------- .../inference/split}/split_parameters.py | 0 5 files changed, 41 insertions(+), 34 deletions(-) rename mindee/{input => v2/parsing/inference/split}/split_parameters.py (100%) diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index 85df3ae1..b80e2ce8 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -1,7 +1,7 @@ from mindee.input.local_response import LocalResponse from mindee.input.base_parameters import BaseParameters from mindee.input.inference_parameters import InferenceParameters -from mindee.input.split_parameters import SplitParameters +from mindee.v2.parsing.inference.split.split_parameters import SplitParameters from mindee.input.page_options import PageOptions from mindee.input.polling_options import PollingOptions from mindee.input.sources.base_64_input import Base64Input diff --git a/mindee/input/base_parameters.py b/mindee/input/base_parameters.py index f9319444..5de8ca58 100644 --- a/mindee/input/base_parameters.py +++ b/mindee/input/base_parameters.py @@ -1,6 +1,6 @@ from abc import ABC from dataclasses import dataclass -from typing import Optional, List +from typing import Dict, Optional, List, Union from mindee.input.polling_options import PollingOptions @@ -19,3 +19,18 @@ class BaseParameters(ABC): """Options for polling. Set only if having timeout issues.""" close_file: bool = True """Whether to close the file after parsing.""" + + def get_config(self) -> Dict[str, Union[str, List[str]]]: + """ + Return the parameters as a config dictionary. + + :return: A dict of parameters. + """ + data: Dict[str, Union[str, List[str]]] = { + "model_id": self.model_id, + } + if self.alias is not None: + data["alias"] = self.alias + if self.webhook_ids and len(self.webhook_ids) > 0: + data["webhook_ids"] = self.webhook_ids + return data diff --git a/mindee/input/inference_parameters.py b/mindee/input/inference_parameters.py index 92a1080e..5e6e3622 100644 --- a/mindee/input/inference_parameters.py +++ b/mindee/input/inference_parameters.py @@ -1,6 +1,6 @@ import json from dataclasses import dataclass, asdict -from typing import List, Optional, Union +from typing import Dict, List, Optional, Union from mindee.input.base_parameters import BaseParameters @@ -108,3 +108,24 @@ def __post_init__(self): self.data_schema = DataSchema(**json.loads(self.data_schema)) elif isinstance(self.data_schema, dict): self.data_schema = DataSchema(**self.data_schema) + + def get_config(self) -> Dict[str, Union[str, List[str]]]: + """ + Return the parameters as a config dictionary. + + :return: A dict of parameters. + """ + data = super().get_config() + if self.data_schema is not None: + data["data_schema"] = str(self.data_schema) + if self.rag is not None: + data["rag"] = data["rag"] = str(self.rag).lower() + if self.raw_text is not None: + data["raw_text"] = data["raw_text"] = str(self.raw_text).lower() + if self.polygon is not None: + data["polygon"] = data["polygon"] = str(self.polygon).lower() + if self.confidence is not None: + data["confidence"] = data["confidence"] = str(self.confidence).lower() + if self.text_context is not None: + data["text_context"] = self.text_context + return data diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index 9638c509..bfad0bc7 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -1,11 +1,10 @@ import os -from typing import Dict, List, Optional, Union +from typing import Dict, Optional, Union import requests from mindee.error.mindee_error import MindeeApiV2Error from mindee.input import LocalInputSource, UrlInputSource, BaseParameters -from mindee.input.inference_parameters import InferenceParameters from mindee.logger import logger from mindee.mindee_http.base_settings import USER_AGENT from mindee.mindee_http.settings_mixin import SettingsMixin @@ -87,14 +86,8 @@ def req_post_inference_enqueue( """ if not slug: slug = "inferences" - data: Dict[str, Union[str, List[str]]] = {"model_id": params.model_id} + data = params.get_config() url = f"{self.url_root}/{slug}/enqueue" - if isinstance(params, InferenceParameters): - self._set_inference_params(data, params) - if params.webhook_ids and len(params.webhook_ids) > 0: - data["webhook_ids"] = params.webhook_ids - if params.alias and len(params.alias): - data["alias"] = params.alias if isinstance(input_source, LocalInputSource): files = {"file": input_source.read_contents(params.close_file)} @@ -117,28 +110,6 @@ def req_post_inference_enqueue( raise MindeeApiV2Error("Invalid input source.") return response - def _set_inference_params( - self, data: Dict[str, Union[str, List[str]]], params: InferenceParameters - ) -> None: - """ - Sets the inference-specific parameters. - - :param data: Data dict to fill. - :param params: Parameters to add. - """ - if params.rag is not None: - data["rag"] = str(params.rag).lower() - if params.raw_text is not None: - data["raw_text"] = str(params.raw_text).lower() - if params.confidence is not None: - data["confidence"] = str(params.confidence).lower() - if params.polygon is not None: - data["polygon"] = str(params.polygon).lower() - if params.text_context and len(params.text_context): - data["text_context"] = params.text_context - if params.data_schema is not None: - data["data_schema"] = str(params.data_schema) - def req_get_job(self, job_id: str) -> requests.Response: """ Sends a request matching a given queue_id. Returns either a Job or a Document. diff --git a/mindee/input/split_parameters.py b/mindee/v2/parsing/inference/split/split_parameters.py similarity index 100% rename from mindee/input/split_parameters.py rename to mindee/v2/parsing/inference/split/split_parameters.py From 30a6a7536d4808192461e97a319fa67c94868820 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:05:22 +0100 Subject: [PATCH 12/19] add full support for split --- mindee/v2/__init__.py | 4 +- mindee/v2/parsing/__init__.py | 4 +- mindee/v2/parsing/inference/__init__.py | 6 ++ mindee/v2/parsing/inference/split/split.py | 20 +++++++ .../inference/split/split_inference.py | 7 +-- .../parsing/inference/split/split_result.py | 17 ++++++ tests/v2/parsing/test_split_integration.py | 4 +- tests/v2/parsing/test_split_response.py | 59 ++++++++----------- 8 files changed, 77 insertions(+), 44 deletions(-) create mode 100644 mindee/v2/parsing/inference/split/split.py create mode 100644 mindee/v2/parsing/inference/split/split_result.py diff --git a/mindee/v2/__init__.py b/mindee/v2/__init__.py index 1aa33bf9..728ad060 100644 --- a/mindee/v2/__init__.py +++ b/mindee/v2/__init__.py @@ -1,15 +1,15 @@ +from mindee.v2.parsing import SplitParameters from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.parsing.inference.base_inference_response import ( BaseInferenceResponse, TypeInferenceResponse, ) -from mindee.v2.parsing.inference.split.split_inference import SplitInference from mindee.v2.parsing.inference.split.split_response import SplitResponse __all__ = [ "BaseInference", "BaseInferenceResponse", "TypeInferenceResponse", - "SplitInference", "SplitResponse", + "SplitParameters", ] diff --git a/mindee/v2/parsing/__init__.py b/mindee/v2/parsing/__init__.py index 1aa33bf9..4dd422c9 100644 --- a/mindee/v2/parsing/__init__.py +++ b/mindee/v2/parsing/__init__.py @@ -1,15 +1,15 @@ +from mindee.v2.parsing.inference import SplitParameters from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.parsing.inference.base_inference_response import ( BaseInferenceResponse, TypeInferenceResponse, ) -from mindee.v2.parsing.inference.split.split_inference import SplitInference from mindee.v2.parsing.inference.split.split_response import SplitResponse __all__ = [ "BaseInference", "BaseInferenceResponse", "TypeInferenceResponse", - "SplitInference", "SplitResponse", + "SplitParameters", ] diff --git a/mindee/v2/parsing/inference/__init__.py b/mindee/v2/parsing/inference/__init__.py index 1aa33bf9..c1a924d1 100644 --- a/mindee/v2/parsing/inference/__init__.py +++ b/mindee/v2/parsing/inference/__init__.py @@ -3,13 +3,19 @@ BaseInferenceResponse, TypeInferenceResponse, ) +from mindee.v2.parsing.inference.split.split import Split from mindee.v2.parsing.inference.split.split_inference import SplitInference +from mindee.v2.parsing.inference.split.split_parameters import SplitParameters from mindee.v2.parsing.inference.split.split_response import SplitResponse +from mindee.v2.parsing.inference.split.split_result import SplitResult __all__ = [ "BaseInference", "BaseInferenceResponse", "TypeInferenceResponse", + "Split", "SplitInference", + "SplitParameters", "SplitResponse", + "SplitResult", ] diff --git a/mindee/v2/parsing/inference/split/split.py b/mindee/v2/parsing/inference/split/split.py new file mode 100644 index 00000000..141fb578 --- /dev/null +++ b/mindee/v2/parsing/inference/split/split.py @@ -0,0 +1,20 @@ +from typing import List + +from mindee.parsing.common.string_dict import StringDict + + +class Split: + """Split inference result.""" + + page_range: List[int] + """Page range of the split inference.""" + document_type: str + """Document type of the split inference.""" + + def __init__(self, server_response: StringDict): + self.page_range = server_response["page_range"] + self.document_type = server_response["document_type"] + + def __str__(self) -> str: + page_range = ",".join([str(page_index) for page_index in self.page_range]) + return f":Page Range: {page_range}\n:Document Type: {self.document_type}" diff --git a/mindee/v2/parsing/inference/split/split_inference.py b/mindee/v2/parsing/inference/split/split_inference.py index dfab6b9f..c3271419 100644 --- a/mindee/v2/parsing/inference/split/split_inference.py +++ b/mindee/v2/parsing/inference/split/split_inference.py @@ -1,20 +1,19 @@ -from typing import Any - from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference +from mindee.v2.parsing.inference.split.split_result import SplitResult class SplitInference(BaseInference): """Split inference result.""" - result: Any + result: SplitResult """Result of a split inference.""" _slug: str = "split" """Slug of the endpoint.""" def __init__(self, raw_response: StringDict) -> None: super().__init__(raw_response) - self.result = raw_response["result"] + self.result = SplitResult(raw_response["result"]) def __str__(self) -> str: return f"Inference\n#########\n{self.model}\n{self.file}\n{self.result}\n" diff --git a/mindee/v2/parsing/inference/split/split_result.py b/mindee/v2/parsing/inference/split/split_result.py new file mode 100644 index 00000000..59eb342d --- /dev/null +++ b/mindee/v2/parsing/inference/split/split_result.py @@ -0,0 +1,17 @@ +from typing import List + +from mindee.parsing.common.string_dict import StringDict +from mindee.v2.parsing.inference.split.split import Split + + +class SplitResult: + """Split result info.""" + + split: List[Split] + + def __init__(self, raw_response: StringDict) -> None: + self.split = [Split(split) for split in raw_response["split"]] + + def __str__(self) -> str: + out_str = f"Splits\n======{self.split}" + return out_str diff --git a/tests/v2/parsing/test_split_integration.py b/tests/v2/parsing/test_split_integration.py index 9274a3ec..fd544b84 100644 --- a/tests/v2/parsing/test_split_integration.py +++ b/tests/v2/parsing/test_split_integration.py @@ -35,5 +35,5 @@ def test_split_blank(v2_client: ClientV2, split_model_id: str): ) # Note: do not use blank_1.pdf for this. assert response.inference is not None assert response.inference.file.name == "default_sample.pdf" - assert response.inference.result.get("split") - assert len(response.inference.result.get("split")) == 2 + assert response.inference.result.split + assert len(response.inference.result.split) == 2 diff --git a/tests/v2/parsing/test_split_response.py b/tests/v2/parsing/test_split_response.py index 08823ac4..9e684354 100644 --- a/tests/v2/parsing/test_split_response.py +++ b/tests/v2/parsing/test_split_response.py @@ -1,8 +1,10 @@ import pytest from mindee import LocalResponse +from mindee.v2.parsing.inference.split.split import Split from mindee.v2.parsing.inference.split.split_inference import SplitInference from mindee.v2.parsing.inference.split.split_response import SplitResponse +from mindee.v2.parsing.inference.split.split_result import SplitResult from tests.utils import V2_UTILITIES_DATA_DIR @@ -11,14 +13,11 @@ def test_split_single(): input_inference = LocalResponse(V2_UTILITIES_DATA_DIR / "split_single.json") split_response = input_inference.deserialize_response(SplitResponse) assert isinstance(split_response.inference, SplitInference) - assert split_response.inference.result.get("split") - assert len(split_response.inference.result.get("split")[0].get("page_range")) == 2 - assert split_response.inference.result.get("split")[0].get("page_range")[0] == 0 - assert split_response.inference.result.get("split")[0].get("page_range")[1] == 0 - assert ( - split_response.inference.result.get("split")[0].get("document_type") - == "receipt" - ) + assert split_response.inference.result.split + assert len(split_response.inference.result.split[0].page_range) == 2 + assert split_response.inference.result.split[0].page_range[0] == 0 + assert split_response.inference.result.split[0].page_range[1] == 0 + assert split_response.inference.result.split[0].document_type == "receipt" @pytest.mark.v2 @@ -26,29 +25,21 @@ def test_split_multiple(): input_inference = LocalResponse(V2_UTILITIES_DATA_DIR / "split_multiple.json") split_response = input_inference.deserialize_response(SplitResponse) assert isinstance(split_response.inference, SplitInference) - assert split_response.inference.result.get("split") - assert len(split_response.inference.result.get("split")) == 3 - - assert len(split_response.inference.result.get("split")[0].get("page_range")) == 2 - assert split_response.inference.result.get("split")[0].get("page_range")[0] == 0 - assert split_response.inference.result.get("split")[0].get("page_range")[1] == 0 - assert ( - split_response.inference.result.get("split")[0].get("document_type") - == "invoice" - ) - - assert len(split_response.inference.result.get("split")[1].get("page_range")) == 2 - assert split_response.inference.result.get("split")[1].get("page_range")[0] == 1 - assert split_response.inference.result.get("split")[1].get("page_range")[1] == 3 - assert ( - split_response.inference.result.get("split")[1].get("document_type") - == "invoice" - ) - - assert len(split_response.inference.result.get("split")[2].get("page_range")) == 2 - assert split_response.inference.result.get("split")[2].get("page_range")[0] == 4 - assert split_response.inference.result.get("split")[2].get("page_range")[1] == 4 - assert ( - split_response.inference.result.get("split")[2].get("document_type") - == "invoice" - ) + assert isinstance(split_response.inference.result, SplitResult) + assert isinstance(split_response.inference.result.split[0], Split) + assert len(split_response.inference.result.split) == 3 + + assert len(split_response.inference.result.split[0].page_range) == 2 + assert split_response.inference.result.split[0].page_range[0] == 0 + assert split_response.inference.result.split[0].page_range[1] == 0 + assert split_response.inference.result.split[0].document_type == "invoice" + + assert len(split_response.inference.result.split[1].page_range) == 2 + assert split_response.inference.result.split[1].page_range[0] == 1 + assert split_response.inference.result.split[1].page_range[1] == 3 + assert split_response.inference.result.split[1].document_type == "invoice" + + assert len(split_response.inference.result.split[2].page_range) == 2 + assert split_response.inference.result.split[2].page_range[0] == 4 + assert split_response.inference.result.split[2].page_range[1] == 4 + assert split_response.inference.result.split[2].document_type == "invoice" From 6c97e7aaa501a6859771d30ebb2f412a1aaf84c2 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 17:08:53 +0100 Subject: [PATCH 13/19] revise entire endpoint slug generation system & refactor input parameters --- mindee/client_v2.py | 36 +++++++----------- mindee/input/__init__.py | 2 +- mindee/input/base_parameters.py | 10 ++++- mindee/input/inference_parameters.py | 5 ++- mindee/mindee_http/mindee_api_v2.py | 13 ++----- mindee/parsing/v2/inference.py | 2 - mindee/parsing/v2/inference_response.py | 15 +++++--- mindee/v2/__init__.py | 11 +----- mindee/v2/parsing/__init__.py | 10 +---- mindee/v2/parsing/inference/__init__.py | 20 +++++----- mindee/v2/parsing/inference/base_inference.py | 7 ---- .../inference/base_inference_response.py | 38 ------------------- mindee/v2/parsing/inference/base_response.py | 22 +++++++++++ mindee/v2/parsing/inference/split/__init__.py | 0 .../inference/split/split_parameters.py | 7 ---- .../parsing/inference/split/split_response.py | 19 ---------- .../parsing/inference/utilities/__init__.py | 15 ++++++++ .../inference/utilities/split/__init__.py | 13 +++++++ .../{ => utilities}/split/split_inference.py | 2 +- .../utilities/split/split_parameters.py | 9 +++++ .../utilities/split/split_response.py | 17 +++++++++ .../{ => utilities}/split/split_result.py | 6 +-- .../split/split_split.py} | 2 +- .../inference/utilities/utility_parameters.py | 9 +++++ .../inference/utilities/utility_response.py | 9 +++++ tests/v2/parsing/test_split_response.py | 10 ++--- 26 files changed, 156 insertions(+), 153 deletions(-) delete mode 100644 mindee/v2/parsing/inference/base_inference_response.py create mode 100644 mindee/v2/parsing/inference/base_response.py delete mode 100644 mindee/v2/parsing/inference/split/__init__.py delete mode 100644 mindee/v2/parsing/inference/split/split_parameters.py delete mode 100644 mindee/v2/parsing/inference/split/split_response.py create mode 100644 mindee/v2/parsing/inference/utilities/__init__.py create mode 100644 mindee/v2/parsing/inference/utilities/split/__init__.py rename mindee/v2/parsing/inference/{ => utilities}/split/split_inference.py (87%) create mode 100644 mindee/v2/parsing/inference/utilities/split/split_parameters.py create mode 100644 mindee/v2/parsing/inference/utilities/split/split_response.py rename mindee/v2/parsing/inference/{ => utilities}/split/split_result.py (61%) rename mindee/v2/parsing/inference/{split/split.py => utilities/split/split_split.py} (97%) create mode 100644 mindee/v2/parsing/inference/utilities/utility_parameters.py create mode 100644 mindee/v2/parsing/inference/utilities/utility_response.py diff --git a/mindee/client_v2.py b/mindee/client_v2.py index 89fd5fe0..3a7ad300 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -16,13 +16,11 @@ is_valid_post_response, ) from mindee.parsing.v2.common_response import CommonStatus -from mindee.v2 import BaseInferenceResponse +from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.parsing.v2.inference_response import InferenceResponse from mindee.parsing.v2.job_response import JobResponse -TypeBaseInferenceResponse = TypeVar( - "TypeBaseInferenceResponse", bound=BaseInferenceResponse -) +TypeBaseInferenceResponse = TypeVar("TypeBaseInferenceResponse", bound=BaseResponse) class ClientV2(ClientMixin): @@ -48,7 +46,6 @@ def enqueue_inference( self, input_source: Union[LocalInputSource, UrlInputSource], params: BaseParameters, - slug: Optional[str] = None, disable_redundant_warnings: bool = False, ) -> JobResponse: """[Deprecated] Use `enqueue` instead.""" @@ -58,13 +55,12 @@ def enqueue_inference( DeprecationWarning, stacklevel=2, ) - return self.enqueue(input_source, params, slug) + return self.enqueue(input_source, params) def enqueue( self, input_source: Union[LocalInputSource, UrlInputSource], params: BaseParameters, - slug: Optional[str] = None, ) -> JobResponse: """ Enqueues a document to a given model. @@ -77,7 +73,7 @@ def enqueue( """ logger.debug("Enqueuing inference using model: %s", params.model_id) response = self.mindee_api.req_post_inference_enqueue( - input_source=input_source, params=params, slug=slug + input_source=input_source, params=params, slug=params.get_enqueue_slug() ) dict_response = response.json() @@ -105,9 +101,9 @@ def get_job(self, job_id: str) -> JobResponse: def get_inference( self, inference_id: str, - response_type: Type[BaseInferenceResponse] = InferenceResponse, + response_type: Type[BaseResponse] = InferenceResponse, disable_redundant_warnings: bool = False, - ) -> BaseInferenceResponse: + ) -> BaseResponse: """[Deprecated] Use `get_result` instead.""" if not disable_redundant_warnings: warnings.warn( @@ -120,8 +116,8 @@ def get_inference( def get_result( self, inference_id: str, - response_type: Type[BaseInferenceResponse] = InferenceResponse, - ) -> BaseInferenceResponse: + response_type: Type[BaseResponse] = InferenceResponse, + ) -> BaseResponse: """ Get the result of an inference that was previously enqueued. @@ -132,11 +128,10 @@ def get_result( :return: An inference response. """ logger.debug("Fetching inference: %s", inference_id) - slug = None - if response_type and response_type is not InferenceResponse: - slug = "utilities/" + response_type.get_inference_slug() - response = self.mindee_api.req_get_inference(inference_id, slug) + response = self.mindee_api.req_get_inference( + inference_id, response_type.get_result_slug() + ) if not is_valid_get_response(response): handle_error_v2(response.json()) dict_response = response.json() @@ -146,8 +141,8 @@ def _enqueue_and_get( self, input_source: Union[LocalInputSource, UrlInputSource], params: BaseParameters, - response_type: Optional[Type[BaseInferenceResponse]] = InferenceResponse, - ) -> BaseInferenceResponse: + response_type: Optional[Type[BaseResponse]] = InferenceResponse, + ) -> BaseResponse: """ Enqueues to an asynchronous endpoint and automatically polls for a response. @@ -164,10 +159,7 @@ def _enqueue_and_get( params.polling_options.delay_sec, params.polling_options.max_retries, ) - slug = None - if response_type and response_type is not InferenceResponse: - slug = "utilities/" + response_type.get_inference_slug() - enqueue_response = self.enqueue_inference(input_source, params, slug, True) + enqueue_response = self.enqueue_inference(input_source, params, True) logger.debug( "Successfully enqueued document with job id: %s", enqueue_response.job.id ) diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index b80e2ce8..c711672c 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -1,7 +1,7 @@ from mindee.input.local_response import LocalResponse from mindee.input.base_parameters import BaseParameters from mindee.input.inference_parameters import InferenceParameters -from mindee.v2.parsing.inference.split.split_parameters import SplitParameters +from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters from mindee.input.page_options import PageOptions from mindee.input.polling_options import PollingOptions from mindee.input.sources.base_64_input import Base64Input diff --git a/mindee/input/base_parameters.py b/mindee/input/base_parameters.py index 5de8ca58..159da4c4 100644 --- a/mindee/input/base_parameters.py +++ b/mindee/input/base_parameters.py @@ -1,5 +1,5 @@ from abc import ABC -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Dict, Optional, List, Union from mindee.input.polling_options import PollingOptions @@ -9,6 +9,9 @@ class BaseParameters(ABC): """Base class for parameters accepted by all V2 endpoints.""" + _slug: str = field(init=False) + """Slug of the endpoint.""" + model_id: str """ID of the model, required.""" alias: Optional[str] = None @@ -34,3 +37,8 @@ def get_config(self) -> Dict[str, Union[str, List[str]]]: if self.webhook_ids and len(self.webhook_ids) > 0: data["webhook_ids"] = self.webhook_ids return data + + @classmethod + def get_enqueue_slug(cls) -> str: + """Getter for the enqueue slug.""" + return cls._slug diff --git a/mindee/input/inference_parameters.py b/mindee/input/inference_parameters.py index 5e6e3622..1554da31 100644 --- a/mindee/input/inference_parameters.py +++ b/mindee/input/inference_parameters.py @@ -1,5 +1,5 @@ import json -from dataclasses import dataclass, asdict +from dataclasses import dataclass, asdict, field from typing import Dict, List, Optional, Union from mindee.input.base_parameters import BaseParameters @@ -81,6 +81,9 @@ def __post_init__(self) -> None: class InferenceParameters(BaseParameters): """Inference parameters to set when sending a file.""" + _slug: str = field(init=False, default="inferences") + """Slug of the endpoint.""" + rag: Optional[bool] = None """Enhance extraction accuracy with Retrieval-Augmented Generation.""" raw_text: Optional[bool] = None diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index bfad0bc7..1cb7a8b2 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -74,7 +74,7 @@ def req_post_inference_enqueue( self, input_source: Union[LocalInputSource, UrlInputSource], params: BaseParameters, - slug: Optional[str] = None, + slug: str, ) -> requests.Response: """ Make an asynchronous request to POST a document for prediction on the V2 API. @@ -84,8 +84,6 @@ def req_post_inference_enqueue( :param slug: Slug to use for the enqueueing, defaults to 'inferences'. :return: requests response. """ - if not slug: - slug = "inferences" data = params.get_config() url = f"{self.url_root}/{slug}/enqueue" @@ -123,9 +121,7 @@ def req_get_job(self, job_id: str) -> requests.Response: allow_redirects=False, ) - def req_get_inference( - self, inference_id: str, slug: Optional[str] - ) -> requests.Response: + def req_get_inference(self, inference_id: str, slug: str) -> requests.Response: """ Sends a request matching a given queue_id. Returns either a Job or a Document. @@ -133,10 +129,7 @@ def req_get_inference( :param slug: Slug of the inference, defaults to nothing. """ - if not slug: - url = f"{self.url_root}/inferences/{inference_id}" - else: - url = f"{self.url_root}/{slug}/{inference_id}" + url = f"{self.url_root}/{slug}/{inference_id}" return requests.get( url, headers=self.base_headers, diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py index 0fdbcd3a..477cc41c 100644 --- a/mindee/parsing/v2/inference.py +++ b/mindee/parsing/v2/inference.py @@ -11,8 +11,6 @@ class Inference(BaseInference): """Result of the inference.""" active_options: InferenceActiveOptions """Active options for the inference.""" - _slug: str = "inferences" - """Slug of the inference.""" def __init__(self, raw_response: StringDict): super().__init__(raw_response) diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index 71e7c62c..ff056d36 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -1,16 +1,17 @@ from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.inference import Inference -from mindee.v2.parsing.inference.base_inference_response import ( - BaseInferenceResponse, +from mindee.v2.parsing.inference.base_response import ( + BaseResponse, ) -class InferenceResponse(BaseInferenceResponse[Inference]): +class InferenceResponse(BaseResponse): """Represent an inference response from Mindee V2 API.""" inference: Inference """Inference result.""" - inference_type = Inference + _slug: str = "inferences" + """Slug of the inference.""" def __init__(self, raw_response: StringDict) -> None: super().__init__(raw_response) @@ -19,5 +20,7 @@ def __init__(self, raw_response: StringDict) -> None: def __str__(self) -> str: return str(self.inference) - def _set_inference_type(self, inference_response: StringDict): - return Inference + @classmethod + def get_result_slug(cls) -> str: + """Getter for the inference slug.""" + return cls._slug diff --git a/mindee/v2/__init__.py b/mindee/v2/__init__.py index 728ad060..ccfdb7f9 100644 --- a/mindee/v2/__init__.py +++ b/mindee/v2/__init__.py @@ -1,15 +1,8 @@ from mindee.v2.parsing import SplitParameters -from mindee.v2.parsing.inference.base_inference import BaseInference -from mindee.v2.parsing.inference.base_inference_response import ( - BaseInferenceResponse, - TypeInferenceResponse, -) -from mindee.v2.parsing.inference.split.split_response import SplitResponse + +from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse __all__ = [ - "BaseInference", - "BaseInferenceResponse", - "TypeInferenceResponse", "SplitResponse", "SplitParameters", ] diff --git a/mindee/v2/parsing/__init__.py b/mindee/v2/parsing/__init__.py index 4dd422c9..b060cb17 100644 --- a/mindee/v2/parsing/__init__.py +++ b/mindee/v2/parsing/__init__.py @@ -1,15 +1,7 @@ from mindee.v2.parsing.inference import SplitParameters -from mindee.v2.parsing.inference.base_inference import BaseInference -from mindee.v2.parsing.inference.base_inference_response import ( - BaseInferenceResponse, - TypeInferenceResponse, -) -from mindee.v2.parsing.inference.split.split_response import SplitResponse +from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse __all__ = [ - "BaseInference", - "BaseInferenceResponse", - "TypeInferenceResponse", "SplitResponse", "SplitParameters", ] diff --git a/mindee/v2/parsing/inference/__init__.py b/mindee/v2/parsing/inference/__init__.py index c1a924d1..03089a56 100644 --- a/mindee/v2/parsing/inference/__init__.py +++ b/mindee/v2/parsing/inference/__init__.py @@ -1,19 +1,17 @@ from mindee.v2.parsing.inference.base_inference import BaseInference -from mindee.v2.parsing.inference.base_inference_response import ( - BaseInferenceResponse, - TypeInferenceResponse, +from mindee.v2.parsing.inference.base_response import ( + BaseResponse, ) -from mindee.v2.parsing.inference.split.split import Split -from mindee.v2.parsing.inference.split.split_inference import SplitInference -from mindee.v2.parsing.inference.split.split_parameters import SplitParameters -from mindee.v2.parsing.inference.split.split_response import SplitResponse -from mindee.v2.parsing.inference.split.split_result import SplitResult +from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit +from mindee.v2.parsing.inference.utilities.split import SplitInference +from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters +from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse +from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult __all__ = [ "BaseInference", - "BaseInferenceResponse", - "TypeInferenceResponse", - "Split", + "BaseResponse", + "SplitSplit", "SplitInference", "SplitParameters", "SplitResponse", diff --git a/mindee/v2/parsing/inference/base_inference.py b/mindee/v2/parsing/inference/base_inference.py index 950ceab7..78462f0f 100644 --- a/mindee/v2/parsing/inference/base_inference.py +++ b/mindee/v2/parsing/inference/base_inference.py @@ -9,8 +9,6 @@ class BaseInference(ABC): """Base class for V2 inference objects.""" - _slug: str - """Slug of the inference.""" model: InferenceModel """Model info for the inference.""" file: InferenceFile @@ -23,10 +21,5 @@ def __init__(self, raw_response: StringDict): self.model = InferenceModel(raw_response["model"]) self.file = InferenceFile(raw_response["file"]) - @classmethod - def get_slug(cls) -> str: - """Getter for the inference slug.""" - return cls._slug - TypeBaseInference = TypeVar("TypeBaseInference", bound=BaseInference) diff --git a/mindee/v2/parsing/inference/base_inference_response.py b/mindee/v2/parsing/inference/base_inference_response.py deleted file mode 100644 index 279cb043..00000000 --- a/mindee/v2/parsing/inference/base_inference_response.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import ClassVar, Type, TypeVar, Generic - -from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.inference.base_inference import BaseInference, TypeBaseInference - -from mindee.parsing.v2.common_response import CommonResponse - - -class BaseInferenceResponse(CommonResponse, Generic[TypeBaseInference]): - """Base class for V2 inference responses.""" - - inference: BaseInference - """The inference result for a split utility request""" - inference_type: ClassVar[Type[BaseInference]] - """Inference class used for slug derivation.""" - - def __init__(self, raw_response: StringDict) -> None: - super().__init__(raw_response) - self.inference = self._set_inference_type(raw_response["inference"]) - - def _set_inference_type(self, inference_response: StringDict): - """ - Sets the inference type. - - :param inference_response: Server response. - """ - raise NotImplementedError() - - @classmethod - def get_inference_slug(cls) -> str: - """Getter for the inference slug.""" - return cls.inference_type.get_slug() - - def __str__(self) -> str: - return str(self.inference) - - -TypeInferenceResponse = TypeVar("TypeInferenceResponse", bound=BaseInferenceResponse) diff --git a/mindee/v2/parsing/inference/base_response.py b/mindee/v2/parsing/inference/base_response.py new file mode 100644 index 00000000..feab839a --- /dev/null +++ b/mindee/v2/parsing/inference/base_response.py @@ -0,0 +1,22 @@ +from abc import ABC + +from mindee.v2.parsing.inference.base_inference import BaseInference + +from mindee.parsing.v2.common_response import CommonResponse + + +class BaseResponse(ABC, CommonResponse): + """Base class for V2 inference responses.""" + + inference: BaseInference + """The inference result for a split utility request""" + _slug: str + """Slug of the inference.""" + + def __str__(self) -> str: + return str(self.inference) + + @classmethod + def get_result_slug(cls) -> str: + """Getter for the inference slug.""" + raise NotImplementedError("Subclasses must implement get_result_slug method") diff --git a/mindee/v2/parsing/inference/split/__init__.py b/mindee/v2/parsing/inference/split/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mindee/v2/parsing/inference/split/split_parameters.py b/mindee/v2/parsing/inference/split/split_parameters.py deleted file mode 100644 index d5957d8b..00000000 --- a/mindee/v2/parsing/inference/split/split_parameters.py +++ /dev/null @@ -1,7 +0,0 @@ -from mindee.input.base_parameters import BaseParameters - - -class SplitParameters(BaseParameters): - """ - Parameters accepted by the split utility v2 endpoint. - """ diff --git a/mindee/v2/parsing/inference/split/split_response.py b/mindee/v2/parsing/inference/split/split_response.py deleted file mode 100644 index 819cb555..00000000 --- a/mindee/v2/parsing/inference/split/split_response.py +++ /dev/null @@ -1,19 +0,0 @@ -from mindee.v2.parsing.inference.base_inference_response import ( - BaseInferenceResponse, -) -from mindee.v2.parsing.inference.split.split_inference import SplitInference - - -class SplitResponse(BaseInferenceResponse[SplitInference]): - """Represent a split inference response from Mindee V2 API.""" - - inference: SplitInference - inference_type = SplitInference - - def _set_inference_type(self, inference_response): - """ - Sets the inference type. - - :param inference_response: Server response. - """ - return SplitInference(inference_response) diff --git a/mindee/v2/parsing/inference/utilities/__init__.py b/mindee/v2/parsing/inference/utilities/__init__.py new file mode 100644 index 00000000..b938a20e --- /dev/null +++ b/mindee/v2/parsing/inference/utilities/__init__.py @@ -0,0 +1,15 @@ +from mindee.v2.parsing.inference.utilities.split.split_inference import SplitInference +from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters +from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse +from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult +from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit +from mindee.v2.parsing.inference.utilities.utility_response import UtilityResponse + +__all__ = [ + "SplitInference", + "SplitParameters", + "SplitResponse", + "SplitResult", + "SplitSplit", + "UtilityResponse", +] diff --git a/mindee/v2/parsing/inference/utilities/split/__init__.py b/mindee/v2/parsing/inference/utilities/split/__init__.py new file mode 100644 index 00000000..203f2ad0 --- /dev/null +++ b/mindee/v2/parsing/inference/utilities/split/__init__.py @@ -0,0 +1,13 @@ +from mindee.v2.parsing.inference.utilities.split.split_inference import SplitInference +from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters +from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse +from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult +from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit + +__all__ = [ + "SplitInference", + "SplitParameters", + "SplitResponse", + "SplitResult", + "SplitSplit", +] diff --git a/mindee/v2/parsing/inference/split/split_inference.py b/mindee/v2/parsing/inference/utilities/split/split_inference.py similarity index 87% rename from mindee/v2/parsing/inference/split/split_inference.py rename to mindee/v2/parsing/inference/utilities/split/split_inference.py index c3271419..d3175da8 100644 --- a/mindee/v2/parsing/inference/split/split_inference.py +++ b/mindee/v2/parsing/inference/utilities/split/split_inference.py @@ -1,6 +1,6 @@ from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference -from mindee.v2.parsing.inference.split.split_result import SplitResult +from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult class SplitInference(BaseInference): diff --git a/mindee/v2/parsing/inference/utilities/split/split_parameters.py b/mindee/v2/parsing/inference/utilities/split/split_parameters.py new file mode 100644 index 00000000..1edae849 --- /dev/null +++ b/mindee/v2/parsing/inference/utilities/split/split_parameters.py @@ -0,0 +1,9 @@ +from mindee.v2.parsing.inference.utilities.utility_parameters import UtilityParameters + + +class SplitParameters(UtilityParameters): + """ + Parameters accepted by the split utility v2 endpoint. + """ + + _slug: str = "split" diff --git a/mindee/v2/parsing/inference/utilities/split/split_response.py b/mindee/v2/parsing/inference/utilities/split/split_response.py new file mode 100644 index 00000000..03386698 --- /dev/null +++ b/mindee/v2/parsing/inference/utilities/split/split_response.py @@ -0,0 +1,17 @@ +from mindee.parsing.common.string_dict import StringDict +from mindee.v2.parsing.inference.utilities.split.split_inference import SplitInference +from mindee.v2.parsing.inference.utilities.utility_response import UtilityResponse + + +class SplitResponse(UtilityResponse): + """Represent a split inference response from Mindee V2 API.""" + + inference: SplitInference + """Inference object for split inference.""" + + _slug: str = "split" + """Slug of the inference.""" + + def __init__(self, raw_response: StringDict) -> None: + super().__init__(raw_response) + self.inference = SplitInference(raw_response["inference"]) diff --git a/mindee/v2/parsing/inference/split/split_result.py b/mindee/v2/parsing/inference/utilities/split/split_result.py similarity index 61% rename from mindee/v2/parsing/inference/split/split_result.py rename to mindee/v2/parsing/inference/utilities/split/split_result.py index 59eb342d..5dafd3f6 100644 --- a/mindee/v2/parsing/inference/split/split_result.py +++ b/mindee/v2/parsing/inference/utilities/split/split_result.py @@ -1,16 +1,16 @@ from typing import List from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.inference.split.split import Split +from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit class SplitResult: """Split result info.""" - split: List[Split] + split: List[SplitSplit] def __init__(self, raw_response: StringDict) -> None: - self.split = [Split(split) for split in raw_response["split"]] + self.split = [SplitSplit(split) for split in raw_response["split"]] def __str__(self) -> str: out_str = f"Splits\n======{self.split}" diff --git a/mindee/v2/parsing/inference/split/split.py b/mindee/v2/parsing/inference/utilities/split/split_split.py similarity index 97% rename from mindee/v2/parsing/inference/split/split.py rename to mindee/v2/parsing/inference/utilities/split/split_split.py index 141fb578..f6f1df8d 100644 --- a/mindee/v2/parsing/inference/split/split.py +++ b/mindee/v2/parsing/inference/utilities/split/split_split.py @@ -3,7 +3,7 @@ from mindee.parsing.common.string_dict import StringDict -class Split: +class SplitSplit: """Split inference result.""" page_range: List[int] diff --git a/mindee/v2/parsing/inference/utilities/utility_parameters.py b/mindee/v2/parsing/inference/utilities/utility_parameters.py new file mode 100644 index 00000000..b643671e --- /dev/null +++ b/mindee/v2/parsing/inference/utilities/utility_parameters.py @@ -0,0 +1,9 @@ +from mindee.input.base_parameters import BaseParameters + + +class UtilityParameters(BaseParameters): + """Parameters accepted by the utility v2 endpoint.""" + + @classmethod + def get_enqueue_slug(cls) -> str: + return "utilities/" + cls._slug diff --git a/mindee/v2/parsing/inference/utilities/utility_response.py b/mindee/v2/parsing/inference/utilities/utility_response.py new file mode 100644 index 00000000..972ed5e3 --- /dev/null +++ b/mindee/v2/parsing/inference/utilities/utility_response.py @@ -0,0 +1,9 @@ +from mindee.v2.parsing.inference.base_response import BaseResponse + + +class UtilityResponse(BaseResponse): + """Base class for utility responses.""" + + @classmethod + def get_result_slug(cls) -> str: + return "utilities/" + cls._slug diff --git a/tests/v2/parsing/test_split_response.py b/tests/v2/parsing/test_split_response.py index 9e684354..5fc8259d 100644 --- a/tests/v2/parsing/test_split_response.py +++ b/tests/v2/parsing/test_split_response.py @@ -1,10 +1,10 @@ import pytest from mindee import LocalResponse -from mindee.v2.parsing.inference.split.split import Split -from mindee.v2.parsing.inference.split.split_inference import SplitInference -from mindee.v2.parsing.inference.split.split_response import SplitResponse -from mindee.v2.parsing.inference.split.split_result import SplitResult +from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit +from mindee.v2.parsing.inference.utilities.split import SplitInference +from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse +from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult from tests.utils import V2_UTILITIES_DATA_DIR @@ -26,7 +26,7 @@ def test_split_multiple(): split_response = input_inference.deserialize_response(SplitResponse) assert isinstance(split_response.inference, SplitInference) assert isinstance(split_response.inference.result, SplitResult) - assert isinstance(split_response.inference.result.split[0], Split) + assert isinstance(split_response.inference.result.split[0], SplitSplit) assert len(split_response.inference.result.split) == 3 assert len(split_response.inference.result.split[0].page_range) == 2 From 45af404247677a2c6c769711eb5d558b1046ac24 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 17:50:47 +0100 Subject: [PATCH 14/19] fix display & test --- mindee/v2/parsing/inference/utilities/split/split_result.py | 5 ++++- mindee/v2/parsing/inference/utilities/split/split_split.py | 2 +- tests/v2/parsing/test_split_integration.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mindee/v2/parsing/inference/utilities/split/split_result.py b/mindee/v2/parsing/inference/utilities/split/split_result.py index 5dafd3f6..d4179db5 100644 --- a/mindee/v2/parsing/inference/utilities/split/split_result.py +++ b/mindee/v2/parsing/inference/utilities/split/split_result.py @@ -13,5 +13,8 @@ def __init__(self, raw_response: StringDict) -> None: self.split = [SplitSplit(split) for split in raw_response["split"]] def __str__(self) -> str: - out_str = f"Splits\n======{self.split}" + splits = "\n" + if len(self.split) > 0: + splits += "\n\n".join([str(split) for split in self.split]) + out_str = f"Splits\n======{splits}" return out_str diff --git a/mindee/v2/parsing/inference/utilities/split/split_split.py b/mindee/v2/parsing/inference/utilities/split/split_split.py index f6f1df8d..c00c4679 100644 --- a/mindee/v2/parsing/inference/utilities/split/split_split.py +++ b/mindee/v2/parsing/inference/utilities/split/split_split.py @@ -17,4 +17,4 @@ def __init__(self, server_response: StringDict): def __str__(self) -> str: page_range = ",".join([str(page_index) for page_index in self.page_range]) - return f":Page Range: {page_range}\n:Document Type: {self.document_type}" + return f"* :Page Range: {page_range}\n :Document Type: {self.document_type}" diff --git a/tests/v2/parsing/test_split_integration.py b/tests/v2/parsing/test_split_integration.py index fd544b84..aa214f9f 100644 --- a/tests/v2/parsing/test_split_integration.py +++ b/tests/v2/parsing/test_split_integration.py @@ -11,7 +11,7 @@ @pytest.fixture(scope="session") def split_model_id() -> str: """Identifier of the Financial Document model, supplied through an env var.""" - return os.getenv("MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID") + return os.getenv("MINDEE_V2_SPLIT_UTILITY_MODEL_ID") @pytest.fixture(scope="session") From 52a5dd60873898440b3d52af798939dad60a0515 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:03:06 +0100 Subject: [PATCH 15/19] fix name --- tests/v2/parsing/test_split_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v2/parsing/test_split_integration.py b/tests/v2/parsing/test_split_integration.py index aa214f9f..fd544b84 100644 --- a/tests/v2/parsing/test_split_integration.py +++ b/tests/v2/parsing/test_split_integration.py @@ -11,7 +11,7 @@ @pytest.fixture(scope="session") def split_model_id() -> str: """Identifier of the Financial Document model, supplied through an env var.""" - return os.getenv("MINDEE_V2_SPLIT_UTILITY_MODEL_ID") + return os.getenv("MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID") @pytest.fixture(scope="session") From 1d4632306b51e9a633e5328b33827ea025908c8d Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:13:35 +0100 Subject: [PATCH 16/19] add missing name --- .github/workflows/_test-integrations.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index 8b8bfa3c..70b7b0e9 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -49,6 +49,7 @@ jobs: MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} run: | pytest --cov mindee -m integration From b704ca626570b7d3ce05a05f1866358f6a15c688 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 30 Jan 2026 10:44:05 +0100 Subject: [PATCH 17/19] refacto into new syntax --- mindee/client.py | 8 ++++---- mindee/commands/cli_parser.py | 4 ++-- mindee/error/mindee_error.py | 2 +- mindee/input/__init__.py | 2 +- mindee/input/base_parameters.py | 2 +- mindee/mindee_http/endpoint.py | 8 ++++---- mindee/mindee_http/response_validation.py | 2 +- mindee/parsing/common/async_predict_response.py | 2 +- mindee/v2/__init__.py | 5 ++--- mindee/v2/parsing/__init__.py | 8 ++++---- mindee/v2/parsing/inference/__init__.py | 10 ---------- mindee/v2/parsing/inference/base_response.py | 2 +- mindee/v2/parsing/inference/utilities/__init__.py | 15 --------------- .../parsing/inference/utilities/split/__init__.py | 13 ------------- .../inference/utilities/split/split_parameters.py | 9 --------- .../inference/utilities/utility_parameters.py | 9 --------- .../inference/utilities/utility_response.py | 9 --------- mindee/v2/product/__init__.py | 7 +++++++ mindee/v2/product/split/__init__.py | 13 +++++++++++++ .../split/split_inference.py | 2 +- mindee/v2/product/split/split_parameters.py | 9 +++++++++ .../utilities => product}/split/split_response.py | 8 ++++---- .../utilities => product}/split/split_result.py | 2 +- .../utilities => product}/split/split_split.py | 0 tests/v1/test_client.py | 2 +- tests/v2/parsing/test_split_response.py | 8 ++++---- 26 files changed, 62 insertions(+), 99 deletions(-) delete mode 100644 mindee/v2/parsing/inference/utilities/__init__.py delete mode 100644 mindee/v2/parsing/inference/utilities/split/__init__.py delete mode 100644 mindee/v2/parsing/inference/utilities/split/split_parameters.py delete mode 100644 mindee/v2/parsing/inference/utilities/utility_parameters.py delete mode 100644 mindee/v2/parsing/inference/utilities/utility_response.py create mode 100644 mindee/v2/product/__init__.py create mode 100644 mindee/v2/product/split/__init__.py rename mindee/v2/{parsing/inference/utilities => product}/split/split_inference.py (87%) create mode 100644 mindee/v2/product/split/split_parameters.py rename mindee/v2/{parsing/inference/utilities => product}/split/split_response.py (63%) rename mindee/v2/{parsing/inference/utilities => product}/split/split_result.py (86%) rename mindee/v2/{parsing/inference/utilities => product}/split/split_split.py (100%) diff --git a/mindee/client.py b/mindee/client.py index 6b8d3ba1..c19f9f43 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -92,7 +92,7 @@ def parse( This performs a full OCR operation on the server and will increase response time. Only available on financial document APIs. - :param close_file: Whether to ``close()`` the file after parsing it. + :param close_file: Whether to ``close()`` the file after product it. Set to ``False`` if you need to access the file after this operation. :param page_options: If set, remove pages from the document as specified. @@ -154,7 +154,7 @@ def enqueue( :param include_words: Whether to include the full text for each page. This performs a full OCR operation on the server and will increase response time. - :param close_file: Whether to ``close()`` the file after parsing it. + :param close_file: Whether to ``close()`` the file after product it. Set to ``False`` if you need to access the file after this operation. :param page_options: If set, remove pages from the document as specified. @@ -299,7 +299,7 @@ def enqueue_and_parse( # pylint: disable=too-many-locals :param include_words: Whether to include the full text for each page. This performs a full OCR operation on the server and will increase response time. - :param close_file: Whether to ``close()`` the file after parsing it. + :param close_file: Whether to ``close()`` the file after product it. Set to ``False`` if you need to access the file after this operation. :param page_options: If set, remove pages from the document as specified. @@ -353,7 +353,7 @@ def enqueue_and_parse( # pylint: disable=too-many-locals if poll_results.job.status == "failed": raise MindeeError("Parsing failed for job {poll_results.job.id}") logger.debug( - "Polling server for parsing result with job id: %s", queue_result.job.id + "Polling server for product result with job id: %s", queue_result.job.id ) retry_counter += 1 sleep(delay_sec) diff --git a/mindee/commands/cli_parser.py b/mindee/commands/cli_parser.py index 0c7f9859..a2caa86b 100644 --- a/mindee/commands/cli_parser.py +++ b/mindee/commands/cli_parser.py @@ -104,7 +104,7 @@ class MindeeParser: parser: MindeeArgumentParser """Parser options.""" parsed_args: Namespace - """Stores attributes relating to parsing.""" + """Stores attributes relating to product.""" client: Client """Mindee client""" document_info: CommandConfig @@ -159,7 +159,7 @@ def call_parse(self) -> None: print(response.raw_http) else: if response.document is None: - raise MindeeClientError("Something went wrong during async parsing.") + raise MindeeClientError("Something went wrong during async product.") # print the OCR if self.parsed_args.include_words: print("#############\nDocument Text\n#############\n::\n") diff --git a/mindee/error/mindee_error.py b/mindee/error/mindee_error.py index 5565a1a1..fd03af87 100644 --- a/mindee/error/mindee_error.py +++ b/mindee/error/mindee_error.py @@ -4,7 +4,7 @@ class MindeeError(RuntimeError): class MindeeClientError(MindeeError): """ - An exception relating to document parsing errors. + An exception relating to document product errors. Not to be confused with `MindeeHTTPClientError`. """ diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index c711672c..31973802 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -1,7 +1,7 @@ from mindee.input.local_response import LocalResponse from mindee.input.base_parameters import BaseParameters from mindee.input.inference_parameters import InferenceParameters -from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters +from mindee.v2.product.split.split_parameters import SplitParameters from mindee.input.page_options import PageOptions from mindee.input.polling_options import PollingOptions from mindee.input.sources.base_64_input import Base64Input diff --git a/mindee/input/base_parameters.py b/mindee/input/base_parameters.py index 159da4c4..af863052 100644 --- a/mindee/input/base_parameters.py +++ b/mindee/input/base_parameters.py @@ -21,7 +21,7 @@ class BaseParameters(ABC): polling_options: Optional[PollingOptions] = None """Options for polling. Set only if having timeout issues.""" close_file: bool = True - """Whether to close the file after parsing.""" + """Whether to close the file after product.""" def get_config(self) -> Dict[str, Union[str, List[str]]]: """ diff --git a/mindee/mindee_http/endpoint.py b/mindee/mindee_http/endpoint.py index 0275328d..82727d99 100644 --- a/mindee/mindee_http/endpoint.py +++ b/mindee/mindee_http/endpoint.py @@ -44,7 +44,7 @@ def predict_req_post( :param input_source: Input object :param include_words: Include raw OCR words in the response - :param close_file: Whether to `close()` the file after parsing it. + :param close_file: Whether to `close()` the file after product it. :param cropper: Including Mindee cropping results. :param full_text: Whether to include the full OCR text response in compatible APIs. :return: requests response @@ -68,7 +68,7 @@ def predict_async_req_post( :param input_source: Input object :param include_words: Include raw OCR words in the response - :param close_file: Whether to `close()` the file after parsing it. + :param close_file: Whether to `close()` the file after product it. :param cropper: Including Mindee cropping results. :param full_text: Whether to include the full OCR text response in compatible APIs. :param workflow_id: Workflow ID. @@ -184,7 +184,7 @@ def training_req_post( :param input_source: Input object :return: requests response - :param close_file: Whether to `close()` the file after parsing it. + :param close_file: Whether to `close()` the file after product it. """ files = {"document": input_source.read_contents(close_file)} params = {"training": True, "with_candidates": True} @@ -206,7 +206,7 @@ def training_async_req_post( :param input_source: Input object :return: requests response - :param close_file: Whether to `close()` the file after parsing it. + :param close_file: Whether to `close()` the file after product it. """ files = {"document": input_source.read_contents(close_file)} params = {"training": True, "async": True} diff --git a/mindee/mindee_http/response_validation.py b/mindee/mindee_http/response_validation.py index e261df91..abb07e55 100644 --- a/mindee/mindee_http/response_validation.py +++ b/mindee/mindee_http/response_validation.py @@ -56,7 +56,7 @@ def clean_request_json(response: requests.Response) -> StringDict: Checks and correct the response error format depending on the two possible kind of returns. :param response: Raw request response. - :return: Returns the job error if the error is due to parsing, returns the http error otherwise. + :return: Returns the job error if the error is due to product, returns the http error otherwise. """ response_json = response.json() if response.status_code < 200 or response.status_code > 302: diff --git a/mindee/parsing/common/async_predict_response.py b/mindee/parsing/common/async_predict_response.py index e3101633..5d657532 100644 --- a/mindee/parsing/common/async_predict_response.py +++ b/mindee/parsing/common/async_predict_response.py @@ -23,7 +23,7 @@ def __init__( """ Container wrapper for a raw API response. - Inherits and instantiates a normal PredictResponse if the parsing of + Inherits and instantiates a normal PredictResponse if the product of the current queue is both requested and done. :param inference_type: Type of the inference. diff --git a/mindee/v2/__init__.py b/mindee/v2/__init__.py index ccfdb7f9..136bbc42 100644 --- a/mindee/v2/__init__.py +++ b/mindee/v2/__init__.py @@ -1,6 +1,5 @@ -from mindee.v2.parsing import SplitParameters - -from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse +from mindee.v2.product.split.split_parameters import SplitParameters +from mindee.v2.product.split.split_response import SplitResponse __all__ = [ "SplitResponse", diff --git a/mindee/v2/parsing/__init__.py b/mindee/v2/parsing/__init__.py index b060cb17..3ab40372 100644 --- a/mindee/v2/parsing/__init__.py +++ b/mindee/v2/parsing/__init__.py @@ -1,7 +1,7 @@ -from mindee.v2.parsing.inference import SplitParameters -from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse +from mindee.v2.parsing.inference.base_inference import BaseInference +from mindee.v2.parsing.inference.base_response import BaseResponse __all__ = [ - "SplitResponse", - "SplitParameters", + "BaseInference", + "BaseResponse", ] diff --git a/mindee/v2/parsing/inference/__init__.py b/mindee/v2/parsing/inference/__init__.py index 03089a56..e59b67ae 100644 --- a/mindee/v2/parsing/inference/__init__.py +++ b/mindee/v2/parsing/inference/__init__.py @@ -2,18 +2,8 @@ from mindee.v2.parsing.inference.base_response import ( BaseResponse, ) -from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit -from mindee.v2.parsing.inference.utilities.split import SplitInference -from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters -from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse -from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult __all__ = [ "BaseInference", "BaseResponse", - "SplitSplit", - "SplitInference", - "SplitParameters", - "SplitResponse", - "SplitResult", ] diff --git a/mindee/v2/parsing/inference/base_response.py b/mindee/v2/parsing/inference/base_response.py index feab839a..55b6deb6 100644 --- a/mindee/v2/parsing/inference/base_response.py +++ b/mindee/v2/parsing/inference/base_response.py @@ -19,4 +19,4 @@ def __str__(self) -> str: @classmethod def get_result_slug(cls) -> str: """Getter for the inference slug.""" - raise NotImplementedError("Subclasses must implement get_result_slug method") + return cls._slug diff --git a/mindee/v2/parsing/inference/utilities/__init__.py b/mindee/v2/parsing/inference/utilities/__init__.py deleted file mode 100644 index b938a20e..00000000 --- a/mindee/v2/parsing/inference/utilities/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from mindee.v2.parsing.inference.utilities.split.split_inference import SplitInference -from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters -from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse -from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult -from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit -from mindee.v2.parsing.inference.utilities.utility_response import UtilityResponse - -__all__ = [ - "SplitInference", - "SplitParameters", - "SplitResponse", - "SplitResult", - "SplitSplit", - "UtilityResponse", -] diff --git a/mindee/v2/parsing/inference/utilities/split/__init__.py b/mindee/v2/parsing/inference/utilities/split/__init__.py deleted file mode 100644 index 203f2ad0..00000000 --- a/mindee/v2/parsing/inference/utilities/split/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from mindee.v2.parsing.inference.utilities.split.split_inference import SplitInference -from mindee.v2.parsing.inference.utilities.split.split_parameters import SplitParameters -from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse -from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult -from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit - -__all__ = [ - "SplitInference", - "SplitParameters", - "SplitResponse", - "SplitResult", - "SplitSplit", -] diff --git a/mindee/v2/parsing/inference/utilities/split/split_parameters.py b/mindee/v2/parsing/inference/utilities/split/split_parameters.py deleted file mode 100644 index 1edae849..00000000 --- a/mindee/v2/parsing/inference/utilities/split/split_parameters.py +++ /dev/null @@ -1,9 +0,0 @@ -from mindee.v2.parsing.inference.utilities.utility_parameters import UtilityParameters - - -class SplitParameters(UtilityParameters): - """ - Parameters accepted by the split utility v2 endpoint. - """ - - _slug: str = "split" diff --git a/mindee/v2/parsing/inference/utilities/utility_parameters.py b/mindee/v2/parsing/inference/utilities/utility_parameters.py deleted file mode 100644 index b643671e..00000000 --- a/mindee/v2/parsing/inference/utilities/utility_parameters.py +++ /dev/null @@ -1,9 +0,0 @@ -from mindee.input.base_parameters import BaseParameters - - -class UtilityParameters(BaseParameters): - """Parameters accepted by the utility v2 endpoint.""" - - @classmethod - def get_enqueue_slug(cls) -> str: - return "utilities/" + cls._slug diff --git a/mindee/v2/parsing/inference/utilities/utility_response.py b/mindee/v2/parsing/inference/utilities/utility_response.py deleted file mode 100644 index 972ed5e3..00000000 --- a/mindee/v2/parsing/inference/utilities/utility_response.py +++ /dev/null @@ -1,9 +0,0 @@ -from mindee.v2.parsing.inference.base_response import BaseResponse - - -class UtilityResponse(BaseResponse): - """Base class for utility responses.""" - - @classmethod - def get_result_slug(cls) -> str: - return "utilities/" + cls._slug diff --git a/mindee/v2/product/__init__.py b/mindee/v2/product/__init__.py new file mode 100644 index 00000000..136bbc42 --- /dev/null +++ b/mindee/v2/product/__init__.py @@ -0,0 +1,7 @@ +from mindee.v2.product.split.split_parameters import SplitParameters +from mindee.v2.product.split.split_response import SplitResponse + +__all__ = [ + "SplitResponse", + "SplitParameters", +] diff --git a/mindee/v2/product/split/__init__.py b/mindee/v2/product/split/__init__.py new file mode 100644 index 00000000..502cae4e --- /dev/null +++ b/mindee/v2/product/split/__init__.py @@ -0,0 +1,13 @@ +from mindee.v2.product.split.split_inference import SplitInference +from mindee.v2.product.split.split_parameters import SplitParameters +from mindee.v2.product.split.split_response import SplitResponse +from mindee.v2.product.split.split_result import SplitResult +from mindee.v2.product.split.split_split import SplitSplit + +__all__ = [ + "SplitInference", + "SplitParameters", + "SplitResponse", + "SplitResult", + "SplitSplit", +] diff --git a/mindee/v2/parsing/inference/utilities/split/split_inference.py b/mindee/v2/product/split/split_inference.py similarity index 87% rename from mindee/v2/parsing/inference/utilities/split/split_inference.py rename to mindee/v2/product/split/split_inference.py index d3175da8..37aa6edb 100644 --- a/mindee/v2/parsing/inference/utilities/split/split_inference.py +++ b/mindee/v2/product/split/split_inference.py @@ -1,6 +1,6 @@ from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference -from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult +from mindee.v2.product.split.split_result import SplitResult class SplitInference(BaseInference): diff --git a/mindee/v2/product/split/split_parameters.py b/mindee/v2/product/split/split_parameters.py new file mode 100644 index 00000000..191070f6 --- /dev/null +++ b/mindee/v2/product/split/split_parameters.py @@ -0,0 +1,9 @@ +from mindee.input.base_parameters import BaseParameters + + +class SplitParameters(BaseParameters): + """ + Parameters accepted by the split utility v2 endpoint. + """ + + _slug: str = "utilities/split" diff --git a/mindee/v2/parsing/inference/utilities/split/split_response.py b/mindee/v2/product/split/split_response.py similarity index 63% rename from mindee/v2/parsing/inference/utilities/split/split_response.py rename to mindee/v2/product/split/split_response.py index 03386698..dfb3c6d5 100644 --- a/mindee/v2/parsing/inference/utilities/split/split_response.py +++ b/mindee/v2/product/split/split_response.py @@ -1,15 +1,15 @@ from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.inference.utilities.split.split_inference import SplitInference -from mindee.v2.parsing.inference.utilities.utility_response import UtilityResponse +from mindee.v2.parsing.inference import BaseResponse +from mindee.v2.product.split.split_inference import SplitInference -class SplitResponse(UtilityResponse): +class SplitResponse(BaseResponse): """Represent a split inference response from Mindee V2 API.""" inference: SplitInference """Inference object for split inference.""" - _slug: str = "split" + _slug: str = "utilities/split" """Slug of the inference.""" def __init__(self, raw_response: StringDict) -> None: diff --git a/mindee/v2/parsing/inference/utilities/split/split_result.py b/mindee/v2/product/split/split_result.py similarity index 86% rename from mindee/v2/parsing/inference/utilities/split/split_result.py rename to mindee/v2/product/split/split_result.py index d4179db5..f2e45e49 100644 --- a/mindee/v2/parsing/inference/utilities/split/split_result.py +++ b/mindee/v2/product/split/split_result.py @@ -1,7 +1,7 @@ from typing import List from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit +from mindee.v2.product.split.split_split import SplitSplit class SplitResult: diff --git a/mindee/v2/parsing/inference/utilities/split/split_split.py b/mindee/v2/product/split/split_split.py similarity index 100% rename from mindee/v2/parsing/inference/utilities/split/split_split.py rename to mindee/v2/product/split/split_split.py diff --git a/tests/v1/test_client.py b/tests/v1/test_client.py index 923f6416..cccaa6de 100644 --- a/tests/v1/test_client.py +++ b/tests/v1/test_client.py @@ -97,7 +97,7 @@ def test_cut_options(dummy_client: Client): f"{FILE_TYPES_DIR}/pdf/multipage.pdf" ) try: - # need to keep file open to count the pages after parsing + # need to keep file open to count the pages after product dummy_client.parse( ReceiptV5, input_doc, diff --git a/tests/v2/parsing/test_split_response.py b/tests/v2/parsing/test_split_response.py index 5fc8259d..12cd7247 100644 --- a/tests/v2/parsing/test_split_response.py +++ b/tests/v2/parsing/test_split_response.py @@ -1,10 +1,10 @@ import pytest from mindee import LocalResponse -from mindee.v2.parsing.inference.utilities.split.split_split import SplitSplit -from mindee.v2.parsing.inference.utilities.split import SplitInference -from mindee.v2.parsing.inference.utilities.split.split_response import SplitResponse -from mindee.v2.parsing.inference.utilities.split.split_result import SplitResult +from mindee.v2.product.split.split_split import SplitSplit +from mindee.v2.product.split import SplitInference +from mindee.v2.product.split.split_response import SplitResponse +from mindee.v2.product.split.split_result import SplitResult from tests.utils import V2_UTILITIES_DATA_DIR From e6cf80669cea33fda51787691543c7c3ee79a17c Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 30 Jan 2026 13:32:11 +0100 Subject: [PATCH 18/19] fix stupid pycharm messing with random strings --- mindee/client.py | 6 +++--- mindee/commands/cli_parser.py | 4 ++-- mindee/error/mindee_error.py | 2 +- mindee/mindee_http/endpoint.py | 8 ++++---- mindee/mindee_http/response_validation.py | 2 +- tests/v1/test_client.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mindee/client.py b/mindee/client.py index c19f9f43..5377314c 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -92,7 +92,7 @@ def parse( This performs a full OCR operation on the server and will increase response time. Only available on financial document APIs. - :param close_file: Whether to ``close()`` the file after product it. + :param close_file: Whether to ``close()`` the file after parsing it. Set to ``False`` if you need to access the file after this operation. :param page_options: If set, remove pages from the document as specified. @@ -154,7 +154,7 @@ def enqueue( :param include_words: Whether to include the full text for each page. This performs a full OCR operation on the server and will increase response time. - :param close_file: Whether to ``close()`` the file after product it. + :param close_file: Whether to ``close()`` the file after parsing it. Set to ``False`` if you need to access the file after this operation. :param page_options: If set, remove pages from the document as specified. @@ -299,7 +299,7 @@ def enqueue_and_parse( # pylint: disable=too-many-locals :param include_words: Whether to include the full text for each page. This performs a full OCR operation on the server and will increase response time. - :param close_file: Whether to ``close()`` the file after product it. + :param close_file: Whether to ``close()`` the file after parsing it. Set to ``False`` if you need to access the file after this operation. :param page_options: If set, remove pages from the document as specified. diff --git a/mindee/commands/cli_parser.py b/mindee/commands/cli_parser.py index a2caa86b..0c7f9859 100644 --- a/mindee/commands/cli_parser.py +++ b/mindee/commands/cli_parser.py @@ -104,7 +104,7 @@ class MindeeParser: parser: MindeeArgumentParser """Parser options.""" parsed_args: Namespace - """Stores attributes relating to product.""" + """Stores attributes relating to parsing.""" client: Client """Mindee client""" document_info: CommandConfig @@ -159,7 +159,7 @@ def call_parse(self) -> None: print(response.raw_http) else: if response.document is None: - raise MindeeClientError("Something went wrong during async product.") + raise MindeeClientError("Something went wrong during async parsing.") # print the OCR if self.parsed_args.include_words: print("#############\nDocument Text\n#############\n::\n") diff --git a/mindee/error/mindee_error.py b/mindee/error/mindee_error.py index fd03af87..5565a1a1 100644 --- a/mindee/error/mindee_error.py +++ b/mindee/error/mindee_error.py @@ -4,7 +4,7 @@ class MindeeError(RuntimeError): class MindeeClientError(MindeeError): """ - An exception relating to document product errors. + An exception relating to document parsing errors. Not to be confused with `MindeeHTTPClientError`. """ diff --git a/mindee/mindee_http/endpoint.py b/mindee/mindee_http/endpoint.py index 82727d99..0275328d 100644 --- a/mindee/mindee_http/endpoint.py +++ b/mindee/mindee_http/endpoint.py @@ -44,7 +44,7 @@ def predict_req_post( :param input_source: Input object :param include_words: Include raw OCR words in the response - :param close_file: Whether to `close()` the file after product it. + :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. :param full_text: Whether to include the full OCR text response in compatible APIs. :return: requests response @@ -68,7 +68,7 @@ def predict_async_req_post( :param input_source: Input object :param include_words: Include raw OCR words in the response - :param close_file: Whether to `close()` the file after product it. + :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. :param full_text: Whether to include the full OCR text response in compatible APIs. :param workflow_id: Workflow ID. @@ -184,7 +184,7 @@ def training_req_post( :param input_source: Input object :return: requests response - :param close_file: Whether to `close()` the file after product it. + :param close_file: Whether to `close()` the file after parsing it. """ files = {"document": input_source.read_contents(close_file)} params = {"training": True, "with_candidates": True} @@ -206,7 +206,7 @@ def training_async_req_post( :param input_source: Input object :return: requests response - :param close_file: Whether to `close()` the file after product it. + :param close_file: Whether to `close()` the file after parsing it. """ files = {"document": input_source.read_contents(close_file)} params = {"training": True, "async": True} diff --git a/mindee/mindee_http/response_validation.py b/mindee/mindee_http/response_validation.py index abb07e55..e261df91 100644 --- a/mindee/mindee_http/response_validation.py +++ b/mindee/mindee_http/response_validation.py @@ -56,7 +56,7 @@ def clean_request_json(response: requests.Response) -> StringDict: Checks and correct the response error format depending on the two possible kind of returns. :param response: Raw request response. - :return: Returns the job error if the error is due to product, returns the http error otherwise. + :return: Returns the job error if the error is due to parsing, returns the http error otherwise. """ response_json = response.json() if response.status_code < 200 or response.status_code > 302: diff --git a/tests/v1/test_client.py b/tests/v1/test_client.py index cccaa6de..923f6416 100644 --- a/tests/v1/test_client.py +++ b/tests/v1/test_client.py @@ -97,7 +97,7 @@ def test_cut_options(dummy_client: Client): f"{FILE_TYPES_DIR}/pdf/multipage.pdf" ) try: - # need to keep file open to count the pages after product + # need to keep file open to count the pages after parsing dummy_client.parse( ReceiptV5, input_doc, From cc128a57102aeaecfebef36dfd058b6182f70fa6 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 30 Jan 2026 13:33:38 +0100 Subject: [PATCH 19/19] preemptively upgrade split syntax to account for future server evolutions --- mindee/v2/product/split/__init__.py | 4 +- .../split/{split_split.py => split_range.py} | 2 +- mindee/v2/product/split/split_result.py | 10 ++-- tests/v2/parsing/test_split_integration.py | 4 +- tests/v2/parsing/test_split_response.py | 46 +++++++++---------- 5 files changed, 33 insertions(+), 33 deletions(-) rename mindee/v2/product/split/{split_split.py => split_range.py} (97%) diff --git a/mindee/v2/product/split/__init__.py b/mindee/v2/product/split/__init__.py index 502cae4e..9284c63e 100644 --- a/mindee/v2/product/split/__init__.py +++ b/mindee/v2/product/split/__init__.py @@ -2,12 +2,12 @@ from mindee.v2.product.split.split_parameters import SplitParameters from mindee.v2.product.split.split_response import SplitResponse from mindee.v2.product.split.split_result import SplitResult -from mindee.v2.product.split.split_split import SplitSplit +from mindee.v2.product.split.split_range import SplitRange __all__ = [ "SplitInference", "SplitParameters", "SplitResponse", "SplitResult", - "SplitSplit", + "SplitRange", ] diff --git a/mindee/v2/product/split/split_split.py b/mindee/v2/product/split/split_range.py similarity index 97% rename from mindee/v2/product/split/split_split.py rename to mindee/v2/product/split/split_range.py index c00c4679..21a85405 100644 --- a/mindee/v2/product/split/split_split.py +++ b/mindee/v2/product/split/split_range.py @@ -3,7 +3,7 @@ from mindee.parsing.common.string_dict import StringDict -class SplitSplit: +class SplitRange: """Split inference result.""" page_range: List[int] diff --git a/mindee/v2/product/split/split_result.py b/mindee/v2/product/split/split_result.py index f2e45e49..99c57845 100644 --- a/mindee/v2/product/split/split_result.py +++ b/mindee/v2/product/split/split_result.py @@ -1,20 +1,20 @@ from typing import List from mindee.parsing.common.string_dict import StringDict -from mindee.v2.product.split.split_split import SplitSplit +from mindee.v2.product.split.split_range import SplitRange class SplitResult: """Split result info.""" - split: List[SplitSplit] + splits: List[SplitRange] def __init__(self, raw_response: StringDict) -> None: - self.split = [SplitSplit(split) for split in raw_response["split"]] + self.splits = [SplitRange(split) for split in raw_response["split"]] def __str__(self) -> str: splits = "\n" - if len(self.split) > 0: - splits += "\n\n".join([str(split) for split in self.split]) + if len(self.splits) > 0: + splits += "\n\n".join([str(split) for split in self.splits]) out_str = f"Splits\n======{splits}" return out_str diff --git a/tests/v2/parsing/test_split_integration.py b/tests/v2/parsing/test_split_integration.py index fd544b84..efca8b07 100644 --- a/tests/v2/parsing/test_split_integration.py +++ b/tests/v2/parsing/test_split_integration.py @@ -35,5 +35,5 @@ def test_split_blank(v2_client: ClientV2, split_model_id: str): ) # Note: do not use blank_1.pdf for this. assert response.inference is not None assert response.inference.file.name == "default_sample.pdf" - assert response.inference.result.split - assert len(response.inference.result.split) == 2 + assert response.inference.result.splits + assert len(response.inference.result.splits) == 2 diff --git a/tests/v2/parsing/test_split_response.py b/tests/v2/parsing/test_split_response.py index 12cd7247..0ce0d707 100644 --- a/tests/v2/parsing/test_split_response.py +++ b/tests/v2/parsing/test_split_response.py @@ -1,7 +1,7 @@ import pytest from mindee import LocalResponse -from mindee.v2.product.split.split_split import SplitSplit +from mindee.v2.product.split.split_range import SplitRange from mindee.v2.product.split import SplitInference from mindee.v2.product.split.split_response import SplitResponse from mindee.v2.product.split.split_result import SplitResult @@ -13,11 +13,11 @@ def test_split_single(): input_inference = LocalResponse(V2_UTILITIES_DATA_DIR / "split_single.json") split_response = input_inference.deserialize_response(SplitResponse) assert isinstance(split_response.inference, SplitInference) - assert split_response.inference.result.split - assert len(split_response.inference.result.split[0].page_range) == 2 - assert split_response.inference.result.split[0].page_range[0] == 0 - assert split_response.inference.result.split[0].page_range[1] == 0 - assert split_response.inference.result.split[0].document_type == "receipt" + assert split_response.inference.result.splits + assert len(split_response.inference.result.splits[0].page_range) == 2 + assert split_response.inference.result.splits[0].page_range[0] == 0 + assert split_response.inference.result.splits[0].page_range[1] == 0 + assert split_response.inference.result.splits[0].document_type == "receipt" @pytest.mark.v2 @@ -26,20 +26,20 @@ def test_split_multiple(): split_response = input_inference.deserialize_response(SplitResponse) assert isinstance(split_response.inference, SplitInference) assert isinstance(split_response.inference.result, SplitResult) - assert isinstance(split_response.inference.result.split[0], SplitSplit) - assert len(split_response.inference.result.split) == 3 - - assert len(split_response.inference.result.split[0].page_range) == 2 - assert split_response.inference.result.split[0].page_range[0] == 0 - assert split_response.inference.result.split[0].page_range[1] == 0 - assert split_response.inference.result.split[0].document_type == "invoice" - - assert len(split_response.inference.result.split[1].page_range) == 2 - assert split_response.inference.result.split[1].page_range[0] == 1 - assert split_response.inference.result.split[1].page_range[1] == 3 - assert split_response.inference.result.split[1].document_type == "invoice" - - assert len(split_response.inference.result.split[2].page_range) == 2 - assert split_response.inference.result.split[2].page_range[0] == 4 - assert split_response.inference.result.split[2].page_range[1] == 4 - assert split_response.inference.result.split[2].document_type == "invoice" + assert isinstance(split_response.inference.result.splits[0], SplitRange) + assert len(split_response.inference.result.splits) == 3 + + assert len(split_response.inference.result.splits[0].page_range) == 2 + assert split_response.inference.result.splits[0].page_range[0] == 0 + assert split_response.inference.result.splits[0].page_range[1] == 0 + assert split_response.inference.result.splits[0].document_type == "invoice" + + assert len(split_response.inference.result.splits[1].page_range) == 2 + assert split_response.inference.result.splits[1].page_range[0] == 1 + assert split_response.inference.result.splits[1].page_range[1] == 3 + assert split_response.inference.result.splits[1].document_type == "invoice" + + assert len(split_response.inference.result.splits[2].page_range) == 2 + assert split_response.inference.result.splits[2].page_range[0] == 4 + assert split_response.inference.result.splits[2].page_range[1] == 4 + assert split_response.inference.result.splits[2].document_type == "invoice"