diff --git a/docs/tutorial/estimations.md b/docs/tutorial/estimations.md new file mode 100644 index 00000000..7161d327 --- /dev/null +++ b/docs/tutorial/estimations.md @@ -0,0 +1,84 @@ +# Manual Estimations + +EcoLogits can estimate impacts without patching a provider client. This is useful when you already have usage data from another system, such as an agent report, an API gateway, or a calculator. + +```python +from ecologits.estimations import estimate_llm_impacts + +output_tokens = 12_782 + +estimation = estimate_llm_impacts( + provider="openai", + model_name="gpt-5-mini", + output_token_count=output_tokens, + tps=50, +) + +print(estimation.energy.value) +print(estimation.gwp.value) +``` + +The returned object has the same impact fields as traced responses: `energy`, `gwp`, `adpe`, `pe`, `wcf`, `usage`, `embodied`, `warnings`, and `errors`. + +!!! note "Generated tokens" + + The current methodology models generated tokens. If your tool only reports aggregate token totals, you can pass the aggregate value as `output_token_count` as a proxy, but EcoLogits does not yet distinguish prompt tokens from generated tokens in manual estimations. + +## Latency and Throughput + +If you know the request latency, pass it directly: + +```python +estimation = estimate_llm_impacts( + provider="openai", + model_name="gpt-5-mini", + output_token_count=12_782, + request_latency=255.64, +) +``` + +If you do not know the latency, you can provide an average token throughput with `tps`. EcoLogits will use it to estimate generation latency. + +```python +estimation = estimate_llm_impacts( + provider="openai", + model_name="gpt-5-mini", + output_token_count=12_782, + tps=50, + ttft=0.5, +) +``` + +When `tps` or `ttft` are omitted, EcoLogits falls back to deployment metadata from the model repository when available, then to the methodology defaults. + +## Electricity Mix + +The `electricity_mix_zone` parameter represents the datacenter electricity mix, not the user's location. When it is omitted, EcoLogits uses the provider default datacenter zone when known, then falls back to the world average `WOR`. + +```python +estimation = estimate_llm_impacts( + provider="mistralai", + model_name="mistral-large-latest", + output_token_count=1_000, + electricity_mix_zone="SWE", +) +``` + +## Intermediate Details + +Set `include_details=True` to expose intermediate methodology values for explainability tools. + +```python +estimation = estimate_llm_impacts( + provider="cohere", + model_name="c4ai-aya-expanse-8b", + output_token_count=1_000, + include_details=True, +) + +print(estimation.details.generation_latency) +print(estimation.details.request_energy) +print(estimation.details.gpu_required_count) +``` + +For models represented by an interval, final impacts remain interval-aware. Intermediate details use representative mean parameter values to provide stable explanatory numbers. diff --git a/ecologits/estimations/__init__.py b/ecologits/estimations/__init__.py new file mode 100644 index 00000000..26982106 --- /dev/null +++ b/ecologits/estimations/__init__.py @@ -0,0 +1,10 @@ +from .llm import PROVIDER_CONFIG_MAP, ProviderConfig, estimate_llm_impacts +from .modeling import LLMEstimationDetails, LLMEstimationResult + +__all__ = [ + "PROVIDER_CONFIG_MAP", + "LLMEstimationDetails", + "LLMEstimationResult", + "ProviderConfig", + "estimate_llm_impacts", +] diff --git a/ecologits/estimations/llm.py b/ecologits/estimations/llm.py new file mode 100644 index 00000000..6602283b --- /dev/null +++ b/ecologits/estimations/llm.py @@ -0,0 +1,228 @@ +from __future__ import annotations + +import math +from dataclasses import dataclass + +from ecologits.electricity_mix_repository import electricity_mixes +from ecologits.impacts.llm import compute_llm_impacts, compute_llm_impacts_dag +from ecologits.log import logger +from ecologits.model_repository import ParametersMoE, models +from ecologits.status_messages import ModelNotRegisteredError, ZoneNotRegisteredError +from ecologits.utils.range_value import RangeValue, ValueOrRange + +from .modeling import LLMEstimationDetails, LLMEstimationResult + + +@dataclass +class ProviderConfig: + """ + Default datacenter configuration for a provider. + + Attributes: + datacenter_location: ISO 3166-1 alpha-3 code of the datacenter electricity mix zone. + datacenter_pue: Power Usage Effectiveness of the datacenter. + datacenter_wue: Water Usage Effectiveness of the datacenter. + """ + datacenter_location: str | None + datacenter_pue: float | RangeValue + datacenter_wue: float | RangeValue + + +PROVIDER_CONFIG_MAP = { + "anthropic": ProviderConfig( + datacenter_location="USA", + datacenter_pue=RangeValue(min=1.09, max=1.14), + datacenter_wue=RangeValue(min=0.13, max=0.999), + ), + "cohere": ProviderConfig( + datacenter_location="USA", + datacenter_pue=1.09, + datacenter_wue=0.999, + ), + "google_genai": ProviderConfig( + datacenter_location="USA", + datacenter_pue=1.09, + datacenter_wue=0.999, + ), + "huggingface_hub": ProviderConfig( + datacenter_location="USA", + datacenter_pue=RangeValue(min=1.09, max=1.14), + datacenter_wue=RangeValue(min=0.13, max=0.99), + ), + "mistralai": ProviderConfig( + datacenter_location="SWE", + datacenter_pue=1.16, + datacenter_wue=0.09, + ), + "openai": ProviderConfig( + datacenter_location="USA", + datacenter_pue=1.20, + datacenter_wue=0.569, + ), +} + + +def estimate_llm_impacts( + provider: str, + model_name: str, + output_token_count: int, + request_latency: float | None = None, + electricity_mix_zone: str | None = None, + tps: float | None = None, + ttft: float | None = None, + include_details: bool = False, +) -> LLMEstimationResult: + """ + Estimate the impacts of an LLM generation request without provider tracing. + + Args: + provider: Name of the provider. + model_name: Name of the LLM used. + output_token_count: Number of generated tokens. + request_latency: Measured request latency in seconds. + electricity_mix_zone: ISO 3166-1 alpha-3 code of the datacenter electricity mix zone. + tps: Number of generated tokens per second. + ttft: Time-to-first-token latency in seconds. + include_details: Include intermediate methodology values in the result. + + Returns: + The estimated impacts of an LLM generation request. + """ + model = models.find_model(provider=provider, model_name=model_name) + if model is None: + error = ModelNotRegisteredError(message=f"Could not find model `{model_name}` for {provider} provider.") + logger.warning_once(str(error)) + return LLMEstimationResult(errors=[error]) + + if isinstance(model.architecture.parameters, ParametersMoE): + model_total_params = model.architecture.parameters.total + model_active_params = model.architecture.parameters.active + else: + model_total_params = model.architecture.parameters + model_active_params = model.architecture.parameters + + provider_config = PROVIDER_CONFIG_MAP[provider] + resolved_electricity_mix_zone = electricity_mix_zone or provider_config.datacenter_location or "WOR" + if_electricity_mix = electricity_mixes.find_electricity_mix(zone=resolved_electricity_mix_zone) + if if_electricity_mix is None: + error = ZoneNotRegisteredError( + message=f"Could not find electricity mix for `{resolved_electricity_mix_zone}` zone." + ) + logger.warning_once(str(error)) + return LLMEstimationResult(errors=[error]) + + resolved_tps = _resolve_optional_float(tps, model.deployment.tps if model.deployment else None) + resolved_ttft = _resolve_optional_float(ttft, model.deployment.ttft if model.deployment else None) + + impacts = compute_llm_impacts( + model_active_parameter_count=model_active_params, + model_total_parameter_count=model_total_params, + output_token_count=output_token_count, + request_latency=request_latency, + if_electricity_mix_adpe=if_electricity_mix.adpe, + if_electricity_mix_pe=if_electricity_mix.pe, + if_electricity_mix_gwp=if_electricity_mix.gwp, + if_electricity_mix_wue=if_electricity_mix.wue, + datacenter_pue=provider_config.datacenter_pue, + datacenter_wue=provider_config.datacenter_wue, + tps=resolved_tps, + ttft=resolved_ttft, + ) + result = LLMEstimationResult.model_validate(impacts.model_dump()) + + if include_details: + result.details = _estimate_llm_details( + provider=provider, + model_name=model_name, + model_active_parameter_count=model_active_params, + model_total_parameter_count=model_total_params, + output_token_count=output_token_count, + request_latency=request_latency, + electricity_mix_zone=resolved_electricity_mix_zone, + datacenter_location=provider_config.datacenter_location, + datacenter_pue=provider_config.datacenter_pue, + datacenter_wue=provider_config.datacenter_wue, + if_electricity_mix_adpe=if_electricity_mix.adpe, + if_electricity_mix_pe=if_electricity_mix.pe, + if_electricity_mix_gwp=if_electricity_mix.gwp, + if_electricity_mix_wue=if_electricity_mix.wue, + tps=resolved_tps, + ttft=resolved_ttft, + ) + + if model.has_warnings: + for warning in model.warnings: + logger.warning_once(str(warning)) + result.add_warning(warning) + + return result + + +def _estimate_llm_details( + provider: str, + model_name: str, + model_active_parameter_count: ValueOrRange, + model_total_parameter_count: ValueOrRange, + output_token_count: int, + request_latency: float | None, + electricity_mix_zone: str, + datacenter_location: str | None, + datacenter_pue: ValueOrRange, + datacenter_wue: ValueOrRange, + if_electricity_mix_adpe: float, + if_electricity_mix_pe: float, + if_electricity_mix_gwp: float, + if_electricity_mix_wue: float, + tps: float | None, + ttft: float | None, +) -> LLMEstimationDetails: + dag_results = compute_llm_impacts_dag( + model_active_parameter_count=_mean_value(model_active_parameter_count), + model_total_parameter_count=_mean_value(model_total_parameter_count), + output_token_count=output_token_count, + request_latency=request_latency if request_latency is not None else math.inf, + if_electricity_mix_adpe=if_electricity_mix_adpe, + if_electricity_mix_pe=if_electricity_mix_pe, + if_electricity_mix_gwp=if_electricity_mix_gwp, + if_electricity_mix_wue=if_electricity_mix_wue, + datacenter_pue=datacenter_pue, + datacenter_wue=datacenter_wue, + tps=tps, + ttft=ttft, + ) + return LLMEstimationDetails( + provider=provider, + model_name=model_name, + model_active_parameter_count=model_active_parameter_count, + model_total_parameter_count=model_total_parameter_count, + output_token_count=output_token_count, + request_latency=request_latency, + tps=tps, + ttft=ttft, + electricity_mix_zone=electricity_mix_zone, + datacenter_location=datacenter_location, + datacenter_pue=datacenter_pue, + datacenter_wue=datacenter_wue, + generation_latency=dag_results["generation_latency"], + gpu_required_count=dag_results["gpu_required_count"], + request_energy=dag_results["request_energy"], + request_usage_gwp=dag_results["request_usage_gwp"], + request_usage_adpe=dag_results["request_usage_adpe"], + request_usage_pe=dag_results["request_usage_pe"], + request_usage_wcf=dag_results["request_usage_wcf"], + request_embodied_gwp=dag_results["request_embodied_gwp"], + request_embodied_adpe=dag_results["request_embodied_adpe"], + request_embodied_pe=dag_results["request_embodied_pe"], + ) + + +def _mean_value(value: ValueOrRange) -> float: + if isinstance(value, RangeValue): + return value.mean + return float(value) + + +def _resolve_optional_float(value: float | None, fallback: float | None) -> float | None: + if value is not None: + return value + return fallback diff --git a/ecologits/estimations/modeling.py b/ecologits/estimations/modeling.py new file mode 100644 index 00000000..ec6f62bd --- /dev/null +++ b/ecologits/estimations/modeling.py @@ -0,0 +1,127 @@ +from pydantic import BaseModel + +from ecologits.impacts.modeling import GWP, PE, WCF, ADPe, Embodied, Energy, Usage +from ecologits.status_messages import ErrorMessage, WarningMessage +from ecologits.utils.range_value import ValueOrRange + + +class LLMEstimationDetails(BaseModel): + """ + Intermediate values used to estimate LLM inference impacts. + + Attributes: + provider: Name of the provider. + model_name: Name of the LLM. + model_active_parameter_count: Number of active parameters of the model (in billion). + model_total_parameter_count: Number of total parameters of the model (in billion). + output_token_count: Number of generated tokens. + request_latency: Measured request latency in seconds, when provided. + tps: Number of generated tokens per second used for the estimate, when provided. + ttft: Time-to-first-token latency used for the estimate, when provided. + electricity_mix_zone: ISO 3166-1 alpha-3 code of the electricity mix zone. + datacenter_location: ISO 3166-1 alpha-3 code of the provider datacenter location. + datacenter_pue: Power Usage Effectiveness of the datacenter. + datacenter_wue: Water Usage Effectiveness of the datacenter. + generation_latency: Token generation latency in seconds. + gpu_required_count: Number of GPUs required to load the model. + request_energy: Energy consumption of the request in kWh. + request_usage_gwp: Usage Global Warming Potential in kgCO2eq. + request_usage_adpe: Usage Abiotic Depletion Potential in kgSbeq. + request_usage_pe: Usage Primary Energy in MJ. + request_usage_wcf: Usage Water Consumption Footprint in L. + request_embodied_gwp: Embodied Global Warming Potential in kgCO2eq. + request_embodied_adpe: Embodied Abiotic Depletion Potential in kgSbeq. + request_embodied_pe: Embodied Primary Energy in MJ. + """ + provider: str + model_name: str + model_active_parameter_count: ValueOrRange + model_total_parameter_count: ValueOrRange + output_token_count: int + request_latency: float | None = None + tps: float | None = None + ttft: float | None = None + electricity_mix_zone: str + datacenter_location: str | None = None + datacenter_pue: ValueOrRange + datacenter_wue: ValueOrRange + generation_latency: ValueOrRange + gpu_required_count: int + request_energy: ValueOrRange + request_usage_gwp: ValueOrRange + request_usage_adpe: ValueOrRange + request_usage_pe: ValueOrRange + request_usage_wcf: ValueOrRange + request_embodied_gwp: ValueOrRange + request_embodied_adpe: ValueOrRange + request_embodied_pe: ValueOrRange + + +class LLMEstimationResult(BaseModel): + """ + LLM impacts estimation result. + + Attributes: + energy: Total energy consumption. + gwp: Total Global Warming Potential (GWP) impact. + adpe: Total Abiotic Depletion Potential for Elements (ADPe) impact. + pe: Total Primary Energy (PE) impact. + wcf: Usage-only Water Consumption Footprint (WCF) impact. + usage: Impacts for the usage phase. + embodied: Impacts for the embodied phase. + warnings: List of warnings. + errors: List of errors. + details: Intermediate estimation values. + """ + energy: Energy | None = None + gwp: GWP | None = None + adpe: ADPe | None = None + pe: PE | None = None + wcf: WCF | None = None + usage: Usage | None = None + embodied: Embodied | None = None + warnings: list[WarningMessage] | None = None + errors: list[ErrorMessage] | None = None + details: LLMEstimationDetails | None = None + + @property + def has_warnings(self) -> bool: + """ + Check whether the estimation result contains warnings. + + Returns: + Whether warnings are present. + """ + return isinstance(self.warnings, list) and len(self.warnings) > 0 + + @property + def has_errors(self) -> bool: + """ + Check whether the estimation result contains errors. + + Returns: + Whether errors are present. + """ + return isinstance(self.errors, list) and len(self.errors) > 0 + + def add_warning(self, warning: WarningMessage) -> None: + """ + Add a warning to the estimation result. + + Args: + warning: Warning to add. + """ + if self.warnings is None: + self.warnings = [] + self.warnings.append(warning) + + def add_errors(self, error: ErrorMessage) -> None: + """ + Add an error to the estimation result. + + Args: + error: Error to add. + """ + if self.errors is None: + self.errors = [] + self.errors.append(error) diff --git a/ecologits/tracers/utils.py b/ecologits/tracers/utils.py index 63583759..c6139927 100644 --- a/ecologits/tracers/utils.py +++ b/ecologits/tracers/utils.py @@ -1,60 +1,41 @@ from __future__ import annotations -from dataclasses import dataclass +from typing import Any -from pydantic import BaseModel +from ecologits.estimations import ( + PROVIDER_CONFIG_MAP, + LLMEstimationResult, + estimate_llm_impacts, +) -from ecologits.electricity_mix_repository import electricity_mixes -from ecologits.impacts.llm import compute_llm_impacts -from ecologits.impacts.modeling import GWP, PE, WCF, ADPe, Embodied, Energy, Usage -from ecologits.log import logger -from ecologits.model_repository import ParametersMoE, models -from ecologits.status_messages import ErrorMessage, ModelNotRegisteredError, WarningMessage, ZoneNotRegisteredError -from ecologits.utils.range_value import RangeValue - -class ImpactsOutput(BaseModel): +class ImpactsOutput(LLMEstimationResult): """ - Impacts output data model. + Environmental impacts of an LLM generation request. Attributes: - energy: Total energy consumption - gwp: Total Global Warming Potential (GWP) impact - adpe: Total Abiotic Depletion Potential for Elements (ADPe) impact - pe: Total Primary Energy (PE) impact - wcf: Usage-only Water Consumption Footprint (WCF) impact - usage: Impacts for the usage phase - embodied: Impacts for the embodied phase - warnings: List of warnings - errors: List of errors + energy: Total energy consumption. + gwp: Total Global Warming Potential (GWP) impact. + adpe: Total Abiotic Depletion Potential for Elements (ADPe) impact. + pe: Total Primary Energy (PE) impact. + wcf: Usage-only Water Consumption Footprint (WCF) impact. + usage: Impacts for the usage phase. + embodied: Impacts for the embodied phase. + warnings: List of warnings. + errors: List of errors. + details: Intermediate estimation values. """ - energy: Energy | None = None - gwp: GWP | None = None - adpe: ADPe | None = None - pe: PE | None = None - wcf: WCF | None = None - usage: Usage | None = None - embodied: Embodied | None = None - warnings: list[WarningMessage] | None = None - errors: list[ErrorMessage] | None = None - - @property - def has_warnings(self) -> bool: - return isinstance(self.warnings, list) and len(self.warnings) > 0 - @property - def has_errors(self) -> bool: - return isinstance(self.errors, list) and len(self.errors) > 0 + def __eq__(self, other: Any) -> bool: + if isinstance(other, LLMEstimationResult): + return self.model_dump() == other.model_dump() + return super().__eq__(other) - def add_warning(self, warning: WarningMessage) -> None: - if self.warnings is None: - self.warnings = [] - self.warnings.append(warning) - - def add_errors(self, error: ErrorMessage) -> None: - if self.errors is None: - self.errors = [] - self.errors.append(error) +__all__ = [ + "PROVIDER_CONFIG_MAP", + "ImpactsOutput", + "llm_impacts", +] def llm_impacts( @@ -77,94 +58,14 @@ def llm_impacts( Returns: The impacts of an LLM generation request. """ - - model = models.find_model(provider=provider, model_name=model_name) - if model is None: - error = ModelNotRegisteredError(message=f"Could not find model `{model_name}` for {provider} provider.") - logger.warning_once(str(error)) - return ImpactsOutput(errors=[error]) - - if isinstance(model.architecture.parameters, ParametersMoE): - model_total_params = model.architecture.parameters.total - model_active_params = model.architecture.parameters.active - else: - model_total_params = model.architecture.parameters - model_active_params = model.architecture.parameters - - datacenter_location = PROVIDER_CONFIG_MAP[provider].datacenter_location - datacenter_pue = PROVIDER_CONFIG_MAP[provider].datacenter_pue - datacenter_wue = PROVIDER_CONFIG_MAP[provider].datacenter_wue - - if electricity_mix_zone is None: - electricity_mix_zone = datacenter_location - if electricity_mix_zone is None: - electricity_mix_zone = "WOR" - if_electricity_mix = electricity_mixes.find_electricity_mix(zone=electricity_mix_zone) - if if_electricity_mix is None: - error = ZoneNotRegisteredError(message=f"Could not find electricity mix for `{electricity_mix_zone}` zone.") - logger.warning_once(str(error)) - return ImpactsOutput(errors=[error]) - - impacts = compute_llm_impacts( - model_active_parameter_count=model_active_params, - model_total_parameter_count=model_total_params, + estimation = estimate_llm_impacts( + provider=provider, + model_name=model_name, output_token_count=output_token_count, request_latency=request_latency, - if_electricity_mix_adpe=if_electricity_mix.adpe, - if_electricity_mix_pe=if_electricity_mix.pe, - if_electricity_mix_gwp=if_electricity_mix.gwp, - if_electricity_mix_wue=if_electricity_mix.wue, - datacenter_pue=datacenter_pue, - datacenter_wue=datacenter_wue, - tps=model.deployment.tps if model.deployment else None, - ttft=model.deployment.ttft if model.deployment else None, + electricity_mix_zone=electricity_mix_zone, ) - impacts = ImpactsOutput.model_validate(impacts.model_dump()) - - if model.has_warnings: - for w in model.warnings: - logger.warning_once(str(w)) - impacts.add_warning(w) - - return impacts - - -@dataclass -class _ProviderConfig: - datacenter_location: str - datacenter_pue: float | RangeValue - datacenter_wue: float | RangeValue - - -PROVIDER_CONFIG_MAP = { - "anthropic": _ProviderConfig( - datacenter_location="USA", - datacenter_pue=RangeValue(min=1.09, max=1.14), - datacenter_wue=RangeValue(min=0.13, max=0.999), - ), - "cohere": _ProviderConfig( - datacenter_location="USA", - datacenter_pue=1.09, - datacenter_wue=0.999, - ), - "google_genai": _ProviderConfig( - datacenter_location="USA", - datacenter_pue=1.09, - datacenter_wue=0.999, - ), - "huggingface_hub": _ProviderConfig( - datacenter_location="USA", - datacenter_pue=RangeValue(min=1.09, max=1.14), - datacenter_wue=RangeValue(min=0.13, max=0.99), - ), - "mistralai": _ProviderConfig( - datacenter_location="SWE", - datacenter_pue=1.16, - datacenter_wue=0.09, - ), - "openai": _ProviderConfig( - datacenter_location="USA", - datacenter_pue=1.20, - datacenter_wue=0.569, + return ImpactsOutput.model_construct( + _fields_set=estimation.model_fields_set, + **estimation.__dict__, ) -} diff --git a/mkdocs.yml b/mkdocs.yml index b9415fea..131868fb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -14,6 +14,7 @@ nav: - 'Tutorial': - 'Introduction': tutorial/index.md - 'Environmental Impacts': tutorial/impacts.md + - 'Manual Estimations': tutorial/estimations.md - 'Warnings and Errors': tutorial/warnings_and_errors.md - 'Supported providers': tutorial/providers.md - 'OpenTelemetry': tutorial/opentelemetry.md diff --git a/tests/test_estimations.py b/tests/test_estimations.py new file mode 100644 index 00000000..398dca97 --- /dev/null +++ b/tests/test_estimations.py @@ -0,0 +1,118 @@ +from ecologits.estimations import LLMEstimationResult, estimate_llm_impacts +from ecologits.status_messages import ( + ModelArchMultimodalWarning, + ModelArchNotReleasedWarning, + ModelNotRegisteredError, + ZoneNotRegisteredError, +) +from ecologits.tracers.utils import ImpactsOutput, llm_impacts + + +def test_estimate_llm_impacts() -> None: + estimation = estimate_llm_impacts( + provider="cohere", + model_name="c4ai-aya-expanse-8b", + output_token_count=10, + request_latency=10, + ) + + assert isinstance(estimation, LLMEstimationResult) + assert estimation.energy.value > 0 + assert estimation.gwp.value > 0 + assert estimation.adpe.value > 0 + assert estimation.pe.value > 0 + assert estimation.wcf.value > 0 + assert estimation.details is None + + +def test_estimate_llm_impacts_with_details() -> None: + estimation = estimate_llm_impacts( + provider="cohere", + model_name="c4ai-aya-expanse-8b", + output_token_count=10, + request_latency=10, + include_details=True, + ) + + assert estimation.details is not None + assert estimation.details.provider == "cohere" + assert estimation.details.model_name == "c4ai-aya-expanse-8b" + assert estimation.details.electricity_mix_zone == "USA" + assert estimation.details.generation_latency > 0 + assert estimation.details.gpu_required_count > 0 + assert estimation.details.request_energy > 0 + assert estimation.details.request_usage_gwp > 0 + assert estimation.details.request_embodied_gwp > 0 + + +def test_estimate_llm_impacts_uses_explicit_tps() -> None: + default_estimation = estimate_llm_impacts( + provider="cohere", + model_name="c4ai-aya-expanse-8b", + output_token_count=100, + ) + fast_estimation = estimate_llm_impacts( + provider="cohere", + model_name="c4ai-aya-expanse-8b", + output_token_count=100, + tps=1000, + ttft=0.1, + ) + + assert fast_estimation.energy.value < default_estimation.energy.value + + +def test_estimate_llm_impacts_model_error() -> None: + estimation = estimate_llm_impacts( + provider="openai", + model_name="unknown-model", + output_token_count=10, + ) + + assert estimation.energy is None + assert estimation.has_errors + assert isinstance(estimation.errors[0], ModelNotRegisteredError) + + +def test_estimate_llm_impacts_zone_error() -> None: + estimation = estimate_llm_impacts( + provider="openai", + model_name="gpt-4o-mini", + output_token_count=10, + electricity_mix_zone="UNKNOWN-ZONE", + ) + + assert estimation.energy is None + assert estimation.has_errors + assert isinstance(estimation.errors[0], ZoneNotRegisteredError) + + +def test_estimate_llm_impacts_warnings() -> None: + estimation = estimate_llm_impacts( + provider="openai", + model_name="gpt-4o-mini", + output_token_count=10, + ) + + assert estimation.energy.value > 0 + assert estimation.has_warnings + assert isinstance(estimation.warnings[0], (ModelArchNotReleasedWarning, ModelArchMultimodalWarning)) + assert isinstance(estimation.warnings[1], (ModelArchNotReleasedWarning, ModelArchMultimodalWarning)) + + +def test_llm_impacts_wrapper_matches_estimation() -> None: + estimation = estimate_llm_impacts( + provider="cohere", + model_name="c4ai-aya-expanse-8b", + output_token_count=10, + request_latency=10, + ) + tracer_impacts = llm_impacts( + provider="cohere", + model_name="c4ai-aya-expanse-8b", + output_token_count=10, + request_latency=10, + ) + + assert isinstance(tracer_impacts, ImpactsOutput) + assert tracer_impacts == estimation