Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions docs/tutorial/estimations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Manual Estimations

EcoLogits can estimate impacts without patching a provider client. This is useful when you already have usage data from another system, such as an agent report, an API gateway, or a calculator.

```python
from ecologits.estimations import estimate_llm_impacts

output_tokens = 12_782

estimation = estimate_llm_impacts(
provider="openai",
model_name="gpt-5-mini",
output_token_count=output_tokens,
tps=50,
)

print(estimation.energy.value)
print(estimation.gwp.value)
```

The returned object has the same impact fields as traced responses: `energy`, `gwp`, `adpe`, `pe`, `wcf`, `usage`, `embodied`, `warnings`, and `errors`.

!!! note "Generated tokens"

The current methodology models generated tokens. If your tool only reports aggregate token totals, you can pass the aggregate value as `output_token_count` as a proxy, but EcoLogits does not yet distinguish prompt tokens from generated tokens in manual estimations.

## Latency and Throughput

If you know the request latency, pass it directly:

```python
estimation = estimate_llm_impacts(
provider="openai",
model_name="gpt-5-mini",
output_token_count=12_782,
request_latency=255.64,
)
```

If you do not know the latency, you can provide an average token throughput with `tps`. EcoLogits will use it to estimate generation latency.

```python
estimation = estimate_llm_impacts(
provider="openai",
model_name="gpt-5-mini",
output_token_count=12_782,
tps=50,
ttft=0.5,
)
```

When `tps` or `ttft` are omitted, EcoLogits falls back to deployment metadata from the model repository when available, then to the methodology defaults.

## Electricity Mix

The `electricity_mix_zone` parameter represents the datacenter electricity mix, not the user's location. When it is omitted, EcoLogits uses the provider default datacenter zone when known, then falls back to the world average `WOR`.

```python
estimation = estimate_llm_impacts(
provider="mistralai",
model_name="mistral-large-latest",
output_token_count=1_000,
electricity_mix_zone="SWE",
)
```

## Intermediate Details

Set `include_details=True` to expose intermediate methodology values for explainability tools.

```python
estimation = estimate_llm_impacts(
provider="cohere",
model_name="c4ai-aya-expanse-8b",
output_token_count=1_000,
include_details=True,
)

print(estimation.details.generation_latency)
print(estimation.details.request_energy)
print(estimation.details.gpu_required_count)
```

For models represented by an interval, final impacts remain interval-aware. Intermediate details use representative mean parameter values to provide stable explanatory numbers.
10 changes: 10 additions & 0 deletions ecologits/estimations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .llm import PROVIDER_CONFIG_MAP, ProviderConfig, estimate_llm_impacts
from .modeling import LLMEstimationDetails, LLMEstimationResult

__all__ = [
"PROVIDER_CONFIG_MAP",
"LLMEstimationDetails",
"LLMEstimationResult",
"ProviderConfig",
"estimate_llm_impacts",
]
228 changes: 228 additions & 0 deletions ecologits/estimations/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
from __future__ import annotations

import math
from dataclasses import dataclass

from ecologits.electricity_mix_repository import electricity_mixes
from ecologits.impacts.llm import compute_llm_impacts, compute_llm_impacts_dag
from ecologits.log import logger
from ecologits.model_repository import ParametersMoE, models
from ecologits.status_messages import ModelNotRegisteredError, ZoneNotRegisteredError
from ecologits.utils.range_value import RangeValue, ValueOrRange

from .modeling import LLMEstimationDetails, LLMEstimationResult


@dataclass
class ProviderConfig:
"""
Default datacenter configuration for a provider.

Attributes:
datacenter_location: ISO 3166-1 alpha-3 code of the datacenter electricity mix zone.
datacenter_pue: Power Usage Effectiveness of the datacenter.
datacenter_wue: Water Usage Effectiveness of the datacenter.
"""
datacenter_location: str | None
datacenter_pue: float | RangeValue
datacenter_wue: float | RangeValue


PROVIDER_CONFIG_MAP = {
"anthropic": ProviderConfig(
datacenter_location="USA",
datacenter_pue=RangeValue(min=1.09, max=1.14),
datacenter_wue=RangeValue(min=0.13, max=0.999),
),
"cohere": ProviderConfig(
datacenter_location="USA",
datacenter_pue=1.09,
datacenter_wue=0.999,
),
"google_genai": ProviderConfig(
datacenter_location="USA",
datacenter_pue=1.09,
datacenter_wue=0.999,
),
"huggingface_hub": ProviderConfig(
datacenter_location="USA",
datacenter_pue=RangeValue(min=1.09, max=1.14),
datacenter_wue=RangeValue(min=0.13, max=0.99),
),
"mistralai": ProviderConfig(
datacenter_location="SWE",
datacenter_pue=1.16,
datacenter_wue=0.09,
),
"openai": ProviderConfig(
datacenter_location="USA",
datacenter_pue=1.20,
datacenter_wue=0.569,
),
}


def estimate_llm_impacts(
provider: str,
model_name: str,
output_token_count: int,
request_latency: float | None = None,
electricity_mix_zone: str | None = None,
tps: float | None = None,
ttft: float | None = None,
include_details: bool = False,
) -> LLMEstimationResult:
"""
Estimate the impacts of an LLM generation request without provider tracing.

Args:
provider: Name of the provider.
model_name: Name of the LLM used.
output_token_count: Number of generated tokens.
request_latency: Measured request latency in seconds.
electricity_mix_zone: ISO 3166-1 alpha-3 code of the datacenter electricity mix zone.
tps: Number of generated tokens per second.
ttft: Time-to-first-token latency in seconds.
include_details: Include intermediate methodology values in the result.

Returns:
The estimated impacts of an LLM generation request.
"""
model = models.find_model(provider=provider, model_name=model_name)
if model is None:
error = ModelNotRegisteredError(message=f"Could not find model `{model_name}` for {provider} provider.")
logger.warning_once(str(error))
return LLMEstimationResult(errors=[error])

if isinstance(model.architecture.parameters, ParametersMoE):
model_total_params = model.architecture.parameters.total
model_active_params = model.architecture.parameters.active
else:
model_total_params = model.architecture.parameters
model_active_params = model.architecture.parameters

provider_config = PROVIDER_CONFIG_MAP[provider]
resolved_electricity_mix_zone = electricity_mix_zone or provider_config.datacenter_location or "WOR"
if_electricity_mix = electricity_mixes.find_electricity_mix(zone=resolved_electricity_mix_zone)
if if_electricity_mix is None:
error = ZoneNotRegisteredError(
message=f"Could not find electricity mix for `{resolved_electricity_mix_zone}` zone."
)
logger.warning_once(str(error))
return LLMEstimationResult(errors=[error])

resolved_tps = _resolve_optional_float(tps, model.deployment.tps if model.deployment else None)
resolved_ttft = _resolve_optional_float(ttft, model.deployment.ttft if model.deployment else None)

impacts = compute_llm_impacts(
model_active_parameter_count=model_active_params,
model_total_parameter_count=model_total_params,
output_token_count=output_token_count,
request_latency=request_latency,
if_electricity_mix_adpe=if_electricity_mix.adpe,
if_electricity_mix_pe=if_electricity_mix.pe,
if_electricity_mix_gwp=if_electricity_mix.gwp,
if_electricity_mix_wue=if_electricity_mix.wue,
datacenter_pue=provider_config.datacenter_pue,
datacenter_wue=provider_config.datacenter_wue,
tps=resolved_tps,
ttft=resolved_ttft,
)
result = LLMEstimationResult.model_validate(impacts.model_dump())

if include_details:
result.details = _estimate_llm_details(
provider=provider,
model_name=model_name,
model_active_parameter_count=model_active_params,
model_total_parameter_count=model_total_params,
output_token_count=output_token_count,
request_latency=request_latency,
electricity_mix_zone=resolved_electricity_mix_zone,
datacenter_location=provider_config.datacenter_location,
datacenter_pue=provider_config.datacenter_pue,
datacenter_wue=provider_config.datacenter_wue,
if_electricity_mix_adpe=if_electricity_mix.adpe,
if_electricity_mix_pe=if_electricity_mix.pe,
if_electricity_mix_gwp=if_electricity_mix.gwp,
if_electricity_mix_wue=if_electricity_mix.wue,
tps=resolved_tps,
ttft=resolved_ttft,
)

if model.has_warnings:
for warning in model.warnings:
logger.warning_once(str(warning))
result.add_warning(warning)

return result


def _estimate_llm_details(
provider: str,
model_name: str,
model_active_parameter_count: ValueOrRange,
model_total_parameter_count: ValueOrRange,
output_token_count: int,
request_latency: float | None,
electricity_mix_zone: str,
datacenter_location: str | None,
datacenter_pue: ValueOrRange,
datacenter_wue: ValueOrRange,
if_electricity_mix_adpe: float,
if_electricity_mix_pe: float,
if_electricity_mix_gwp: float,
if_electricity_mix_wue: float,
tps: float | None,
ttft: float | None,
) -> LLMEstimationDetails:
dag_results = compute_llm_impacts_dag(
model_active_parameter_count=_mean_value(model_active_parameter_count),
model_total_parameter_count=_mean_value(model_total_parameter_count),
output_token_count=output_token_count,
request_latency=request_latency if request_latency is not None else math.inf,
if_electricity_mix_adpe=if_electricity_mix_adpe,
if_electricity_mix_pe=if_electricity_mix_pe,
if_electricity_mix_gwp=if_electricity_mix_gwp,
if_electricity_mix_wue=if_electricity_mix_wue,
datacenter_pue=datacenter_pue,
datacenter_wue=datacenter_wue,
tps=tps,
ttft=ttft,
)
return LLMEstimationDetails(
provider=provider,
model_name=model_name,
model_active_parameter_count=model_active_parameter_count,
model_total_parameter_count=model_total_parameter_count,
output_token_count=output_token_count,
request_latency=request_latency,
tps=tps,
ttft=ttft,
electricity_mix_zone=electricity_mix_zone,
datacenter_location=datacenter_location,
datacenter_pue=datacenter_pue,
datacenter_wue=datacenter_wue,
generation_latency=dag_results["generation_latency"],
gpu_required_count=dag_results["gpu_required_count"],
request_energy=dag_results["request_energy"],
request_usage_gwp=dag_results["request_usage_gwp"],
request_usage_adpe=dag_results["request_usage_adpe"],
request_usage_pe=dag_results["request_usage_pe"],
request_usage_wcf=dag_results["request_usage_wcf"],
request_embodied_gwp=dag_results["request_embodied_gwp"],
request_embodied_adpe=dag_results["request_embodied_adpe"],
request_embodied_pe=dag_results["request_embodied_pe"],
)


def _mean_value(value: ValueOrRange) -> float:
if isinstance(value, RangeValue):
return value.mean
return float(value)


def _resolve_optional_float(value: float | None, fallback: float | None) -> float | None:
if value is not None:
return value
return fallback
Loading