Skip to content

Commit f172def

Browse files
authored
Add Open Data Adapters for climate risk assessment
1 parent 2341096 commit f172def

1 file changed

Lines changed: 390 additions & 0 deletions

File tree

  • data-engine/python/data-engine/python/adapters
Lines changed: 390 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,390 @@
1+
"""
2+
Free/Open Data Adapters for Meteorium
3+
======================================
4+
These are the sources you can ACTUALLY use right now without $25k licenses.
5+
All free, all real, all used by actual climate risk platforms.
6+
7+
Sources:
8+
- Open-Meteo: weather forecasts, no API key needed
9+
- NASA FIRMS: wildfire/thermal anomaly detection
10+
- ECMWF Open Data: global weather model output
11+
- Carbon Monitor: real-time CO2 by country
12+
- Copernicus Climate Data Store: CMIP6 projections (free, needs registration)
13+
"""
14+
15+
import httpx
16+
import asyncio
17+
import logging
18+
from datetime import datetime, date, timedelta
19+
from dataclasses import dataclass
20+
from typing import Optional
21+
22+
log = logging.getLogger(__name__)
23+
24+
# ─── Open-Meteo (Free, No API Key) ────────────────────────────────────────────
25+
# Used by: small-medium climate fintechs, ESG startups
26+
# Covers: global weather forecasts + historical climate reanalysis (ERA5)
27+
# Resolution: 1km (some models), 7-day forecast
28+
29+
OPEN_METEO_BASE = "https://api.open-meteo.com/v1"
30+
OPEN_METEO_HIST = "https://archive-api.open-meteo.com/v1"
31+
32+
33+
@dataclass
34+
class WeatherRisk:
35+
lat: float
36+
lon: float
37+
date: str
38+
max_temp_c: float
39+
precip_mm: float
40+
wind_speed_ms: float
41+
flood_risk_score: float # derived: normalized precipitation anomaly
42+
heat_risk_score: float # derived: temp above historical baseline
43+
44+
45+
class OpenMeteoAdapter:
46+
"""
47+
Open-Meteo: real free weather API. No key. No rate limit (fair use).
48+
Used to derive physical climate risk scores for asset locations.
49+
"""
50+
51+
async def get_forecast(self, lat: float, lon: float, days: int = 7) -> list[WeatherRisk]:
52+
params = {
53+
"latitude": lat,
54+
"longitude": lon,
55+
"daily": [
56+
"temperature_2m_max",
57+
"precipitation_sum",
58+
"windspeed_10m_max",
59+
"precipitation_probability_max",
60+
],
61+
"forecast_days": days,
62+
"timezone": "UTC",
63+
}
64+
65+
async with httpx.AsyncClient(timeout=15.0) as client:
66+
r = await client.get(f"{OPEN_METEO_BASE}/forecast", params=params)
67+
r.raise_for_status()
68+
data = r.json()
69+
70+
daily = data["daily"]
71+
results = []
72+
for i, dt in enumerate(daily["time"]):
73+
temp = daily["temperature_2m_max"][i] or 0
74+
precip = daily["precipitation_sum"][i] or 0
75+
wind = daily["windspeed_10m_max"][i] or 0
76+
77+
# Derive risk scores (crude but real heuristics)
78+
heat_risk = min((max(temp - 35.0, 0) / 20.0), 1.0) # 35°C threshold
79+
flood_risk = min(precip / 150.0, 1.0) # 150mm/day = extreme
80+
81+
results.append(WeatherRisk(
82+
lat=lat, lon=lon, date=dt,
83+
max_temp_c=temp, precip_mm=precip, wind_speed_ms=wind,
84+
flood_risk_score=flood_risk,
85+
heat_risk_score=heat_risk,
86+
))
87+
88+
return results
89+
90+
async def get_historical_baseline(
91+
self, lat: float, lon: float,
92+
years_back: int = 10,
93+
) -> dict:
94+
"""
95+
Pull ERA5 historical reanalysis data to compute baseline climate normals.
96+
Used to detect anomalies — the core of physical risk scoring.
97+
"""
98+
end = date.today()
99+
start = end - timedelta(days=365 * years_back)
100+
101+
params = {
102+
"latitude": lat,
103+
"longitude": lon,
104+
"start_date": str(start),
105+
"end_date": str(end),
106+
"daily": [
107+
"temperature_2m_max",
108+
"precipitation_sum",
109+
],
110+
"timezone": "UTC",
111+
}
112+
113+
async with httpx.AsyncClient(timeout=60.0) as client:
114+
r = await client.get(f"{OPEN_METEO_HIST}/era5", params=params)
115+
r.raise_for_status()
116+
data = r.json()
117+
118+
temps = [t for t in data["daily"]["temperature_2m_max"] if t is not None]
119+
precips = [p for p in data["daily"]["precipitation_sum"] if p is not None]
120+
121+
return {
122+
"mean_temp_c": sum(temps) / len(temps) if temps else 0,
123+
"mean_precip_mm": sum(precips) / len(precips) if precips else 0,
124+
"p95_temp_c": sorted(temps)[int(len(temps) * 0.95)] if temps else 0,
125+
"p95_precip_mm": sorted(precips)[int(len(precips) * 0.95)] if precips else 0,
126+
}
127+
128+
129+
# ─── NASA FIRMS (Wildfire / Thermal Anomaly) ──────────────────────────────────
130+
# Free. Registration at firms.modaps.eosdis.nasa.gov for API key.
131+
# Real satellite fire detection — MODIS + VIIRS sensors, 375m resolution
132+
# Used by: insurance companies, forest asset managers, infrastructure risk firms
133+
134+
NASA_FIRMS_BASE = "https://firms.modaps.eosdis.nasa.gov/api"
135+
136+
137+
class NASAFirmsAdapter:
138+
"""
139+
NASA FIRMS: actual satellite wildfire/thermal anomaly data.
140+
This is real — the same data used by CAL FIRE, FEMA, insurance companies.
141+
"""
142+
143+
def __init__(self, api_key: str):
144+
# Get free key at: https://firms.modaps.eosdis.nasa.gov/api/area/
145+
self.api_key = api_key
146+
147+
async def get_fire_risk(
148+
self,
149+
lat: float, lon: float,
150+
radius_km: float = 50.0,
151+
days_back: int = 7,
152+
) -> dict:
153+
"""
154+
Query satellite fire detections around an asset location.
155+
Returns count + FRP (Fire Radiative Power) as risk proxy.
156+
"""
157+
# Build bounding box from center + radius
158+
deg_per_km = 1 / 111.0
159+
d = radius_km * deg_per_km
160+
bbox = f"{lon-d},{lat-d},{lon+d},{lat+d}"
161+
162+
# VIIRS I-Band 375m — highest resolution NASA fire product
163+
url = f"{NASA_FIRMS_BASE}/area/csv/{self.api_key}/VIIRS_SNPP_NRT/{bbox}/{days_back}"
164+
165+
async with httpx.AsyncClient(timeout=20.0) as client:
166+
r = await client.get(url)
167+
if r.status_code == 404:
168+
return {"fire_count": 0, "max_frp": 0.0, "fire_risk_score": 0.0}
169+
r.raise_for_status()
170+
csv_text = r.text
171+
172+
# Parse CSV — each row is a fire detection
173+
lines = csv_text.strip().split("\n")
174+
if len(lines) < 2:
175+
return {"fire_count": 0, "max_frp": 0.0, "fire_risk_score": 0.0}
176+
177+
header = lines[0].split(",")
178+
frp_idx = header.index("frp") if "frp" in header else -1
179+
180+
fire_count = len(lines) - 1
181+
frps = []
182+
for line in lines[1:]:
183+
cols = line.split(",")
184+
if frp_idx >= 0 and frp_idx < len(cols):
185+
try:
186+
frps.append(float(cols[frp_idx]))
187+
except ValueError:
188+
pass
189+
190+
max_frp = max(frps) if frps else 0.0
191+
# FRP > 1000 MW = extreme fire. Normalize to [0,1]
192+
fire_risk_score = min(fire_count * 0.02 + max_frp / 1000.0, 1.0)
193+
194+
return {
195+
"fire_count": fire_count,
196+
"max_frp_mw": max_frp,
197+
"fire_risk_score": fire_risk_score,
198+
"radius_km": radius_km,
199+
"days_back": days_back,
200+
}
201+
202+
203+
# ─── Carbon Monitor (Real-time CO2 by Country) ────────────────────────────────
204+
# Free REST API. No key. Updated daily from power plant + industry sensors.
205+
# Used by: ESG scoring firms, carbon accounting platforms
206+
207+
CARBON_MONITOR_BASE = "https://carbonmonitor-ds.adeel.cloud"
208+
209+
210+
class CarbonMonitorAdapter:
211+
"""
212+
Real CO2 emission data by country/sector. Updated near-daily.
213+
Used as transition risk input — high-emission countries = higher policy risk.
214+
"""
215+
216+
async def get_country_emissions(self, country_code: str) -> dict:
217+
"""
218+
Get recent daily CO2 emissions for a country.
219+
Returns value in MtCO2/day and normalized transition risk score.
220+
"""
221+
async with httpx.AsyncClient(timeout=15.0) as client:
222+
r = await client.get(
223+
f"{CARBON_MONITOR_BASE}/api/data",
224+
params={
225+
"countries": country_code,
226+
"sectors": "Total",
227+
"date_from": str(date.today() - timedelta(days=30)),
228+
"date_to": str(date.today()),
229+
}
230+
)
231+
if r.status_code != 200:
232+
return {"emissions_mtco2_day": 0.0, "transition_risk_factor": 0.5}
233+
data = r.json()
234+
235+
values = [d["value"] for d in data.get("data", []) if d.get("value")]
236+
if not values:
237+
return {"emissions_mtco2_day": 0.0, "transition_risk_factor": 0.5}
238+
239+
avg = sum(values) / len(values)
240+
241+
# China ~30 MtCO2/day, USA ~15, EU ~10, India ~8
242+
# Normalize relative to global high emitters
243+
GLOBAL_MAX = 35.0
244+
transition_risk = min(avg / GLOBAL_MAX, 1.0)
245+
246+
return {
247+
"country": country_code,
248+
"avg_emissions_30d": round(avg, 2),
249+
"unit": "MtCO2/day",
250+
"transition_risk_factor": round(transition_risk, 3),
251+
}
252+
253+
254+
# ─── ECMWF Open Data ──────────────────────────────────────────────────────────
255+
# Free. The same ECMWF model used by meteorologists worldwide.
256+
# Data in GRIB2 format — binary, needs eccodes library to parse
257+
# Better to use Open-Meteo which wraps ECMWF in clean JSON API
258+
259+
# For direct ECMWF integration via their Python client:
260+
# pip install ecmwf-opendata
261+
262+
class ECMWFOpenDataAdapter:
263+
"""
264+
Direct ECMWF Open Data access.
265+
Downloads GRIB2 forecast files and parses meteorological fields.
266+
"""
267+
268+
async def get_ensemble_forecast(
269+
self, lat: float, lon: float, step_hours: int = 240
270+
) -> dict:
271+
"""
272+
Pull ENS (51-member ensemble) forecast for a location.
273+
Ensemble spread = uncertainty = risk factor.
274+
275+
Requires: pip install ecmwf-opendata cfgrib xarray
276+
"""
277+
try:
278+
from ecmwf.opendata import Client
279+
import xarray as xr
280+
281+
client = Client(source="ecmwf")
282+
client.retrieve(
283+
step=list(range(0, step_hours + 6, 6)),
284+
type="ef", # ENS forecast
285+
param=["2t", "tp", "10u", "10v"], # temp, precip, wind
286+
target="/tmp/ecmwf_ens.grib2",
287+
)
288+
289+
ds = xr.open_dataset(
290+
"/tmp/ecmwf_ens.grib2",
291+
engine="cfgrib",
292+
filter_by_keys={"typeOfLevel": "heightAboveGround"},
293+
)
294+
295+
# Select nearest gridpoint to asset location
296+
point = ds.sel(latitude=lat, longitude=lon, method="nearest")
297+
298+
return {
299+
"temp_mean_k": float(point["t2m"].mean()),
300+
"temp_spread_k": float(point["t2m"].std()),
301+
"precip_mean_m": float(point["tp"].mean()),
302+
"precip_spread_m": float(point["tp"].std()),
303+
"source": "ECMWF ENS 51-member",
304+
"step_hours": step_hours,
305+
}
306+
307+
except ImportError:
308+
log.warning("ecmwf-opendata not installed. Falling back to Open-Meteo.")
309+
adapter = OpenMeteoAdapter()
310+
fc = await adapter.get_forecast(lat, lon)
311+
return {"source": "open-meteo-fallback", "forecast": fc}
312+
313+
314+
# ─── Master Physical Risk Scorer ──────────────────────────────────────────────
315+
316+
class PhysicalRiskScorer:
317+
"""
318+
Combines multiple free data sources into a single physical risk score
319+
for a Prexus asset. This is the real pipeline.
320+
"""
321+
322+
def __init__(self, nasa_firms_key: str = ""):
323+
self.weather = OpenMeteoAdapter()
324+
self.fire = NASAFirmsAdapter(nasa_firms_key) if nasa_firms_key else None
325+
self.carbon = CarbonMonitorAdapter()
326+
327+
async def score_asset(
328+
self,
329+
lat: float,
330+
lon: float,
331+
country_code: str,
332+
horizon_days: int = 30,
333+
) -> dict:
334+
"""
335+
Full pipeline: pull all sources, compute composite physical risk score.
336+
This replaces the fake static data in Meteorium.
337+
"""
338+
tasks = [
339+
self.weather.get_forecast(lat, lon, min(horizon_days, 16)),
340+
self.weather.get_historical_baseline(lat, lon),
341+
self.carbon.get_country_emissions(country_code),
342+
]
343+
344+
if self.fire:
345+
tasks.append(self.fire.get_fire_risk(lat, lon))
346+
347+
results = await asyncio.gather(*tasks, return_exceptions=True)
348+
349+
forecast, baseline, carbon_data = results[0], results[1], results[2]
350+
fire_data = results[3] if self.fire and len(results) > 3 else None
351+
352+
# Handle errors gracefully — some sources may be unavailable
353+
if isinstance(forecast, Exception):
354+
log.warning(f"Weather forecast failed: {forecast}")
355+
forecast = []
356+
if isinstance(baseline, Exception):
357+
baseline = {}
358+
if isinstance(carbon_data, Exception):
359+
carbon_data = {}
360+
361+
# Compute peak risk over forecast period
362+
heat_risk = max((w.heat_risk_score for w in forecast), default=0.0)
363+
flood_risk = max((w.flood_risk_score for w in forecast), default=0.0)
364+
fire_risk = (fire_data or {}).get("fire_risk_score", 0.0)
365+
if isinstance(fire_risk, Exception):
366+
fire_risk = 0.0
367+
368+
# Composite physical risk: weighted combination
369+
physical_risk = (
370+
heat_risk * 0.30 +
371+
flood_risk * 0.40 +
372+
fire_risk * 0.30
373+
)
374+
375+
return {
376+
"physical_risk": round(physical_risk, 3),
377+
"heat_risk": round(heat_risk, 3),
378+
"flood_risk": round(flood_risk, 3),
379+
"fire_risk": round(fire_risk, 3),
380+
"transition_risk": round(carbon_data.get("transition_risk_factor", 0.5), 3),
381+
"composite_risk": round((physical_risk * 0.6 + carbon_data.get("transition_risk_factor", 0.5) * 0.4), 3),
382+
"sources": {
383+
"weather": "Open-Meteo (ECMWF-based)",
384+
"fire": "NASA FIRMS VIIRS 375m" if self.fire else "disabled",
385+
"carbon": "Carbon Monitor",
386+
"baseline": "ERA5 Historical Reanalysis",
387+
},
388+
"as_of": datetime.utcnow().isoformat() + "Z",
389+
}
390+

0 commit comments

Comments
 (0)