From 90ea6b31c39e8863ac0188004d7101b035ac54c6 Mon Sep 17 00:00:00 2001 From: blalterman Date: Fri, 23 Jan 2026 03:14:11 -0500 Subject: [PATCH 1/3] feat(solar_activity): add ICMECAT class for HELIO4CAST ICME catalog access Add new solarwindpy.solar_activity.icme module providing class-based access to the HELIO4CAST Interplanetary Coronal Mass Ejection Catalog. Features: - ICMECAT class with properties: data, intervals, strict_intervals, spacecraft - Methods: filter(), contains(), summary(), get_events_in_range() - Case-insensitive spacecraft filtering (handles ULYSSES vs Ulysses) - Interval fallback logic: mo_end_time -> mo_start_time + 24h -> icme_start_time + 24h - Optional caching with 30-day staleness check - Proper Helio4cast Rules of the Road in docstrings (dated January 2026) Tests: - 43 unit tests (mocked, no network) - 17 smoke tests (imports, docstrings, structure) - 8 integration tests (live network) Co-Authored-By: Claude Opus 4.5 --- solarwindpy/solar_activity/icme/__init__.py | 34 ++ solarwindpy/solar_activity/icme/icmecat.py | 397 ++++++++++++++ tests/solar_activity/icme/__init__.py | 1 + tests/solar_activity/icme/conftest.py | 78 +++ tests/solar_activity/icme/test_icmecat.py | 503 ++++++++++++++++++ .../icme/test_icmecat_integration.py | 87 +++ .../solar_activity/icme/test_icmecat_smoke.py | 114 ++++ 7 files changed, 1214 insertions(+) create mode 100644 solarwindpy/solar_activity/icme/__init__.py create mode 100644 solarwindpy/solar_activity/icme/icmecat.py create mode 100644 tests/solar_activity/icme/__init__.py create mode 100644 tests/solar_activity/icme/conftest.py create mode 100644 tests/solar_activity/icme/test_icmecat.py create mode 100644 tests/solar_activity/icme/test_icmecat_integration.py create mode 100644 tests/solar_activity/icme/test_icmecat_smoke.py diff --git a/solarwindpy/solar_activity/icme/__init__.py b/solarwindpy/solar_activity/icme/__init__.py new file mode 100644 index 00000000..3444fd3a --- /dev/null +++ b/solarwindpy/solar_activity/icme/__init__.py @@ -0,0 +1,34 @@ +"""HELIO4CAST ICMECAT - Interplanetary Coronal Mass Ejection Catalog. + +This module provides access to the HELIO4CAST ICMECAT catalog for solar wind +analysis. See https://helioforecast.space/icmecat for the most up-to-date +rules of the road. + +Rules of the Road (as of January 2026) +-------------------------------------- + If this catalog is used for results that are published in peer-reviewed + international journals, please contact chris.moestl@outlook.com for + possible co-authorship. + + Cite the catalog with: Möstl et al. (2020) + DOI: 10.6084/m9.figshare.6356420 + +Example +------- +>>> from solarwindpy.solar_activity.icme import ICMECAT +>>> cat = ICMECAT(spacecraft="Ulysses") +>>> print(f"Found {len(cat)} Ulysses ICMEs") +>>> in_icme = cat.contains(observations.index) +""" + +from .icmecat import ( + ICMECAT, + ICMECAT_URL, + SPACECRAFT_NAMES, +) + +__all__ = [ + "ICMECAT", + "ICMECAT_URL", + "SPACECRAFT_NAMES", +] diff --git a/solarwindpy/solar_activity/icme/icmecat.py b/solarwindpy/solar_activity/icme/icmecat.py new file mode 100644 index 00000000..99f52637 --- /dev/null +++ b/solarwindpy/solar_activity/icme/icmecat.py @@ -0,0 +1,397 @@ +"""ICMECAT class for accessing the HELIO4CAST ICME catalog.""" + +import logging +import pandas as pd +import numpy as np +from pathlib import Path +from typing import Optional + + +ICMECAT_URL = "https://helioforecast.space/static/sync/icmecat/HELIO4CAST_ICMECAT_v23.csv" + +SPACECRAFT_NAMES = frozenset([ + "Ulysses", "Wind", "STEREO-A", "STEREO-B", "ACE", + "Solar Orbiter", "PSP", "BepiColombo", "Juno", "MESSENGER", + "VEX", "MAVEN", "Cassini", +]) + +_DATETIME_COLUMNS = ["icme_start_time", "mo_start_time", "mo_end_time"] + + +class ICMECAT: + """Access the HELIO4CAST Interplanetary Coronal Mass Ejection Catalog. + + See https://helioforecast.space/icmecat for the most up-to-date rules of + the road. As of January 2026, they are: + + If this catalog is used for results that are published in peer-reviewed + international journals, please contact chris.moestl@outlook.com for + possible co-authorship. + + Cite the catalog with: Möstl et al. (2020) + DOI: 10.6084/m9.figshare.6356420 + + Parameters + ---------- + spacecraft : str, optional + If provided, filter catalog to this spacecraft on load. + Valid names: Ulysses, Wind, ACE, STEREO-A, STEREO-B, etc. + cache_dir : Path, optional + Directory for caching downloaded data. If None, no caching. + + Attributes + ---------- + data : pd.DataFrame + Raw catalog data (filtered if spacecraft was specified). + intervals : pd.DataFrame + Prepared intervals with computed interval_end column. + strict_intervals : pd.DataFrame + Intervals with valid mo_end_time only (no fallbacks used). + spacecraft : str or None + Spacecraft filter applied (None if full catalog). + + Example + ------- + >>> cat = ICMECAT(spacecraft="Ulysses") + >>> print(f"Found {len(cat)} Ulysses ICMEs") + >>> intervals = cat.intervals + >>> print(intervals[["icme_start_time", "mo_end_time", "interval_end"]]) + >>> + >>> # Check which observations fall within ICME intervals + >>> in_icme = cat.contains(observations.index) + """ + + def __init__( + self, + spacecraft: Optional[str] = None, + cache_dir: Optional[Path] = None, + ): + self._init_logger() + self._spacecraft = spacecraft + self._cache_dir = Path(cache_dir) if cache_dir else None + self._data: Optional[pd.DataFrame] = None + self._intervals: Optional[pd.DataFrame] = None + + self._load_data() + + if spacecraft is not None: + self._filter_by_spacecraft(spacecraft) + + self._prepare_intervals() + + def _init_logger(self): + """Initialize logger for this instance.""" + self._logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + @property + def logger(self) -> logging.Logger: + """Logger instance.""" + return self._logger + + @property + def spacecraft(self) -> Optional[str]: + """Spacecraft filter applied, or None if full catalog.""" + return self._spacecraft + + @property + def data(self) -> pd.DataFrame: + """Raw ICMECAT data (filtered if spacecraft specified).""" + return self._data + + @property + def intervals(self) -> pd.DataFrame: + """Prepared intervals with computed interval_end. + + The interval_end column uses fallbacks: + 1. mo_end_time if available + 2. mo_start_time + 24h if mo_end_time is NaT + 3. icme_start_time + 24h if both are NaT + """ + return self._intervals + + @property + def strict_intervals(self) -> pd.DataFrame: + """Intervals with valid mo_end_time only (no fallbacks).""" + return self._intervals[self._intervals["mo_end_time"].notna()].copy() + + def __len__(self) -> int: + """Number of ICME events.""" + return len(self._data) if self._data is not None else 0 + + def __repr__(self) -> str: + sc_str = f"spacecraft={self._spacecraft!r}" if self._spacecraft else "all spacecraft" + return f"ICMECAT({sc_str}, n_events={len(self)})" + + # ------------------------------------------------------------------------- + # Data Loading + # ------------------------------------------------------------------------- + + def _load_data(self) -> None: + """Load ICMECAT data from URL or cache.""" + cached = self._try_load_cache() + if cached is not None: + self._data = cached + self.logger.info("Loaded from cache: %d events", len(self._data)) + return + + self._download() + + def _try_load_cache(self) -> Optional[pd.DataFrame]: + """Try to load from cache. Returns None if no cache or stale.""" + if self._cache_dir is None: + return None + + cache_path = self._cache_dir / "icmecat.parquet" + if not cache_path.exists(): + return None + + # Check age - re-download if > 30 days old + import time + age_days = (time.time() - cache_path.stat().st_mtime) / 86400 + if age_days > 30: + self.logger.info("Cache stale (%.0f days), re-downloading", age_days) + return None + + return pd.read_parquet(cache_path) + + def _download(self) -> None: + """Download ICMECAT from helioforecast.space.""" + self.logger.info("Downloading ICMECAT from %s", ICMECAT_URL) + + self._data = pd.read_csv(ICMECAT_URL, parse_dates=_DATETIME_COLUMNS) + + self.logger.info("Downloaded %d ICME events", len(self._data)) + + # Save to cache if configured + if self._cache_dir is not None: + self._cache_dir.mkdir(parents=True, exist_ok=True) + cache_path = self._cache_dir / "icmecat.parquet" + self._data.to_parquet(cache_path, index=False) + self.logger.info("Cached to %s", cache_path) + + # ------------------------------------------------------------------------- + # Filtering + # ------------------------------------------------------------------------- + + def _filter_by_spacecraft(self, spacecraft: str) -> None: + """Filter data to specified spacecraft (case-insensitive).""" + # Build case-insensitive mapping + available = self._data["sc_insitu"].unique() + name_map = {name.lower(): name for name in available} + + # Find matching spacecraft name (case-insensitive) + spacecraft_lower = spacecraft.lower() + if spacecraft_lower in name_map: + actual_name = name_map[spacecraft_lower] + else: + self.logger.warning( + "Spacecraft '%s' not found. Available: %s", + spacecraft, sorted(available) + ) + actual_name = spacecraft # Will result in empty filter + + self._data = self._data[self._data["sc_insitu"] == actual_name].copy() + self._spacecraft = spacecraft # Keep user's original spelling for display + self.logger.info("Filtered to %s: %d events", actual_name, len(self._data)) + + def filter(self, spacecraft: str) -> "ICMECAT": + """Return new ICMECAT instance filtered to spacecraft. + + Parameters + ---------- + spacecraft : str + Spacecraft name (e.g., "Ulysses", "Wind"). Case-insensitive. + + Returns + ------- + ICMECAT + New instance with filtered data (does not re-download). + """ + # Build case-insensitive mapping + available = self._data["sc_insitu"].unique() + name_map = {name.lower(): name for name in available} + + # Find matching spacecraft name (case-insensitive) + spacecraft_lower = spacecraft.lower() + actual_name = name_map.get(spacecraft_lower, spacecraft) + + # Create new instance without re-downloading + new = object.__new__(ICMECAT) + new._init_logger() + new._spacecraft = spacecraft # Keep user's original spelling for display + new._cache_dir = self._cache_dir + new._data = self._data[self._data["sc_insitu"] == actual_name].copy() + new._intervals = None + new._prepare_intervals() + return new + + # ------------------------------------------------------------------------- + # Interval Preparation + # ------------------------------------------------------------------------- + + def _prepare_intervals(self) -> None: + """Prepare interval DataFrame with computed interval_end.""" + columns = [ + "icmecat_id", + "icme_start_time", + "mo_start_time", + "mo_end_time", + "mo_sc_heliodistance", + "mo_sc_lat_heeq", + "mo_sc_long_heeq", + ] + + # Select available columns + available = [c for c in columns if c in self._data.columns] + result = self._data[available].copy() + + if len(result) == 0: + result["interval_end"] = pd.Series(dtype="datetime64[ns]") + self._intervals = result + return + + # Compute interval_end with fallbacks + interval_end = result["mo_end_time"].copy() + + # Fallback 1: mo_start_time + 24h + mask_missing = interval_end.isna() + if "mo_start_time" in result.columns: + fallback = result.loc[mask_missing, "mo_start_time"] + pd.Timedelta(hours=24) + interval_end.loc[mask_missing] = fallback + + # Fallback 2: icme_start_time + 24h + mask_still_missing = interval_end.isna() + fallback = result.loc[mask_still_missing, "icme_start_time"] + pd.Timedelta(hours=24) + interval_end.loc[mask_still_missing] = fallback + + result["interval_end"] = interval_end + self._intervals = result + + # ------------------------------------------------------------------------- + # Query Methods + # ------------------------------------------------------------------------- + + def get_events_in_range( + self, + start: pd.Timestamp, + end: pd.Timestamp, + ) -> pd.DataFrame: + """Get ICME events that overlap with a time range. + + Parameters + ---------- + start, end : pd.Timestamp + Time range to query. + + Returns + ------- + pd.DataFrame + Events where icme_start_time <= end AND interval_end >= start. + """ + mask = ( + (self._intervals["icme_start_time"] <= end) & + (self._intervals["interval_end"] >= start) + ) + return self._intervals[mask].copy() + + def contains(self, times: pd.DatetimeIndex | pd.Series) -> pd.Series: + """Check which timestamps fall within any ICME interval. + + Uses only strict intervals (events with valid mo_end_time) to ensure + accurate containment checking. + + Parameters + ---------- + times : pd.DatetimeIndex or pd.Series + Timestamps to check. + + Returns + ------- + pd.Series + Boolean mask, True if timestamp is within any ICME interval. + Index matches input times. + """ + if isinstance(times, pd.DatetimeIndex): + times = times.to_series() + + if len(times) == 0: + return pd.Series([], dtype=bool) + + if len(self._intervals) == 0: + return pd.Series(False, index=times.index) + + # Use strict intervals (valid mo_end_time only) + intervals = self.strict_intervals + if len(intervals) == 0: + return pd.Series(False, index=times.index) + + starts = intervals["icme_start_time"].values + ends = intervals["mo_end_time"].values + obs = times.values + + # Sort intervals by start time for efficient search + sort_idx = np.argsort(starts) + sorted_starts = starts[sort_idx] + sorted_ends = ends[sort_idx] + + # Vectorized containment check + mask = np.zeros(len(obs), dtype=bool) + for i, t in enumerate(obs): + # Find intervals that start before or at this time + idx = np.searchsorted(sorted_starts, t, side="right") + # Check if any of those intervals end at or after this time + if idx > 0 and np.any(sorted_ends[:idx] >= t): + mask[i] = True + + return pd.Series(mask, index=times.index) + + # ------------------------------------------------------------------------- + # Summary Statistics + # ------------------------------------------------------------------------- + + def summary(self) -> pd.DataFrame: + """Summary statistics of ICME events. + + Returns + ------- + pd.DataFrame + Single-row DataFrame with statistics including: + - n_events: Total number of events + - n_strict: Events with valid mo_end_time + - date_range_start/end: Temporal coverage + - duration_*: Duration statistics in hours + - spacecraft: If filtered (optional) + """ + intervals = self._intervals + + if len(intervals) == 0: + stats = { + "n_events": 0, + "n_strict": 0, + "date_range_start": pd.NaT, + "date_range_end": pd.NaT, + "duration_median_hours": np.nan, + "duration_mean_hours": np.nan, + "duration_min_hours": np.nan, + "duration_max_hours": np.nan, + } + else: + # Duration statistics + durations = intervals["interval_end"] - intervals["icme_start_time"] + duration_hours = durations.dt.total_seconds() / 3600 + + stats = { + "n_events": len(intervals), + "n_strict": len(self.strict_intervals), + "date_range_start": intervals["icme_start_time"].min(), + "date_range_end": intervals["interval_end"].max(), + "duration_median_hours": duration_hours.median(), + "duration_mean_hours": duration_hours.mean(), + "duration_min_hours": duration_hours.min(), + "duration_max_hours": duration_hours.max(), + } + + if self._spacecraft: + stats["spacecraft"] = self._spacecraft + + return pd.DataFrame([stats]) diff --git a/tests/solar_activity/icme/__init__.py b/tests/solar_activity/icme/__init__.py new file mode 100644 index 00000000..fb0604d0 --- /dev/null +++ b/tests/solar_activity/icme/__init__.py @@ -0,0 +1 @@ +"""Tests for solarwindpy.solar_activity.icme module.""" diff --git a/tests/solar_activity/icme/conftest.py b/tests/solar_activity/icme/conftest.py new file mode 100644 index 00000000..233b26fd --- /dev/null +++ b/tests/solar_activity/icme/conftest.py @@ -0,0 +1,78 @@ +"""Shared fixtures for ICMECAT tests.""" + +import pytest +import pandas as pd +import numpy as np + + +@pytest.fixture +def mock_icmecat_csv_data(): + """Create mock ICMECAT CSV data matching real catalog structure. + + Returns DataFrame with realistic column names and data types + matching HELIO4CAST ICMECAT v2.3 format. + """ + np.random.seed(42) # Reproducible + n_events = 50 + base_date = pd.Timestamp("2000-01-01") + + data = { + "icmecat_id": [f"ICME_{i:04d}" for i in range(n_events)], + "sc_insitu": np.random.choice( + ["Ulysses", "Wind", "STEREO-A", "STEREO-B", "ACE"], + n_events + ), + "icme_start_time": [ + base_date + pd.Timedelta(days=i * 30 + np.random.randint(0, 10)) + for i in range(n_events) + ], + "mo_start_time": [ + base_date + pd.Timedelta(days=i * 30 + np.random.randint(10, 15)) + for i in range(n_events) + ], + "mo_end_time": [ + base_date + pd.Timedelta(days=i * 30 + np.random.randint(15, 25)) + if np.random.random() > 0.1 else pd.NaT # 10% missing + for i in range(n_events) + ], + "mo_sc_heliodistance": np.random.uniform(0.7, 5.4, n_events), + "mo_sc_lat_heeq": np.random.uniform(-80, 80, n_events), + "mo_sc_long_heeq": np.random.uniform(0, 360, n_events), + } + return pd.DataFrame(data) + + +@pytest.fixture +def sample_observation_times(): + """Create sample observation timestamps for containment testing.""" + return pd.Series( + pd.date_range("2000-01-01", "2005-12-31", freq="4min"), + name="time" + ) + + +@pytest.fixture +def simple_icme_intervals(): + """Simple, predictable ICME intervals for testing containment.""" + return pd.DataFrame({ + "icmecat_id": ["TEST_001", "TEST_002", "TEST_003"], + "sc_insitu": ["Ulysses", "Ulysses", "Ulysses"], + "icme_start_time": [ + pd.Timestamp("2000-01-10"), + pd.Timestamp("2000-02-15"), + pd.Timestamp("2000-03-20"), + ], + "mo_start_time": [ + pd.Timestamp("2000-01-11"), + pd.Timestamp("2000-02-16"), + pd.Timestamp("2000-03-21"), + ], + "mo_end_time": [ + pd.Timestamp("2000-01-15"), + pd.Timestamp("2000-02-20"), + pd.NaT, # Missing - will use fallback + ], + "mo_sc_heliodistance": [1.0, 2.0, 3.0], + "mo_sc_lat_heeq": [10.0, 20.0, 30.0], + "mo_sc_long_heeq": [100.0, 200.0, 300.0], + }) diff --git a/tests/solar_activity/icme/test_icmecat.py b/tests/solar_activity/icme/test_icmecat.py new file mode 100644 index 00000000..3e51c172 --- /dev/null +++ b/tests/solar_activity/icme/test_icmecat.py @@ -0,0 +1,503 @@ +"""Unit tests for ICMECAT class. + +Tests cover: +- Initialization and data loading +- Spacecraft filtering +- Interval preparation with fallbacks +- Containment checking +- Summary statistics +- Property types, shapes, and dtypes +""" + +import pytest +import pandas as pd +import numpy as np +from unittest.mock import patch, MagicMock +from pathlib import Path + + +class TestICMECATInitialization: + """Test ICMECAT class initialization.""" + + def test_init_downloads_data(self, mock_icmecat_csv_data): + """ICMECAT() downloads data on initialization.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert cat.data is not None + assert len(cat) > 0 + + def test_init_with_spacecraft_filters(self, mock_icmecat_csv_data): + """ICMECAT(spacecraft='X') filters to that spacecraft.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT(spacecraft="Ulysses") + + assert cat.spacecraft == "Ulysses" + assert all(cat.data["sc_insitu"] == "Ulysses") + + def test_init_without_spacecraft_keeps_all(self, mock_icmecat_csv_data): + """ICMECAT() without spacecraft keeps all events.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert cat.spacecraft is None + assert len(cat.data["sc_insitu"].unique()) > 1 + + +class TestICMECATDataProperty: + """Test ICMECAT.data property.""" + + def test_data_is_dataframe(self, mock_icmecat_csv_data): + """data property returns a DataFrame.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert isinstance(cat.data, pd.DataFrame) + + def test_data_has_required_columns(self, mock_icmecat_csv_data): + """data has all required columns.""" + required = ["icmecat_id", "sc_insitu", "icme_start_time", "mo_end_time"] + + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + for col in required: + assert col in cat.data.columns, f"Missing column: {col}" + + def test_data_datetime_dtypes(self, mock_icmecat_csv_data): + """Datetime columns have datetime64 dtype.""" + datetime_cols = ["icme_start_time", "mo_start_time", "mo_end_time"] + + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + for col in datetime_cols: + assert pd.api.types.is_datetime64_any_dtype(cat.data[col]), \ + f"{col} should be datetime64, got {cat.data[col].dtype}" + + def test_data_shape_nonzero(self, mock_icmecat_csv_data): + """data has non-zero rows.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert cat.data.shape[0] > 0 + assert cat.data.shape[1] >= 5 + + +class TestICMECATIntervalsProperty: + """Test ICMECAT.intervals property.""" + + def test_intervals_is_dataframe(self, mock_icmecat_csv_data): + """intervals property returns a DataFrame.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert isinstance(cat.intervals, pd.DataFrame) + + def test_intervals_has_interval_end(self, mock_icmecat_csv_data): + """intervals has computed interval_end column.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert "interval_end" in cat.intervals.columns + + def test_interval_end_no_nulls(self, mock_icmecat_csv_data): + """interval_end has no NaN values (fallbacks applied).""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert cat.intervals["interval_end"].notna().all() + + def test_interval_end_dtype_datetime(self, mock_icmecat_csv_data): + """interval_end is datetime64.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert pd.api.types.is_datetime64_any_dtype( + cat.intervals["interval_end"] + ) + + def test_interval_end_after_start(self, mock_icmecat_csv_data): + """interval_end >= icme_start_time for all events.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert all( + cat.intervals["interval_end"] >= cat.intervals["icme_start_time"] + ) + + +class TestICMECATIntervalFallbacks: + """Test interval_end fallback logic.""" + + def test_fallback_uses_mo_end_when_available(self, simple_icme_intervals): + """When mo_end_time exists, interval_end equals mo_end_time.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + # First event has mo_end_time + assert cat.intervals.iloc[0]["interval_end"] == pd.Timestamp("2000-01-15") + + def test_fallback_mo_start_plus_24h(self): + """Fallback: mo_end_time missing -> mo_start_time + 24h.""" + data = pd.DataFrame({ + "icmecat_id": ["TEST"], + "sc_insitu": ["Ulysses"], + "icme_start_time": [pd.Timestamp("2000-01-01")], + "mo_start_time": [pd.Timestamp("2000-01-02")], + "mo_end_time": [pd.NaT], + }) + + with patch("pandas.read_csv", return_value=data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + expected = pd.Timestamp("2000-01-03") # mo_start + 24h + assert cat.intervals.iloc[0]["interval_end"] == expected + + def test_fallback_icme_start_plus_24h(self): + """Fallback: both missing -> icme_start_time + 24h.""" + data = pd.DataFrame({ + "icmecat_id": ["TEST"], + "sc_insitu": ["Ulysses"], + "icme_start_time": [pd.Timestamp("2000-01-01")], + "mo_start_time": [pd.NaT], + "mo_end_time": [pd.NaT], + }) + + with patch("pandas.read_csv", return_value=data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + expected = pd.Timestamp("2000-01-02") # icme_start + 24h + assert cat.intervals.iloc[0]["interval_end"] == expected + + +class TestICMECATStrictIntervals: + """Test ICMECAT.strict_intervals property.""" + + def test_strict_intervals_excludes_nat(self, mock_icmecat_csv_data): + """strict_intervals only includes events with valid mo_end_time.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + # strict_intervals should have fewer rows if there are NaT values + assert cat.strict_intervals["mo_end_time"].notna().all() + + def test_strict_intervals_is_subset(self, mock_icmecat_csv_data): + """strict_intervals is subset of intervals.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert len(cat.strict_intervals) <= len(cat.intervals) + + def test_strict_intervals_returns_copy(self, mock_icmecat_csv_data): + """strict_intervals returns a copy, not a view.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + strict = cat.strict_intervals + if len(strict) > 0: + original_id = cat.intervals.iloc[0]["icmecat_id"] + strict.iloc[0, strict.columns.get_loc("icmecat_id")] = "MODIFIED" + assert cat.intervals.iloc[0]["icmecat_id"] == original_id + + +class TestICMECATFilter: + """Test ICMECAT.filter() method.""" + + def test_filter_returns_new_instance(self, mock_icmecat_csv_data): + """filter() returns a new ICMECAT instance.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + filtered = cat.filter("Ulysses") + + assert isinstance(filtered, ICMECAT) + assert filtered is not cat + + def test_filter_sets_spacecraft(self, mock_icmecat_csv_data): + """filter() sets spacecraft property on new instance.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + filtered = cat.filter("Ulysses") + + assert filtered.spacecraft == "Ulysses" + assert cat.spacecraft is None # Original unchanged + + def test_filter_only_includes_spacecraft(self, mock_icmecat_csv_data): + """filter() only includes events from specified spacecraft.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + filtered = cat.filter("Ulysses") + + assert all(filtered.data["sc_insitu"] == "Ulysses") + + def test_filter_unknown_spacecraft_empty(self, mock_icmecat_csv_data): + """filter() with unknown spacecraft returns empty catalog.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + filtered = cat.filter("NONEXISTENT") + + assert len(filtered) == 0 + + +class TestICMECATContains: + """Test ICMECAT.contains() method.""" + + def test_contains_returns_series(self, simple_icme_intervals): + """contains() returns a boolean Series.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + times = pd.Series([pd.Timestamp("2000-01-12")]) + result = cat.contains(times) + + assert isinstance(result, pd.Series) + assert result.dtype == bool + + def test_contains_preserves_index(self, simple_icme_intervals): + """contains() preserves input index.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + times = pd.Series( + [pd.Timestamp("2000-01-12")], + index=["custom_index"] + ) + result = cat.contains(times) + + assert result.index.tolist() == ["custom_index"] + + def test_contains_true_inside_interval(self, simple_icme_intervals): + """contains() returns True for times inside an interval.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + # 2000-01-12 is inside first interval (01-10 to 01-15) + times = pd.Series([pd.Timestamp("2000-01-12")]) + result = cat.contains(times) + + assert result.iloc[0] == True + + def test_contains_false_outside_interval(self, simple_icme_intervals): + """contains() returns False for times outside all intervals.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + # 2000-01-05 is before first interval + times = pd.Series([pd.Timestamp("2000-01-05")]) + result = cat.contains(times) + + assert result.iloc[0] == False + + def test_contains_boundary_start_inclusive(self, simple_icme_intervals): + """contains() includes interval start time.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + # Exactly at start of first interval + times = pd.Series([pd.Timestamp("2000-01-10")]) + result = cat.contains(times) + + assert result.iloc[0] == True + + def test_contains_boundary_end_inclusive(self, simple_icme_intervals): + """contains() includes interval end time.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + # Exactly at end of first interval + times = pd.Series([pd.Timestamp("2000-01-15")]) + result = cat.contains(times) + + assert result.iloc[0] == True + + def test_contains_accepts_datetimeindex(self, simple_icme_intervals): + """contains() accepts DatetimeIndex input.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + times = pd.DatetimeIndex(["2000-01-12", "2000-01-05"]) + result = cat.contains(times) + + assert isinstance(result, pd.Series) + assert len(result) == 2 + + def test_contains_empty_input(self, simple_icme_intervals): + """contains() handles empty input gracefully.""" + with patch("pandas.read_csv", return_value=simple_icme_intervals): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + times = pd.Series([], dtype="datetime64[ns]") + result = cat.contains(times) + + assert len(result) == 0 + assert result.dtype == bool + + +class TestICMECATSummary: + """Test ICMECAT.summary() method.""" + + def test_summary_returns_dataframe(self, mock_icmecat_csv_data): + """summary() returns a DataFrame.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + result = cat.summary() + assert isinstance(result, pd.DataFrame) + + def test_summary_has_event_count(self, mock_icmecat_csv_data): + """summary() includes event count.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + result = cat.summary() + assert "n_events" in result.columns + assert result["n_events"].iloc[0] == len(cat) + + def test_summary_has_strict_count(self, mock_icmecat_csv_data): + """summary() includes strict event count.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + result = cat.summary() + assert "n_strict" in result.columns + assert result["n_strict"].iloc[0] == len(cat.strict_intervals) + + def test_summary_has_duration_stats(self, mock_icmecat_csv_data): + """summary() includes duration statistics.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + result = cat.summary() + duration_cols = ["duration_median_hours", "duration_mean_hours"] + for col in duration_cols: + assert col in result.columns + + def test_summary_includes_spacecraft_when_filtered(self, mock_icmecat_csv_data): + """summary() includes spacecraft when filtered.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT(spacecraft="Ulysses") + + result = cat.summary() + assert "spacecraft" in result.columns + assert result["spacecraft"].iloc[0] == "Ulysses" + + +class TestICMECATDunderMethods: + """Test ICMECAT special methods (__len__, __repr__).""" + + def test_len_returns_event_count(self, mock_icmecat_csv_data): + """len(ICMECAT) returns number of events.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert len(cat) == len(cat.data) + + def test_repr_includes_class_name(self, mock_icmecat_csv_data): + """repr includes class name.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert "ICMECAT" in repr(cat) + + def test_repr_includes_event_count(self, mock_icmecat_csv_data): + """repr includes event count.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert str(len(cat)) in repr(cat) + + def test_repr_includes_spacecraft_when_filtered(self, mock_icmecat_csv_data): + """repr includes spacecraft when filtered.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT(spacecraft="Ulysses") + + assert "Ulysses" in repr(cat) + + +class TestICMECATEdgeCases: + """Test edge cases and error handling.""" + + def test_empty_catalog_after_filter(self, mock_icmecat_csv_data): + """Handles filtering to zero events gracefully.""" + with patch("pandas.read_csv", return_value=mock_icmecat_csv_data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT(spacecraft="NONEXISTENT") + + assert len(cat) == 0 + assert len(cat.intervals) == 0 + assert len(cat.strict_intervals) == 0 + + def test_all_mo_end_time_missing(self): + """Handles case where all mo_end_time are NaT.""" + data = pd.DataFrame({ + "icmecat_id": ["A", "B"], + "sc_insitu": ["Ulysses", "Ulysses"], + "icme_start_time": [pd.Timestamp("2000-01-01"), pd.Timestamp("2000-02-01")], + "mo_start_time": [pd.Timestamp("2000-01-02"), pd.NaT], + "mo_end_time": [pd.NaT, pd.NaT], + }) + + with patch("pandas.read_csv", return_value=data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert cat.intervals["interval_end"].notna().all() + assert len(cat.strict_intervals) == 0 + + def test_contains_with_no_strict_intervals(self): + """contains() returns False when no strict intervals exist.""" + data = pd.DataFrame({ + "icmecat_id": ["A"], + "sc_insitu": ["Ulysses"], + "icme_start_time": [pd.Timestamp("2000-01-01")], + "mo_start_time": [pd.Timestamp("2000-01-02")], + "mo_end_time": [pd.NaT], + }) + + with patch("pandas.read_csv", return_value=data): + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + times = pd.Series([pd.Timestamp("2000-01-05")]) + result = cat.contains(times) + + assert result.iloc[0] == False diff --git a/tests/solar_activity/icme/test_icmecat_integration.py b/tests/solar_activity/icme/test_icmecat_integration.py new file mode 100644 index 00000000..5ab28b0a --- /dev/null +++ b/tests/solar_activity/icme/test_icmecat_integration.py @@ -0,0 +1,87 @@ +"""Integration tests for ICMECAT class. + +These tests require network access and download real data. +Mark with pytest.mark.integration to skip in CI without network. +""" + +import pytest +import pandas as pd + + +@pytest.mark.integration +@pytest.mark.slow +class TestLiveDownload: + """Integration tests that download real ICMECAT data.""" + + def test_instantiate_downloads_data(self): + """ICMECAT() downloads real data.""" + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert len(cat) > 100, "Should have >100 ICME events" + + def test_ulysses_events_exist(self): + """Real catalog contains Ulysses events.""" + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT(spacecraft="Ulysses") + + assert len(cat) > 0, "Should have Ulysses events" + # Ulysses mission: 1990-2009 + min_year = cat.data["icme_start_time"].min().year + max_year = cat.data["icme_start_time"].max().year + assert min_year >= 1990 + assert max_year <= 2010 + + def test_data_types_correct(self): + """Real data has correct dtypes.""" + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + + assert pd.api.types.is_datetime64_any_dtype(cat.data["icme_start_time"]) + assert pd.api.types.is_datetime64_any_dtype(cat.data["mo_end_time"]) + assert cat.data["icmecat_id"].dtype == object + + def test_filter_then_contains(self): + """End-to-end: filter to Ulysses, check containment.""" + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT(spacecraft="Ulysses") + + # Get first strict interval + strict = cat.strict_intervals + if len(strict) > 0: + first = strict.iloc[0] + mid_time = first["icme_start_time"] + ( + first["mo_end_time"] - first["icme_start_time"] + ) / 2 + + times = pd.Series([mid_time]) + result = cat.contains(times) + + assert result.iloc[0] == True, "Mid-point should be in interval" + + def test_summary_on_real_data(self): + """summary() works on real data.""" + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT(spacecraft="Ulysses") + + result = cat.summary() + + assert result["n_events"].iloc[0] > 0 + assert result["duration_median_hours"].iloc[0] > 0 + + +@pytest.mark.integration +class TestMultipleSpacecraft: + """Test filtering to different spacecraft.""" + + @pytest.mark.parametrize("spacecraft", ["Ulysses", "Wind", "ACE", "STEREO-A"]) + def test_filter_to_spacecraft(self, spacecraft): + """Can filter to various spacecraft.""" + from solarwindpy.solar_activity.icme import ICMECAT + cat = ICMECAT() + filtered = cat.filter(spacecraft) + + # Some spacecraft may have no events, that's OK + if len(filtered) > 0: + # Case-insensitive comparison (catalog uses ULYSSES, user may pass Ulysses) + assert all(filtered.data["sc_insitu"].str.lower() == spacecraft.lower()) diff --git a/tests/solar_activity/icme/test_icmecat_smoke.py b/tests/solar_activity/icme/test_icmecat_smoke.py new file mode 100644 index 00000000..1c4f6a41 --- /dev/null +++ b/tests/solar_activity/icme/test_icmecat_smoke.py @@ -0,0 +1,114 @@ +"""Smoke tests for ICMECAT class. + +Quick validation tests that can run without network access. +Verify module imports, docstrings, and basic instantiation. +""" + +import pytest + + +class TestModuleImports: + """Verify module can be imported and has expected attributes.""" + + def test_import_module(self): + """Module can be imported without errors.""" + from solarwindpy.solar_activity import icme + assert icme is not None + + def test_icmecat_class_exists(self): + """ICMECAT class is importable.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert ICMECAT is not None + + def test_url_constant_defined(self): + """ICMECAT_URL constant is defined.""" + from solarwindpy.solar_activity.icme import ICMECAT_URL + assert isinstance(ICMECAT_URL, str) + assert ICMECAT_URL.startswith("https://") + assert "helioforecast" in ICMECAT_URL + + def test_spacecraft_names_defined(self): + """SPACECRAFT_NAMES constant is defined.""" + from solarwindpy.solar_activity.icme import SPACECRAFT_NAMES + assert "Ulysses" in SPACECRAFT_NAMES + assert "Wind" in SPACECRAFT_NAMES + + +class TestDocstrings: + """Verify docstrings are present and contain required information.""" + + def test_module_docstring_exists(self): + """Module has a docstring.""" + from solarwindpy.solar_activity import icme + assert icme.__doc__ is not None + assert len(icme.__doc__) > 100 + + def test_module_docstring_has_url(self): + """Module docstring references helioforecast.space.""" + from solarwindpy.solar_activity import icme + assert "helioforecast.space/icmecat" in icme.__doc__ + + def test_module_docstring_has_rules_of_road(self): + """Module docstring includes rules of the road.""" + from solarwindpy.solar_activity import icme + assert "rules of the road" in icme.__doc__.lower() + assert "co-authorship" in icme.__doc__.lower() + + def test_module_docstring_has_citation(self): + """Module docstring includes citation info.""" + from solarwindpy.solar_activity import icme + assert "Möstl" in icme.__doc__ + assert "10.6084/m9.figshare.6356420" in icme.__doc__ + + def test_icmecat_class_docstring(self): + """ICMECAT class has a docstring.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert ICMECAT.__doc__ is not None + + def test_icmecat_methods_have_docstrings(self): + """ICMECAT public methods have docstrings.""" + from solarwindpy.solar_activity.icme import ICMECAT + + methods = ["filter", "contains", "summary", "get_events_in_range"] + for method_name in methods: + method = getattr(ICMECAT, method_name) + assert method.__doc__ is not None, f"{method_name} missing docstring" + + +class TestClassStructure: + """Verify class has expected properties and methods.""" + + def test_icmecat_has_data_property(self): + """ICMECAT has data property.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert hasattr(ICMECAT, "data") + + def test_icmecat_has_intervals_property(self): + """ICMECAT has intervals property.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert hasattr(ICMECAT, "intervals") + + def test_icmecat_has_strict_intervals_property(self): + """ICMECAT has strict_intervals property.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert hasattr(ICMECAT, "strict_intervals") + + def test_icmecat_has_spacecraft_property(self): + """ICMECAT has spacecraft property.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert hasattr(ICMECAT, "spacecraft") + + def test_icmecat_has_filter_method(self): + """ICMECAT has filter method.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert callable(getattr(ICMECAT, "filter", None)) + + def test_icmecat_has_contains_method(self): + """ICMECAT has contains method.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert callable(getattr(ICMECAT, "contains", None)) + + def test_icmecat_has_summary_method(self): + """ICMECAT has summary method.""" + from solarwindpy.solar_activity.icme import ICMECAT + assert callable(getattr(ICMECAT, "summary", None)) From 04f63958143f7996e465be0854b85847031a6317 Mon Sep 17 00:00:00 2001 From: blalterman Date: Fri, 23 Jan 2026 19:11:49 -0500 Subject: [PATCH 2/3] fix(solar_activity): export ICMECAT module and add doctest skip directives - Export icme module from solar_activity package for discoverability (now available as: from solarwindpy.solar_activity import icme) - Add doctest +SKIP directives to examples that require network access since ICMECAT downloads live data from helioforecast.space Co-Authored-By: Claude Opus 4.5 --- solarwindpy/solar_activity/__init__.py | 3 ++- solarwindpy/solar_activity/icme/__init__.py | 8 ++++---- solarwindpy/solar_activity/icme/icmecat.py | 10 +++++----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/solarwindpy/solar_activity/__init__.py b/solarwindpy/solar_activity/__init__.py index f4991a23..06ba8c08 100644 --- a/solarwindpy/solar_activity/__init__.py +++ b/solarwindpy/solar_activity/__init__.py @@ -5,13 +5,14 @@ :mod:`solarwindpy` and exposes convenience utilities for working with them. """ -__all__ = ["sunspot_number", "ssn", "lisird", "plots"] +__all__ = ["sunspot_number", "ssn", "lisird", "plots", "icme"] import pandas as pd from . import sunspot_number # noqa: F401 from . import lisird # noqa: F401 from . import plots # noqa: F401 +from . import icme # noqa: F401 ssn = sunspot_number diff --git a/solarwindpy/solar_activity/icme/__init__.py b/solarwindpy/solar_activity/icme/__init__.py index 3444fd3a..a5e3aa5b 100644 --- a/solarwindpy/solar_activity/icme/__init__.py +++ b/solarwindpy/solar_activity/icme/__init__.py @@ -15,10 +15,10 @@ Example ------- ->>> from solarwindpy.solar_activity.icme import ICMECAT ->>> cat = ICMECAT(spacecraft="Ulysses") ->>> print(f"Found {len(cat)} Ulysses ICMEs") ->>> in_icme = cat.contains(observations.index) +>>> from solarwindpy.solar_activity.icme import ICMECAT # doctest: +SKIP +>>> cat = ICMECAT(spacecraft="Ulysses") # doctest: +SKIP +>>> print(f"Found {len(cat)} Ulysses ICMEs") # doctest: +SKIP +>>> in_icme = cat.contains(observations.index) # doctest: +SKIP """ from .icmecat import ( diff --git a/solarwindpy/solar_activity/icme/icmecat.py b/solarwindpy/solar_activity/icme/icmecat.py index 99f52637..54a06b6b 100644 --- a/solarwindpy/solar_activity/icme/icmecat.py +++ b/solarwindpy/solar_activity/icme/icmecat.py @@ -52,13 +52,13 @@ class ICMECAT: Example ------- - >>> cat = ICMECAT(spacecraft="Ulysses") - >>> print(f"Found {len(cat)} Ulysses ICMEs") - >>> intervals = cat.intervals - >>> print(intervals[["icme_start_time", "mo_end_time", "interval_end"]]) + >>> cat = ICMECAT(spacecraft="Ulysses") # doctest: +SKIP + >>> print(f"Found {len(cat)} Ulysses ICMEs") # doctest: +SKIP + >>> intervals = cat.intervals # doctest: +SKIP + >>> print(intervals[["icme_start_time", "mo_end_time", "interval_end"]]) # doctest: +SKIP >>> >>> # Check which observations fall within ICME intervals - >>> in_icme = cat.contains(observations.index) + >>> in_icme = cat.contains(observations.index) # doctest: +SKIP """ def __init__( From ccae9dc8f11a35fdaaaf1188a9f67d2aa5e8589f Mon Sep 17 00:00:00 2001 From: blalterman Date: Sat, 24 Jan 2026 03:17:12 -0500 Subject: [PATCH 3/3] style: apply black formatting to docstrings Co-Authored-By: Claude Opus 4.5 --- solarwindpy/solar_activity/icme/icmecat.py | 49 +++++++++++++++------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/solarwindpy/solar_activity/icme/icmecat.py b/solarwindpy/solar_activity/icme/icmecat.py index 54a06b6b..c9422fa7 100644 --- a/solarwindpy/solar_activity/icme/icmecat.py +++ b/solarwindpy/solar_activity/icme/icmecat.py @@ -7,13 +7,27 @@ from typing import Optional -ICMECAT_URL = "https://helioforecast.space/static/sync/icmecat/HELIO4CAST_ICMECAT_v23.csv" - -SPACECRAFT_NAMES = frozenset([ - "Ulysses", "Wind", "STEREO-A", "STEREO-B", "ACE", - "Solar Orbiter", "PSP", "BepiColombo", "Juno", "MESSENGER", - "VEX", "MAVEN", "Cassini", -]) +ICMECAT_URL = ( + "https://helioforecast.space/static/sync/icmecat/HELIO4CAST_ICMECAT_v23.csv" +) + +SPACECRAFT_NAMES = frozenset( + [ + "Ulysses", + "Wind", + "STEREO-A", + "STEREO-B", + "ACE", + "Solar Orbiter", + "PSP", + "BepiColombo", + "Juno", + "MESSENGER", + "VEX", + "MAVEN", + "Cassini", + ] +) _DATETIME_COLUMNS = ["icme_start_time", "mo_start_time", "mo_end_time"] @@ -119,7 +133,9 @@ def __len__(self) -> int: return len(self._data) if self._data is not None else 0 def __repr__(self) -> str: - sc_str = f"spacecraft={self._spacecraft!r}" if self._spacecraft else "all spacecraft" + sc_str = ( + f"spacecraft={self._spacecraft!r}" if self._spacecraft else "all spacecraft" + ) return f"ICMECAT({sc_str}, n_events={len(self)})" # ------------------------------------------------------------------------- @@ -147,6 +163,7 @@ def _try_load_cache(self) -> Optional[pd.DataFrame]: # Check age - re-download if > 30 days old import time + age_days = (time.time() - cache_path.stat().st_mtime) / 86400 if age_days > 30: self.logger.info("Cache stale (%.0f days), re-downloading", age_days) @@ -186,7 +203,8 @@ def _filter_by_spacecraft(self, spacecraft: str) -> None: else: self.logger.warning( "Spacecraft '%s' not found. Available: %s", - spacecraft, sorted(available) + spacecraft, + sorted(available), ) actual_name = spacecraft # Will result in empty filter @@ -256,12 +274,16 @@ def _prepare_intervals(self) -> None: # Fallback 1: mo_start_time + 24h mask_missing = interval_end.isna() if "mo_start_time" in result.columns: - fallback = result.loc[mask_missing, "mo_start_time"] + pd.Timedelta(hours=24) + fallback = result.loc[mask_missing, "mo_start_time"] + pd.Timedelta( + hours=24 + ) interval_end.loc[mask_missing] = fallback # Fallback 2: icme_start_time + 24h mask_still_missing = interval_end.isna() - fallback = result.loc[mask_still_missing, "icme_start_time"] + pd.Timedelta(hours=24) + fallback = result.loc[mask_still_missing, "icme_start_time"] + pd.Timedelta( + hours=24 + ) interval_end.loc[mask_still_missing] = fallback result["interval_end"] = interval_end @@ -288,9 +310,8 @@ def get_events_in_range( pd.DataFrame Events where icme_start_time <= end AND interval_end >= start. """ - mask = ( - (self._intervals["icme_start_time"] <= end) & - (self._intervals["interval_end"] >= start) + mask = (self._intervals["icme_start_time"] <= end) & ( + self._intervals["interval_end"] >= start ) return self._intervals[mask].copy()