ReEDS-Model · wesleyjcole · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026 · May 5, 2026
diff --git a/aeo_updates/AEO2025_bldgs_pv_gen_ref_high_low_economic_growth_2025-04-17.xlsx b/aeo_updates/AEO2025_bldgs_pv_gen_ref_high_low_economic_growth_2025-04-17.xlsx
diff --git a/aeo_updates/AEO2026_bldgs_pv_gen_cb_high_low_economic_growth_2026-04-27.xlsx b/aeo_updates/AEO2026_bldgs_pv_gen_cb_high_low_economic_growth_2026-04-27.xlsx
diff --git a/aeo_updates/AEO_Load_Projections.py b/aeo_updates/AEO_Load_Projections.py
@@ -1,11 +1,14 @@
-# This script creates the demand projection files for AEO scenarios
-# It uses historical retail sales and behind-the-meter PV generation
-# data from EIA to calibrate historical state-level demand, and then
-# carries that forward using demand ratios calculated from the
-# AEO scenarios.
-
-# The original capability was created by Anna Schleifer in 2023, and
-# then was adapted by Wesley Cole in 2025.
+# This script creates state-level demand multiplier files for AEO scenarios.
+#
+# For historical years (2010-lastyear), state-level demand is derived from
+# EIA retail electricity sales and behind-the-meter PV generation (via EIA API).
+#
+# For projected years (lastyear+1 through 2050), demand growth is computed from:
+#   - AEO electricity consumption CSVs (outputs/ folder, by census division, in quads)
+#   - Regional rooftop PV generation from an EIA-provided Excel file (residential +
+#     commercial, by census division, in trillion Btu)
+# These two are added to reconstruct gross electricity demand, then normalized to the
+# first AEO projected year so the historical and projected series join cleanly.
 
 import os
 import pandas as pd
@@ -15,8 +18,101 @@
 ### Set up information
 
 # lastyear is the last year that historical data are available
-lastyear = 2023
-AEO_year = 2025
+lastyear = 2024
+AEO_year = 2026
+
+### Helper functions
+
+# Census division number to name mapping (from "census division mapping" tab
+# in AEO2026_bldgs_pv_gen_cb_high_low_economic_growth_2026-04-27.xlsx)
+_DIV_TO_CENDIV = {
+    1: 'NewEngland', 2: 'MiddleAtlantic', 3: 'EastNorthCentral',
+    4: 'WestNorthCentral', 5: 'SouthAtlantic', 6: 'EastSouthCentral',
+    7: 'WestSouthCentral', 8: 'Mountain', 9: 'Pacific',
+}
+
+# Mapping from AEO CSV column-name format to no-spaces cendiv format
+_CENDIV_NAME_MAP = {
+    'East North Central': 'EastNorthCentral',
+    'East South Central': 'EastSouthCentral',
+    'Middle Atlantic': 'MiddleAtlantic',
+    'Mountain': 'Mountain',
+    'New England': 'NewEngland',
+    'Pacific': 'Pacific',
+    'South Atlantic': 'SouthAtlantic',
+    'West North Central': 'WestNorthCentral',
+    'West South Central': 'WestSouthCentral',
+}
+
+
+def read_dgpv_from_excel(xl_file, sheet_name):
+    """Read census-division DGPV (all sectors, residential + commercial) from
+    an EIA AEO2026 Excel sheet. Returns long-format DataFrame with
+    columns [cendiv, year, dgpv] where dgpv is in quads."""
+    df = pd.read_excel(xl_file, sheet_name=sheet_name, header=None)
+
+    # Section headers are rows where column 1 == 'Division'
+    header_rows = df[df.iloc[:, 1].astype(str).str.strip() == 'Division'].index.tolist()
+
+    records = []
+    for header_row in header_rows:
+        # Extract (year, column_index) pairs from this header row
+        year_cols = []
+        for col in range(2, df.shape[1]):
+            raw = df.iloc[header_row, col]
+            if pd.isna(raw):
+                continue
+            try:
+                year_cols.append((int(float(raw)), col))
+            except (ValueError, TypeError):
+                pass
+
+        # Read the 9 division rows immediately following the header
+        for row_idx in range(header_row + 1, header_row + 11):
+            if row_idx >= len(df):
+                break
+            div_raw = df.iloc[row_idx, 1]
+            if pd.isna(div_raw) or str(div_raw).strip() == 'Grand Total':
+                break
+            try:
+                div = int(float(div_raw))
+            except (ValueError, TypeError):
+                break
+            cendiv = _DIV_TO_CENDIV[div]
+            for yr, col in year_cols:
+                val = df.iloc[row_idx, col]
+                if val == '-' or pd.isna(val):
+                    val = 0.0
+                records.append({'cendiv': cendiv, 'year': yr, 'dgpv': float(val)})
+
+    df_dgpv = pd.DataFrame(records)
+    df_dgpv = df_dgpv.groupby(['cendiv', 'year'])['dgpv'].sum().reset_index()
+    # Convert trillion Btu to quads
+    df_dgpv['dgpv'] = df_dgpv['dgpv'] / 1000
+    return df_dgpv
+
+
+def read_aeo_electricity(csv_path):
+    """Read an AEO electricity consumption CSV and return long-format
+    DataFrame with columns [year, cendiv, aeo_electricity] in quads."""
+    df = pd.read_csv(csv_path)
+    rename_map = {}
+    for col in df.columns:
+        if col in ('year', 'units'):
+            continue
+        for long_name, short_name in _CENDIV_NAME_MAP.items():
+            if long_name in col:
+                rename_map[col] = short_name
+                break
+    df = df.rename(columns=rename_map)
+    cendiv_cols = [c for c in df.columns if c in _CENDIV_NAME_MAP.values()]
+    df_long = df.melt(id_vars=['year'], value_vars=cendiv_cols,
+                      var_name='cendiv', value_name='aeo_electricity')
+    df_long['year'] = df_long['year'].astype(int)
+    df_long['aeo_electricity'] = pd.to_numeric(df_long['aeo_electricity'],
+                                               errors='coerce').fillna(0)
+    return df_long
+
 
 ### Load and process data
 
@@ -25,29 +121,29 @@
 # Rename columns to match EIA data
 st_cendiv.rename(columns={'st':'stateid'}, inplace=True)
 
-# Load the "Demand Ratios" sheet from the Excel file
-demand_ratios = pd.read_excel(
-    'Electricity Demand Preprocessing for AEO Inputs.xlsx',
-    sheet_name='Demand Ratios'
-)
-# Melt years into long format
-demand_ratios = demand_ratios.melt(id_vars=['scenario','cendiv'], var_name='year', value_name='ratio')
-demand_ratios['year'] = demand_ratios['year'].astype(int)
-
-# Normalize ratios to 1 in lastyear
-demand_ratios = demand_ratios.merge(
-    demand_ratios[demand_ratios['year'] == lastyear][['scenario', 'cendiv', 'ratio']],
-    on=['scenario', 'cendiv'],
-    suffixes=('', '_lastyear')
-)
-demand_ratios['ratio'] = demand_ratios['ratio'] / demand_ratios['ratio_lastyear']
-demand_ratios.drop(columns=['ratio_lastyear'], inplace=True)
-
-# Set values before lastyear to 1.0
-demand_ratios.loc[demand_ratios['year'] < lastyear, 'ratio'] = 1.0
+# Scenario configuration: maps scenario names to input files
+dgpv_file = 'AEO2026_bldgs_pv_gen_cb_high_low_economic_growth_2026-04-27.xlsx'
+
+scenario_config = [
+    {
+        'name': 'Counterfactual Baseline',
+        'elec_csv': os.path.join('outputs', f'AEO_CB2026_{AEO_year}_electricity_consumption.csv'),
+        'dgpv_sheet': 'AEO2026 Counterfactual Baseline',
+    },
+    {
+        'name': 'High Economic Growth',
+        'elec_csv': os.path.join('outputs', f'AEO_HM2026_{AEO_year}_electricity_consumption.csv'),
+        'dgpv_sheet': 'AEO2026 High Economic Growth',
+    },
+    {
+        'name': 'Low Economic Growth',
+        'elec_csv': os.path.join('outputs', f'AEO_LM2026_{AEO_year}_electricity_consumption.csv'),
+        'dgpv_sheet': 'AEO2026 Low Economic Growth',
+    },
+]
 
 # Collect state-level retail sales data from EIA API
-url_retail = create_EIA_url(api_key, 'retail-sales', ['sales'], 
+url_retail = create_EIA_url(api_key, 'retail-sales', ['sales'],
                             {'sectorid':['ALL']}, freq='annual', start=2010)
 df_retail = retrieve_EIA_data(url_retail)
 df_retail = df_retail[['year', 'stateid', 'sales']].copy()
@@ -77,14 +173,53 @@
 # Rename value column to generation
 df_pv3.rename(columns={'value':'pvgen'}, inplace=True)
 
-# Combine retail sales and residential PV generation 
+# Combine retail sales and behind-the-meter PV generation
 df_combined = df_retail.merge(df_pv3, on=['year', 'stateid'], how='left').fillna(0)
 
 # Add load column (sales + pv generation)
 df_combined['load'] = df_combined['sales'] + df_combined['pvgen']
 # Drop sales and pvgen columns
 df_combined.drop(columns=['sales', 'pvgen'], inplace=True)
 
+# Build demand ratios from AEO electricity consumption + DGPV for each scenario.
+# Ratios are normalized so that the first AEO projected year (2025) = 1.0 for each cendiv.
+# This avoids unit-mismatch between historical EIA data (million kWh) and AEO data (quads).
+# Historical years (2010-lastyear) use ratio = 1.0 so the state-level loadmult drives
+# the multiplier for those years; AEO growth trajectory applies from lastyear onward.
+aeo_first_year = 2025
+all_cendivs = list(_DIV_TO_CENDIV.values())
+hist_years = list(range(2010, lastyear + 1))  # 2010 through lastyear inclusive
+all_ratios = []
+
+# Add historical rows with ratio = 1.0 for all scenarios
+for cfg in scenario_config:
+    hist_rows = pd.DataFrame([
+        {'scenario': cfg['name'], 'cendiv': cd, 'year': yr, 'ratio': 1.0}
+        for cd in all_cendivs
+        for yr in hist_years
+    ])
+    all_ratios.append(hist_rows)
+
+# Add projected rows (2025-2050) normalized to ratio = 1.0 in aeo_first_year
+for cfg in scenario_config:
+    df_elec = read_aeo_electricity(cfg['elec_csv'])
+    df_dgpv = read_dgpv_from_excel(dgpv_file, cfg['dgpv_sheet'])
+
+    df_future = df_elec.merge(df_dgpv, on=['year', 'cendiv'], how='left').fillna(0)
+    df_future['total_demand'] = df_future['aeo_electricity'] + df_future['dgpv']
+
+    # Normalize so ratio = 1.0 in aeo_first_year for each cendiv
+    base = (df_future[df_future['year'] == aeo_first_year][['cendiv', 'total_demand']]
+            .rename(columns={'total_demand': 'demand_base'}))
+    df_future = df_future.merge(base, on='cendiv')
+    df_future['ratio'] = df_future['total_demand'] / df_future['demand_base']
+    df_future['scenario'] = cfg['name']
+    all_ratios.append(df_future[['scenario', 'cendiv', 'year', 'ratio']])
+
+demand_ratios = pd.concat(all_ratios, ignore_index=True)
+# Drop duplicate lastyear rows (historical block already added lastyear = 1.0)
+demand_ratios = demand_ratios.drop_duplicates(subset=['scenario', 'cendiv', 'year'], keep='last')
+
 # Normalize load by 2010 load
 df_load_2010 = df_combined[df_combined['year'] == 2010][['stateid', 'load']].rename(columns={'load':'load_2010'})
 df_load = df_combined.merge(df_load_2010, on='stateid', how='left')
@@ -93,28 +228,28 @@
 
 # Fill in future years using a value of 1.0 for loadmult
 future_years = pd.DataFrame({
-    'year': range(2024, 2051)
+    'year': range(2025, 2051)
 }).assign(key=1)
 
 unique_states = df_load[['stateid']].drop_duplicates().assign(key=1)
 
-df_future = pd.merge(future_years, unique_states, on='key').drop(columns=['key'])
+df_future_yrs = pd.merge(future_years, unique_states, on='key').drop(columns=['key'])
 
-# Fill loadmult with with the value from lastyear
+# Fill loadmult with the value from lastyear
 lastyear_loadmult = df_load[df_load['year'] == lastyear][['stateid', 'loadmult']]
-df_future = df_future.merge(lastyear_loadmult, on='stateid', how='left')
+df_future_yrs = df_future_yrs.merge(lastyear_loadmult, on='stateid', how='left')
 
-df_load2 = pd.concat([df_load, df_future], ignore_index=True)
+df_load2 = pd.concat([df_load, df_future_yrs], ignore_index=True)
 
 # Map states to census divisions
 df_load3 = df_load2.merge(st_cendiv, on='stateid', how='left').dropna()
 
 # Remove spaces from cendiv names and make lower case for merging
-df_load3['cendiv'] = df_load3['cendiv'].str.strip().str.lower()
-demand_ratios['cendiv'] = demand_ratios['cendiv'].str.strip().str.lower()
+df_load3['cendiv'] = df_load3['cendiv'].str.strip().str.replace(' ', '').str.lower()
+demand_ratios['cendiv'] = demand_ratios['cendiv'].str.lower()
 
 # Merge with demand ratios
-df_loadtot = df_load3.merge(demand_ratios, left_on=['year', 'cendiv'], right_on=['year', 'cendiv'], how='left')
+df_loadtot = df_load3.merge(demand_ratios, on=['year', 'cendiv'], how='left')
 
 # Check stateid = "ND" for verification
 df_check = df_loadtot[df_loadtot['stateid'] == 'ND'].copy()
@@ -130,16 +265,16 @@
 
 # Split out into different scenarios
 df_low = df_loadtot[df_loadtot['scenario']=='Low Economic Growth'].copy()
-df_ref = df_loadtot[df_loadtot['scenario']=='Reference Case'].copy()
+df_baseline = df_loadtot[df_loadtot['scenario']=='Counterfactual Baseline'].copy()
 df_high = df_loadtot[df_loadtot['scenario']=='High Economic Growth'].copy()
 
 # Drop scenario column
 df_low.drop(columns=['scenario'], inplace=True)
-df_ref.drop(columns=['scenario'], inplace=True)
+df_baseline.drop(columns=['scenario'], inplace=True)
 df_high.drop(columns=['scenario'], inplace=True)
 
 # Plot the multipliers by r for each scenario (optional)
-for scenario, df in zip(['Low', 'Reference', 'High'], [df_low, df_ref, df_high]):
+for scenario, df in zip(['Low', 'Baseline', 'High'], [df_low, df_baseline, df_high]):
     plt.figure(figsize=(10,6))
     for r in df['r'].unique():
         df_r = df[df['r'] == r]
@@ -159,6 +294,6 @@
 if not os.path.exists(Output_folder):
     os.makedirs(Output_folder)
 
-df_low.to_csv(os.path.join(Output_folder, 'demand_AEO_{}_low.csv').format(AEO_year), index=False)
-df_ref.to_csv(os.path.join(Output_folder, 'demand_AEO_{}_reference.csv').format(AEO_year), index=False)
-df_high.to_csv(os.path.join(Output_folder, 'demand_AEO_{}_high.csv').format(AEO_year), index=False)
+df_low.to_csv(os.path.join(Output_folder, 'demand_AEO_{}_low.csv'.format(AEO_year)), index=False)
+df_baseline.to_csv(os.path.join(Output_folder, 'demand_AEO_{}_baseline.csv'.format(AEO_year)), index=False)
+df_high.to_csv(os.path.join(Output_folder, 'demand_AEO_{}_high.csv'.format(AEO_year)), index=False)