Skip to content
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,18 @@ data_sources/Carbon_bomb_personalDB.csv
data_sources/Extract_carbon_bombs_new.xlsx
data_sources/Carbon_Bombs_Projects.xlsx
data_sources/Temp_companies.xlsx
data_sources/BOCC25_All FF_Aggregated_D4G.xlsx
data_sources/20250717_Confidential - Carbon_Bombs_Companies_For D4G.xlsx
data_sources/20250718_Confidential - CarbonBombs_Yearly emissions.xlsx
data_sources/Confidential_BOCC25_All FF_Aggregated_D4G with GOGEL ID.xlsx
data_cleaned/diffGPT_GEM_coal_only.csv
data_save_tmp/*
report_gasoil
working_documents/*
img/
docs/modules/generated/*
database.json
nb/output_data.xlsx

# Editors
.vscode/
Expand Down
2 changes: 1 addition & 1 deletion carbon_bombs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
SHEETNAME_RYSTAD_CB_EMISSION_INFERIOR_1GT = "V1_method_0.1GT"
SHEETNAME_RYSTAD_CB_EMISSION = "Carbon_Bombs_1GT"
SHEETNAME_RYSTAD_CB_COMPANY = "Carbon_Bombs_1GT_Companies"
SHEETNAME_RYSTAD_GASOIL_EMISSION = "Carbon_Bombs_Projects"
SHEETNAME_RYSTAD_EXPANSION_EMISSION = "Carbon_Bombs_Projects"

# Manual matching source
FPATH_SRC_MANUAL_MATCHING = f"{DATA_SOURCE_PATH}/Manual matching.xlsx"
Expand Down
42 changes: 33 additions & 9 deletions carbon_bombs/io/gogel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,44 @@ def load_lng_database():
"""
LOGGER.debug("Read GOGEL data: all LNG project")
df = pd.read_excel(
FPATH_SRC_GOGEL_LNG,
sheet_name="LNG Liquefaction projects",
engine="openpyxl",
skiprows=2,
FPATH_SRC_GOGEL_LNG, sheet_name="LNG Liquefaction projects", engine="openpyxl"
)
parent_cols = [f"Parent {i}" for i in range(1, 11)]

df["Companies involved"] = df[parent_cols].apply(
lambda row: ",".join(row.dropna().astype(str)), axis=1
)

renamed_columns = {
"Name (project)": "Project_name",
"Export capacity (Mtpa)": "Export_capacity_in_Mtpa",
"Status": "Project_status",
"Country": "Country",
"Companies involved": "Companies_involved",
"Project Name": "project_name",
"Export capacity (Mtpa)": "export_capacity_in_mtpa",
"Status": "project_status",
"Start of commercial operations": "start_year",
"Country": "country",
"latitude": "latitude",
"longitude": "longitude",
"Companies involved": "companies_involved",
}
# Only keep columns of interest for the project
df = df.loc[:, renamed_columns.keys()]
# Rename columns
df = df.rename(columns=renamed_columns)

# Deprecated
# # For duplicate name in project_name column we use other_name values
# mask = df["project_name"].duplicated(keep=False) & df["other_name"].notna()
# df.loc[mask, "project_name"] = df.loc[mask, "other_name"]

# # If other_name is empty (NaN or ""), concatenate project_name and unit name
# mask = df["project_name"].duplicated(keep=False)
# df.loc[mask, "project_name"] = df["project_name"] + " " + df["unit"]

# # Drop column other_name
# df = df.drop(columns=["other_name", "unit"])
# Replace UAE by United Arab Emirates in country column
df["country"] = df["country"].replace("UAE", "United Arab Emirates")
# Replace country Senegal/Mauritania for project Greater Tortue Ahmeyim - Phase 1 and Phase 2
# To avoid warning on double country
# See slack message : https://data-for-good.slack.com/archives/C08C639D8HM/p1755531343983969?thread_ts=1754847139.766209&cid=C08C639D8HM
df["country"] = df["country"].replace("Senegal/Mauritania", "Senegal")
return df
62 changes: 32 additions & 30 deletions carbon_bombs/io/rystad.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from carbon_bombs.conf import FPATH_SRC_RYSTAD_CB
from carbon_bombs.conf import SHEETNAME_RYSTAD_CB_EMISSION
from carbon_bombs.conf import SHEETNAME_RYSTAD_CB_COMPANY
from carbon_bombs.conf import SHEETNAME_RYSTAD_GASOIL_EMISSION
from carbon_bombs.conf import SHEETNAME_RYSTAD_EXPANSION_EMISSION
from carbon_bombs.conf import SHEETNAME_RYSTAD_CB_EMISSION_INFERIOR_1GT
from carbon_bombs.utils.logger import LOGGER
from carbon_bombs.utils.location import clean_project_names_with_iso
Expand All @@ -26,41 +26,41 @@ def load_rystad_emission_database(sheet_name: str) -> pd.DataFrame:
"""
if sheet_name == SHEETNAME_RYSTAD_CB_EMISSION:
renamed_columns = {
"Project name": "Project_name",
"Country": "Country",
"Latitude": "Latitude",
"Longitude": "Longitude",
"Start-up year min asset": "Start_up_year",
"Producing - Potential emissions (GTCO2)": "Producing_potential_emissions_in_GTCO2",
"Short term expansion - Potential emissions (GTCO2)": "Short_term_expansion_potential_emissions_in_GTCO2",
"Long term expansion - Potential emissions (GTCO2)": "Long_term_expansion_potential_emissions_in_GTCO2",
"Total potential emissions (GTCO2)": "Total_potential_emissions_in_GTCO2",
"Project name": "project_name",
"Country": "country",
"Latitude": "latitude",
"Longitude": "longitude",
"Start-up year min asset": "start_up_year",
"Producing - Potential emissions (GTCO2)": "producing_potential_emissions",
"Short term expansion - Potential emissions (GTCO2)": "short_term_expansion_potential_emissions",
"Long term expansion - Potential emissions (GTCO2)": "long_term_expansion_potential_emissions",
"Total potential emissions (GTCO2)": "total_potential_emissions",
}
log_message = "Read Rystad data: all Carbon Bombs project emissions > 1 GtCO2"
elif sheet_name == SHEETNAME_RYSTAD_CB_EMISSION_INFERIOR_1GT:
renamed_columns = {
"Project name": "Project_name",
"Country": "Country",
"Latitude": "Latitude",
"Longitude": "Longitude",
"Start-up year min asset": "Start_up_year",
"Producing - Potential emissions (GTCO2)": "Producing_potential_emissions_in_GTCO2",
"Short term expansion - Potential emissions (GTCO2)": "Short_term_expansion_potential_emissions_in_GTCO2",
"Long term expansion - Potential emissions (GTCO2)": "Long_term_expansion_potential_emissions_in_GTCO2",
"Total potential emissions (GTCO2)": "Total_potential_emissions_in_GTCO2",
"Project name": "project_name",
"Country": "country",
"Latitude": "latitude",
"Longitude": "longitude",
"Start-up year min asset": "start_up_year",
"Producing - Potential emissions (GTCO2)": "producing_potential_emissions",
"Short term expansion - Potential emissions (GTCO2)": "short_term_expansion_potential_emissions",
"Long term expansion - Potential emissions (GTCO2)": "long_term_expansion_potential_emissions",
"Total potential emissions (GTCO2)": "total_potential_emissions",
}
log_message = "Read Rystad data: all Carbon Bombs project emissions < 1 GtCO2"
elif sheet_name == SHEETNAME_RYSTAD_GASOIL_EMISSION:
elif sheet_name == SHEETNAME_RYSTAD_EXPANSION_EMISSION:
renamed_columns = {
"Project name": "Project_name",
"Country": "Country",
"Latitude": "Latitude",
"Longitude": "Longitude",
"Start-up year min asset": "Start_up_year",
"Producing - Potential emissions": "Producing_potential_emissions",
"Short term expansion - Potential emissions": "Short_term_expansion_potential_emissions",
"Long term expansion - Potential emissions": "Long_term_expansion_potential_emissions",
"Total potential emissions (mtCO2)": "Total_potential_emissions",
"Project name": "project_name",
"Country": "country",
"Latitude": "latitude",
"Longitude": "longitude",
"Start-up year min asset": "start_up_year",
"Producing - Potential emissions": "producing_potential_emissions",
"Short term expansion - Potential emissions": "short_term_expansion_potential_emissions",
"Long term expansion - Potential emissions": "long_term_expansion_potential_emissions",
"Total potential emissions (mtCO2)": "total_potential_emissions",
}
log_message = "Read Rystad data: all Gasoil project emissions > 5MTCO2"
else:
Expand All @@ -78,9 +78,11 @@ def load_rystad_emission_database(sheet_name: str) -> pd.DataFrame:
# Rename columns
df = df.rename(columns=renamed_columns)
# Remove total row if applicable
df = df[df["Project_name"] != "SUMS"]
df = df[df["project_name"] != "SUMS"]
# Clean project names
clean_project_names_with_iso(df)
# Replace UAE by United Arab Emirates in country column
df["country"] = df["country"].replace("UAE", "United Arab Emirates")
return df


Expand Down
19 changes: 0 additions & 19 deletions carbon_bombs/processing/lng.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,6 @@ def create_lng_table():
"""
LOGGER.debug("Read LNG source: LNG Liquefaction projects")
df_lng = load_lng_database()

# TODO: refacto get lat long country in an utils func for here and cb processing
country_lat_long_df = pd.read_csv(f"{DATA_SOURCE_PATH}/longitude-latitude.csv")

LOGGER.debug("Add LNG project's country location")
df_lng[["Latitude", "Longitude"]] = df_lng["Country"].apply(
lambda x: pd.Series(_get_lat_long(x, country_lat_long_df))
)

# Add noise to duplicate lat/long
np.random.seed(42)
lat_long_dup = df_lng.duplicated(subset=["Latitude", "Longitude"], keep=False)
df_lng.loc[lat_long_dup, "Latitude"] = df_lng.loc[lat_long_dup, "Latitude"].apply(
_add_noise_lat_long
)
df_lng.loc[lat_long_dup, "Longitude"] = df_lng.loc[lat_long_dup, "Longitude"].apply(
_add_noise_lat_long
)
LOGGER.debug("Success adding LNG project's country location")
return df_lng


Expand Down
14 changes: 10 additions & 4 deletions carbon_bombs/utils/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,15 @@ def get_world_region(country: str) -> str:
str
Continent name
"""
if country == "None":
if country == "None" or country == "Neutral Zone" or country == "Other":
return "None"

elif country == "Timor-Leste":
return "Asia"

# Take only the first part before a slash or a dash (if any)
country = re.split(r"[/\-]", country)[0].strip()

try:
return coco.convert(names=country, to="Continent")

Expand Down Expand Up @@ -64,7 +70,7 @@ def get_country_from_geopy(lat: float, long: float) -> str:
return country


def clean_project_names_with_iso(df, column_name="Project_name"):
def clean_project_names_with_iso(df, column_name="project_name"):
"""
Clean the project names by removing ISO codes,
while preserving the original values in a new column.
Expand All @@ -74,13 +80,13 @@ def clean_project_names_with_iso(df, column_name="Project_name"):
df : pandas.DataFrame
DataFrame containing project names
column_name : str, optional
Name of the column containing project names, by default "Project_name"
Name of the column containing project names, by default "project_name"
"""
# Match pattern: comma followed by optional space and 2 uppercase letters at the end
iso_pattern = r",\s*[A-Z]{2}$"

# Preserve original values
df["Project_name_raw"] = df[column_name]
df["project_name_raw"] = df[column_name]

# Clean the project name in place
df[column_name] = df[column_name].apply(
Expand Down
Binary file not shown.
1 change: 1 addition & 0 deletions data_sources/country_world_region_new.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Country;RegionAzerbaijan;Western AsiaSaudi Arabia;Western AsiaIran;Southern AsiaQatar;Western AsiaUnited States;Northern AmericaRussia;Eastern EuropeMozambique;Eastern AfricaUnited Arab Emirates;Western AsiaCanada;Northern AmericaIraq;Western AsiaIndia;Southern AsiaUnited Kingdom;Northern EuropeBrazil;South AmericaChina;Eastern AsiaKazakhstan;Central AsiaBahrain;Western AsiaKuwait;Western AsiaSouth Africa;Southern AfricaTurkmenistan;Central AsiaMexico;Central AmericaIndonesia;Southern AsiaLibya;Northern AfricaAustralia;Australia and New ZealandGuyana;South AmericaAlgeria;Northern AfricaNorway;Northern EuropeKuwait-Saudi Arabia;Western AsiaDenmark;Northern EuropeColombia;South AmericaVenezuela;South AmericaIsrael;Western AsiaPoland;Eastern EuropeUkraine;Eastern EuropeNigeria;Western AfricaPakistan;Southern AsiaSyria;Western AsiaTanzania;Eastern AfricaArgentina;South AmericaNew Caledonia;MelanesiaNew Zealand;Australia and New ZealandPapua New Guinea;MelanesiaSouth Korea;Eastern AsiaMongolia;Eastern AsiaPhilippines;Southern AsiaTimor-Leste;Southern AsiaMalaysia;Southern AsiaBrunei;Southern AsiaVietnam;Southern AsiaCambodia;Southern AsiaMalaysia/Thailand JDA;Southern AsiaThailand;Southern AsiaMyanmar;Southern AsiaBangladesh;Southern AsiaSri Lanka;Southern AsiaNeutral Zone;#N/AOman;Western AsiaYemen;Western AsiaJordan;Western AsiaTurkiye;Western AsiaLebanon;Western AsiaEgypt;Northern AfricaTunisia;Northern AfricaMorocco;Northern AfricaWestern Sahara;Northern AfricaSudan;Eastern AfricaSouth Sudan;Eastern AfricaMadagascar;Eastern AfricaUganda;Eastern AfricaKenya;Eastern AfricaSomalia;Eastern AfricaEthiopia;Eastern AfricaZimbabwe;Eastern AfricaNamibia;Southern AfricaChad;Southern AfricaMauritania;Western AfricaSenegal;Western AfricaSierra Leone;Western AfricaLiberia;Western AfricaCote d'Ivoire;Western AfricaGhana;Western AfricaTogo;Western AfricaBenin;Western AfricaNiger;Western AfricaCameroon;Middle AfricaEquatorial Guinea;Middle AfricaGabon;Middle AfricaCongo;Middle AfricaAngola;Middle AfricaFalkland Islands (Malvinas);South AmericaUruguay;South AmericaParaguay;South AmericaBolivia;South AmericaPeru;South AmericaEcuador;South AmericaFrench Guiana;South AmericaSuriname;South AmericaTrinidad and Tobago;CaribbeanCuba;CaribbeanJamaica;CaribbeanCosta Rica;Central AmericaNicaragua;Central AmericaFaroe Islands;Northern EuropeIreland;Northern EuropeGermany;Western EuropeNetherlands;Western EuropeAustria;Western EuropeSpain;Southern EuropeItaly;Southern EuropeSlovenia;Southern EuropeCroatia;Southern EuropeMontenegro;Southern EuropeAlbania;Southern EuropeGreece;Southern EuropeCyprus;Western AsiaBulgaria;Eastern EuropeRomania;Eastern EuropeHungary;Eastern EuropeCzechia;Eastern EuropeLatvia;Northern EuropeEstonia;Northern EuropeBelarus;Eastern EuropeGeorgia;Western AsiaUzbekistan;Central AsiaTajikistan;Central AsiaKyrgyzstan;Central AsiaTurkey;Western AsiaSerbia;Southern EuropeBotswana;Southern AfricaLaos;Southern AsiaNorth Korea;Eastern AsiaNorth Macedonia;Southern EuropeFrance;Western EuropeSingapore;Southern AsiaJapan;Eastern AsiaFinland;Northern EuropeSwitzerland;Western EuropeDjibouti;Eastern AfricaHong Kong (China);Eastern AsiaTaiwan (China);Eastern AsiaPortugal;Southern EuropeBermuda;Northern AmericaMonaco;Western EuropeArgentine;Southern America
Expand Down
Loading