GCAM-CDR-policy/verification.py at master · PEESEgroup/GCAM-CDR-policy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
import pandas as pd
import numpy as np
import math
import data_manipulation
import process_GCAM_data
import utilities
import constants


def main(scenario_name):
    """
    verify GCAM model outputs
    :param scenario_name: name of the scenario for verification
    :return: N/A
    """
    # get a list of files that need verification
    files = scenario_name.split("_")
    files.reverse()  # this ensures that the altered files will overwrite the original files
    xml_files_to_build = []
    for i in files:
        xml_files_to_build.extend(utilities.build_from_scenario(str(i)))

    # location of output data
    directory = str(scenario_name).replace("_", "/")
    prefix = "./data/gcam_out/"
    fpath = prefix + directory

    # clear log file
    with open(fpath + "/log.txt", "w+") as f:
        f.write("")

    # don't do data analysis on years with errors
    error_years = []

    # group certain types of files (i.e. CDR demand) together
    csvs = {}
    CDR = {}
    links = {}
    for xml in xml_files_to_build:
        for file in xml.data_files:
            csv = xml.data_files[file]
            if "verify" in file:
                if "CDR" in file:
                    CDR[file] = csv
                else:
                    csvs[file] = csv
            if "link" in file:
                links_ground_truth = pd.read_csv(csv, skiprows=2)
                if "sector" in links_ground_truth.columns:
                    links[file] = links_ground_truth
                elif "supplysector" in links_ground_truth.columns:
                    links[file] = links_ground_truth
                else:
                    links[file] = links_ground_truth[["region", "market"]]

    # verify CDR demand
    error_years.extend(verify_cdr(CDR, links, fpath))

    for csv in csvs:
        ground_truth = pd.read_csv(csvs[csv], skiprows=2)
        if "RES_tech" in csv:
            error_years.extend(verify_beccs(csvs[csv], fpath))
            log(fpath, "all years", csv + " was verified", success=True)
        elif "ghg_constraint" in csv:
            error_years.extend(verify_ghg_constraint(ground_truth, links["ghg_CDR_market_link"], fpath))
            log(fpath, "all years", csv + " was verified", success=True)
        elif "ghg_tax" in csv:
            # query co2 prices
            results = pd.read_csv(fpath + "/CO2_prices.csv")
            error_years.extend(verify_ghg_tax(ground_truth, results, fpath))
            log(fpath, "all years", csv + " was verified", success=True)
        elif "subsidy" in csv:
            error_years.extend(verify_subsidy(ground_truth, links, fpath))
            log(fpath, "all years", csv + " was verified", success=True)
        else:
            log(fpath, "all years", csv + " was not verified")

    # update output .csv files based on years with errors
    process_GCAM_data.masking(scenario_name, error_years)

    # verify procurement
    split_scenario = scenario_name.split("_")
    if split_scenario[0] != split_scenario[1]:
        verify_procurement(split_scenario[0], split_scenario[1], fpath)


def verify_procurement(scenario, baseline, fpath):
    """
    Method to verify the cost and CDR demand of procurement technologies
    :param scenario: the name of the policy scenario being run
    :param baseline: the baseline pathway name
    :param fpath: file path to the output log file
    :return: N/A
    """
    try:
        subsidy = ["", "_no_subsidy"]
        for k in subsidy:
            baseline_total_cost = pd.read_csv(
                "data/data_analysis/supplementary_tables/" + baseline + "/" + baseline + "/sorted price and supply of CDR by technology" + k + ".csv")
            scenario_total_cost = pd.read_csv(
                "data/data_analysis/supplementary_tables/" + scenario + "/" + baseline + "/sorted price and supply of CDR by technology" + k + ".csv")

            # calculate total cost
            if k == "":
                price_col = "_subsidized"
            else:
                price_col = "_price"

            for j in constants.GCAMConstants.plotting_x:
                try:
                    scenario_total_cost[str(j) + "total_cost"] = scenario_total_cost[str(j) + price_col] * \
                                                             scenario_total_cost[str(j) + "_supply"]
                except KeyError as e:
                    print(e)
                    scenario_total_cost[str(j) + "total_cost"] = np.nan
                try:
                    baseline_total_cost[str(j) + "total_cost"] = baseline_total_cost[str(j) + price_col] * \
                                                             baseline_total_cost[str(j) + "_supply"]
                except KeyError as e:
                    print(e)
                    baseline_total_cost[str(j) + "total_cost"] = np.nan

            # group and get total supply
            scenario_total_cost = scenario_total_cost.groupby(["Units_price", "Units_supply"]).sum(min_count=1)
            baseline_total_cost = baseline_total_cost.groupby(["Units_price", "Units_supply"]).sum(min_count=1)

            merge = pd.merge(baseline_total_cost, scenario_total_cost, "left", on=["Units_price", "Units_supply"],
                             suffixes=("_baseline", "_scenario")).reset_index()

            # calculate difference in total cost
            df = merge.loc[0]
            for j in constants.GCAMConstants.plotting_x:
                try:
                    supply_diff = df[str(j) + "_supply_scenario"] - df[str(j) + "_supply_baseline"]
                except KeyError as e:
                    print(e)
                    supply_diff = np.nan
                # more supply in the scenario than in the baseline
                if supply_diff > .01:  # to catch out small deviations
                    price_diff = df[str(j) + "total_cost" + "_scenario"] - df[str(j) + "total_cost" + "_baseline"]
                    with open(fpath + "/log.txt", "a+") as f:
                        print(
                            f"Procurement of {supply_diff:.2f} Mt CO2-eq comes at a cost of {price_diff:.2f} Million USD in " + str(
                                j) + k)
                        f.write(
                            f"Procurement of {supply_diff:.2f} Mt CO2-eq comes at a cost of {price_diff:.2f} Million USD in " + str(
                                j) + k + "\n")
    except FileNotFoundError or KeyError as e:
        print("cannot verify procurement right now", e)


def verify_non_input_tech_costs(ground_truth, links, fpath):
    """
    verifies the data for the non-input costs for CDR technologies
    :param ground_truth: data used for configuration
    :param links: links between markets and ground truths
    :param fpath: link to the directory where output data is stored
    :return: list of years that have errors
    """
    # format results
    results = pd.read_csv(fpath + "/costs_by_tech_and_input.csv")
    years_with_error = []

    # format ground truth
    for i in constants.GCAMConstants.plotting_x:
        ground_truth[str(i)] = ground_truth[str(i)] * constants.GCAMConstants.USD2025_tCO2_to_1975_kgC
    ground_truth["Units"] = "1975$/kg C"
    results = results[results["input"] == "non-energy"]

    # merge results and ground truth
    merge = pd.merge(results, ground_truth, "inner", on="technology", suffixes=("_l", "_r"))
    for i in constants.GCAMConstants.plotting_x:
        # check that minimum price is satisfied with converted units
        merge[str(i)] = merge[str(i) + "_l"] - merge[str(i) + "_r"]
        if not ((merge[str(i)] <= 1e-4) & (merge[str(i)] >= -1e-4)).all():
            errors = merge[~((merge[str(i)] <= 1e-4) & (merge[str(i)] >= -1e-4))]
            years_with_error.append(str(i))
            log(fpath, str(i), "Non-input energy costs are invalid for " + str(errors["sector"].unique()))

    return years_with_error


def verify_subsidy(ground_truth, links, fpath):
    """
    verifies the presences of subsidies on technologies
    :param ground_truth: data in the config files
    :param links: links of ground truth data to technologies
    :param fpath: filepath of gcam output data
    :return: list of years with error
    """
    # format results
    years_with_error = []
    for j in ground_truth["stub-technology"].unique():
        product = j + "_subsidy"
        link = links[ground_truth["stub-technology"].unique()[0] + "_subsidy_link"]
        results = pd.read_csv(fpath + "/prices_of_all_markets.csv")
        prod_results = results[results["product"] == product]
        prod_gt = ground_truth[ground_truth["stub-technology"] == j]

        # format ground truth
        prod_gt = prod_gt.pivot(index='market', columns='year')['fixedTax'].reset_index()
        prod_gt.columns = prod_gt.columns.astype(str)
        prod_gt["Units"] = "Mt"

        # merge results and ground truth
        merge = pd.merge(prod_results, prod_gt, "left", left_on="GCAM", right_on="market", suffixes=("_l", "_r"))
        for i in constants.GCAMConstants.plotting_x:
            # check that minimum price is satisfied with converted units
            merge[str(i)] = merge[str(i) + "_l"] - (
                        merge[str(i) + "_r"] * constants.GCAMConstants.USD2025_tCO2_to_1975_kgC)
            if not ((merge[str(i)] <= 1e-4) & (merge[str(i)] >= -1e-4)).all():
                years_with_error.append(str(i))
                log(fpath, str(i), "Subsidy for " + str(product) + " is incorrect in " + str(i))

    return years_with_error


def verify_ghg_constraint(ground_truth, regions_map, fpath):
    """
    verifies ghg constraints
    :param ground_truth: the data used in the gcam config file
    :param regions_map: maps regions to gcam emissions markets
    :param fpath: location of the output data in the directory
    :return: list of years with errors
    """
    years_with_error = []
    results = pd.read_csv(fpath + "/CO2_emissions_by_sector.csv")

    # reformat ground truth
    ground_truth = ground_truth.pivot(index='market', columns='year')['constraint'].reset_index()
    ground_truth.columns = ground_truth.columns.astype(str)
    ground_truth["Units"] = "Mt"

    # regional CDR does not count toward C pricing, but it does count towards emissions in the economy

    # categorize by market
    results = pd.merge(results, regions_map, "left", left_on="GCAM", right_on="region")
    ground_truth = pd.merge(ground_truth, regions_map, "left", left_on="market", right_on="region", suffixes=("_x", ""))
    results = results.groupby(["market", "scenario", "baseline", "Units"]).sum(min_count=1).reset_index()

    # merge dataframes
    comparison = pd.merge(ground_truth, results, "left", on="market", suffixes=("_g", "_r"))

    for index, row in comparison.iterrows():
        df = comparison.iloc[index].copy(deep=True)
        for i in constants.GCAMConstants.plotting_x:
            # unit conversion
            if np.isnan(df[str(i) + "_g"]):
                print("GHG market has no constraint in " + str(i) + " in " + df["market"])
            else:
                df[str(i) + "_g"] = df[str(i) + "_g"] * constants.GCAMConstants.CO2_to_C
                # if the estimated value is not close to the reported value
                if df["market"] == "USA":
                    # within 4% is fine
                    if .96 * df[str(i) + "_r"] < df[str(i) + "_g"]:
                        print("GHG constraint for " + df["market"] + " in " + str(i) + " meets the constraint by " + str(
                            df[str(i) + "_g"] - df[str(i) + "_r"]) + " Mt C")
                    else:
                        years_with_error.append(str(i))
                        log(fpath, str(i),
                            "GHG constraint for " + df["market"] + " in " + str(i) + " fails the constraint by " + str(
                                df[str(i) + "_r"] - df[str(i) + "_g"]) + " Mt C")
    return years_with_error


def verify_beccs(csv, fpath):
    """
    verifies the minimum price of beccs technologies
    :param csv: csv filepath containing the location of the ground truth data
    :param fpath: filepath to the output data
    :return: list of years with errors
    """
    # process results
    results = pd.read_csv(fpath + "/prices_of_all_markets.csv")
    results = results[results["product"] == "BECCS"]
    ground_truth = pd.read_csv(csv, skiprows=2)
    ground_truth = ground_truth.pivot(index='minicam-energy-input', columns='year')['min-price'].reset_index()
    ground_truth.columns = ground_truth.columns.astype(str)
    years_with_error = []

    # merge results and ground truth
    merge = pd.merge(results, ground_truth, "left", left_on="product", right_on="minicam-energy-input",
                     suffixes=("_left", "_right"))
    for i in constants.GCAMConstants.plotting_x:
        # check that minimum price is satisfied
        merge[str(i)] = merge[str(i) + "_left"] - merge[
            str(i) + "_right"] * constants.GCAMConstants.USD2025_tCO2_to_1975_kgC
        if not (merge[str(i)] >= -1e-4).all():
            years_with_error.append(str(i))
            errors = merge[merge[str(i)] <= -1e-4]
            log(fpath, str(i),
                "BECCS is lower than minimum price in the following regions: " + str(errors["GCAM"].unique()))

    return years_with_error


def verify_cdr(CDR, links, fpath):
    """
    verify ghg tax values
    :param CDR: a list of files necessary to validate CDR output
    :param links: a dictionary of links between config data and gcam technologies and locations
    :param fpath: location to the output data in the directory
    :return: a list of years in which an error was detected
    """
    results = pd.read_csv(fpath + "/CDR_by_tech.csv")
    years_with_error = []
    exo_CDR_demand = pd.DataFrame()
    elastic_CDR_demand = pd.DataFrame()
    region_market = links["ghg_CDR_market_link"]

    # split up CDR dictionary
    for i in CDR:
        if "exo_CDR" in i:
            exo_CDR_demand = pd.read_csv(CDR[i], skiprows=2)
            exo_CDR_demand = exo_CDR_demand.pivot(index='region', columns='year')['demand'].reset_index()
            exo_CDR_demand.columns = exo_CDR_demand.columns.astype(str)
            for j in constants.GCAMConstants.plotting_x:
                # convert from CO2-eq to C
                exo_CDR_demand[str(j)] = exo_CDR_demand[str(j)] * constants.GCAMConstants.CO2_to_C
            exo_CDR_demand["Units"] = "Mt C"
            exo_CDR_demand = exo_CDR_demand.groupby(["Units"]).sum(min_count=1).reset_index()
        if "elastic_CDR" in i:
            elastic_CDR_demand = get_elastic_CDR_demand(CDR, fpath, i, region_market)
        if "CDR_non-input_tech_costs" in i:
            ground_truth = pd.read_csv(CDR[i], skiprows=2).T
            ground_truth.columns = ground_truth.iloc[0].astype(int).astype(str)
            ground_truth = ground_truth[1:].reset_index().rename(columns={'index': 'technology'})
            #years_with_error.extend(verify_non_input_tech_costs(ground_truth, links, fpath))

    # combine CDR sources to get estimated CDR demand
    if exo_CDR_demand.empty:
        CDR_demand = elastic_CDR_demand
    elif elastic_CDR_demand.empty:
        CDR_demand = exo_CDR_demand
    else:
        elastic_CDR_demand["Units"] = "Mt C"
        CDR_demand = pd.merge(exo_CDR_demand, elastic_CDR_demand, on=["Units"], suffixes=("_l", "_r"))
        for i in constants.GCAMConstants.plotting_x:
            CDR_demand[str(i)] = CDR_demand[str(i) + "_l"] + CDR_demand[str(i) + "_r"]
    CDR_demand["product"] = "CDR"

    # process ground truth CDR numbers
    if not region_market.empty:
        # move unsatisfied CDR demand to its own output .csv file
        results = results[results["GCAM"] != "Global"]
        unsatisfied_CDR = results[results["subsector"] == "unsatisfiedDemand"]
        satisfied_CDR = results[results["subsector"] != "unsatisfiedDemand"]
        unsatisfied_CDR.to_csv(fpath + "/unsatisfied_CDR_demand.csv")

        # add market information to the results
        satisfied_CDR = pd.merge(satisfied_CDR, region_market, "left", left_on="GCAM", right_on="region")

        # group by market and technology
        data_manipulation.group(satisfied_CDR, ["GCAM"]).to_csv(fpath + "/satisfied_CDR_demand_by_region.csv")
        data_manipulation.group(satisfied_CDR, ["technology"]).to_csv(fpath + "/satisfied_CDR_demand_by_tech.csv")
        satisfied_CDR = data_manipulation.group(satisfied_CDR, ["scenario", "baseline"])
        satisfied_CDR["product"] = "CDR"

        # compare ground truths with results
        df = pd.merge(CDR_demand, satisfied_CDR, "left", ["product"], suffixes=("_left", "_right"))
        # because this is one line of data
        df = df.iloc[0]
        for i in constants.GCAMConstants.plotting_x:
            # if the estimated value is not close to the reported value
            if .97 * df[str(i) + "_right"] < df[str(i) + "_left"] < 1.03 * df[str(i) + "_right"]:
                print("CDR demand matches CDR supply by " + str(df[str(i) + "_left"] - df[str(i) + "_right"]))
            else:
                years_with_error.append(str(i))
                log(fpath, str(i),
                    "CDR demand does not match CDR supply by " + str(df[str(i) + "_left"] - df[str(i) + "_right"]))

    return years_with_error


def get_elastic_CDR_demand(CDR, fpath, i, region_market):
    """
    get information pertaining to the elastic cdr demand based on the carbon prices and formulat
    :param CDR: dict containing cdr config information
    :param fpath: location to gcam output data directory
    :param i: key for cdr dict
    :param region_market: link between cdr markets and regions
    :return: dataframe containing the elastic cdr demand
    """
    elastic_ground_truth = pd.read_csv(CDR[i], skiprows=2)
    # read in carbon prices
    carbon_prices = pd.read_csv(fpath + "/CO2_prices.csv")
    # merge carbon prices into elastic demand df
    elastic_ground_truth = pd.merge(elastic_ground_truth, region_market, "left", left_on="region",
                                    right_on="region")
    elastic_ground_truth = pd.merge(elastic_ground_truth, carbon_prices, "left", left_on="market",
                                    right_on="GCAM")
    # only look at actual carbon prices
    elastic_ground_truth = elastic_ground_truth[elastic_ground_truth["product"] == "CO2"]
    elastic_ground_truth.columns = elastic_ground_truth.columns.astype(str)
    # calculate elastic ground truth

    for j in constants.GCAMConstants.plotting_x:
        elastic_ground_truth[str(j)] = elastic_ground_truth.apply(lambda row: get_elastic_demand(row, str(j)),
                                                                  axis=1)
    return elastic_ground_truth


def get_elastic_demand(row, i):
    """
    return the elastic demand as calculated by the C price
    :param row: row of a data frame
    :param i: a year
    :return: the calculated CDR demand value
    """
    # s-curve calculation
    if row[str(i)] < row["min-price"] * constants.GCAMConstants.USD2025_tCO2_to_1990_tC + 1e-7:
        return 0
    else:
        return row["max-demand"] * constants.GCAMConstants.CO2_to_C / (1 + math.exp((0 - row["steepness"]) * (
                    row[str(i)] - (row["midpoint"] * constants.GCAMConstants.USD2025_tCO2_to_1990_tC))))


def verify_ghg_tax(ground_truth, results, fpath):
    """
    verify ghg tax values
    :param ground_truth: the values in the input file
    :param results: the values in the GCAM model output
    :param fpath: filepath used to verify outputs
    :return: a list of years in which an error was detected
    """
    # update ground truth to the results data format
    years_with_error = []
    ground_truth = ground_truth.transpose()
    ground_truth.columns = ground_truth.iloc[0]
    ground_truth.columns = ground_truth.columns.astype(str)
    ground_truth["GCAM"] = ground_truth.iloc[1].unique()[0]
    ground_truth["product"] = ground_truth.iloc[2].unique()[0]
    ground_truth = pd.DataFrame(ground_truth.iloc[3]).transpose()

    # compare ground truths with results
    df = pd.merge(ground_truth, results, "left", ["GCAM", "product"], suffixes=("_left", "_right"))
    for i in constants.GCAMConstants.plotting_x:
        # within 4% accuracy is fine
        if .96 * df[str(i) + "_right"] < df[str(i) + "_left"] < 1.04 * df[str(i) + "_right"]:
            years_with_error.append(str(i))
            log(fpath, str(i), "ghg taxes do not match")

    return years_with_error


def log(fpath, year, reason, success=False):
    """
    log errors to the error log
    :param fpath: fpath to output data
    :param year: year in which an error occured
    :param reason: reason for the error occuring as calculated in the verification
    :param success: boolean - if the verification succeeds or not
    :return: N/A
    """
    # open log file and add reason for masking a year
    with open(fpath + "/log.txt", "a+") as f:
        if success:
            print("Verification succeeds in " + year + " because " + reason)
            f.write("Verification succeeds in " + year + " because " + reason + "\n")
        else:
            print("Verification fails in " + year + " because " + reason)
            f.write("Verification fails in " + year + " because " + reason + "\n")


if __name__ == '__main__':
    for i in ["CDRIA-2050_low"]:
        main(i)