|
| 1 | +library(purrr) |
| 2 | +library(rsample) |
| 3 | +library(slurmR) |
| 4 | +library(EMODAnalyzeR) |
| 5 | + |
| 6 | +#run_report will output two csvs |
| 7 | +run_report = function(root_path, cost_art, life_expectancy, pop_scale_param_inst) { |
| 8 | + |
| 9 | + emodplot.incidence = |
| 10 | + function (data_baseline, intervention_df, date.start, date.end) |
| 11 | + { |
| 12 | + data.incidence <- EMODAnalyzeR::calculate.incidence(data_baseline) |
| 13 | + y.lim.max <- min(max((data.incidence %>% filter(Year > date.start, Year < date.end))$incidence * 1.2), 1) |
| 14 | + intervention.incidence <- EMODAnalyzeR::calculate.incidence(intervention_df) |
| 15 | + all_data_incidence = rbind(data.incidence, intervention.incidence) |
| 16 | + p <- emodplot.by_gender(all_data_incidence, date.start, date.end, |
| 17 | + "incidence") + |
| 18 | + scale_y_continuous(labels = scales::percent_format(accuracy = .1), |
| 19 | + breaks = seq(0, y.lim.max, 0.005), limits = c(0, y.lim.max)) + |
| 20 | + ylab("HIV Incidence (%)") |
| 21 | + return(p) |
| 22 | + } |
| 23 | + |
| 24 | + dalys_by_sim = function(data_) { |
| 25 | + data_ %>% |
| 26 | + group_by(sim.id) %>% |
| 27 | + group_map( |
| 28 | + function(data,group) { |
| 29 | + data %>% |
| 30 | + mutate(sim.id = group[1,1]) %>% |
| 31 | + EMODAnalyzeR::calculate.DALY(life_expectancy = life_expectancy) %>% |
| 32 | + mutate(sim.id = group$sim.id) |
| 33 | + } |
| 34 | + ) %>% bind_rows |
| 35 | + } |
| 36 | + |
| 37 | + get_infections_and_py = function (data) { |
| 38 | + infections = data %>% |
| 39 | + mutate(Year = floor(Year)) %>% |
| 40 | + group_by(Year, sim.id) %>% |
| 41 | + summarize(Infections=sum(Newly.Infected * pop_scaling_factor)) |
| 42 | + py_on_treatment = data %>% filter(HasIntervention.LEN==1) %>% |
| 43 | + group_by(Year, sim.id) %>% |
| 44 | + summarize(Population=sum(Population * pop_scaling_factor)) %>% |
| 45 | + mutate(Year = floor(Year)) %>% |
| 46 | + group_by(Year, sim.id) %>% |
| 47 | + summarize(py_on_treatment=mean(Population)) |
| 48 | + py_on_oral_prep = data %>% filter(HasIntervention.Oral_PrEP==1) %>% |
| 49 | + group_by(Year, sim.id) %>% |
| 50 | + summarize(Population=sum(Population * pop_scaling_factor)) %>% |
| 51 | + mutate(Year = floor(Year)) %>% |
| 52 | + group_by(Year, sim.id) %>% |
| 53 | + summarize(py_on_oral_prep=mean(Population)) |
| 54 | + py_total = data %>% |
| 55 | + group_by(Year, sim.id) %>% |
| 56 | + summarize(Population=sum(Population * pop_scaling_factor)) %>% |
| 57 | + mutate(Year = floor(Year)) %>% |
| 58 | + group_by(Year, sim.id) %>% |
| 59 | + summarize(py_total=mean(Population)) |
| 60 | + inner_join(infections, py_on_treatment, by=c("Year","sim.id")) %>% |
| 61 | + inner_join(py_total, by=c("Year","sim.id")) %>% |
| 62 | + inner_join(py_on_oral_prep, by=c("Year","sim.id")) |
| 63 | + } |
| 64 | + # todo - offset year just like daly calculation |
| 65 | + infections_averted_over_time = function(baseline, intervention) { |
| 66 | + baseline_data = get_infections_and_py(baseline) |
| 67 | + intervention_data = get_infections_and_py(intervention) |
| 68 | + intervention_data$infections.averted = baseline_data$Infections - intervention_data$Infections |
| 69 | + intervention_data$infections.baseline = baseline_data$Infections |
| 70 | + results = intervention_data %>% |
| 71 | + group_by(Year) %>% |
| 72 | + summarize(infections.averted = median(infections.averted), |
| 73 | + py_on_treatment = median(py_on_treatment), |
| 74 | + py_total = median(py_total), |
| 75 | + baseline_infections = median(infections.baseline)) |
| 76 | + results_time_on_treatment = results %>% filter(Year >= 2026, Year < 2036) |
| 77 | + results %>% ggplot() + |
| 78 | + geom_point(aes(x=Year, y=infections.averted)) |
| 79 | + data.frame(infections.averted=sum(results$infections.averted), |
| 80 | + py_on_treatment=sum(results$py_on_treatment), |
| 81 | + percent_coverage=sum(results_time_on_treatment$py_on_treatment)/sum(results_time_on_treatment$py_total), |
| 82 | + percent_infections_averted = sum(results_time_on_treatment$infections.averted)/sum(results_time_on_treatment$baseline_infections)) |
| 83 | + |
| 84 | + } |
| 85 | + |
| 86 | + infections_averted_by_simid = function(baseline, intervention) { |
| 87 | + baseline_data = get_infections_and_py(baseline) |
| 88 | + intervention_data = get_infections_and_py(intervention) |
| 89 | + intervention_data$infections.averted = baseline_data$Infections - intervention_data$Infections |
| 90 | + intervention_data$baseline_infections = baseline_data$Infections |
| 91 | + results = intervention_data %>% |
| 92 | + filter(Year >= 2026, Year < 2036) %>% |
| 93 | + group_by(sim.id) %>% |
| 94 | + summarize(infections.averted=sum(infections.averted), |
| 95 | + py_on_treatment=sum(py_on_treatment), |
| 96 | + percent_coverage=sum(py_on_treatment)/sum(py_total), |
| 97 | + percent_infections_averted = sum(infections.averted)/sum(baseline_infections)) |
| 98 | + results |
| 99 | + } |
| 100 | + |
| 101 | + calc_max_price = function(baseline, intervention, cost_art) { |
| 102 | + intervention = calculate.pop_scaling_factor(intervention, |
| 103 | + pop_scale_param_inst$year, |
| 104 | + reference_population = pop_scale_param_inst$population, |
| 105 | + age_max_inclusive = pop_scale_param_inst$age_max_inc, |
| 106 | + age_min_inclusive = pop_scale_param_inst$age_min_inc) |
| 107 | + py = get_infections_and_py(intervention) |
| 108 | + py_baseline = get_infections_and_py(baseline) |
| 109 | + py$oral_prep_averted = py_baseline$py_on_oral_prep - py$py_on_oral_prep |
| 110 | + dbs_intervention= dalys_by_sim(intervention) |
| 111 | + dbs_baseline = dalys_by_sim(baseline) |
| 112 | + dbs_intervention$dalys_averted = dbs_baseline$daly_future_discounted - dbs_intervention$daly_future_discounted |
| 113 | + dbs_intervention$on_art_averted = dbs_baseline$discount_factor * (dbs_baseline$on_art - dbs_intervention$on_art) |
| 114 | + py_and_dbs = inner_join(dbs_intervention %>% rename(Year = year), py, by=c("Year", "sim.id")) |
| 115 | + py_and_dbs = py_and_dbs %>% mutate(oral_prep_averted_discounted = oral_prep_averted * discount_factor) |
| 116 | + cost_delivery = (6 + 0.05 + 2.5) * 2 # cost of delivery + cost of syringe + COST OF test |
| 117 | + summary_fun <- function (.data) { |
| 118 | + .data %>% summarize(dalys_per_py = sum(dalys_averted) / sum(discounted_py), |
| 119 | + art_per_py = sum(on_art_averted) / sum(discounted_py), |
| 120 | + dalys_per_py_minus_prep = sum(dalys_averted) / sum(discounted_py - oral_prep_averted_discounted), |
| 121 | + art_per_py_minus_prep = sum(on_art_averted) / sum(discounted_py - oral_prep_averted_discounted), |
| 122 | + py_on_oral_averted_per_py_on_len = sum(oral_prep_averted_discounted) / sum(discounted_py)) %>% |
| 123 | + mutate(max_cost_without_art = 500*dalys_per_py) %>% |
| 124 | + mutate(max_cost_with_art = max_cost_without_art + cost_art*art_per_py) %>% |
| 125 | + mutate(demand_generation = .1 * max_cost_with_art) %>% |
| 126 | + mutate(max_cost_with_art_minus_dg = max_cost_with_art - demand_generation) %>% |
| 127 | + mutate(max_cost_with_art_minus_dg_delivery = max_cost_with_art_minus_dg - cost_delivery) %>% |
| 128 | + mutate(max_cost_per_dose = max_cost_with_art_minus_dg_delivery / 2) %>% |
| 129 | + mutate(wastage_per_dose = 0.05*max_cost_per_dose) %>% |
| 130 | + mutate(max_cost_per_dose_minus_wastage = max_cost_per_dose - wastage_per_dose) |
| 131 | + } |
| 132 | + bootstraps = |
| 133 | + py_and_dbs %>% |
| 134 | + mutate(discounted_py = py_on_treatment * discount_factor) %>% group_by(sim.id) %>% |
| 135 | + summarize (dalys_averted = sum(dalys_averted), |
| 136 | + discounted_py = sum(discounted_py), |
| 137 | + on_art_averted = sum(on_art_averted), |
| 138 | + undiscounted_py = sum(py_on_treatment), |
| 139 | + oral_prep_averted_discounted = sum(oral_prep_averted_discounted)) %>% |
| 140 | + bootstraps( times = 500 ) |
| 141 | + resamples <- map_dfr(bootstraps$splits, function(.data) {.data %>% analysis() %>% summary_fun()} ) |
| 142 | + summary <- rbind( resamples %>% summarize_all(function(data) {quantile(data, 0.5)}) %>% mutate(aggregation = "Median"), |
| 143 | + resamples %>% summarize_all(function(data) {quantile(data, 0.025)}) %>% mutate(aggregation = "Lower Bound (95% CI)"), |
| 144 | + resamples %>% summarize_all(function(data) {quantile(data, 0.975)}) %>% mutate(aggregation = "Upper Bound (95% CI)")) |
| 145 | + summary |
| 146 | + } |
| 147 | + |
| 148 | + dirs = list.dirs(root_path,recursive = FALSE) |
| 149 | + baseline_dir = Filter(function(.) {grepl("-baseline", .)}, dirs) |
| 150 | + experiment_dirs = Filter(function(.) {!grepl("-baseline", .)}, dirs) |
| 151 | + print(experiment_dirs) |
| 152 | + baseline = read.simulation.results(paste0(baseline_dir,"/ReportHIVByAgeAndGender"), "baseline", stratify_columns = c("Year","Gender","Age","HasIntervention.LEN","HasIntervention.Oral_PrEP"), |
| 153 | + summarize_columns = c("Newly.Infected","Newly.Tested.Positive", "Newly.Tested.Negative", "Population", |
| 154 | + "Infected", "On_ART", "Died", "Died_from_HIV", |
| 155 | + "Tested.Ever", "Diagnosed"), |
| 156 | + min_age_inclusive=15, max_age_inclusive=100) %>% filter(Year < 2060) |
| 157 | + baseline = calculate.pop_scaling_factor(baseline, |
| 158 | + pop_scale_param_inst$year, |
| 159 | + reference_population = pop_scale_param_inst$population, |
| 160 | + age_max_inclusive = pop_scale_param_inst$age_max_inc, |
| 161 | + age_min_inclusive = pop_scale_param_inst$age_min_inc) |
| 162 | + |
| 163 | + # ABOVE THIS LINE is functions that we will call |
| 164 | + # BELOW is the "main" script |
| 165 | + |
| 166 | + #EMODAnalyzeR::bigpurple.add_slurm_to_path() |
| 167 | + bigpurple_opts = list(partition = "a100_short", time = "12:00:00") |
| 168 | + inf_averted = lapply(as.list(experiment_dirs), |
| 169 | + inf_averted_fun) |
| 170 | + #slurmR::Slurm_lapply(as.list(experiment_dirs), |
| 171 | + # inf_averted_fun, |
| 172 | + #sbatch_opt=bigpurple_opts, |
| 173 | + #njobs = length(experiment_dirs), |
| 174 | + #export = c("dalys_by_sim", "infections_averted_over_time", "baseline","get_infections_and_py", "pop_scale_param_inst")) |
| 175 | + |
| 176 | + inf_averted %>% bind_rows %>% |
| 177 | + mutate(experiment = stringr::str_split(experiment, "/") %>% map(~ .[[9]]) %>% unlist()) %>% |
| 178 | + write.csv(file=paste0(root_path,"/infections_averted.csv")) |
| 179 | + #ggplot + geom_point(aes(x=py_on_treatment ,y=infections.averted)) + scale_x_continuous(expand = c(0, 0)) + |
| 180 | + #scale_y_continuous(expand = c(0, 0), limits = c(0, 4.5e6)) + geom_text(aes(x=py_on_treatment ,y=infections.averted, label=experiment, hjust=0)) |
| 181 | + |
| 182 | + max_price_fun <- function(path) { |
| 183 | + experiment = |
| 184 | + paste0(path,"/ReportHIVByAgeAndGender") %>% |
| 185 | + read.simulation.results("Intervention", stratify_columns = c("Year","Gender","Age","HasIntervention.LEN","HasIntervention.Oral_PrEP"), |
| 186 | + summarize_columns = c("Newly.Infected","Newly.Tested.Positive", "Newly.Tested.Negative", "Population", |
| 187 | + "Infected", "On_ART", "Died", "Died_from_HIV", |
| 188 | + "Tested.Ever", "Diagnosed"), |
| 189 | + min_age_inclusive=15, max_age_inclusive=100) %>% filter(Year < 2060) |
| 190 | + plt = emodplot.incidence(baseline, experiment, 2020,2059) |
| 191 | + ggplot2::ggsave(paste0(path,"/incidence.png"),plot=plt) |
| 192 | + calc_max_price(baseline, experiment, cost_art) %>% mutate(experiment = path) |
| 193 | + } |
| 194 | + |
| 195 | + |
| 196 | + max_prices = lapply(as.list(experiment_dirs), |
| 197 | + max_price_fun) |
| 198 | + #slurmR::Slurm_ #sbatch_opt=bigpurple_opts, |
| 199 | + #njobs = length(experiment_dirs), |
| 200 | + #export = c("dalys_by_sim", "calc_max_price", "baseline","get_infections_and_py", "emodplot.incidence","pop_scale_param_inst","cost_art","life_expectancy")) |
| 201 | + |
| 202 | + max_prices %>% bind_rows %>% |
| 203 | + mutate(experiment = stringr::str_split(experiment, "/") %>% map(~ .[[9]]) %>% unlist()) %>% |
| 204 | + write.csv(file=paste0(root_path,"/max_prices.csv")) |
| 205 | +} |
| 206 | +#test |
| 207 | +run_report( root_path = "/gpfs/data/bershteynlab/EMOD/kaftad01/202401_SA_LEN_realistic_coverage/uw_output", |
| 208 | + cost_art=187, |
| 209 | + life_expectancy = 66, |
| 210 | + pop_scale_param_inst = pop_scale_params(year=2009,population=33868111, age_min_inc=15, age_max_inc= 64)) |
0 commit comments