From 272fb289c5ac5a66362a7416e6dbdbeeead5a15e Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Wed, 28 Feb 2024 11:36:42 -0500
Subject: [PATCH 01/11] update gitignore

clean up and document lasso functions, add new predict and brier functions, add code to run and evaluate simulations

run simulations script
---
 .Rbuildignore               |   6 +-
 .gitignore                  |   1 +
 R/brier2.R                  | 158 +++++++++++++++++++++++++++++
 R/cureitlasso.R             |  65 +++++++++++-
 R/cv.cureitlasso.R          |  19 +++-
 R/predict.R                 | 183 ++++++++++++++++++++++++++++++++++
 R/simulasso.R               |  98 +++++++++++-------
 inst/evaluate-simulations.R | 192 ++++++++++++++++++++++++++++++++++++
 inst/run-simulations.R      | 168 +++++++++++++++++++++++++++++++
 9 files changed, 850 insertions(+), 40 deletions(-)
 create mode 100644 R/brier2.R
 create mode 100644 R/predict.R
 create mode 100644 inst/evaluate-simulations.R
 create mode 100644 inst/run-simulations.R

diff --git a/.Rbuildignore b/.Rbuildignore
index 241c55c..0317850 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -13,4 +13,8 @@
 ^.*[.]Rproj.*$
 ^\.Rproj\.user$
 ^.Rhistory$
-[~]
\ No newline at end of file
+[~]
+^R/cv.cureitlasso.R
+^R/simulasso.R
+^R/coxsplit.R
+^R/run-simulations.R
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3f26a3f..22b011a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 inst/doc
 inst/model-fits
 inst/cure-model-fits
+inst/cureit-simulation-results/*
 inst/archive
 docs/
 revdep/
diff --git a/R/brier2.R b/R/brier2.R
new file mode 100644
index 0000000..5063496
--- /dev/null
+++ b/R/brier2.R
@@ -0,0 +1,158 @@
+# we should use this: https://www.tidymodels.org/learn/statistics/survival-metrics/ 
+
+library(survival)
+library(tidyverse)
+
+# Prep Data ---------------------------------------------------------------
+
+xi = dat$x
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]
+
+fitcure <- final_fit$fitcure
+predcure <- predict(fitcure, newx = xi,
+                    s = min(fitcure$lambda), type = "response")
+
+fitcox <- fi nal_fit$fitcox
+predsurvexp <- predict(fitcox, newx = xi, s = min(fitcox$lambda), type = "response")
+
+
+haz <- final_fit$haz
+cumhaz <- final_fit$cumhaz
+
+predsurv <- exp(-cumhaz*predsurvexp)
+
+# Create Data frame to pass ----------------------------------------------
+
+data = data.frame(dat$x) %>%
+  mutate(times = dat$t, 
+         event = dat$d,
+         "truth_survival" = Surv(dat$t, dat$d))
+
+# Predict Function (glmnet cure) ---------------------------------------------
+
+# Trying to get predictions in similar format as tidymodels yardstick
+# Eventually will integratethis with existing predict method function
+
+predict_cure <- function(final_fit,
+                         data = NULL,
+                         eval_timepoints = NULL) {
+
+  # Get values from observed data
+  survival_times = data$times
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(data$times[data$event==1])
+  censoring_weights = survfit(Surv(data$times, 1 - data$event) ~ 1)$surv
+  
+  data_x <- data %>%
+    select(-c(times, event, truth_survival)) %>%
+    as.matrix()
+  
+  # Extract cumulative hazard from final fit (for event times)
+  haz <- final_fit$haz
+  cumhaz <- final_fit$cumhaz
+  
+  # default eval timepoints are observed event times if not supplied by user
+  if(is.null(eval_timepoints)) {
+    eval_timepoints = sorted_event_times
+  }
+
+
+  # Get Relative Cure and Cox Risk ------------------------------------------
+
+  # Cure probability risk prediction (independent of timepoint)
+  fitcure <- final_fit$fitcure
+  predcure <- predict(fitcure, newx = data_x,
+                      s = min(fitcure$lambda), type = "response")
+  
+  # Cox survival probability risk prediction (independent of timepoint)
+  fitcox <- final_fit$fitcox
+  predsurvexp <- predict(fitcox, 
+                         newx = data_x,
+                         s = min(fitcox$lambda),
+                         type = "response")
+
+
+  
+  # Get Predicted probabilities at select timepoints -------------------
+  
+  # vector for conditional survival prob for all patients at sorted_survival_times[l]
+  all_preds_all_tps <- list()
+  
+  # for each eval timepoint, get predicted survival values
+  for (l in 1:length(eval_timepoints)) {
+    all_preds <- data.frame("eval_timepoint" = NA, 
+                            "preds" = NA, 
+                            "pred_cure" = NA, 
+                            "preds_surv" = NA, 
+                            "weights" = NA)
+    
+    # vector for conditional survival prob for all patients at sorted_survival_times[l]
+    predsurv <- rep(NA,length(survival_times)) 
+    
+
+    if (eval_timepoints[l] >= min(sorted_event_times)){
+      
+      ids <- which(
+        sorted_event_times == 
+          max(sorted_event_times[sorted_event_times <= eval_timepoints[l]])
+        )
+      
+      # Conditional survival prob at given patient
+      predsurv <- exp(-cumhaz[ids]*predsurvexp)
+        
+      # inverse probability of the censoring weights
+      ipw <- as.numeric(survival_times > eval_timepoints[l])*censoring_weights[l] + 
+        as.numeric(survival_times <= eval_timepoints[l])*censoring_weights
+      
+      # if no events occurred before eval time 
+    } else if (eval_timepoints[l] < min(sorted_event_times)) {
+      
+      predsurv <- rep(1,length(survival_times))
+      ipw <- 1
+      
+    }
+    
+    preds <- 1 - predcure + predcure*predsurv
+    
+    all_preds <- tibble(
+      
+      "preds" = preds, 
+      "predcure" = predcure, 
+      "predsurv" = predsurv, 
+      "weights" = ipw) %>%
+      mutate(id_num = 1:nrow(.))
+    
+    names(all_preds) <- c(
+                          "preds",
+                          "predcure", "predsurv", "weights",
+                          "id_num")
+    
+    x <-  all_preds %>% nest(.preds = -c(id_num))
+    
+    all_preds_all_tps[[l]] <- all_preds
+                            
+  #  names(all_preds) = c("pred", "pred_cure","pred_surv")
+  }
+  # all_pred_df <- tibble(
+  #   times = data$times,
+  #   events = data$event,
+  #   .pred = all_preds_all_tps
+  # )
+  return(all_pred_df)
+}
+
+predict_df <- predict_cure(final_fit = final_fit, data = data)
+
+
+# Brier Calculation -------------------------------------------------------
+
+brier_cure <- function(predict_df, times, event) {
+  
+  eval_timepoints = unique(predict_df$eval_timepoint)
+  
+  for (l in 1:length(eval_timepoints)) {
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+
+  
+  }
+  
diff --git a/R/cureitlasso.R b/R/cureitlasso.R
index 00fd83f..bc42272 100644
--- a/R/cureitlasso.R
+++ b/R/cureitlasso.R
@@ -1,3 +1,47 @@
+
+# mu is penlanlity for cure
+# lamda is penality for cox
+# touch is posterior probabilty of being cured fr uncured (maybe don't need) (Entropy) 
+ # NOTE maybe change name of alphs 
+# calculated linear peredictor of that iteration 
+
+
+#' Lasso for Cure Models
+#'
+#' @param t Survival times
+#' @param d Event indicator
+#' @param x data n x p matrix or data.frame
+#' @param minmu.ratio minimum penalty value in the logistic model
+#' @param minlambda.ratio minimum penalty value in the cox model
+#' @param adaptive 
+#' @param length.grid 
+#' @param mus penalty for cure
+#' @param lambdas is penalty for cox
+#' @param tol posterior probability of being cured for uncured (maybe don't need) (Entropy) 
+#' @param maxiter 
+#' @param progress verbose iteration counter
+#'
+#' @return
+#' - `all_fits`
+#'  - `t`,
+#'  - `tj` - unique event times sorted,
+#'  - `d`- event indicator,
+#'  - `x`- data n x p matrix or data.frame,
+#'  - `alpha0` fitcure$a0[length(fitcure$lambda)],
+#'  - `alpha` =alpha,
+#'  - `beta` =beta,
+#'  - `haz`=haz,
+#'  - `cumhaz`=cumhaz,
+#'  - `tau`=as.vector(tau),
+#'  - `predsurv`=as.vector(predsurv_iter),
+#'  - `predcure`=as.vector(predcure_iter),
+#'  - `fitcure`=fitcure,
+#'  - `fitcox` =fitcox
+#' - `mus` - mus that were used as penalties for cure models in cross validation
+#' - `lambda` - lambdas that were used as penalities in cix model cross validation
+#' @export
+#'
+#' @examples
 cureitlasso <- function(t,
                         d,
                         x,
@@ -15,6 +59,7 @@ cureitlasso <- function(t,
   require(glmnet)
   require(survival)
   
+  # initialize initial weights 
   tau <- matrix(0.5,nrow=length(t),ncol=2) # First column: cured; second column: uncured
   tau[d==1,1] = 0
   tau[d==1,2] = 1
@@ -24,6 +69,7 @@ cureitlasso <- function(t,
   
   # CV: Be careful for fold split! Two replicates from the same subject should always be assigned to the same fold
   
+  #this is for later integreation with tuning
   penalty.factor.cure=rep(1,ncol(x))
   penalty.factor.cox=rep(1,ncol(x))
   
@@ -51,6 +97,8 @@ cureitlasso <- function(t,
   #   
   # }
   
+  # So this section is finding the set of optimal hyperparemters to test ----
+  # tau is uncure probability 
   fitcure0 <- glmnet(x=do.call("rbind", rep(list(x), 2)),
                      y=lab_class,
                      family="binomial",
@@ -72,8 +120,10 @@ cureitlasso <- function(t,
     lambdas <- fitcox0$lambda[idx_lambdas]
   }
   
+  
   musmax <- fitcure0$lambda[1]
   lambdasmax <- fitcox0$lambda[1]
+  # -----------------
   
   predcure <- predict(fitcure0,newx=x,s=mus,type="response") #Prob of uncured
   predsurvexp <- predict(fitcox0,newx=x,s=lambdas,type="response") # exp(betaX)
@@ -104,6 +154,7 @@ cureitlasso <- function(t,
     
     fit[[i]] <- list()
     
+    
     for (j in 1:length(lambdas)){
       
       tau <- d + (1-d) * (predcure[,i] * predsurv[,j])/( (1 - predcure[,i]) + predcure[,i] * predsurv[,j] )
@@ -309,14 +360,24 @@ cureitlasso <- function(t,
       # num_alpha[i,j] <- sum(alpha!=0)
       # num_beta[i,j] <- sum(beta!=0)
       
+    #  names(fit[i][j]) <- paste0("mu_fit_", i, "_lambda_fit_", "j")
+      
     }
     
+
+    names(fit[[i]]) <- paste0("lambda_fit_", 1:length(lambdas))
   }
   
-  return(list(fit=fit,
+  names(fit) <- paste0("mu_fit_", 1:length(mus))
+  
+  return(list(fit = fit,
               # num_alpha=num_alpha,
               # num_beta=num_beta,
               mus=mus,
               lambdas=lambdas))
   
-}
\ No newline at end of file
+}
+
+# tj - is unique event times sorted
+# haz 
+
diff --git a/R/cv.cureitlasso.R b/R/cv.cureitlasso.R
index 8865130..403c130 100644
--- a/R/cv.cureitlasso.R
+++ b/R/cv.cureitlasso.R
@@ -1,10 +1,14 @@
+# Returns:
+# fit - 
+# arg: length.grid - 
+
 cv.cureitlasso <- function(t,
                            d,
                            x,
                            minmu.ratio=0.05, # minimum penalty value in the logistic model
                            minlambda.ratio=0.05, # minimum penalty value in the cox model
                            adaptive=FALSE,
-                           length.grid=10,
+                           length.grid=10, # tells how many hyperparameters to test. default is 10 for each 
                            nfolds=5,
                            tol=1e-2,
                            maxiter=100,
@@ -22,6 +26,7 @@ cv.cureitlasso <- function(t,
   require(survcomp)
   require(pracma)
   
+  # order data by times 
   order_idx <- order(t)
   t <- t[order_idx]
   d <- d[order_idx]
@@ -29,6 +34,7 @@ cv.cureitlasso <- function(t,
   
   if (progress) print("Fitting by EM algorithm ...")
   
+  # main model with no cross validation
   fit <- cureitlasso(t,d,x,
                      minmu.ratio,
                      minlambda.ratio,
@@ -38,7 +44,7 @@ cv.cureitlasso <- function(t,
                      lambdas=NULL,
                      tol,
                      maxiter,
-                     progress)
+                     progress = TRUE)
   
   if (progress) print("Running cross validations ...")
   
@@ -46,6 +52,8 @@ cv.cureitlasso <- function(t,
   
   # Fold split
   foldid <- coxsplit(as.matrix(Surv(t,d)), nfolds)
+  
+  # grid/array of values 
   cv_brier <- array(NA,dim=c(length.grid,length.grid,nfolds))
   
   # Run CVs
@@ -57,6 +65,7 @@ cv.cureitlasso <- function(t,
       
       if (progress) print(i)
       
+      
       cv.fit[[i]] <- cureitlasso(t[foldid != i],
                                  d[foldid != i],
                                  x[foldid != i,],
@@ -114,6 +123,7 @@ cv.cureitlasso <- function(t,
             
           }
           
+          # calcualte trapazoidal area for brier score 
           cv_brier[j,k,i] <- trapz(tbrier,brier)
           
           
@@ -130,7 +140,7 @@ cv.cureitlasso <- function(t,
     
     cv.fit <- foreach(i = 1:nfolds) %dopar% {
       
-      source("~/Projects/Whiting-Qin-cureit/cureit/R/cureitlasso.R")
+      source(here::here("R/cureitlasso.R"))
       
       cureitlasso(t[foldid != i],
                   d[foldid != i],
@@ -206,6 +216,7 @@ cv.cureitlasso <- function(t,
     
   }
   
+  # summarize - take average across all folds 
   cv_brier_mean <- apply(cv_brier,c(1,2),function(x) mean(x))
   cv_brier_se <- apply(cv_brier,c(1,2),function(x) sd(x)/sqrt(nfolds))
   # pheatmap::pheatmap(cv_brier_mean,cluster_cols = F,cluster_rows = F)
@@ -235,4 +246,4 @@ cv.cureitlasso <- function(t,
   )
   )
   
-}
\ No newline at end of file
+}
diff --git a/R/predict.R b/R/predict.R
new file mode 100644
index 0000000..087eee0
--- /dev/null
+++ b/R/predict.R
@@ -0,0 +1,183 @@
+#' #' Predicted survival probability by cure model
+#' #'
+#' #' @param object A cureit object.
+#' #' @param times Numeric vector of times to obtain survival probability estimates at
+#' #' @param probs Numeric vector of quantiles to obtain estimates at
+#' #' @param newdata A `base::data.frame()` or `tibble::tibble()` containing all
+#' #' the original predictors used to create object. Defaults to `NULL`.
+#' #' @param type Output format of predicted values: "lp" (linear predictor) or "prob" (predicted probabilities).
+#' #' @param brier Boolean indicator of calculating the Brier scores at specified `times`.
+#' #' @param cox Boolean indicator of fitting the Cox model for the training data and calculating the Brier scores at specified `times` for `newdata`.
+#' #' @inheritParams cureit
+#' #' @return named list of prediction estimates
+#' #' @family cureit() functions
+#' #' @export
+#' #' @examples
+#' #' p <- cureit(surv_formula = Surv(ttdeath, death) ~ age, 
+#' #'    cure_formula = ~ age,
+#' #'    data = trial) 
+#' #'    
+#' #' pred <- predict(p, times = seq(5,24,0.5),
+#' #'  newdata = trial[complete.cases(trial), ], brier=TRUE,cox=TRUE)
+#' #' 
+#' #' plot(seq(5,24,0.5),pred$brier,type="S",pch=1)
+#' #' lines(seq(5,24,0.5),pred$brier_cox,type="S",col="red",pch=3)
+#' #' legend("topright",c("Cure model","Cox model"),
+#' #' col=c("black","red"),lty=1,pch=c(1,3))
+#' #' 
+#' #'   
+#' predict.cureit <- function(object, times = NULL, probs = NULL,
+#'                            newdata = NULL, type = c("prob", "lp"),
+#'                            brier = FALSE, cox = FALSE, ...) {
+#'   # checking inputs ------------------------------------------------------------
+#'   
+#'   type <- match.arg(type)
+#'   
+#'   if (is.null(times) + is.null(probs) != 1L) {
+#'     stop("Must specify one and only one of `times=` and `probs=`.", call. = FALSE)
+#'   }
+#'   if (!is.null(times) && any(times < 0)) {
+#'     stop("`times=` must be non-negative.", call. = FALSE)
+#'   }
+#'   if (!is.null(probs) && !all(dplyr::between(probs, 0, 1))) {
+#'     stop("`probs=` must be between 0 and 1.", call. = FALSE)
+#'   }
+#'   if (!(type %in% c("lp","prob"))) {
+#'     stop("`type=` must be one out of 'lp' or 'prob'.", call. = FALSE)
+#'   }
+#'   # if (type == "lp" & brier){
+#'   #   stop("`type=` must be 'prob' to calculate the Brier scores.")
+#'   # }
+#'   # if (brier & is.null(times)){
+#'   #   stop("Must specify one or more times to calculate the Brier scores.")
+#'   # }
+#'   # if (!brier & cox){
+#'   #   stop("Must let `brier=TRUE` to perform Cox regression and corresponding Brier scores.")
+#'   # }
+#'   
+#'   # getting predictions on the original model fit ------------------------------
+#'   processed <- cureit_mold(object$surv_formula, object$cure_formula, newdata %||% object$data,
+#'                            surv_blueprint = object$surv_blueprint, cure_blueprint = object$cure_blueprint)
+#'   
+#'   newX=processed$surv_processed$predictors
+#'   newZ=processed$cure_processed$predictors
+#'   if (is.vector(newZ)) 
+#'     newZ = as.matrix(newZ)
+#'   newZ = cbind(1, newZ)
+#'   if (is.vector(newX)) 
+#'     newX = as.matrix(newX)
+#'   
+#'   s0 = as.matrix(object$smcure$s[order(object$smcure$Time)], ncol = 1)
+#'   n = nrow(s0)
+#'   uncureprob = exp(object$smcure$b %*% t(newZ))/(1 + exp(object$smcure$b %*% t(newZ)))
+#'   
+#'   scure = array(0, dim = c(n, nrow(newX)))
+#'   t = array(0, dim = c(n, nrow(newX)))
+#'   spop = array(0, dim = c(n, nrow(newX)))
+#'   
+#'   #### Only support PH model for now
+#'   ebetaX = exp(object$smcure$beta %*% t(newX))
+#'   for (i in 1:nrow(newZ)) {
+#'     scure[, i] = s0^ebetaX[i]
+#'   }
+#'   
+#'   for (i in 1:n) {
+#'     for (j in 1:nrow(newX)) {
+#'       spop[i, j] = uncureprob[j] * scure[i, j] + (1 - 
+#'                                                     uncureprob[j])
+#'     }
+#'   }
+#'   
+#'   spop_prd = cbind(object$smcure$Time, spop)
+#'   scure_prd = cbind(object$smcure$Time, scure)
+#'   cure_prd = 1-uncureprob
+#'   
+#'   # Type = "Prob ---------
+#'   if (type == "prob"){
+#'     
+#'     if (!is.null(times)) {
+#'       return(list(cured = cure_prd,
+#'                     surv_uncured = probs_at_times(scure_prd,times),
+#'                     surv_marginal = probs_at_times(spop_prd,times)))
+#'     }
+#'     
+#'     if (!is.null(probs)) {
+#'       return(list(cured = cure_prd,
+#'                   surv_uncured = times_at_probs(scure_prd,probs),
+#'                   surv_marginal = times_at_probs(spop_prd,probs)))  
+#'     }
+#'   }
+#'   
+#'   # Type = "Prob ---------
+#'   if (type == "lp"){
+#'     
+#'     return(list(lp_cure_model = object$smcure$b %*% t(newZ),
+#'                 lp_surv_model = object$smcure$beta %*% t(newX)))
+#'     
+#'   }
+#'   
+#' }
+#' 
+#' times_at_probs <- function(matrix_pred, probs) {
+#'   matrix_zero <- matrix(c(0, 1), ncol = 2)
+#'   lst_time_risk <-
+#'     purrr::map(
+#'       probs,
+#'       ~ purrr::map_dbl(
+#'         seq_len(ncol(matrix_pred) - 1L),
+#'         function(i) {
+#'           # adding 1 to the matrix, keeping only the time column, and col of interest
+#'           m <- rbind(matrix_zero, matrix_pred[, c(1L, i + 1L)])
+#'           
+#'           # return NA if the quantiles are all missing OR prob is larger than observed
+#'           if (isTRUE(all(is.na(m[-1, 2])) || .x > max(m[, 2]))) {
+#'             return(NA)
+#'           }
+#'           if (isTRUE(.x %in% m[, 2])) {
+#'             return(m[m[, 2] %in% .x, 1])
+#'           }
+#'           return(m[which.min(m[, 2] >= .x), 1])
+#'         }
+#'       )
+#'     ) %>%
+#'     stats::setNames(paste0("prob ", probs * 100, "%"))
+#'   
+#'   # returning results ----------------------------------------------------------
+#'   lst_time_risk
+#' }
+#' 
+#' 
+#' 
+#' probs_at_times <- function(matrix_pred, times) {
+#'   # defining times for predictions ---------------------------------------------
+#'   # CHECK- here should this be warning?
+#'   all_times <- union(0, matrix_pred[, 1]) %>% sort()
+#'   if (isTRUE(max(times) > max(all_times))) {
+#'     stringr::str_glue("`times=` cannot be larger than {max(all_times)}") %>%
+#'       stop(call. = FALSE)
+#'   }
+#'   times_obs <-
+#'     purrr::map_dbl(
+#'       times,
+#'       function(.x) {
+#'         if (isTRUE(.x %in% all_times)) {
+#'           return(all_times[all_times %in% .x])
+#'         }
+#'         all_times[which.min(all_times <= .x) - 1L]
+#'       }
+#'     )
+#'   
+#'   # named list of the risks, the names are the times,
+#'   # the values are the estimates of risk at the covar levels
+#'   lst_risk_time <-
+#'     purrr::map(seq_len(length(all_times) - 1L), ~ matrix_pred[.x, -1]) %>%
+#'     stats::setNames(all_times[-1]) %>%
+#'     dplyr::bind_cols() %>%
+#'     mutate(`0` = 0, .before = 1) %>%
+#'     as.list() %>%
+#'     stats::setNames(all_times)
+#'   
+#'   # extracting risks at specified time -----------------------------------------
+#'   lst_risk_time[as.character(times_obs)] %>%
+#'     stats::setNames(paste("time", times))
+#' }
diff --git a/R/simulasso.R b/R/simulasso.R
index edc60c4..3d745f6 100644
--- a/R/simulasso.R
+++ b/R/simulasso.R
@@ -1,38 +1,70 @@
-simulasso <- function(n=100,
-                      p=200,
-                      id_cure=NULL,
-                      id_cox=NULL,
-                      coefs_cure=NULL,
-                      coefs_cox=NULL
-                      ){
-  
+#' Simulate Data 
+#'
+#' @param n Number of patients to simulate
+#' @param p Number of parameters to simulate
+#' @param id_cure Which parameters to include in cure simulation
+#' @param id_cox Which parameters to include in cox simulation
+#' @param coefs_cure Pre-specified coefficients of cure
+#' @param coefs_cox  Pre-specified Coefficients of cox
+#' @param n_true_p Number of "true" non-zero coefficients
+#'
+#' @return
+#' - `t`- simulated survival times
+#' - `d`- simulated event indicator
+#' - `x` simulated data n x p matrix or data.frame
+#' - `id_cure` - IDs of selected cure parameters
+#' - `id_cure` - IDs of selected cox parameters
+#' - `coefs_cure` - Coefficients of cure parameters
+#' - `coefs_cox` - Coefficients of cox parameters
+#' 
+#' @export
+#'
+#' @examples
+#' simulasso(n = 10, p = 3)
+simulasso <- function(n = 100,
+                      p = 200,
+                      id_cure = NULL,
+                      id_cox = NULL,
+                      coefs_cure = NULL,
+                      coefs_cox = NULL, 
+                      n_true_p = 10) {
   require(mvtnorm)
+
+  t <- rep(NA, length = n)
+  d <- rep(NA, length = n)
+
+  # number of predictors is size 
+  if (is.null(id_cure)) id_cure <- sample(p, size = n_true_p)
+  if (is.null(id_cox)) id_cox <- sample(p, size = n_true_p)
+
+  if (is.null(coefs_cure)) coefs_cure <- runif(length(id_cure) + 1, min = 2, max = 3)
+  if (is.null(coefs_cox)) coefs_cox <- runif(length(id_cox), min = 2, max = 3)
+
+  x <- mvtnorm::rmvnorm(n, mean = rep(0, p))
+
+  # add intercept and calculate linear predictor and probability of cure using the logistic function:
   
-  t <- rep(NA,length=n)
-  d <- rep(NA,length=n)
-  
-  if(is.null(id_cure)) id_cure = sample(p,size=3)
-  if(is.null(id_cox)) id_cox = sample(p,size=3)
-  
-  if(is.null(coefs_cure)) coefs_cure = runif(length(id_cure)+1,min=2,max=3)
-  if(is.null(coefs_cox)) coefs_cox = runif(length(id_cox),min=2,max=3)
-  
-  x <- mvtnorm::rmvnorm(n,mean=rep(0,p))
-  puncure <- exp(cbind(1,x[,id_cure]) %*% coefs_cure)/(1+exp(cbind(1,x[,id_cure]) %*% coefs_cure))
-  uncure <- rbinom(n,1,prob=puncure)
-  
-  lambda <- exp(x[,id_cox] %*% coefs_cox)
+  puncure <- exp(cbind(1, x[, id_cure]) %*% coefs_cure) / (1 + exp(cbind(1, x[, id_cure]) %*% coefs_cure))
   
+  # make into event index
+  uncure <- rbinom(n, 1, prob = puncure)
+
+  # linear predictor survival
+  lp_cox <- exp(x[, id_cox] %*% coefs_cox)
+
   ### Should censoring time for cured subjects be generated from a same distribution as those uncured?
-  
-  for(i in 1:n){
-    U <- runif(1,min=0,max=1)
-    t0 <- log(1-(log(1-U))/(0.1*lambda[i]))
-    c0 <- min(rexp(1,rate=0.1),runif(1,min=5,max=6))
-    t[i] <- ifelse(uncure[i]==1, min(t0,c0), c0)
-    d[i] <- ifelse(uncure[i]==1, as.numeric(I(t0 <= c0)), 0)
+
+  for (i in 1:n) {
+    U <- runif(1, min = 0, max = 1)
+    t0 <- log(1 - (log(1 - U)) / (0.1 * lp_cox[i]))
+    c0 <- min(rexp(1, rate = 0.1), runif(1, min = 5, max = 6))
+    t[i] <- ifelse(uncure[i] == 1, min(t0, c0), c0)
+    d[i] <- ifelse(uncure[i] == 1, as.numeric(I(t0 <= c0)), 0)
   }
-  
-  dat <- list(t=t,d=d,x=x,uncure=uncure,id_cure=id_cure,id_cox=id_cox,coefs_cure=coefs_cure,coefs_cox=coefs_cox)
-  
-}
\ No newline at end of file
+
+  dat <- list(t = t, d = d, x = x,
+              uncure = uncure,
+              id_cure = id_cure,
+              id_cox = id_cox, coefs_cure = coefs_cure, 
+              coefs_cox = coefs_cox)
+}
diff --git a/inst/evaluate-simulations.R b/inst/evaluate-simulations.R
new file mode 100644
index 0000000..83aa32e
--- /dev/null
+++ b/inst/evaluate-simulations.R
@@ -0,0 +1,192 @@
+source(here::here("R/cv.cureitlasso.R"))
+source(here::here("R/cureitlasso.R"))
+source(here::here("R/simulasso.R"))
+source(here::here("R/coxsplit.R"))
+
+# Calculate Performance Metrics on Simulations ----------------------------------
+
+# * Load all relevant data -------
+load(here::here("inst", "cv_fits_50.RData"))
+load(here::here("inst", "cv_fits_100.RData"))
+load(here::here("inst", "sim_data_list.RData"))
+load(here::here("inst", "sim_valid_data_list.RData"))
+
+# * Calculate Sensitivity of Variable Selection in Training Fits -------
+x <- for (i in 1:50) {
+  fit <- cv_fits_list[[i]]
+  #fit <- x$fit
+  dat <- sim_data_list[[i]]
+  
+  sen_cox_min <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  print(sen_cox_min)
+  
+}
+
+
+hist(x)
+
+# Sensitivity and specificity (for both min and 1se choices of parameters) ---
+
+sen_cox_min <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+
+
+spe_cure_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$alpha!=0))
+spe_cox_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+
+sen_cure_1se <- mean(dat$id_cure %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$alpha!=0))
+sen_cox_1se <- mean(dat$id_cox %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$beta!=0))
+spe_cure_1se <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$alpha!=0))
+spe_cox_1se <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$beta!=0))
+
+
+
+
+# * Get it to work with yardstick -----
+library(yardstick)
+
+vec <- c(TRUE, TRUE, FALSE)
+df <- data.frame(
+  pred = factor(as.integer(dat$id_cure %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$alpha!=0)))
+  # Presence = as.integer(vec)  # Convert logical values to integers (TRUE -> 1, FALSE -> 0)
+) %>%
+  mutate(obs = factor(rep(1, length = nrow(.)), levels = c("0", "1")))
+
+sens(df, obs, pred, event_level = "second")
+sen_cure_min <- mean(dat$id_cure %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$alpha!=0))
+
+
+
+# * Brier Score -------------------------------------------------------------
+#' - `ti`- simulated survival times
+#' - `d`- simulated event indicator
+#' - `x` simulated data n x p matrix or data.frame
+#' - `id_cure` - IDs of selected cure parameters
+#' - `id_cure` - IDs of selected cox parameters
+#' - `coefs_cure` - Coefficients of cure parameters
+#' - `coefs_cox` - Coefficients of cox parameters
+#' - `tj` - is unique event times sorted
+
+
+for (i in 1:length(sim_data_list)) {
+  
+  dat <- sim_data_list[[i]]
+  dat_valid = sim_valid_data_list[[i]]
+  fit <- cv_fits_list[[i]]
+  
+  # for training set
+  ti <- dat$t
+  di <- dat$d
+  xi <- dat$x
+  # unique event times sorted
+  tj <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$tj
+  
+  # for test set
+  ti_valid <- dat_valid$t
+  di_valid <- dat_valid$d
+  xi_valid <- dat_valid$x
+  tj_valid <- sort(ti_valid[di_valid==1])
+  
+  
+  # get final fit (hyperparam with min error) for cure model (fits used train data)
+  final_select_model = fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]
+  fitcure <- final_select_model$fitcure
+  
+  # use this model to make predictions on data (fitted relative-risk for "cox)
+  predcure <- predict(fitcure, newx=xi, s=min(fitcure$lambda), type="response")
+  predcure_valid <- predict(fitcure, newx = xi_valid, s=min(fitcure$lambda), type="response")
+  
+  # get final fit of min coxcure model
+  fitcox <- final_select_model$fitcox
+  
+  # Get Data from Selected "Final" Prediction Model -----
+  # use this model to make predictions on data
+  
+  # get linear predictors from cox
+  predsurvexp <- predict(fitcox,
+                         newx=xi,
+                         s = min(fitcox$lambda),
+                         type="response")
+  
+  predsurvexp_valid <- predict(fitcox,
+                               newx=xi_valid,
+                               s = min(fitcox$lambda),
+                               type="response")
+  
+  haz <- final_select_model$haz
+  cumhaz <- final_select_model$cumhaz
+  
+  # Get Kaplan Meier estimate of actual data
+  predcens <- survfit(Surv(ti,1-di)~1)$surv
+  tcens <- survfit(Surv(ti,1-di)~1)$time
+  # 
+  # predcens <- survfit(Surv(ti,1-di)~1)$surv
+  # tcens <- survfit(Surv(ti,1-di)~1)$time
+  
+  # times at which to get score
+  tbrier <- sort(ti)
+  brier <- rep(NA,length(tbrier))
+  
+  tbrier_valid <- sort(ti_valid)
+  brier_valid <- rep(NA,length(tbrier_valid))
+  
+  # Train Data----
+  for (l in 1:length(tbrier)){
+    
+    predsurv <- rep(NA,length(ti)) # Conditional survival prob for all patients at tbrier[l]
+    
+    if (tbrier[l] >= min(tj)){
+      
+      ids <- which(tj == max(tj[tj <= tbrier[l]]))
+      
+      # get predicted survival probability
+      predsurv <- exp(-cumhaz[ids]*predsurvexp)
+      
+      # weights
+      ipw <- as.numeric(ti > tbrier[l])*predcens[l] + as.numeric(ti <= tbrier[l])*predcens
+      
+    } else if (tbrier[l] < min(tj)){
+      
+      predsurv <- rep(1,length(ti))
+      ipw <- 1
+      
+    }
+    
+    preds <- 1 - predcure + predcure*predsurv
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+    
+  }
+  
+  # Test Data----
+  for (l in 1:length(tbrier)){
+    
+    predsurv_valid <- rep(NA,length(ti_valid)) # Conditional survival prob for all patients at tbrier[l]
+    
+    if (tbrier_valid[l] >= min(tj_valid)){
+      
+      ids_valid <- which(tj_valid == max(tj_valid[tj_valid <= tbrier_valid[l]]))
+      
+      # get predicted survival probability
+      predsurv_valid <- exp(-cumhaz[ids_valid]*predsurvexp_valid)
+      
+      # weights
+      ipw_valid <- as.numeric(ti_valid > tbrier_valid[l])*predcens[l] + as.numeric(ti_valid <= tbrier_valid[l])*predcens
+      
+    }else if (tbrier_valid[l] < min(tj_valid)){
+      
+      predsurv_valid <- rep(1,length(ti_valid))
+      ipw_valid <- 1
+      
+    }
+    
+    preds_valid <- 1 - predcure_valid + predcure_valid*predsurv_valid
+    brier_valid[l] <- mean(I(di_valid==0)*(1 - preds_valie)^2/(ipw_valid + 0.001) + I(di_valid==1)*(0 - preds_valid^2/(ipw_valid+0.001))
+                           
+                           
+                           
+                           all_brier = cbind.data.frame("tbrier" = tbrier, "brier" = brier, "brier_valid" = brier_valid)
+                           return(all_brier)
+                           # plot(tbrier,brier,type="S")
+                           
+                           
+  }
+}
diff --git a/inst/run-simulations.R b/inst/run-simulations.R
new file mode 100644
index 0000000..35275d5
--- /dev/null
+++ b/inst/run-simulations.R
@@ -0,0 +1,168 @@
+source(here::here("R/cv.cureitlasso.R"))
+source(here::here("R/cureitlasso.R"))
+source(here::here("R/simulasso.R"))
+source(here::here("R/coxsplit.R"))
+
+# Potentially change sample sizes and number of parameters
+# We could try simulating scenarios with overlap between cure and cox
+
+# Simulate Cure Data --------------------------------------------------------
+
+# Test- Try simulating one train and test set
+
+dat <- simulasso() # training data
+
+
+
+dat_valid <- simulasso(id_cure = dat$id_cure,
+          id_cox = dat$id_cox,
+          coefs_cure = dat$coefs_cure,
+          coefs_cox = dat$coefs_cox)
+
+
+setdiff(dat$id_cox, dat_valid$id_cox)
+
+
+# * Training- Simulate n Data Sets --------------------------------------------------------
+
+sim_data_list <- list()
+
+for (i in 1:100) {
+  sim_data_list[[i]] = simulasso()
+}
+
+sim_data_list[[1]]$id_cure
+
+# * Validation - Simulate n Validation Data ---------------------------------------------------
+
+sim_valid_data_list <- list()
+
+for (i in 1:100) {
+  dat <- sim_data_list[[i]]
+  sim_valid_data_list[[i]] = simulasso(id_cure = dat$id_cure,
+                                       id_cox = dat$id_cox,
+                                       coefs_cure = dat$coefs_cure,
+                                       coefs_cox = dat$coefs_cox)
+}
+
+sim_valid_data_list[[1]]$id_cure
+
+# save(sim_data_list, file = here::here("inst", "sim_data_list.RData"))
+# save(sim_valid_data_list, file = here::here("inst", "sim_valid_data_list.RData"))
+
+# Fit Lasso  ------------------------------------------------------------
+
+# test- A Single Lasso Fit 
+
+dat <- sim_data_list[[1]]
+test_single_run <- cureitlasso(
+            t = dat$t,
+            d = dat$d,
+            x = dat$x,
+            minmu.ratio=0.05, # minimum penalty value in the logistic model
+            minlambda.ratio=0.05, # minimum penalty value in the cox model
+            adaptive=FALSE,
+            length.grid=8,
+            tol=1e-2,
+            maxiter=100,
+            progress=FALSE)
+
+# Run Cross Validation ----------------------------------------------------
+
+# * Test a Single CV Fit ------
+# To get a sense of what is in the output
+
+dat_valid <- sim_data_list[[1]]
+
+test_single_cv <- cv.cureitlasso(t = dat_valid$t,
+                                 d = dat_valid$d,
+                                 x = dat_valid$x,
+                                 minmu.ratio=0.05, # minimum penalty value in the logistic model
+                                 minlambda.ratio=0.05, # minimum penalty value in the cox model
+                                 adaptive=FALSE,
+                                 length.grid=10,
+
+                                 tol=1e-2,
+                                 maxiter=100,
+
+                                 nfolds=5,
+                                 ncore=5,
+                                 seed=NULL,
+                                 progress=FALSE)
+
+test_list <- list()
+test_list[[1]] <- test_single_cv
+
+# * 100 Runs ----
+
+cv_fits_list <- list()
+start_time <- Sys.time()
+
+for (i in 1:100) {
+
+  dat_valid <- sim_data_list[[i]]
+  
+  cv_fits_list[[i]] <- cv.cureitlasso(t = dat_valid$t,
+                                    d = dat_valid$d,
+                                    x = dat_valid$x,
+                                    minmu.ratio=0.05, # minimum penalty value in the logistic model
+                                    minlambda.ratio=0.05, # minimum penalty value in the cox model
+                                    adaptive=FALSE,
+                                    length.grid=10,
+
+                                    tol=1e-2,
+                                    maxiter=100,
+
+                                    nfolds=5,
+                                    ncore=5,
+                                    seed=NULL,
+                                    progress=FALSE
+  )
+
+
+}
+
+#  8.670569 mins for 3 data sets 
+end_time <- Sys.time()
+end_time - start_time
+
+
+save(cv_fits_list, file = here::here("inst", "cv_fits_100.RData"))
+
+# * 50 Runs ----
+
+cv_fits_list2 <- list()
+
+start_time <- Sys.time()
+
+for (i in 1:50) {
+  
+  dat_valid <- sim_data_list[[i]]
+  
+  cv_fits_list2[[i]] <- cv.cureitlasso(t = dat_valid$t,
+                                      d = dat_valid$d,
+                                      x = dat_valid$x,
+                                      minmu.ratio=0.05, # minimum penalty value in the logistic model
+                                      minlambda.ratio=0.05, # minimum penalty value in the cox model
+                                      adaptive=FALSE,
+                                      length.grid=10,
+                                      
+                                      tol=1e-2,
+                                      maxiter=100,
+                                      
+                                      nfolds=5,
+                                      ncore=5,
+                                      seed=NULL,
+                                      progress=FALSE
+  )
+  
+  
+}
+
+# 2.369856 hours for 50 fits
+end_time <- Sys.time()
+end_time - start_time
+
+
+save(cv_fits_list2, file = here::here("inst", "cv_fits_50.RData"))
+

From ba5773eb43736cd3d26743ee8f253a16ab92d40b Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Wed, 28 Feb 2024 17:46:16 -0500
Subject: [PATCH 02/11] brier scores

---
 R/brier2.R                    | 158 ---------------------
 R/cureit_lasso_predict_eval.R | 260 ++++++++++++++++++++++++++++++++++
 2 files changed, 260 insertions(+), 158 deletions(-)
 delete mode 100644 R/brier2.R
 create mode 100644 R/cureit_lasso_predict_eval.R

diff --git a/R/brier2.R b/R/brier2.R
deleted file mode 100644
index 5063496..0000000
--- a/R/brier2.R
+++ /dev/null
@@ -1,158 +0,0 @@
-# we should use this: https://www.tidymodels.org/learn/statistics/survival-metrics/ 
-
-library(survival)
-library(tidyverse)
-
-# Prep Data ---------------------------------------------------------------
-
-xi = dat$x
-final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]
-
-fitcure <- final_fit$fitcure
-predcure <- predict(fitcure, newx = xi,
-                    s = min(fitcure$lambda), type = "response")
-
-fitcox <- fi nal_fit$fitcox
-predsurvexp <- predict(fitcox, newx = xi, s = min(fitcox$lambda), type = "response")
-
-
-haz <- final_fit$haz
-cumhaz <- final_fit$cumhaz
-
-predsurv <- exp(-cumhaz*predsurvexp)
-
-# Create Data frame to pass ----------------------------------------------
-
-data = data.frame(dat$x) %>%
-  mutate(times = dat$t, 
-         event = dat$d,
-         "truth_survival" = Surv(dat$t, dat$d))
-
-# Predict Function (glmnet cure) ---------------------------------------------
-
-# Trying to get predictions in similar format as tidymodels yardstick
-# Eventually will integratethis with existing predict method function
-
-predict_cure <- function(final_fit,
-                         data = NULL,
-                         eval_timepoints = NULL) {
-
-  # Get values from observed data
-  survival_times = data$times
-  sorted_survival_times <- sort(survival_times)
-  sorted_event_times = sort(data$times[data$event==1])
-  censoring_weights = survfit(Surv(data$times, 1 - data$event) ~ 1)$surv
-  
-  data_x <- data %>%
-    select(-c(times, event, truth_survival)) %>%
-    as.matrix()
-  
-  # Extract cumulative hazard from final fit (for event times)
-  haz <- final_fit$haz
-  cumhaz <- final_fit$cumhaz
-  
-  # default eval timepoints are observed event times if not supplied by user
-  if(is.null(eval_timepoints)) {
-    eval_timepoints = sorted_event_times
-  }
-
-
-  # Get Relative Cure and Cox Risk ------------------------------------------
-
-  # Cure probability risk prediction (independent of timepoint)
-  fitcure <- final_fit$fitcure
-  predcure <- predict(fitcure, newx = data_x,
-                      s = min(fitcure$lambda), type = "response")
-  
-  # Cox survival probability risk prediction (independent of timepoint)
-  fitcox <- final_fit$fitcox
-  predsurvexp <- predict(fitcox, 
-                         newx = data_x,
-                         s = min(fitcox$lambda),
-                         type = "response")
-
-
-  
-  # Get Predicted probabilities at select timepoints -------------------
-  
-  # vector for conditional survival prob for all patients at sorted_survival_times[l]
-  all_preds_all_tps <- list()
-  
-  # for each eval timepoint, get predicted survival values
-  for (l in 1:length(eval_timepoints)) {
-    all_preds <- data.frame("eval_timepoint" = NA, 
-                            "preds" = NA, 
-                            "pred_cure" = NA, 
-                            "preds_surv" = NA, 
-                            "weights" = NA)
-    
-    # vector for conditional survival prob for all patients at sorted_survival_times[l]
-    predsurv <- rep(NA,length(survival_times)) 
-    
-
-    if (eval_timepoints[l] >= min(sorted_event_times)){
-      
-      ids <- which(
-        sorted_event_times == 
-          max(sorted_event_times[sorted_event_times <= eval_timepoints[l]])
-        )
-      
-      # Conditional survival prob at given patient
-      predsurv <- exp(-cumhaz[ids]*predsurvexp)
-        
-      # inverse probability of the censoring weights
-      ipw <- as.numeric(survival_times > eval_timepoints[l])*censoring_weights[l] + 
-        as.numeric(survival_times <= eval_timepoints[l])*censoring_weights
-      
-      # if no events occurred before eval time 
-    } else if (eval_timepoints[l] < min(sorted_event_times)) {
-      
-      predsurv <- rep(1,length(survival_times))
-      ipw <- 1
-      
-    }
-    
-    preds <- 1 - predcure + predcure*predsurv
-    
-    all_preds <- tibble(
-      
-      "preds" = preds, 
-      "predcure" = predcure, 
-      "predsurv" = predsurv, 
-      "weights" = ipw) %>%
-      mutate(id_num = 1:nrow(.))
-    
-    names(all_preds) <- c(
-                          "preds",
-                          "predcure", "predsurv", "weights",
-                          "id_num")
-    
-    x <-  all_preds %>% nest(.preds = -c(id_num))
-    
-    all_preds_all_tps[[l]] <- all_preds
-                            
-  #  names(all_preds) = c("pred", "pred_cure","pred_surv")
-  }
-  # all_pred_df <- tibble(
-  #   times = data$times,
-  #   events = data$event,
-  #   .pred = all_preds_all_tps
-  # )
-  return(all_pred_df)
-}
-
-predict_df <- predict_cure(final_fit = final_fit, data = data)
-
-
-# Brier Calculation -------------------------------------------------------
-
-brier_cure <- function(predict_df, times, event) {
-  
-  eval_timepoints = unique(predict_df$eval_timepoint)
-  
-  for (l in 1:length(eval_timepoints)) {
-    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
-
-  
-  }
-  
diff --git a/R/cureit_lasso_predict_eval.R b/R/cureit_lasso_predict_eval.R
new file mode 100644
index 0000000..e2ff1f0
--- /dev/null
+++ b/R/cureit_lasso_predict_eval.R
@@ -0,0 +1,260 @@
+# we should use this: https://www.tidymodels.org/learn/statistics/survival-metrics/ 
+
+library(survival)
+library(tidyverse)
+library(glmnet)
+
+# Prep Data ---------------------------------------------------------------
+
+load(here::here("inst", "cureit-simulation-results", "sim_data_list.RData"))
+load(here::here("inst", "cureit-simulation-results", "sim_valid_data_list.RData"))
+dat <- sim_data_list[[1]]
+
+load(here::here("inst","cureit-simulation-results", "cv_fits_100.RData"))
+fit <- cv_fits_list[[1]]
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+# Create Data frame to pass 
+
+data = data.frame(dat$x) %>%
+  mutate(times = dat$t, 
+         event = dat$d,
+         "truth_survival" = Surv(dat$t, dat$d))
+
+# Predict Function (glmnet cure) ---------------------------------------------
+
+# Trying to get predictions in similar format as tidymodels yardstick
+# Eventually will integrate this with existing predict method function
+
+predict_cure <- function(final_fit,
+                         data = NULL,
+                         eval_timepoints = NULL) {
+
+  # Get values from observed data
+  survival_times = data$times
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(data$times[data$event==1])
+  censoring_weights = survfit(Surv(data$times, 1 - data$event) ~ 1)$surv
+  
+  data_x <- data %>%
+    select(-c(times, event, truth_survival)) %>%
+    as.matrix()
+  
+  
+  # Extract cumulative hazard from final fit (for event times)
+  haz <- final_fit$haz
+  cumhaz <- final_fit$cumhaz
+  
+  # default eval timepoints are observed event times if not supplied by user
+  if(is.null(eval_timepoints)) {
+    eval_timepoints = sorted_survival_times
+  }
+
+
+  # Get Relative Cure and Cox Risk (independent of timepoint) -------------------
+
+  # Cure probability risk prediction
+  fitcure <- final_fit$fitcure
+  predcure <- predict(fitcure, newx = data_x,
+                      s = min(fitcure$lambda), type = "response")
+  
+  # Cox survival probability risk prediction
+  fitcox <- final_fit$fitcox
+  predsurvexp <- predict(fitcox, 
+                         newx = data_x,
+                         s = min(fitcox$lambda),
+                         type = "response")
+
+
+  
+  # Get Predicted probabilities at Selected Timepoints -------------------
+  
+  all_preds_all_tps <- list()
+  
+  # for each eval timepoint, get predicted survival values
+  for (l in 1:length(eval_timepoints)) {
+    all_preds <- data.frame("eval_timepoint" = NA, 
+                            "preds" = NA, 
+                            "pred_cure" = NA, 
+                            "preds_surv" = NA, 
+                            "weights" = NA)
+    
+    # vector for conditional survival prob for all patients at sorted_survival_times[l]
+    #predsurv <- rep(NA,length(survival_times)) 
+    
+
+    if (eval_timepoints[l] >= min(sorted_event_times)){
+      
+      # maximum event time that is less than timepoint we are evaluating 
+      ids <- which(
+        sorted_event_times == 
+          max(sorted_event_times[sorted_event_times <= eval_timepoints[l]])
+        )
+      
+      # Conditional survival prob for all patients at given time
+      predsurv <- exp(-cumhaz[ids]*predsurvexp)
+        
+      # inverse probability of the censoring weights
+      ipw <- as.numeric(survival_times > eval_timepoints[l])*censoring_weights[l] + 
+        as.numeric(survival_times <= eval_timepoints[l])*censoring_weights
+      
+      # if no events occurred before eval time 
+    } else if (eval_timepoints[l] < min(sorted_event_times)) {
+      
+      predsurv <- rep(1,length(survival_times))
+      ipw <- 1
+      
+    }
+    
+    preds <- 1 - predcure + predcure*predsurv
+    
+    all_preds <- tibble(
+      
+      ".eval_time" = eval_timepoints[l],
+      ".pred_survival" = preds, 
+      ".predcure" = predcure, 
+      ".predsurv" = predsurv, 
+      ".weight_censored_raw" = censoring_weights[l],
+      ".weight_censored" = ipw) %>%
+      mutate(id = 1:nrow(.))
+    # 
+    # names(all_preds) <- c(
+    #                       "preds",
+    #                       "predcure", "predsurv", "weights",
+    #                       "id_num")
+    
+    all_preds_nest <-  all_preds %>% nest(.preds = -c(id))
+    
+    all_preds_all_tps[[l]] <- all_preds
+                            
+  #  names(all_preds) = c("pred", "pred_cure","pred_surv")
+  }
+  
+  
+  all_pred_df <- all_preds_all_tps %>% 
+    do.call("rbind", .)
+  
+  all_pred_df <- all_pred_df %>%
+    nest(.pred = -c(id)) %>% 
+    bind_cols(., "times" = data$times, 
+              "event" = data$event)
+
+  
+  return(all_pred_df)
+}
+
+predict_df <- predict_cure(final_fit = final_fit, data = data)
+
+# quick brier
+x <- c()
+for (i in 1:10) {
+  
+  time_1 <- predict_df_long$.eval_time[[i]]
+  
+  predict_df_long_1 <- predict_df_long %>%
+    filter(.eval_time == time_1)
+  
+  x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
+         I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
+}
+
+# Brier With Yardstick -------------------------------------------------------
+library(yardstick)
+
+brier_scores <-
+  predict_df %>% 
+  mutate(truth = Surv(times, event)) %>%
+  brier_survival(truth = truth, .pred)
+
+
+# Brier Function -------------------------------------------------------
+
+brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
+
+  
+  # Get values from observed data
+  survival_times = predict_df$times
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(data$times[data$event==1])
+  
+  
+  predict_df_long <- predict_df %>%
+    unnest(everything())
+  
+  predict_df_nest_time <- predict_df_long %>%
+    nest(data = -.eval_time)
+  
+
+  map(predict_df_nest_time$data, function(x) {
+    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
+    
+  })
+  for (l in 1:length(eval_timepoints)) {
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+  
+  }
+}
+  
+
+
+# Former Brier Calculation ---------------------------------------------------
+
+calc_brier_old <- function(dat, fit) {
+  # ti <- dat_valid$t
+  # di <- dat_valid$d
+  # xi <- dat_valid$x
+  ti <- dat$t
+  di <- dat$d
+  xi <- dat$x
+  tj <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$tj
+  final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]
+  
+  fitcure <- final_fit$fitcure
+  predcure <- predict(fitcure,newx=xi,s=min(fitcure$lambda),type="response")
+  fitcox <- final_fit$fitcox
+  predsurvexp <- predict(fitcox,newx=xi,s=min(fitcox$lambda),type="response")
+  haz <- final_fit$haz
+  cumhaz <- final_fit$cumhaz
+  
+  predcens <- survfit(Surv(ti,1-di)~1)$surv
+  tcens <- survfit(Surv(ti,1-di)~1)$time
+  
+  tbrier <- sort(ti)
+  brier <- rep(NA,length(tbrier))
+  ipw <- rep(NA,length(tbrier))
+  
+  for (l in 1:length(tbrier)){
+    
+    predsurv <- rep(NA,length(ti)) # Conditional survival prob for all patients at tbrier[l]
+    
+    if (tbrier[l] >= min(tj)){
+      
+      ids <- which(tj == max(tj[tj <= tbrier[l]]))
+      predsurv <- exp(-cumhaz[ids]*predsurvexp)
+      ipw <- as.numeric(ti > tbrier[l])*predcens[l] + as.numeric(ti <= tbrier[l])*predcens
+      
+    }else if (tbrier[l] < min(tj)){
+      
+      predsurv <- rep(1,length(ti))
+      ipw <- 1
+      
+    }
+    
+    preds <- 1 - predcure + predcure*predsurv
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+    #ipw[l] <- ipw
+   # print(ipw)
+  }
+  
+  preds <- cbind.data.frame(tbrier, brier)
+  return(preds)
+ # plot(tbrier,brier,type="S")
+  
+}
+
+x <- calc_brier_old(sim_data_list[[1]], cv_fits_list[[1]])
+x_val <- calc_brier_old(sim_valid_data_list[[1]], cv_fits_list[[1]])
+
+for (i in 1:length(cv_fits_list)) {
+  
+}

From a1218db4ec1823af94fc954891cc9cbb3a19ce83 Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Wed, 28 Feb 2024 18:01:28 -0500
Subject: [PATCH 03/11] updates

---
 R/cureit_lasso_predict_eval.R | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/R/cureit_lasso_predict_eval.R b/R/cureit_lasso_predict_eval.R
index e2ff1f0..4a7b2a0 100644
--- a/R/cureit_lasso_predict_eval.R
+++ b/R/cureit_lasso_predict_eval.R
@@ -8,9 +8,9 @@ library(glmnet)
 
 load(here::here("inst", "cureit-simulation-results", "sim_data_list.RData"))
 load(here::here("inst", "cureit-simulation-results", "sim_valid_data_list.RData"))
-dat <- sim_data_list[[1]]
-
 load(here::here("inst","cureit-simulation-results", "cv_fits_100.RData"))
+
+dat <- sim_data_list[[1]]
 fit <- cv_fits_list[[1]]
 final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
 
@@ -117,17 +117,11 @@ predict_cure <- function(final_fit,
       ".weight_censored_raw" = censoring_weights[l],
       ".weight_censored" = ipw) %>%
       mutate(id = 1:nrow(.))
-    # 
-    # names(all_preds) <- c(
-    #                       "preds",
-    #                       "predcure", "predsurv", "weights",
-    #                       "id_num")
-    
+
     all_preds_nest <-  all_preds %>% nest(.preds = -c(id))
     
     all_preds_all_tps[[l]] <- all_preds
                             
-  #  names(all_preds) = c("pred", "pred_cure","pred_surv")
   }
   
   
@@ -145,7 +139,7 @@ predict_cure <- function(final_fit,
 
 predict_df <- predict_cure(final_fit = final_fit, data = data)
 
-# quick brier
+# quick brier ---
 x <- c()
 for (i in 1:10) {
   
@@ -161,6 +155,7 @@ for (i in 1:10) {
 # Brier With Yardstick -------------------------------------------------------
 library(yardstick)
 
+# Doesn't match- weights are wrong
 brier_scores <-
   predict_df %>% 
   mutate(truth = Surv(times, event)) %>%
@@ -197,7 +192,7 @@ brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
   
 
 
-# Former Brier Calculation ---------------------------------------------------
+# Simple Former Brier Calculation ---------------------------------------------------
 
 calc_brier_old <- function(dat, fit) {
   # ti <- dat_valid$t
@@ -255,6 +250,10 @@ calc_brier_old <- function(dat, fit) {
 x <- calc_brier_old(sim_data_list[[1]], cv_fits_list[[1]])
 x_val <- calc_brier_old(sim_valid_data_list[[1]], cv_fits_list[[1]])
 
-for (i in 1:length(cv_fits_list)) {
-  
+all_brier <- list()
+
+for (i in 1:10) {
+  all_brier[[i]] <- calc_brier_old(sim_data_list[[i]], fit = cv_fits_list[[i]])
 }
+
+

From dcc6e13bc6e7e2561b18d675f7b4784d31eb1bdf Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Thu, 29 Feb 2024 16:41:06 -0500
Subject: [PATCH 04/11] small code changes

---
 R/cureit_lasso_predict_eval.R | 71 ++++++++++++++++++++++++++++-------
 R/cv.cureitlasso.R            |  3 +-
 inst/evaluate-simulations.R   | 13 +++++--
 inst/run-simulations.R        | 17 ++++++---
 inst/testcode.R               | 48 +++++++++++++++++++++++
 5 files changed, 129 insertions(+), 23 deletions(-)
 create mode 100644 inst/testcode.R

diff --git a/R/cureit_lasso_predict_eval.R b/R/cureit_lasso_predict_eval.R
index 4a7b2a0..764f703 100644
--- a/R/cureit_lasso_predict_eval.R
+++ b/R/cureit_lasso_predict_eval.R
@@ -21,22 +21,47 @@ data = data.frame(dat$x) %>%
          event = dat$d,
          "truth_survival" = Surv(dat$t, dat$d))
 
+# * Calculate Sensitivity of Variable Selection in Training Fits -------
+spe_cox_min <- c()
+sen_cox_min <- c()
+
+x <- for (i in 1:100) {
+  fit <- cv_fits_list[[i]]
+  #fit <- x$fit
+  dat <- sim_data_list[[i]]
+  
+  spe_cox_min[[i]] <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  sen_cox_min[[i]] <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  # spe_cox_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  
+  # print(sen_cox_min)
+  
+}
+
+
+hist(unlist(sen_cox_min))
+hist(unlist(spe_cox_min))
+
+# Check Variable Selection on Standard Lasso
+# will add foldid (see new version of cv.lassocure())
+cv.glmnet(x = dat$x, y = Surv(dat$t, dat$d), family = "cox")
+
 # Predict Function (glmnet cure) ---------------------------------------------
 
 # Trying to get predictions in similar format as tidymodels yardstick
 # Eventually will integrate this with existing predict method function
 
 predict_cure <- function(final_fit,
-                         data = NULL,
+                         new_data = NULL,
                          eval_timepoints = NULL) {
 
   # Get values from observed data
-  survival_times = data$times
+  survival_times = new_data$times
   sorted_survival_times <- sort(survival_times)
-  sorted_event_times = sort(data$times[data$event==1])
-  censoring_weights = survfit(Surv(data$times, 1 - data$event) ~ 1)$surv
+  sorted_event_times = sort(new_data$times[new_data$event==1])
+  censoring_weights = survfit(Surv(new_data$times, 1 - new_data$event) ~ 1)$surv
   
-  data_x <- data %>%
+  data_x <- new_data %>%
     select(-c(times, event, truth_survival)) %>%
     as.matrix()
   
@@ -93,7 +118,8 @@ predict_cure <- function(final_fit,
       
       # Conditional survival prob for all patients at given time
       predsurv <- exp(-cumhaz[ids]*predsurvexp)
-        
+      
+        # HERE- check this - tru predict of survfit 
       # inverse probability of the censoring weights
       ipw <- as.numeric(survival_times > eval_timepoints[l])*censoring_weights[l] + 
         as.numeric(survival_times <= eval_timepoints[l])*censoring_weights
@@ -130,14 +156,13 @@ predict_cure <- function(final_fit,
   
   all_pred_df <- all_pred_df %>%
     nest(.pred = -c(id)) %>% 
-    bind_cols(., "times" = data$times, 
-              "event" = data$event)
+    bind_cols(., "event" = new_data$event)
 
   
   return(all_pred_df)
 }
 
-predict_df <- predict_cure(final_fit = final_fit, data = data)
+predict_df <- predict_cure(final_fit = final_fit, new_data = data)
 
 # quick brier ---
 x <- c()
@@ -193,27 +218,36 @@ brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
 
 
 # Simple Former Brier Calculation ---------------------------------------------------
+library(glmnet)
+library(tidyverse)
+library(survival)
 
-calc_brier_old <- function(dat, fit) {
+calc_brier_old <- function(new_data, fit) {
   # ti <- dat_valid$t
   # di <- dat_valid$d
   # xi <- dat_valid$x
-  ti <- dat$t
-  di <- dat$d
-  xi <- dat$x
+  ti <- new_data$t
+  di <- new_data$d
+  xi <- new_data$x
+  
+  # tj from train data
   tj <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$tj
   final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]
   
   fitcure <- final_fit$fitcure
   predcure <- predict(fitcure,newx=xi,s=min(fitcure$lambda),type="response")
+  
   fitcox <- final_fit$fitcox
   predsurvexp <- predict(fitcox,newx=xi,s=min(fitcox$lambda),type="response")
+  
   haz <- final_fit$haz
   cumhaz <- final_fit$cumhaz
   
+  # censoring weights from test data
   predcens <- survfit(Surv(ti,1-di)~1)$surv
   tcens <- survfit(Surv(ti,1-di)~1)$time
   
+  # tbrier is evaluation timepoints (default to all timepoints in test data)
   tbrier <- sort(ti)
   brier <- rep(NA,length(tbrier))
   ipw <- rep(NA,length(tbrier))
@@ -250,10 +284,19 @@ calc_brier_old <- function(dat, fit) {
 x <- calc_brier_old(sim_data_list[[1]], cv_fits_list[[1]])
 x_val <- calc_brier_old(sim_valid_data_list[[1]], cv_fits_list[[1]])
 
+plot(x$tbrier,x$brier,type="S")
+plot(x_val$tbrier,x_val$brier,type="S")
+
+
 all_brier <- list()
 
 for (i in 1:10) {
-  all_brier[[i]] <- calc_brier_old(sim_data_list[[i]], fit = cv_fits_list[[i]])
+  all_brier[[i]] <- calc_brier_old( new_data = sim_data_list[[i]], fit = cv_fits_list[[i]])
 }
+all_brier2 <- all_brier %>%
+  do.call("rbind", .)
 
+x <- all_brier2 %>% group_by(round(tbrier, 1)) %>%
+  summarize(mean = mean(brier))
 
+plot(x$`round(tbrier, 1)`, x$mean,type="S")
diff --git a/R/cv.cureitlasso.R b/R/cv.cureitlasso.R
index 403c130..89cb1ac 100644
--- a/R/cv.cureitlasso.R
+++ b/R/cv.cureitlasso.R
@@ -242,7 +242,8 @@ cv.cureitlasso <- function(t,
               cv_brier_mean = cv_brier_mean,
               cv_brier_se = cv_brier_se,
               index = list(min=idxmin,
-                           `1se`= idx1se)
+                           `1se`= idx1se), 
+              foldid = foldid
   )
   )
   
diff --git a/inst/evaluate-simulations.R b/inst/evaluate-simulations.R
index 83aa32e..15abbdb 100644
--- a/inst/evaluate-simulations.R
+++ b/inst/evaluate-simulations.R
@@ -12,18 +12,25 @@ load(here::here("inst", "sim_data_list.RData"))
 load(here::here("inst", "sim_valid_data_list.RData"))
 
 # * Calculate Sensitivity of Variable Selection in Training Fits -------
+spe_cox_min <- c()
+sen_cox_min <- c()
+
 x <- for (i in 1:50) {
   fit <- cv_fits_list[[i]]
   #fit <- x$fit
   dat <- sim_data_list[[i]]
   
-  sen_cox_min <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
-  print(sen_cox_min)
+  spe_cox_min[[i]] <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  sen_cox_min[[i]] <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+ # spe_cox_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  
+ # print(sen_cox_min)
   
 }
 
 
-hist(x)
+hist(unlist(sen_cox_min))
+hist(unlist(sen_cox_min))
 
 # Sensitivity and specificity (for both min and 1se choices of parameters) ---
 
diff --git a/inst/run-simulations.R b/inst/run-simulations.R
index 35275d5..11f7ace 100644
--- a/inst/run-simulations.R
+++ b/inst/run-simulations.R
@@ -10,6 +10,7 @@ source(here::here("R/coxsplit.R"))
 
 # Test- Try simulating one train and test set
 
+
 dat <- simulasso() # training data
 
 
@@ -24,8 +25,8 @@ setdiff(dat$id_cox, dat_valid$id_cox)
 
 
 # * Training- Simulate n Data Sets --------------------------------------------------------
-
 sim_data_list <- list()
+set.seed(999)
 
 for (i in 1:100) {
   sim_data_list[[i]] = simulasso()
@@ -45,10 +46,16 @@ for (i in 1:100) {
                                        coefs_cox = dat$coefs_cox)
 }
 
+sim_data_list[[1]]$id_cure
 sim_valid_data_list[[1]]$id_cure
 
-# save(sim_data_list, file = here::here("inst", "sim_data_list.RData"))
-# save(sim_valid_data_list, file = here::here("inst", "sim_valid_data_list.RData"))
+save(sim_data_list, file = here::here("inst", 
+                                      "cureit-simulation-results",
+                                      "sim_data_list_v2.RData"))
+
+save(sim_valid_data_list, file = here::here("inst", 
+                                            "cureit-simulation-results",
+                                            "sim_valid_data_list_v2.RData"))
 
 # Fit Lasso  ------------------------------------------------------------
 
@@ -98,7 +105,7 @@ test_list[[1]] <- test_single_cv
 cv_fits_list <- list()
 start_time <- Sys.time()
 
-for (i in 1:100) {
+for (i in 1:50) {
 
   dat_valid <- sim_data_list[[i]]
   
@@ -127,7 +134,7 @@ end_time <- Sys.time()
 end_time - start_time
 
 
-save(cv_fits_list, file = here::here("inst", "cv_fits_100.RData"))
+save(cv_fits_list, file = here::here("inst", "cureit-simulation-results", "cv_fits_50_v2.RData"))
 
 # * 50 Runs ----
 
diff --git a/inst/testcode.R b/inst/testcode.R
new file mode 100644
index 0000000..4ecfc25
--- /dev/null
+++ b/inst/testcode.R
@@ -0,0 +1,48 @@
+#### Evaluate the calibration by Brier score for the validation data
+#### Line 79 - 123: Brier score for the cureit model
+
+# ti <- dat_valid$t
+# di <- dat_valid$d
+# xi <- dat_valid$x
+ti <- dat$t
+di <- dat$d
+xi <- dat$x
+tj <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$tj
+
+fitcure <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$fitcure
+predcure <- predict(fitcure,newx=xi,s=min(fitcure$lambda),type="response")
+fitcox <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$fitcox
+predsurvexp <- predict(fitcox,newx=xi,s=min(fitcox$lambda),type="response")
+haz <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$haz
+cumhaz <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$cumhaz
+
+predcens <- survfit(Surv(ti,1-di)~1)$surv
+tcens <- survfit(Surv(ti,1-di)~1)$time
+
+tbrier <- sort(ti)
+brier <- rep(NA,length(tbrier))
+ipw <- rep(NA,length(tbrier))
+for (l in 1:length(tbrier)){
+
+  predsurv <- rep(NA,length(ti)) # Conditional survival prob for all patients at tbrier[l]
+
+  if (tbrier[l] >= min(tj)){
+
+    ids <- which(tj == max(tj[tj <= tbrier[l]]))
+    predsurv <- exp(-cumhaz[ids]*predsurvexp)
+    ipw <- as.numeric(ti > tbrier[l])*predcens[l] + as.numeric(ti <= tbrier[l])*predcens
+
+  }else if (tbrier[l] < min(tj)){
+
+    predsurv <- rep(1,length(ti))
+    ipw <- 1
+
+  }
+  
+  ipw[l] <- ipw
+  preds <- 1 - predcure + predcure*predsurv
+  brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+
+}
+
+plot(tbrier,brier,type="S")
\ No newline at end of file

From 286b16999255c1e083f3e69139dcde7d69099ddb Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Fri, 15 Mar 2024 14:27:31 -0400
Subject: [PATCH 05/11] small updates to get survfit for censoring weights

---
 R/cureit_lasso_predict_eval.R | 58 +++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/R/cureit_lasso_predict_eval.R b/R/cureit_lasso_predict_eval.R
index 764f703..d954009 100644
--- a/R/cureit_lasso_predict_eval.R
+++ b/R/cureit_lasso_predict_eval.R
@@ -59,7 +59,11 @@ predict_cure <- function(final_fit,
   survival_times = new_data$times
   sorted_survival_times <- sort(survival_times)
   sorted_event_times = sort(new_data$times[new_data$event==1])
-  censoring_weights = survfit(Surv(new_data$times, 1 - new_data$event) ~ 1)$surv
+  
+  # This may need to be eval timepoints....
+  censoring_survfit = survfit(Surv(new_data$times, 1 - new_data$event) ~ 1)
+  censoring_weights = summary(censoring_survfit, times = survival_times)
+  censoring_weights = censoring_weights$surv
   
   data_x <- new_data %>%
     select(-c(times, event, truth_survival)) %>%
@@ -111,19 +115,24 @@ predict_cure <- function(final_fit,
     if (eval_timepoints[l] >= min(sorted_event_times)){
       
       # maximum event time that is less than timepoint we are evaluating 
+      
       ids <- which(
         sorted_event_times == 
           max(sorted_event_times[sorted_event_times <= eval_timepoints[l]])
         )
       
-      # Conditional survival prob for all patients at given time
+      # Conditional survival prob for all patients at given time-
+      # get cumhaz for time closest to eval timepoint
       predsurv <- exp(-cumhaz[ids]*predsurvexp)
       
         # HERE- check this - tru predict of survfit 
       # inverse probability of the censoring weights
+      
       ipw <- as.numeric(survival_times > eval_timepoints[l])*censoring_weights[l] + 
         as.numeric(survival_times <= eval_timepoints[l])*censoring_weights
       
+      ipw = 1/ipw
+      
       # if no events occurred before eval time 
     } else if (eval_timepoints[l] < min(sorted_event_times)) {
       
@@ -156,13 +165,16 @@ predict_cure <- function(final_fit,
   
   all_pred_df <- all_pred_df %>%
     nest(.pred = -c(id)) %>% 
-    bind_cols(., "event" = new_data$event)
+    bind_cols(., "event" = new_data$event, 
+              "times" = new_data$times)
 
   
   return(all_pred_df)
 }
 
 predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+predict_df_long <- predict_df %>%
+  unnest(everything())
 
 # quick brier ---
 x <- c()
@@ -177,6 +189,7 @@ for (i in 1:10) {
          I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
 }
 
+x
 # Brier With Yardstick -------------------------------------------------------
 library(yardstick)
 
@@ -244,8 +257,11 @@ calc_brier_old <- function(new_data, fit) {
   cumhaz <- final_fit$cumhaz
   
   # censoring weights from test data
-  predcens <- survfit(Surv(ti,1-di)~1)$surv
-  tcens <- survfit(Surv(ti,1-di)~1)$time
+  sf <- survfit(Surv(ti,1-di)~1)
+  sf_pred <- summary(sf, time = ti)
+  
+  predcens <- sf_pred$surv
+  tcens <- sf_pred$time
   
   # tbrier is evaluation timepoints (default to all timepoints in test data)
   tbrier <- sort(ti)
@@ -284,15 +300,42 @@ calc_brier_old <- function(new_data, fit) {
 x <- calc_brier_old(sim_data_list[[1]], cv_fits_list[[1]])
 x_val <- calc_brier_old(sim_valid_data_list[[1]], cv_fits_list[[1]])
 
+x
 plot(x$tbrier,x$brier,type="S")
 plot(x_val$tbrier,x_val$brier,type="S")
 
 
 all_brier <- list()
 
-for (i in 1:10) {
+for (i in 1:length(sim_data_list)) {
   all_brier[[i]] <- calc_brier_old( new_data = sim_data_list[[i]], fit = cv_fits_list[[i]])
 }
+
+all_brier_valid <- list()
+
+for (i in 1:length(sim_valid_data_list)) {
+  all_brier_valid[[i]] <- calc_brier_old( new_data = sim_valid_data_list[[i]],
+                                          fit = cv_fits_list[[i]])
+}
+
+x <- all_brier_valid[[1]]
+x_valid <- all_brier[[1]]
+
+ggplot(x, aes(x =  tbrier,y = brier)) + geom_line( aes(x =  tbrier,y = brier)) +
+  geom_line(data = x_valid, aes(x =tbrier, y = brier), color = "blue")
+
+plot(x$tbrier,x$brier,type="S")
+line(x_valid$tbrier,x_valid$brier,type="S")
+
+
+traps <- c()
+
+for (i in 1:length(all_brier)) {
+  traps[i] <- trapz(all_brier[[i]]$tbrier,all_brier[[i]]$brier)
+
+}
+
+
 all_brier2 <- all_brier %>%
   do.call("rbind", .)
 
@@ -300,3 +343,6 @@ x <- all_brier2 %>% group_by(round(tbrier, 1)) %>%
   summarize(mean = mean(brier))
 
 plot(x$`round(tbrier, 1)`, x$mean,type="S")
+
+
+

From d518c5aa1d29318c322faa0293cb2c5f5f5817a8 Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Thu, 28 Mar 2024 09:46:33 -0400
Subject: [PATCH 06/11] update predict function

---
 R/cureit_lasso_predict_eval.R  | 121 +----------------------------
 R/explore-brier.R              | 110 ++++++++++++++++++++++++++
 inst/predict_cure_2024-03-15.R | 137 +++++++++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 119 deletions(-)
 create mode 100644 R/explore-brier.R
 create mode 100644 inst/predict_cure_2024-03-15.R

diff --git a/R/cureit_lasso_predict_eval.R b/R/cureit_lasso_predict_eval.R
index d954009..61043cb 100644
--- a/R/cureit_lasso_predict_eval.R
+++ b/R/cureit_lasso_predict_eval.R
@@ -51,131 +51,14 @@ cv.glmnet(x = dat$x, y = Surv(dat$t, dat$d), family = "cox")
 # Trying to get predictions in similar format as tidymodels yardstick
 # Eventually will integrate this with existing predict method function
 
-predict_cure <- function(final_fit,
-                         new_data = NULL,
-                         eval_timepoints = NULL) {
-
-  # Get values from observed data
-  survival_times = new_data$times
-  sorted_survival_times <- sort(survival_times)
-  sorted_event_times = sort(new_data$times[new_data$event==1])
-  
-  # This may need to be eval timepoints....
-  censoring_survfit = survfit(Surv(new_data$times, 1 - new_data$event) ~ 1)
-  censoring_weights = summary(censoring_survfit, times = survival_times)
-  censoring_weights = censoring_weights$surv
-  
-  data_x <- new_data %>%
-    select(-c(times, event, truth_survival)) %>%
-    as.matrix()
-  
-  
-  # Extract cumulative hazard from final fit (for event times)
-  haz <- final_fit$haz
-  cumhaz <- final_fit$cumhaz
-  
-  # default eval timepoints are observed event times if not supplied by user
-  if(is.null(eval_timepoints)) {
-    eval_timepoints = sorted_survival_times
-  }
-
-
-  # Get Relative Cure and Cox Risk (independent of timepoint) -------------------
-
-  # Cure probability risk prediction
-  fitcure <- final_fit$fitcure
-  predcure <- predict(fitcure, newx = data_x,
-                      s = min(fitcure$lambda), type = "response")
-  
-  # Cox survival probability risk prediction
-  fitcox <- final_fit$fitcox
-  predsurvexp <- predict(fitcox, 
-                         newx = data_x,
-                         s = min(fitcox$lambda),
-                         type = "response")
 
 
-  
-  # Get Predicted probabilities at Selected Timepoints -------------------
-  
-  all_preds_all_tps <- list()
-  
-  # for each eval timepoint, get predicted survival values
-  for (l in 1:length(eval_timepoints)) {
-    all_preds <- data.frame("eval_timepoint" = NA, 
-                            "preds" = NA, 
-                            "pred_cure" = NA, 
-                            "preds_surv" = NA, 
-                            "weights" = NA)
-    
-    # vector for conditional survival prob for all patients at sorted_survival_times[l]
-    #predsurv <- rep(NA,length(survival_times)) 
-    
-
-    if (eval_timepoints[l] >= min(sorted_event_times)){
-      
-      # maximum event time that is less than timepoint we are evaluating 
-      
-      ids <- which(
-        sorted_event_times == 
-          max(sorted_event_times[sorted_event_times <= eval_timepoints[l]])
-        )
-      
-      # Conditional survival prob for all patients at given time-
-      # get cumhaz for time closest to eval timepoint
-      predsurv <- exp(-cumhaz[ids]*predsurvexp)
-      
-        # HERE- check this - tru predict of survfit 
-      # inverse probability of the censoring weights
-      
-      ipw <- as.numeric(survival_times > eval_timepoints[l])*censoring_weights[l] + 
-        as.numeric(survival_times <= eval_timepoints[l])*censoring_weights
-      
-      ipw = 1/ipw
-      
-      # if no events occurred before eval time 
-    } else if (eval_timepoints[l] < min(sorted_event_times)) {
-      
-      predsurv <- rep(1,length(survival_times))
-      ipw <- 1
-      
-    }
-    
-    preds <- 1 - predcure + predcure*predsurv
-    
-    all_preds <- tibble(
-      
-      ".eval_time" = eval_timepoints[l],
-      ".pred_survival" = preds, 
-      ".predcure" = predcure, 
-      ".predsurv" = predsurv, 
-      ".weight_censored_raw" = censoring_weights[l],
-      ".weight_censored" = ipw) %>%
-      mutate(id = 1:nrow(.))
-
-    all_preds_nest <-  all_preds %>% nest(.preds = -c(id))
-    
-    all_preds_all_tps[[l]] <- all_preds
-                            
-  }
-  
-  
-  all_pred_df <- all_preds_all_tps %>% 
-    do.call("rbind", .)
-  
-  all_pred_df <- all_pred_df %>%
-    nest(.pred = -c(id)) %>% 
-    bind_cols(., "event" = new_data$event, 
-              "times" = new_data$times)
-
-  
-  return(all_pred_df)
-}
-
 predict_df <- predict_cure(final_fit = final_fit, new_data = data)
 predict_df_long <- predict_df %>%
   unnest(everything())
 
+
+
 # quick brier ---
 x <- c()
 for (i in 1:10) {
diff --git a/R/explore-brier.R b/R/explore-brier.R
new file mode 100644
index 0000000..be3623b
--- /dev/null
+++ b/R/explore-brier.R
@@ -0,0 +1,110 @@
+library(tidymodels)
+library(censored)
+tidymodels_prefer()
+
+
+# Try Tidymodels Example --------------------------------------------------
+
+
+# Fit Lung Model  ---------------------------------------------------------
+
+data(cancer, package="survival")
+data(lung_surv)
+#lung <- lung %>% drop_na()
+
+lung <- lung %>%
+  mutate(status = case_when(
+    status == 1 ~ 0, 
+    status == 2 ~ 1
+  ))
+
+
+
+ph_spec <- 
+  proportional_hazards() %>%
+  set_engine("survival") %>% 
+  set_mode("censored regression") 
+
+ph_spec
+
+set.seed(1)
+ph_fit <- ph_spec %>% fit(Surv(time, status) ~ ., data = lung)
+ph_fit
+
+# * Predict Lung Model  -----------
+
+# Make predictions
+p <- predict(
+  ph_fit, 
+  lung, 
+  type = "survival",
+  eval_time = lung$time
+) %>%
+  mutate(surv_truth =  Surv(lung$time, lung$status))
+
+# add weights
+p_weights <- .censoring_weights_graf(ph_fit, predictions = p)
+
+# calc brier
+brier_scores_lung <-
+  p_weights %>% 
+  brier_survival(truth = surv_truth, .pred)
+
+
+# Try our code on their data ----------------------------------------------
+
+
+eval_time_test <- 300
+
+new_data = lung
+survival_times <- lung$time
+sorted_survival_times <- sort(survival_times)
+
+survival_status <- lung$status
+sorted_event_times = sort(survival_times[survival_status==1])
+
+censoring_survfit = survfit(Surv(survival_times, 1 - survival_status) ~ 1)
+censoring_weights = summary(censoring_survfit, times = survival_times)
+censoring_weights = censoring_weights$surv
+
+all_weights = bind_cols(censoring_weights_time, censoring_weights)
+names(all_weights) <- c("weights_time", "weights_surv")
+
+eval_timepoint = 300
+
+# * Inverse Probability Censoring Weights ---------
+
+# If no time points in new data less than lowest event time in model fit data (most cases):
+if (eval_timepoint >= min(sorted_event_times)) {
+  
+  # get maximum event time that is less than time point we are evaluating 
+  closest_event_time <- max(
+    sorted_event_times[sorted_event_times <= eval_timepoint])
+  
+  index_closest_event_time <- which(sorted_event_times == closest_event_time)
+  
+  # Get predicted survival by using cumulative hazard at closest previous event time
+  predsurv <- exp(-cumhaz[index_closest_event_time]*predsurvexp)
+  
+  # CHECK - Probability of the censoring at evaluation time point (or should it be at previous index closest event time??)
+  censoring_weight_at_eval_time <- censoring_weights[timepoint_index]
+  
+  ipw <- 
+    as.numeric(survival_times > eval_timepoint)*censoring_weight_at_eval_time + 
+    as.numeric(survival_times <= eval_timepoint)*censoring_weights
+  
+  ipw = 1/ipw
+  
+  # if no events occurred before eval time 
+} else if (eval_timepoint < min(sorted_event_times)) {
+  
+  predsurv <- rep(1,length(survival_times))
+  ipw <- 1
+  
+}
+
+cbind(survival_times, ipw) %>% View()
+# Compare ------
+lung_surv_long <- lung_surv %>% unnest(everything())
+
+lung_surv_long <- lung_surv_long %>% filter(.eval_time == eval_time_test)
diff --git a/inst/predict_cure_2024-03-15.R b/inst/predict_cure_2024-03-15.R
new file mode 100644
index 0000000..64d684b
--- /dev/null
+++ b/inst/predict_cure_2024-03-15.R
@@ -0,0 +1,137 @@
+
+predict_cure <- function(final_fit,
+                         new_data = NULL,
+                         eval_timepoints = NULL) {
+  
+  # Values from Observed Data & Model Fit ---------------------------------
+  
+  survival_times = new_data$times
+  events = new_data$event
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(new_data$times[new_data$event==1])
+  
+  # Extract cumulative hazard from final fit (for event times)
+  haz <- final_fit$haz
+  cumhaz <- final_fit$cumhaz
+  
+  # Set up Data 
+  data_x <- new_data %>%
+    select(-c(times, event, truth_survival)) %>%
+    as.matrix()
+  
+  # Evaluation Time Points --------------------------------------------------
+  # Default evaluation time points are observed event times if not supplied by user
+  
+  all_eval_timepoints <- eval_timepoints %||% sorted_survival_times
+  
+  # Censoring Weights --------------------------------------------------------
+  
+  censoring_survfit = survfit(Surv(survival_times, 1 - events) ~ 1)
+  
+  # CHECK- or censoring weights at all all respective survival times?
+  # censoring_weights = summary(censoring_survfit, times = all_eval_timepoints)
+  censoring_weights = summary(censoring_survfit, times = survival_times)$surv
+
+    
+  # Relative Risk for Cure and Cox Risk ------------------------------------
+  # (independent of time point) 
+  
+  #  * Cure probability risk prediction ------
+  fitcure <- final_fit$fitcure
+  predcure <- predict(fitcure, newx = data_x,
+                      s = min(fitcure$lambda), type = "response")
+  
+  # * Cox survival probability risk prediction ------
+  fitcox <- final_fit$fitcox
+  predsurvexp <- predict(fitcox, 
+                         newx = data_x,
+                         s = min(fitcox$lambda),
+                         type = "response")
+  
+  
+  
+  # Predicted Probabilities at Evaluated Time points -------------------------
+  # (dependent on time point- need IPCW)
+  
+  all_preds_all_tps <- list()
+  
+  # Evaluated in all patients, for each eval time point (iteration on time point)
+  for (timepoint_index in 1:length(all_eval_timepoints)) {
+    
+    # timepoint we are evaluating
+    eval_timepoint <- all_eval_timepoints[timepoint_index] 
+    
+
+    # * Inverse Probability Censoring Weights ---------
+    
+    # If no time points in new data less than lowest event time in model fit data (most cases):
+    if (eval_timepoint >= min(sorted_event_times)) {
+      
+      # get maximum event time that is less than time point we are evaluating 
+      closest_event_time <- max(
+        sorted_event_times[sorted_event_times <= eval_timepoint])
+      
+      index_closest_event_time <- which(sorted_event_times == closest_event_time)
+      
+      # Get predicted survival by using cumulative hazard at closest previous event time
+      predsurv <- exp(-cumhaz[index_closest_event_time]*predsurvexp)
+      
+      # CHECK - Probability of the censoring at evaluation time point (or should it be at previous index closest event time??)
+     #  censoring_weight_at_eval_time <- censoring_weights[timepoint_index]
+      censoring_weight_at_eval_time <- censoring_weights[index_closest_event_time]
+      
+      ipw <- 
+        # if observed time is greater than event time, use censoring weight at evaluation time (or time closest)
+        (as.numeric(survival_times > eval_timepoint)*censoring_weight_at_eval_time) + 
+        
+        # if observed time is less than or equal to eval time, use censoring weight at it's own observed time
+        as.numeric(survival_times <= eval_timepoint)*censoring_weights
+      
+      ipw = 1/ipw
+      
+      # if no events occurred before eval time 
+    } else if (eval_timepoint < min(sorted_event_times)) {
+      
+      predsurv <- rep(1,length(survival_times))
+      ipw <- 1
+      
+    }
+    
+    preds <- 1 - predcure + predcure*predsurv
+    
+    all_preds <- tibble(
+      
+      ".eval_time" = eval_timepoint,
+      ".pred_survival" = preds, 
+      ".pred_curerate" = predcure, 
+      ".pred_survportion" = predsurv, 
+      ".weight_censored_raw" = censoring_weights[timepoint_index],
+      ".weight_censored" = ipw) %>%
+      mutate(id = 1:nrow(.))
+    
+    all_preds_nest <-  all_preds %>% nest(.preds = -c(id))
+    
+    all_preds_all_tps[[timepoint_index]] <- all_preds
+    
+  }
+  
+  all_pred_df <- all_preds_all_tps %>% 
+    do.call("rbind", .)
+  
+  all_pred_df <- all_pred_df %>%
+    nest(.pred = -c(id)) %>% 
+    bind_cols(., "event" = new_data$event, 
+              "times" = new_data$times)
+  
+  return(all_pred_df)
+  
+}
+
+
+# Try Function ------------------------------------------------------------
+# 
+# predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+# predict_df_long <- predict_df %>%
+#   unnest(everything())
+
+

From a81cc085b833ac05f1619f321a5238800f1b473b Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Thu, 6 Jun 2024 09:09:58 -0400
Subject: [PATCH 07/11] update to predict

---
 R/cureit_lasso_predict_eval.R  | 153 +++++++++++++++++++++++++++++++--
 R/explore-brier.R              |  85 +++++++++++++++++-
 inst/predict_cure_2024-03-15.R |   2 +-
 inst/sim-briers-exploration.R  | 115 +++++++++++++++++++++++++
 4 files changed, 346 insertions(+), 9 deletions(-)
 create mode 100644 inst/sim-briers-exploration.R

diff --git a/R/cureit_lasso_predict_eval.R b/R/cureit_lasso_predict_eval.R
index 61043cb..334e006 100644
--- a/R/cureit_lasso_predict_eval.R
+++ b/R/cureit_lasso_predict_eval.R
@@ -59,30 +59,52 @@ predict_df_long <- predict_df %>%
 
 
 
-# quick brier ---
+# quick brier ---------
 x <- c()
-for (i in 1:10) {
+for (i in 1:100) {
   
   time_1 <- predict_df_long$.eval_time[[i]]
   
   predict_df_long_1 <- predict_df_long %>%
     filter(.eval_time == time_1)
   
-  x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
-         I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
+  x[i] <-  mean(I(predict_df_long_1$times > time_1)*(1 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored) + 
+                  I(predict_df_long_1$event == 1 & predict_df_long_1$times <= time_1)*
+                  (0 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored))
+  
+  # OLD -----
+  # x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
+  #                I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
+  # 
+  # OLD ----
+  # brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) +
+  #                    I(di==1)*(0 - preds)^2/(ipw+0.001))
+  # 
+  # NEW -----
+  # brier[l] <- mean(I(ti > tbrier[l])*(1 - preds)^2/(ipw+0.001) + 
+  #                    I(di==1 & ti <= tbrier[l])*(0 - preds)^2/(ipw+0.001))
+  
 }
 
-x
+times <- unique(predict_df_long$.eval_time)
+brier <- x
+
+brier_scores_manual <- bind_cols("times" = times, "brier" = brier)
+
 # Brier With Yardstick -------------------------------------------------------
 library(yardstick)
 
 # Doesn't match- weights are wrong
-brier_scores <-
+brier_scores_yardstick <-
   predict_df %>% 
   mutate(truth = Surv(times, event)) %>%
   brier_survival(truth = truth, .pred)
 
 
+all_brier <- brier_scores_yardstick %>%
+  select(times = .eval_time, brier_yardstick = .estimate) %>%
+  left_join(brier_scores_manual)
+
 # Brier Function -------------------------------------------------------
 
 brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
@@ -105,13 +127,132 @@ brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
     mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
     
   })
+  
+  for (l in 1:length(eval_timepoints)) {
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+  
+    
+  }
+}
+  
+
+brier_cure_update <- function(predict_df, truth = times, eval_timepoints = NULL) {
+  
+  
+  # Get values from observed data
+  survival_times = predict_df$times
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(data$times[data$event==1])
+  
+  
+  predict_df_long <- predict_df %>%
+    unnest(everything())
+  
+  predict_df_nest_time <- predict_df_long %>%
+    nest(data = -.eval_time)
+  
+  
+  map(predict_df_nest_time$data, function(x) {
+    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
+    
+  })
+  
   for (l in 1:length(eval_timepoints)) {
     brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+    
+    
+  }
+}
+
+predict_df_long <- predict_df %>%
+  unnest(everything())
+
+predict_df_nest_time <- predict_df_long %>%
+  nest(data = -.eval_time)
+
+predict_df_timepoint = predict_df_nest_time[10,] %>%
+  unnest(everything())
+
+brier_for_each_eval <- function(predict_df_timepoint) {
   
+  
+  x = mean(I(predict_df_timepoint$times > predict_df_timepoint$.eval_time )*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+                  I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+                  (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001))
+  x
   }
+  
+
+brier_for_each_eval <- function(predict_df_timepoint) {
+  
+  
+  x = mean(
+    I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+      
+             I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+             (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001)
+    )
+  x/100
+}
+
+brier_for_each_eval2 <- function(predict_df_timepoint) {
+  
+  category1 = I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )
+  category2 = I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)
+  
+  x = mean(
+    category1*(0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) +
+    category2 *(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) 
+     
+  )
+  x
+
 }
+
+brier_for_each_eval(predict_df_timepoint)
+
+# Compare to yardstick ----
+
+brier_survival_impl <- function(truth,
+                                estimate,
+                                censoring_weights,
+                                case_weights,
+                                eval_time) {
+  surv_time <- yardstick:::.extract_surv_time(truth)
+  surv_status <- yardstick:::.extract_surv_status(truth)
+  
+  if (!is.null(case_weights)) {
+    case_weights <- vec_cast(case_weights, to = double())
+    norm_const <- sum(case_weights)
+  } else {
+    case_weights <- rep(1, length(estimate))
+    norm_const <- sum(!survival::is.na.Surv(truth))
+  }
   
+  category_1 <- surv_time <= eval_time & surv_status == 1
+  category_2 <- surv_time > eval_time
+  
+  # (0 - estimate) ^ 2 == estimate ^ 2
+  res <- (category_1 * estimate^2 * censoring_weights) +
+    (category_2 * (1 - estimate)^2 * censoring_weights)
+  
+  res <- res * case_weights
+  res <- sum(res, na.rm = TRUE)
+  res / norm_const
+}
+predict_df_timepoint2 <- predict_df_timepoint %>%
+  mutate(surv_obj = Surv(predict_df_timepoint$times, predict_df_timepoint$event))
 
+library(yardstick)
+brier_survival_impl(
+                    truth = predict_df_timepoint2$surv_obj,
+                    
+                    estimate = predict_df_timepoint2$.pred_survival,
+                                censoring_weights = predict_df_timepoint2$.weight_censored,
+                                case_weights = NULL,
+                                eval_time = predict_df_timepoint2$.eval_time)
+
+brier_for_each_eval(predict_df_timepoint)
 
 # Simple Former Brier Calculation ---------------------------------------------------
 library(glmnet)
diff --git a/R/explore-brier.R b/R/explore-brier.R
index be3623b..ab42001 100644
--- a/R/explore-brier.R
+++ b/R/explore-brier.R
@@ -31,6 +31,15 @@ set.seed(1)
 ph_fit <- ph_spec %>% fit(Surv(time, status) ~ ., data = lung)
 ph_fit
 
+cox <- coxph(Surv(time, status) ~ ., data = lung)
+cox
+cumhaz_all<- survfit(cox)
+cumhaz <- bind_cols(time = cumhaz_all$time, surv = cumhaz_all$surv)
+
+#cumhaz$surv[which.min(abs(cumhaz$time - closest_event_time))]
+
+#cumhaz$surv[cumhaz$time[]
+
 # * Predict Lung Model  -----------
 
 # Make predictions
@@ -43,16 +52,61 @@ p <- predict(
   mutate(surv_truth =  Surv(lung$time, lung$status))
 
 # add weights
-p_weights <- .censoring_weights_graf(ph_fit, predictions = p)
+p_weights <- .censoring_weights_graf(ph_fit, predictions = p) %>%
+  mutate(pat_id = 1:nrow(.))
+
+p_weights_long <- p_weights %>%
+  unnest(cols = .pred)
 
 # calc brier
 brier_scores_lung <-
   p_weights %>% 
   brier_survival(truth = surv_truth, .pred)
 
-
+brier_scores_lung
 # Try our code on their data ----------------------------------------------
 
+# 
+# predict_df_long <- predict_df %>%
+#   unnest(everything())
+
+predict_df_nest_time <- p_weights_long %>%
+  nest(data = -.eval_time)
+
+predict_df_timepoint = predict_df_nest_time[10,] %>%
+  unnest(everything())
+
+brier_for_each_eval <- function(predict_df_timepoint) {
+  
+  
+  x = mean(I(predict_df_timepoint$times > predict_df_timepoint$.eval_time )*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+             I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+             (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001))
+  x
+}
+
+
+
+
+lung2 <- lung %>%
+  rename("times" = time, 
+         "event" = status)
+cox <- coxph(Surv(times, event) ~ ., data = lung2)
+cox
+cumhaz_all<- survfit(cox)
+cumhaz <- bind_cols(time = cumhaz_all$time, surv = cumhaz_all$surv)
+
+
+lung2 <- lung2 %>%
+  mutate(truth_survival = Surv(times, event))
+
+predict_cure(final_fit = cox, new_data = lung2, eval_timepoints = lung2$times)
+
+
+
+
+# Misc --------------------------------------------------------------------
+
 
 eval_time_test <- 300
 
@@ -104,7 +158,34 @@ if (eval_timepoint >= min(sorted_event_times)) {
 }
 
 cbind(survival_times, ipw) %>% View()
+
 # Compare ------
 lung_surv_long <- lung_surv %>% unnest(everything())
 
 lung_surv_long <- lung_surv_long %>% filter(.eval_time == eval_time_test)
+
+#lung %>% select(time, status) %>% distinct() %>% nrow()
+
+
+# Here- New ---------------------------------------------------------------------
+# ti & di is censored patient - If censored time is after eval time () ti > tbrier[l]
+# if event, and event time earlier than evaluation time - (0 - preds)^2/(ipw+0.001))
+
+# 1) If the observed time is a censoring time and that is before the evaluation time, the data point should make no contribution to the performance metric (their "category 3"). These values have a missing value for their probability estimate (and also for their weight column).
+# ^ not included, ensure they are NAs in predict df 
+
+# 2)  I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001)) 
+# Old version ----
+# brier[l] <- mean(I(ti > tbrier[l] & di==0)*
+#                    (1 - preds)^2/(ipw+0.001) + 
+#                    
+#                    I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001))
+
+# Updated Version ---
+
+
+brier[l] <- mean(I(ti > tbrier[l])*
+                   (1 - preds)^2/(ipw+0.001) + 
+                   
+                   I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001))
+
diff --git a/inst/predict_cure_2024-03-15.R b/inst/predict_cure_2024-03-15.R
index 64d684b..4502c57 100644
--- a/inst/predict_cure_2024-03-15.R
+++ b/inst/predict_cure_2024-03-15.R
@@ -87,7 +87,7 @@ predict_cure <- function(final_fit,
         # if observed time is less than or equal to eval time, use censoring weight at it's own observed time
         as.numeric(survival_times <= eval_timepoint)*censoring_weights
       
-      ipw = 1/ipw
+      ipw = 1/(ipw+ 0.001)
       
       # if no events occurred before eval time 
     } else if (eval_timepoint < min(sorted_event_times)) {
diff --git a/inst/sim-briers-exploration.R b/inst/sim-briers-exploration.R
new file mode 100644
index 0000000..bc51536
--- /dev/null
+++ b/inst/sim-briers-exploration.R
@@ -0,0 +1,115 @@
+# we should use this: https://www.tidymodels.org/learn/statistics/survival-metrics/ 
+
+library(survival)
+library(tidyverse)
+library(glmnet)
+
+# Prep Data ---------------------------------------------------------------
+
+load(here::here("inst", "cureit-simulation-results", "sim_data_list.RData"))
+load(here::here("inst", "cureit-simulation-results", "sim_valid_data_list.RData"))
+load(here::here("inst","cureit-simulation-results", "cv_fits_100.RData"))
+
+dat <- sim_data_list[[1]]
+fit <- cv_fits_list[[1]]
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+get_brier_for_each_data <- function(dat, fit) {
+    
+  final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+  
+  # Create Data frame to pass  -----
+  
+  data = data.frame(dat$x) %>%
+    mutate(times = dat$t, 
+           event = dat$d,
+           "truth_survival" = Surv(dat$t, dat$d))
+  
+  # Predict Function (glmnet cure) ---------------------------------------------
+  
+  predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+  predict_df_long <- predict_df %>%
+    unnest(everything())
+  
+  
+  brier_scores_yardstick <-
+    predict_df %>% 
+    mutate(truth = Surv(times, event)) %>%
+    brier_survival(truth = truth, .pred)
+  
+  return(brier_scores_yardstick)
+  
+}
+
+# estimates greater than 1  
+b <- get_brier_for_each_data(dat = sim_data_list[[3]], fit = cv_fits_list[[3]])
+
+
+res <- list()
+for (i in 1:10) {
+  
+  res[[i]] <- get_brier_for_each_data(sim_data_list[[i]], cv_fits_list[[i]])
+}
+
+ggplot(res[[3]], aes(x = .eval_time, .estimate)) + geom_point()
+
+
+
+
+# quick brier ---------
+dat <- sim_data_list[[3]]
+fit <- cv_fits_list[[3]]
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+# Create Data frame to pass  -----
+
+data = data.frame(dat$x) %>%
+  mutate(times = dat$t, 
+         event = dat$d,
+         "truth_survival" = Surv(dat$t, dat$d))
+
+predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+predict_df_long <- predict_df %>%
+  unnest(everything())
+
+
+x <- c()
+for (i in 1:100) {
+  
+  time_1 <- predict_df_long$.eval_time[[i]]
+  
+  predict_df_long_1 <- predict_df_long %>%
+    filter(.eval_time == time_1)
+  
+  x[i] <-  mean(I(predict_df_long_1$times > time_1)*(1 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored) + 
+                  I(predict_df_long_1$event == 1 & predict_df_long_1$times <= time_1)*
+                  (0 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored))
+  
+  # OLD -----
+  # x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
+  #                I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
+  # 
+  # OLD ----
+  # brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) +
+  #                    I(di==1)*(0 - preds)^2/(ipw+0.001))
+  # 
+  # NEW -----
+  # brier[l] <- mean(I(ti > tbrier[l])*(1 - preds)^2/(ipw+0.001) + 
+  #                    I(di==1 & ti <= tbrier[l])*(0 - preds)^2/(ipw+0.001))
+  
+}
+
+times <- unique(predict_df_long$.eval_time)
+brier <- x
+
+brier_scores_manual <- bind_cols("times" = times, "brier" = brier)
+
+brier_scores_yardstick <-
+  predict_df %>% 
+  mutate(truth = Surv(times, event)) %>%
+  brier_survival(truth = truth, .pred)
+
+# Brier With Yardstick -------------------------------------------------------
+library(yardstick)
+
+

From 990c7e3ec521c48cabf461eb762c8e731f8caaea Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Thu, 6 Jun 2024 09:49:36 -0400
Subject: [PATCH 08/11] organize scripts

---
 {R => inst}/cureit_lasso_predict_eval.R | 0
 {R => inst}/explore-brier.R             | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename {R => inst}/cureit_lasso_predict_eval.R (100%)
 rename {R => inst}/explore-brier.R (100%)

diff --git a/R/cureit_lasso_predict_eval.R b/inst/cureit_lasso_predict_eval.R
similarity index 100%
rename from R/cureit_lasso_predict_eval.R
rename to inst/cureit_lasso_predict_eval.R
diff --git a/R/explore-brier.R b/inst/explore-brier.R
similarity index 100%
rename from R/explore-brier.R
rename to inst/explore-brier.R

From 2c6b541e9f8f3ba7d9ea4ee4fec94c50cf5edf79 Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Thu, 6 Jun 2024 10:49:46 -0400
Subject: [PATCH 09/11] organize scripts

---
 R/cv.cureitlasso.R                            |   2 +-
 {inst => R}/predict_cure_2024-03-15.R         |  36 +++-
 ...compare-predict-brier-with-yardstick_P1.R} |   5 +-
 ...compare-predict-brier-with-yardstick_P2.R} | 171 ++++++++++++---
 ...mpare-predict-brier-with-yardstick_P3.R.R} |  57 -----
 inst/evaluate-simulations.R                   | 199 ------------------
 inst/inst-README.md                           |  15 ++
 inst/testcode.R                               |  48 -----
 8 files changed, 191 insertions(+), 342 deletions(-)
 rename {inst => R}/predict_cure_2024-03-15.R (67%)
 rename inst/{explore-brier.R => compare-predict-brier-with-yardstick_P1.R} (98%)
 rename inst/{sim-briers-exploration.R => compare-predict-brier-with-yardstick_P2.R} (53%)
 rename inst/{cureit_lasso_predict_eval.R => compare-predict-brier-with-yardstick_P3.R.R} (86%)
 delete mode 100644 inst/evaluate-simulations.R
 create mode 100644 inst/inst-README.md
 delete mode 100644 inst/testcode.R

diff --git a/R/cv.cureitlasso.R b/R/cv.cureitlasso.R
index 2bdd53c..3ef2447 100644
--- a/R/cv.cureitlasso.R
+++ b/R/cv.cureitlasso.R
@@ -246,6 +246,6 @@ cv.cureitlasso <- function(t,
                            `1se`= idx1se),
               foldid = foldid
   )
-  )
+  
   
 }
diff --git a/inst/predict_cure_2024-03-15.R b/R/predict_cure_2024-03-15.R
similarity index 67%
rename from inst/predict_cure_2024-03-15.R
rename to R/predict_cure_2024-03-15.R
index 4502c57..7d0c721 100644
--- a/inst/predict_cure_2024-03-15.R
+++ b/R/predict_cure_2024-03-15.R
@@ -1,4 +1,38 @@
-
+#' Predict Cure and Survival Probabilities
+#'
+#' This function predicts cure and survival probabilities for data based on a final fitted model.
+#' It calculates predicted survival probabilities at specified evaluation time points, accounting for censoring.
+#' Output is consistent with tidymodels predict guidelines and can be passed into yardstick functions like `brier()`
+#'
+#' @param final_fit A list containing the fitted cure and survival models. It should have elements `fitcure`, `fitcox`, `haz`, and `cumhaz`.
+#' @param new_data A data frame containing the new observations to predict on. It should include columns `times` (observed survival times), `event` (event indicator, 1 for event, 0 for censoring), and other covariates used for prediction.
+#' @param eval_timepoints Optional. A vector of time points at which to evaluate survival probabilities. If NULL, the function uses the observed event times from `new_data`.
+#' 
+#' @return A data frame containing the predicted survival probabilities, cure rates, and other related metrics for each observation at the specified evaluation time points. The returned data frame includes the following columns:
+#' \itemize{
+#'   \item \code{id}: Identifier for each observation.
+#'   \item \code{.eval_time}: Evaluation time point.
+#'   \item \code{.pred_survival}: Predicted survival probability at the evaluation time point.
+#'   \item \code{.pred_curerate}: Predicted cure rate for each observation.
+#'   \item \code{.pred_survportion}: Component of the survival prediction that excludes the cure component.
+#'   \item \code{.weight_censored_raw}: Raw censoring weight at each evaluation time point.
+#'   \item \code{.weight_censored}: Inverse probability weight for censoring.
+#'   \item \code{event}: Event indicator from the new data.
+#'   \item \code{times}: Observed survival times from the new data.
+#' }
+#'
+#' @examples
+#' \dontrun{
+#'   # Assuming `final_fit` is a fitted model list and `new_data` is a data frame with appropriate structure:
+#'   predict_df <- predict_cure(final_fit = final_fit, new_data = new_data)
+#'   print(predict_df)
+#' }
+#' 
+#' @importFrom dplyr select mutate bind_cols
+#' @importFrom tidyr nest unnest
+#' @importFrom tibble tibble
+#' 
+#' @export
 predict_cure <- function(final_fit,
                          new_data = NULL,
                          eval_timepoints = NULL) {
diff --git a/inst/explore-brier.R b/inst/compare-predict-brier-with-yardstick_P1.R
similarity index 98%
rename from inst/explore-brier.R
rename to inst/compare-predict-brier-with-yardstick_P1.R
index ab42001..db759a3 100644
--- a/inst/explore-brier.R
+++ b/inst/compare-predict-brier-with-yardstick_P1.R
@@ -6,10 +6,11 @@ tidymodels_prefer()
 # Try Tidymodels Example --------------------------------------------------
 
 
-# Fit Lung Model  ---------------------------------------------------------
+# * Fit Lung Model  --------
 
 data(cancer, package="survival")
 data(lung_surv)
+
 #lung <- lung %>% drop_na()
 
 lung <- lung %>%
@@ -19,7 +20,6 @@ lung <- lung %>%
   ))
 
 
-
 ph_spec <- 
   proportional_hazards() %>%
   set_engine("survival") %>% 
@@ -64,6 +64,7 @@ brier_scores_lung <-
   brier_survival(truth = surv_truth, .pred)
 
 brier_scores_lung
+
 # Try our code on their data ----------------------------------------------
 
 # 
diff --git a/inst/sim-briers-exploration.R b/inst/compare-predict-brier-with-yardstick_P2.R
similarity index 53%
rename from inst/sim-briers-exploration.R
rename to inst/compare-predict-brier-with-yardstick_P2.R
index bc51536..d281510 100644
--- a/inst/sim-briers-exploration.R
+++ b/inst/compare-predict-brier-with-yardstick_P2.R
@@ -3,29 +3,29 @@
 library(survival)
 library(tidyverse)
 library(glmnet)
+library(yardstick)
 
-# Prep Data ---------------------------------------------------------------
+# Load Simulation Data ---------------------------------------------------------------
 
 load(here::here("inst", "cureit-simulation-results", "sim_data_list.RData"))
 load(here::here("inst", "cureit-simulation-results", "sim_valid_data_list.RData"))
 load(here::here("inst","cureit-simulation-results", "cv_fits_100.RData"))
 
-dat <- sim_data_list[[1]]
-fit <- cv_fits_list[[1]]
-final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+# Function to Get Brier on Each Sim Data ----------------------------------
 
 get_brier_for_each_data <- function(dat, fit) {
-    
+  
   final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
   
-  # Create Data frame to pass  -----
+  # * Create Data frame to pass  -----
   
   data = data.frame(dat$x) %>%
     mutate(times = dat$t, 
            event = dat$d,
            "truth_survival" = Surv(dat$t, dat$d))
   
-  # Predict Function (glmnet cure) ---------------------------------------------
+  # * Run new Predict Function (glmnet cure) ------
   
   predict_df <- predict_cure(final_fit = final_fit, new_data = data)
   predict_df_long <- predict_df %>%
@@ -41,7 +41,125 @@ get_brier_for_each_data <- function(dat, fit) {
   
 }
 
-# estimates greater than 1  
+
+# Manual Brier Score Calc (From Teng) -------------------------------------
+
+# * Version 1 -----
+
+for (i in 1:100) {
+  
+  time_1 <- predict_df_long$.eval_time[[i]]
+  
+  predict_df_long_1 <- predict_df_long %>%
+    filter(.eval_time == time_1)
+  
+  x[i] <-  mean(I(predict_df_long_1$times > time_1)*(1 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored) + 
+                  I(predict_df_long_1$event == 1 & predict_df_long_1$times <= time_1)*
+                  (0 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored))
+  
+  # OLD -----
+  # x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
+  #                I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
+  # 
+  # OLD ----
+  # brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) +
+  #                    I(di==1)*(0 - preds)^2/(ipw+0.001))
+  # 
+  # NEW ?-----
+  # brier[l] <- mean(I(ti > tbrier[l])*(1 - preds)^2/(ipw+0.001) + 
+  #                    I(di==1 & ti <= tbrier[l])*(0 - preds)^2/(ipw+0.001))
+  
+}
+
+
+
+# * Version 2 -----
+
+brier[l] <- mean(I(ti > tbrier[l])*
+                   (1 - preds)^2/(ipw+0.001) + 
+                   
+                   I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001))
+
+
+# * Version 3 -----
+
+brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
+  
+  
+  # Get values from observed data
+  survival_times = predict_df$times
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(data$times[data$event==1])
+  
+  
+  predict_df_long <- predict_df %>%
+    unnest(everything())
+  
+  predict_df_nest_time <- predict_df_long %>%
+    nest(data = -.eval_time)
+  
+  
+  map(predict_df_nest_time$data, function(x) {
+    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
+    
+  })
+  
+  for (l in 1:length(eval_timepoints)) {
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+    
+    
+  }
+}
+
+# * Version 4 -----
+brier_cure_update <- function(predict_df, truth = times, eval_timepoints = NULL) {
+  
+  
+  # Get values from observed data
+  survival_times = predict_df$times
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(data$times[data$event==1])
+  
+  
+  predict_df_long <- predict_df %>%
+    unnest(everything())
+  
+  predict_df_nest_time <- predict_df_long %>%
+    nest(data = -.eval_time)
+  
+  
+  map(predict_df_nest_time$data, function(x) {
+    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
+    
+  })
+  
+  for (l in 1:length(eval_timepoints)) {
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+    
+    
+  }
+}
+
+
+# Example on Single Data Set -------------------------------------------------
+
+# Grab one data set as an example
+dat <- sim_data_list[[1]]
+fit <- cv_fits_list[[1]]
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+# Run predict functon
+predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+predict_df_long <- predict_df %>%
+  unnest(everything())
+
+# Get yardstick values
+brier_scores_yardstick <-
+  predict_df %>% 
+  mutate(truth = Surv(times, event)) %>%
+  brier_survival(truth = truth, .pred)
+
+
 b <- get_brier_for_each_data(dat = sim_data_list[[3]], fit = cv_fits_list[[3]])
 
 
@@ -56,6 +174,8 @@ ggplot(res[[3]], aes(x = .eval_time, .estimate)) + geom_point()
 
 
 
+# Compare to Our Manual Brier Calc ----------------------------------------
+
 # quick brier ---------
 dat <- sim_data_list[[3]]
 fit <- cv_fits_list[[3]]
@@ -73,31 +193,11 @@ predict_df_long <- predict_df %>%
   unnest(everything())
 
 
+
+# Brier With Yardstick -------------------------------------------------------
+library(yardstick)
 x <- c()
-for (i in 1:100) {
-  
-  time_1 <- predict_df_long$.eval_time[[i]]
-  
-  predict_df_long_1 <- predict_df_long %>%
-    filter(.eval_time == time_1)
-  
-  x[i] <-  mean(I(predict_df_long_1$times > time_1)*(1 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored) + 
-                  I(predict_df_long_1$event == 1 & predict_df_long_1$times <= time_1)*
-                  (0 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored))
-  
-  # OLD -----
-  # x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
-  #                I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
-  # 
-  # OLD ----
-  # brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) +
-  #                    I(di==1)*(0 - preds)^2/(ipw+0.001))
-  # 
-  # NEW -----
-  # brier[l] <- mean(I(ti > tbrier[l])*(1 - preds)^2/(ipw+0.001) + 
-  #                    I(di==1 & ti <= tbrier[l])*(0 - preds)^2/(ipw+0.001))
-  
-}
+
 
 times <- unique(predict_df_long$.eval_time)
 brier <- x
@@ -109,7 +209,10 @@ brier_scores_yardstick <-
   mutate(truth = Surv(times, event)) %>%
   brier_survival(truth = truth, .pred)
 
-# Brier With Yardstick -------------------------------------------------------
-library(yardstick)
 
 
+
+
+
+# Misc Code ---------------------------------------------------------------
+
diff --git a/inst/cureit_lasso_predict_eval.R b/inst/compare-predict-brier-with-yardstick_P3.R.R
similarity index 86%
rename from inst/cureit_lasso_predict_eval.R
rename to inst/compare-predict-brier-with-yardstick_P3.R.R
index 334e006..e3fede4 100644
--- a/inst/cureit_lasso_predict_eval.R
+++ b/inst/compare-predict-brier-with-yardstick_P3.R.R
@@ -107,63 +107,6 @@ all_brier <- brier_scores_yardstick %>%
 
 # Brier Function -------------------------------------------------------
 
-brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
-
-  
-  # Get values from observed data
-  survival_times = predict_df$times
-  sorted_survival_times <- sort(survival_times)
-  sorted_event_times = sort(data$times[data$event==1])
-  
-  
-  predict_df_long <- predict_df %>%
-    unnest(everything())
-  
-  predict_df_nest_time <- predict_df_long %>%
-    nest(data = -.eval_time)
-  
-
-  map(predict_df_nest_time$data, function(x) {
-    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
-    
-  })
-  
-  for (l in 1:length(eval_timepoints)) {
-    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
-  
-    
-  }
-}
-  
-
-brier_cure_update <- function(predict_df, truth = times, eval_timepoints = NULL) {
-  
-  
-  # Get values from observed data
-  survival_times = predict_df$times
-  sorted_survival_times <- sort(survival_times)
-  sorted_event_times = sort(data$times[data$event==1])
-  
-  
-  predict_df_long <- predict_df %>%
-    unnest(everything())
-  
-  predict_df_nest_time <- predict_df_long %>%
-    nest(data = -.eval_time)
-  
-  
-  map(predict_df_nest_time$data, function(x) {
-    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
-    
-  })
-  
-  for (l in 1:length(eval_timepoints)) {
-    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
-    
-    
-  }
-}
-
 predict_df_long <- predict_df %>%
   unnest(everything())
 
diff --git a/inst/evaluate-simulations.R b/inst/evaluate-simulations.R
deleted file mode 100644
index 15abbdb..0000000
--- a/inst/evaluate-simulations.R
+++ /dev/null
@@ -1,199 +0,0 @@
-source(here::here("R/cv.cureitlasso.R"))
-source(here::here("R/cureitlasso.R"))
-source(here::here("R/simulasso.R"))
-source(here::here("R/coxsplit.R"))
-
-# Calculate Performance Metrics on Simulations ----------------------------------
-
-# * Load all relevant data -------
-load(here::here("inst", "cv_fits_50.RData"))
-load(here::here("inst", "cv_fits_100.RData"))
-load(here::here("inst", "sim_data_list.RData"))
-load(here::here("inst", "sim_valid_data_list.RData"))
-
-# * Calculate Sensitivity of Variable Selection in Training Fits -------
-spe_cox_min <- c()
-sen_cox_min <- c()
-
-x <- for (i in 1:50) {
-  fit <- cv_fits_list[[i]]
-  #fit <- x$fit
-  dat <- sim_data_list[[i]]
-  
-  spe_cox_min[[i]] <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
-  sen_cox_min[[i]] <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
- # spe_cox_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
-  
- # print(sen_cox_min)
-  
-}
-
-
-hist(unlist(sen_cox_min))
-hist(unlist(sen_cox_min))
-
-# Sensitivity and specificity (for both min and 1se choices of parameters) ---
-
-sen_cox_min <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
-
-
-spe_cure_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$alpha!=0))
-spe_cox_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
-
-sen_cure_1se <- mean(dat$id_cure %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$alpha!=0))
-sen_cox_1se <- mean(dat$id_cox %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$beta!=0))
-spe_cure_1se <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$alpha!=0))
-spe_cox_1se <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$`1se`[1]]][[fit$index$`1se`[2]]]$beta!=0))
-
-
-
-
-# * Get it to work with yardstick -----
-library(yardstick)
-
-vec <- c(TRUE, TRUE, FALSE)
-df <- data.frame(
-  pred = factor(as.integer(dat$id_cure %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$alpha!=0)))
-  # Presence = as.integer(vec)  # Convert logical values to integers (TRUE -> 1, FALSE -> 0)
-) %>%
-  mutate(obs = factor(rep(1, length = nrow(.)), levels = c("0", "1")))
-
-sens(df, obs, pred, event_level = "second")
-sen_cure_min <- mean(dat$id_cure %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$alpha!=0))
-
-
-
-# * Brier Score -------------------------------------------------------------
-#' - `ti`- simulated survival times
-#' - `d`- simulated event indicator
-#' - `x` simulated data n x p matrix or data.frame
-#' - `id_cure` - IDs of selected cure parameters
-#' - `id_cure` - IDs of selected cox parameters
-#' - `coefs_cure` - Coefficients of cure parameters
-#' - `coefs_cox` - Coefficients of cox parameters
-#' - `tj` - is unique event times sorted
-
-
-for (i in 1:length(sim_data_list)) {
-  
-  dat <- sim_data_list[[i]]
-  dat_valid = sim_valid_data_list[[i]]
-  fit <- cv_fits_list[[i]]
-  
-  # for training set
-  ti <- dat$t
-  di <- dat$d
-  xi <- dat$x
-  # unique event times sorted
-  tj <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$tj
-  
-  # for test set
-  ti_valid <- dat_valid$t
-  di_valid <- dat_valid$d
-  xi_valid <- dat_valid$x
-  tj_valid <- sort(ti_valid[di_valid==1])
-  
-  
-  # get final fit (hyperparam with min error) for cure model (fits used train data)
-  final_select_model = fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]
-  fitcure <- final_select_model$fitcure
-  
-  # use this model to make predictions on data (fitted relative-risk for "cox)
-  predcure <- predict(fitcure, newx=xi, s=min(fitcure$lambda), type="response")
-  predcure_valid <- predict(fitcure, newx = xi_valid, s=min(fitcure$lambda), type="response")
-  
-  # get final fit of min coxcure model
-  fitcox <- final_select_model$fitcox
-  
-  # Get Data from Selected "Final" Prediction Model -----
-  # use this model to make predictions on data
-  
-  # get linear predictors from cox
-  predsurvexp <- predict(fitcox,
-                         newx=xi,
-                         s = min(fitcox$lambda),
-                         type="response")
-  
-  predsurvexp_valid <- predict(fitcox,
-                               newx=xi_valid,
-                               s = min(fitcox$lambda),
-                               type="response")
-  
-  haz <- final_select_model$haz
-  cumhaz <- final_select_model$cumhaz
-  
-  # Get Kaplan Meier estimate of actual data
-  predcens <- survfit(Surv(ti,1-di)~1)$surv
-  tcens <- survfit(Surv(ti,1-di)~1)$time
-  # 
-  # predcens <- survfit(Surv(ti,1-di)~1)$surv
-  # tcens <- survfit(Surv(ti,1-di)~1)$time
-  
-  # times at which to get score
-  tbrier <- sort(ti)
-  brier <- rep(NA,length(tbrier))
-  
-  tbrier_valid <- sort(ti_valid)
-  brier_valid <- rep(NA,length(tbrier_valid))
-  
-  # Train Data----
-  for (l in 1:length(tbrier)){
-    
-    predsurv <- rep(NA,length(ti)) # Conditional survival prob for all patients at tbrier[l]
-    
-    if (tbrier[l] >= min(tj)){
-      
-      ids <- which(tj == max(tj[tj <= tbrier[l]]))
-      
-      # get predicted survival probability
-      predsurv <- exp(-cumhaz[ids]*predsurvexp)
-      
-      # weights
-      ipw <- as.numeric(ti > tbrier[l])*predcens[l] + as.numeric(ti <= tbrier[l])*predcens
-      
-    } else if (tbrier[l] < min(tj)){
-      
-      predsurv <- rep(1,length(ti))
-      ipw <- 1
-      
-    }
-    
-    preds <- 1 - predcure + predcure*predsurv
-    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
-    
-  }
-  
-  # Test Data----
-  for (l in 1:length(tbrier)){
-    
-    predsurv_valid <- rep(NA,length(ti_valid)) # Conditional survival prob for all patients at tbrier[l]
-    
-    if (tbrier_valid[l] >= min(tj_valid)){
-      
-      ids_valid <- which(tj_valid == max(tj_valid[tj_valid <= tbrier_valid[l]]))
-      
-      # get predicted survival probability
-      predsurv_valid <- exp(-cumhaz[ids_valid]*predsurvexp_valid)
-      
-      # weights
-      ipw_valid <- as.numeric(ti_valid > tbrier_valid[l])*predcens[l] + as.numeric(ti_valid <= tbrier_valid[l])*predcens
-      
-    }else if (tbrier_valid[l] < min(tj_valid)){
-      
-      predsurv_valid <- rep(1,length(ti_valid))
-      ipw_valid <- 1
-      
-    }
-    
-    preds_valid <- 1 - predcure_valid + predcure_valid*predsurv_valid
-    brier_valid[l] <- mean(I(di_valid==0)*(1 - preds_valie)^2/(ipw_valid + 0.001) + I(di_valid==1)*(0 - preds_valid^2/(ipw_valid+0.001))
-                           
-                           
-                           
-                           all_brier = cbind.data.frame("tbrier" = tbrier, "brier" = brier, "brier_valid" = brier_valid)
-                           return(all_brier)
-                           # plot(tbrier,brier,type="S")
-                           
-                           
-  }
-}
diff --git a/inst/inst-README.md b/inst/inst-README.md
new file mode 100644
index 0000000..396daaf
--- /dev/null
+++ b/inst/inst-README.md
@@ -0,0 +1,15 @@
+# inst
+
+- 
+- `compare-predict-brier-with-yardstick_P2.R` - compare yardstick prediction and brier using our simulation data. Using new predict_cure() compare Teng's manual brier score calc to yardstick brier score calc
+
+
+- `compare-predict-brier-with-yardstick_P1.R` - compare yardstick prediction and brier using their example data
+
+- `compare-predict-brier-with-yardstick_P1.R` - misc comparison code (need to review this)
+
+- `run-simulations.R` - produce simulated data for lasso cure models
+
+- `archive`
+	- `evaluate-simulations.R` - old code to calculate brier score on simulations (modified from Teng)
+	- `testcode.R` - old code to calculate brier score
\ No newline at end of file
diff --git a/inst/testcode.R b/inst/testcode.R
deleted file mode 100644
index 4ecfc25..0000000
--- a/inst/testcode.R
+++ /dev/null
@@ -1,48 +0,0 @@
-#### Evaluate the calibration by Brier score for the validation data
-#### Line 79 - 123: Brier score for the cureit model
-
-# ti <- dat_valid$t
-# di <- dat_valid$d
-# xi <- dat_valid$x
-ti <- dat$t
-di <- dat$d
-xi <- dat$x
-tj <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$tj
-
-fitcure <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$fitcure
-predcure <- predict(fitcure,newx=xi,s=min(fitcure$lambda),type="response")
-fitcox <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$fitcox
-predsurvexp <- predict(fitcox,newx=xi,s=min(fitcox$lambda),type="response")
-haz <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$haz
-cumhaz <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$cumhaz
-
-predcens <- survfit(Surv(ti,1-di)~1)$surv
-tcens <- survfit(Surv(ti,1-di)~1)$time
-
-tbrier <- sort(ti)
-brier <- rep(NA,length(tbrier))
-ipw <- rep(NA,length(tbrier))
-for (l in 1:length(tbrier)){
-
-  predsurv <- rep(NA,length(ti)) # Conditional survival prob for all patients at tbrier[l]
-
-  if (tbrier[l] >= min(tj)){
-
-    ids <- which(tj == max(tj[tj <= tbrier[l]]))
-    predsurv <- exp(-cumhaz[ids]*predsurvexp)
-    ipw <- as.numeric(ti > tbrier[l])*predcens[l] + as.numeric(ti <= tbrier[l])*predcens
-
-  }else if (tbrier[l] < min(tj)){
-
-    predsurv <- rep(1,length(ti))
-    ipw <- 1
-
-  }
-  
-  ipw[l] <- ipw
-  preds <- 1 - predcure + predcure*predsurv
-  brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
-
-}
-
-plot(tbrier,brier,type="S")
\ No newline at end of file

From 232e13428e5fb09aff6553351e8878056ae72dab Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Thu, 6 Jun 2024 10:53:20 -0400
Subject: [PATCH 10/11] update info on scripts

---
 inst/inst-README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/inst/inst-README.md b/inst/inst-README.md
index 396daaf..7cf4a72 100644
--- a/inst/inst-README.md
+++ b/inst/inst-README.md
@@ -1,6 +1,6 @@
 # inst
 
-- 
+
 - `compare-predict-brier-with-yardstick_P2.R` - compare yardstick prediction and brier using our simulation data. Using new predict_cure() compare Teng's manual brier score calc to yardstick brier score calc
 
 
@@ -10,6 +10,9 @@
 
 - `run-simulations.R` - produce simulated data for lasso cure models
 
+- `nomogram-custom-edits.R` - custom edits to nomogram for cureit paper
+
 - `archive`
 	- `evaluate-simulations.R` - old code to calculate brier score on simulations (modified from Teng)
-	- `testcode.R` - old code to calculate brier score
\ No newline at end of file
+	- `testcode.R` - old code to calculate brier score
+

From e693f4ca871cbe08509581da18e26ca1e604c897 Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Thu, 6 Jun 2024 11:00:14 -0400
Subject: [PATCH 11/11] organize

---
 .../compare-predict-brier-with-yardstick_P2.R | 63 ++++++++++++-------
 ...compare-predict-brier-with-yardstick_P3.R} |  0
 2 files changed, 39 insertions(+), 24 deletions(-)
 rename inst/{compare-predict-brier-with-yardstick_P3.R.R => compare-predict-brier-with-yardstick_P3.R} (100%)

diff --git a/inst/compare-predict-brier-with-yardstick_P2.R b/inst/compare-predict-brier-with-yardstick_P2.R
index d281510..de5c1f3 100644
--- a/inst/compare-predict-brier-with-yardstick_P2.R
+++ b/inst/compare-predict-brier-with-yardstick_P2.R
@@ -46,6 +46,7 @@ get_brier_for_each_data <- function(dat, fit) {
 
 # * Version 1 -----
 
+# iterates over eval time 
 for (i in 1:100) {
   
   time_1 <- predict_df_long$.eval_time[[i]]
@@ -57,15 +58,15 @@ for (i in 1:100) {
                   I(predict_df_long_1$event == 1 & predict_df_long_1$times <= time_1)*
                   (0 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored))
   
-  # OLD -----
+  # OLD --
   # x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
   #                I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
   # 
-  # OLD ----
+  # OLD --
   # brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) +
   #                    I(di==1)*(0 - preds)^2/(ipw+0.001))
   # 
-  # NEW ?-----
+  # NEW ?--
   # brier[l] <- mean(I(ti > tbrier[l])*(1 - preds)^2/(ipw+0.001) + 
   #                    I(di==1 & ti <= tbrier[l])*(0 - preds)^2/(ipw+0.001))
   
@@ -75,6 +76,7 @@ for (i in 1:100) {
 
 # * Version 2 -----
 
+# simple version of above- I think I can delete
 brier[l] <- mean(I(ti > tbrier[l])*
                    (1 - preds)^2/(ipw+0.001) + 
                    
@@ -82,8 +84,7 @@ brier[l] <- mean(I(ti > tbrier[l])*
 
 
 # * Version 3 -----
-
-brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
+brier_cure_update <- function(predict_df, truth = times, eval_timepoints = NULL) {
   
   
   # Get values from observed data
@@ -111,36 +112,46 @@ brier_cure <- function(predict_df, truth = times, eval_timepoints = NULL) {
   }
 }
 
-# * Version 4 -----
-brier_cure_update <- function(predict_df, truth = times, eval_timepoints = NULL) {
+# * Version 4 ----
+
+brier_for_each_eval <- function(predict_df_timepoint) {
   
   
-  # Get values from observed data
-  survival_times = predict_df$times
-  sorted_survival_times <- sort(survival_times)
-  sorted_event_times = sort(data$times[data$event==1])
+  x = mean(I(predict_df_timepoint$times > predict_df_timepoint$.eval_time )*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+             I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+             (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001))
+  x
+}
+
+
+brier_for_each_eval <- function(predict_df_timepoint) {
   
   
-  predict_df_long <- predict_df %>%
-    unnest(everything())
-  
-  predict_df_nest_time <- predict_df_long %>%
-    nest(data = -.eval_time)
+  x = mean(
+    I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+      
+      I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+      (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001)
+  )
+  x/100
+}
+
+brier_for_each_eval2 <- function(predict_df_timepoint) {
   
+  category1 = I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )
+  category2 = I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)
   
-  map(predict_df_nest_time$data, function(x) {
-    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
+  x = mean(
+    category1*(0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) +
+      category2 *(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) 
     
-  })
+  )
+  x
   
-  for (l in 1:length(eval_timepoints)) {
-    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
-    
-    
-  }
 }
 
 
+
 # Example on Single Data Set -------------------------------------------------
 
 # Grab one data set as an example
@@ -150,6 +161,7 @@ final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
 
 # Run predict functon
 predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+
 predict_df_long <- predict_df %>%
   unnest(everything())
 
@@ -160,6 +172,9 @@ brier_scores_yardstick <-
   brier_survival(truth = truth, .pred)
 
 
+# Example on Multiple Data ------------------------------------------------
+
+ 
 b <- get_brier_for_each_data(dat = sim_data_list[[3]], fit = cv_fits_list[[3]])
 
 
diff --git a/inst/compare-predict-brier-with-yardstick_P3.R.R b/inst/compare-predict-brier-with-yardstick_P3.R
similarity index 100%
rename from inst/compare-predict-brier-with-yardstick_P3.R.R
rename to inst/compare-predict-brier-with-yardstick_P3.R