diff --git a/.Rbuildignore b/.Rbuildignore
index 241c55c..0317850 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -13,4 +13,8 @@
 ^.*[.]Rproj.*$
 ^\.Rproj\.user$
 ^.Rhistory$
-[~]
\ No newline at end of file
+[~]
+^R/cv.cureitlasso.R
+^R/simulasso.R
+^R/coxsplit.R
+^R/run-simulations.R
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3f26a3f..22b011a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 inst/doc
 inst/model-fits
 inst/cure-model-fits
+inst/cureit-simulation-results/*
 inst/archive
 docs/
 revdep/
diff --git a/R/cureitlasso.R b/R/cureitlasso.R
index ba82871..a4f391f 100644
--- a/R/cureitlasso.R
+++ b/R/cureitlasso.R
@@ -1,3 +1,47 @@
+
+# mu is penlanlity for cure
+# lamda is penality for cox
+# touch is posterior probabilty of being cured fr uncured (maybe don't need) (Entropy) 
+ # NOTE maybe change name of alphs 
+# calculated linear peredictor of that iteration 
+
+
+#' Lasso for Cure Models
+#'
+#' @param t Survival times
+#' @param d Event indicator
+#' @param x data n x p matrix or data.frame
+#' @param minmu.ratio minimum penalty value in the logistic model
+#' @param minlambda.ratio minimum penalty value in the cox model
+#' @param adaptive 
+#' @param length.grid 
+#' @param mus penalty for cure
+#' @param lambdas is penalty for cox
+#' @param tol posterior probability of being cured for uncured (maybe don't need) (Entropy) 
+#' @param maxiter 
+#' @param progress verbose iteration counter
+#'
+#' @return
+#' - `all_fits`
+#'  - `t`,
+#'  - `tj` - unique event times sorted,
+#'  - `d`- event indicator,
+#'  - `x`- data n x p matrix or data.frame,
+#'  - `alpha0` fitcure$a0[length(fitcure$lambda)],
+#'  - `alpha` =alpha,
+#'  - `beta` =beta,
+#'  - `haz`=haz,
+#'  - `cumhaz`=cumhaz,
+#'  - `tau`=as.vector(tau),
+#'  - `predsurv`=as.vector(predsurv_iter),
+#'  - `predcure`=as.vector(predcure_iter),
+#'  - `fitcure`=fitcure,
+#'  - `fitcox` =fitcox
+#' - `mus` - mus that were used as penalties for cure models in cross validation
+#' - `lambda` - lambdas that were used as penalities in cix model cross validation
+#' @export
+#'
+#' @examples
 cureitlasso <- function(t,
                         d,
                         x,
@@ -15,6 +59,7 @@ cureitlasso <- function(t,
   require(glmnet)
   require(survival)
   
+  # initialize initial weights 
   tau <- matrix(0.5,nrow=length(t),ncol=2) # First column: cured; second column: uncured
   tau[d==1,1] = 0
   tau[d==1,2] = 1
@@ -24,6 +69,7 @@ cureitlasso <- function(t,
   
   # CV: Be careful for fold split! Two replicates from the same subject should always be assigned to the same fold
   
+  #this is for later integreation with tuning
   penalty.factor.cure=rep(1,ncol(x))
   penalty.factor.cox=rep(1,ncol(x))
   
@@ -51,6 +97,8 @@ cureitlasso <- function(t,
   #   
   # }
   
+  # So this section is finding the set of optimal hyperparemters to test ----
+  # tau is uncure probability 
   fitcure0 <- glmnet(x=do.call("rbind", rep(list(x), 2)),
                      y=lab_class,
                      family="binomial",
@@ -72,8 +120,10 @@ cureitlasso <- function(t,
     lambdas <- fitcox0$lambda[idx_lambdas]
   }
   
+  
   musmax <- fitcure0$lambda[1]
   lambdasmax <- fitcox0$lambda[1]
+  # -----------------
   
   predcure <- predict(fitcure0,newx=x,s=mus,type="response") #Prob of uncured
   predsurvexp <- predict(fitcox0,newx=x,s=lambdas,type="response") # exp(betaX)
@@ -114,6 +164,7 @@ cureitlasso <- function(t,
     
     fit[[i]] <- list()
     
+    
     for (j in 1:length(lambdas)){
       
       tau <- d + (1-d) * (predcure[,i] * predsurv[,j])/( (1 - predcure[,i]) + predcure[,i] * predsurv[,j] )
@@ -319,14 +370,24 @@ cureitlasso <- function(t,
       # num_alpha[i,j] <- sum(alpha!=0)
       # num_beta[i,j] <- sum(beta!=0)
       
+    #  names(fit[i][j]) <- paste0("mu_fit_", i, "_lambda_fit_", "j")
+      
     }
     
+
+    names(fit[[i]]) <- paste0("lambda_fit_", 1:length(lambdas))
   }
   
-  return(list(fit=fit,
+  names(fit) <- paste0("mu_fit_", 1:length(mus))
+  
+  return(list(fit = fit,
               # num_alpha=num_alpha,
               # num_beta=num_beta,
               mus=mus,
               lambdas=lambdas))
   
-}
\ No newline at end of file
+}
+
+# tj - is unique event times sorted
+# haz 
+
diff --git a/R/cv.cureitlasso.R b/R/cv.cureitlasso.R
index 991f3ba..90e418f 100644
--- a/R/cv.cureitlasso.R
+++ b/R/cv.cureitlasso.R
@@ -1,10 +1,14 @@
+# Returns:
+# fit - 
+# arg: length.grid - 
+
 cv.cureitlasso <- function(t,
                            d,
                            x,
                            minmu.ratio=0.05, # minimum penalty value in the logistic model
                            minlambda.ratio=0.05, # minimum penalty value in the cox model
                            adaptive=FALSE,
-                           length.grid=10,
+                           length.grid=10, # tells how many hyperparameters to test. default is 10 for each 
                            nfolds=5,
                            tol=1e-2,
                            maxiter=100,
@@ -22,6 +26,7 @@ cv.cureitlasso <- function(t,
   require(survcomp)
   require(pracma)
   
+  # order data by times 
   order_idx <- order(t)
   t <- t[order_idx]
   d <- d[order_idx]
@@ -29,6 +34,7 @@ cv.cureitlasso <- function(t,
   
   if (progress) print("Fitting by EM algorithm ...")
   
+  # main model with no cross validation
   fit <- cureitlasso(t,d,x,
                      minmu.ratio,
                      minlambda.ratio,
@@ -38,7 +44,7 @@ cv.cureitlasso <- function(t,
                      lambdas=NULL,
                      tol,
                      maxiter,
-                     progress)
+                     progress = TRUE)
   
   if (progress) print("Running cross validations ...")
   
@@ -46,6 +52,8 @@ cv.cureitlasso <- function(t,
   
   # Fold split
   foldid <- coxsplit(as.matrix(Surv(t,d)), nfolds)
+  
+  # grid/array of values 
   cv_brier <- array(NA,dim=c(length.grid,length.grid,nfolds))
   
   # Run CVs
@@ -57,6 +65,7 @@ cv.cureitlasso <- function(t,
       
       if (progress) print(i)
       
+      
       cv.fit[[i]] <- cureitlasso(t[foldid != i],
                                  d[foldid != i],
                                  x[foldid != i,],
@@ -114,6 +123,7 @@ cv.cureitlasso <- function(t,
             
           }
           
+          # calcualte trapazoidal area for brier score 
           cv_brier[j,k,i] <- trapz(tbrier,brier)
           
           
@@ -130,7 +140,7 @@ cv.cureitlasso <- function(t,
     
     cv.fit <- foreach(i = 1:nfolds) %dopar% {
       
-      source("~/Projects/Whiting-Qin-cureit/cureit/R/cureitlasso.R")
+      source(here::here("R/cureitlasso.R"))
       
       cureitlasso(t[foldid != i],
                   d[foldid != i],
@@ -206,6 +216,7 @@ cv.cureitlasso <- function(t,
     
   }
   
+  # summarize - take average across all folds 
   cv_brier_mean <- apply(cv_brier,c(1,2),function(x) mean(x))
   cv_brier_se <- apply(cv_brier,c(1,2),function(x) sd(x)/sqrt(nfolds))
   # pheatmap::pheatmap(cv_brier_mean,cluster_cols = F,cluster_rows = F)
@@ -231,11 +242,12 @@ cv.cureitlasso <- function(t,
               cv_brier_mean = cv_brier_mean,
               cv_brier_se = cv_brier_se,
               index = list(min=idxmin,
+                           `1se`= idx1se), 
                            `1se`= idx1se),
               foldid = foldid,
               selected = list(min=selectedmin,
                               `1se`=selected1se)
   )
-  )
   
-}
\ No newline at end of file
+  
+}
diff --git a/R/predict.R b/R/predict.R
new file mode 100644
index 0000000..087eee0
--- /dev/null
+++ b/R/predict.R
@@ -0,0 +1,183 @@
+#' #' Predicted survival probability by cure model
+#' #'
+#' #' @param object A cureit object.
+#' #' @param times Numeric vector of times to obtain survival probability estimates at
+#' #' @param probs Numeric vector of quantiles to obtain estimates at
+#' #' @param newdata A `base::data.frame()` or `tibble::tibble()` containing all
+#' #' the original predictors used to create object. Defaults to `NULL`.
+#' #' @param type Output format of predicted values: "lp" (linear predictor) or "prob" (predicted probabilities).
+#' #' @param brier Boolean indicator of calculating the Brier scores at specified `times`.
+#' #' @param cox Boolean indicator of fitting the Cox model for the training data and calculating the Brier scores at specified `times` for `newdata`.
+#' #' @inheritParams cureit
+#' #' @return named list of prediction estimates
+#' #' @family cureit() functions
+#' #' @export
+#' #' @examples
+#' #' p <- cureit(surv_formula = Surv(ttdeath, death) ~ age, 
+#' #'    cure_formula = ~ age,
+#' #'    data = trial) 
+#' #'    
+#' #' pred <- predict(p, times = seq(5,24,0.5),
+#' #'  newdata = trial[complete.cases(trial), ], brier=TRUE,cox=TRUE)
+#' #' 
+#' #' plot(seq(5,24,0.5),pred$brier,type="S",pch=1)
+#' #' lines(seq(5,24,0.5),pred$brier_cox,type="S",col="red",pch=3)
+#' #' legend("topright",c("Cure model","Cox model"),
+#' #' col=c("black","red"),lty=1,pch=c(1,3))
+#' #' 
+#' #'   
+#' predict.cureit <- function(object, times = NULL, probs = NULL,
+#'                            newdata = NULL, type = c("prob", "lp"),
+#'                            brier = FALSE, cox = FALSE, ...) {
+#'   # checking inputs ------------------------------------------------------------
+#'   
+#'   type <- match.arg(type)
+#'   
+#'   if (is.null(times) + is.null(probs) != 1L) {
+#'     stop("Must specify one and only one of `times=` and `probs=`.", call. = FALSE)
+#'   }
+#'   if (!is.null(times) && any(times < 0)) {
+#'     stop("`times=` must be non-negative.", call. = FALSE)
+#'   }
+#'   if (!is.null(probs) && !all(dplyr::between(probs, 0, 1))) {
+#'     stop("`probs=` must be between 0 and 1.", call. = FALSE)
+#'   }
+#'   if (!(type %in% c("lp","prob"))) {
+#'     stop("`type=` must be one out of 'lp' or 'prob'.", call. = FALSE)
+#'   }
+#'   # if (type == "lp" & brier){
+#'   #   stop("`type=` must be 'prob' to calculate the Brier scores.")
+#'   # }
+#'   # if (brier & is.null(times)){
+#'   #   stop("Must specify one or more times to calculate the Brier scores.")
+#'   # }
+#'   # if (!brier & cox){
+#'   #   stop("Must let `brier=TRUE` to perform Cox regression and corresponding Brier scores.")
+#'   # }
+#'   
+#'   # getting predictions on the original model fit ------------------------------
+#'   processed <- cureit_mold(object$surv_formula, object$cure_formula, newdata %||% object$data,
+#'                            surv_blueprint = object$surv_blueprint, cure_blueprint = object$cure_blueprint)
+#'   
+#'   newX=processed$surv_processed$predictors
+#'   newZ=processed$cure_processed$predictors
+#'   if (is.vector(newZ)) 
+#'     newZ = as.matrix(newZ)
+#'   newZ = cbind(1, newZ)
+#'   if (is.vector(newX)) 
+#'     newX = as.matrix(newX)
+#'   
+#'   s0 = as.matrix(object$smcure$s[order(object$smcure$Time)], ncol = 1)
+#'   n = nrow(s0)
+#'   uncureprob = exp(object$smcure$b %*% t(newZ))/(1 + exp(object$smcure$b %*% t(newZ)))
+#'   
+#'   scure = array(0, dim = c(n, nrow(newX)))
+#'   t = array(0, dim = c(n, nrow(newX)))
+#'   spop = array(0, dim = c(n, nrow(newX)))
+#'   
+#'   #### Only support PH model for now
+#'   ebetaX = exp(object$smcure$beta %*% t(newX))
+#'   for (i in 1:nrow(newZ)) {
+#'     scure[, i] = s0^ebetaX[i]
+#'   }
+#'   
+#'   for (i in 1:n) {
+#'     for (j in 1:nrow(newX)) {
+#'       spop[i, j] = uncureprob[j] * scure[i, j] + (1 - 
+#'                                                     uncureprob[j])
+#'     }
+#'   }
+#'   
+#'   spop_prd = cbind(object$smcure$Time, spop)
+#'   scure_prd = cbind(object$smcure$Time, scure)
+#'   cure_prd = 1-uncureprob
+#'   
+#'   # Type = "Prob ---------
+#'   if (type == "prob"){
+#'     
+#'     if (!is.null(times)) {
+#'       return(list(cured = cure_prd,
+#'                     surv_uncured = probs_at_times(scure_prd,times),
+#'                     surv_marginal = probs_at_times(spop_prd,times)))
+#'     }
+#'     
+#'     if (!is.null(probs)) {
+#'       return(list(cured = cure_prd,
+#'                   surv_uncured = times_at_probs(scure_prd,probs),
+#'                   surv_marginal = times_at_probs(spop_prd,probs)))  
+#'     }
+#'   }
+#'   
+#'   # Type = "Prob ---------
+#'   if (type == "lp"){
+#'     
+#'     return(list(lp_cure_model = object$smcure$b %*% t(newZ),
+#'                 lp_surv_model = object$smcure$beta %*% t(newX)))
+#'     
+#'   }
+#'   
+#' }
+#' 
+#' times_at_probs <- function(matrix_pred, probs) {
+#'   matrix_zero <- matrix(c(0, 1), ncol = 2)
+#'   lst_time_risk <-
+#'     purrr::map(
+#'       probs,
+#'       ~ purrr::map_dbl(
+#'         seq_len(ncol(matrix_pred) - 1L),
+#'         function(i) {
+#'           # adding 1 to the matrix, keeping only the time column, and col of interest
+#'           m <- rbind(matrix_zero, matrix_pred[, c(1L, i + 1L)])
+#'           
+#'           # return NA if the quantiles are all missing OR prob is larger than observed
+#'           if (isTRUE(all(is.na(m[-1, 2])) || .x > max(m[, 2]))) {
+#'             return(NA)
+#'           }
+#'           if (isTRUE(.x %in% m[, 2])) {
+#'             return(m[m[, 2] %in% .x, 1])
+#'           }
+#'           return(m[which.min(m[, 2] >= .x), 1])
+#'         }
+#'       )
+#'     ) %>%
+#'     stats::setNames(paste0("prob ", probs * 100, "%"))
+#'   
+#'   # returning results ----------------------------------------------------------
+#'   lst_time_risk
+#' }
+#' 
+#' 
+#' 
+#' probs_at_times <- function(matrix_pred, times) {
+#'   # defining times for predictions ---------------------------------------------
+#'   # CHECK- here should this be warning?
+#'   all_times <- union(0, matrix_pred[, 1]) %>% sort()
+#'   if (isTRUE(max(times) > max(all_times))) {
+#'     stringr::str_glue("`times=` cannot be larger than {max(all_times)}") %>%
+#'       stop(call. = FALSE)
+#'   }
+#'   times_obs <-
+#'     purrr::map_dbl(
+#'       times,
+#'       function(.x) {
+#'         if (isTRUE(.x %in% all_times)) {
+#'           return(all_times[all_times %in% .x])
+#'         }
+#'         all_times[which.min(all_times <= .x) - 1L]
+#'       }
+#'     )
+#'   
+#'   # named list of the risks, the names are the times,
+#'   # the values are the estimates of risk at the covar levels
+#'   lst_risk_time <-
+#'     purrr::map(seq_len(length(all_times) - 1L), ~ matrix_pred[.x, -1]) %>%
+#'     stats::setNames(all_times[-1]) %>%
+#'     dplyr::bind_cols() %>%
+#'     mutate(`0` = 0, .before = 1) %>%
+#'     as.list() %>%
+#'     stats::setNames(all_times)
+#'   
+#'   # extracting risks at specified time -----------------------------------------
+#'   lst_risk_time[as.character(times_obs)] %>%
+#'     stats::setNames(paste("time", times))
+#' }
diff --git a/R/predict_cure_2024-03-15.R b/R/predict_cure_2024-03-15.R
new file mode 100644
index 0000000..7d0c721
--- /dev/null
+++ b/R/predict_cure_2024-03-15.R
@@ -0,0 +1,171 @@
+#' Predict Cure and Survival Probabilities
+#'
+#' This function predicts cure and survival probabilities for data based on a final fitted model.
+#' It calculates predicted survival probabilities at specified evaluation time points, accounting for censoring.
+#' Output is consistent with tidymodels predict guidelines and can be passed into yardstick functions like `brier()`
+#'
+#' @param final_fit A list containing the fitted cure and survival models. It should have elements `fitcure`, `fitcox`, `haz`, and `cumhaz`.
+#' @param new_data A data frame containing the new observations to predict on. It should include columns `times` (observed survival times), `event` (event indicator, 1 for event, 0 for censoring), and other covariates used for prediction.
+#' @param eval_timepoints Optional. A vector of time points at which to evaluate survival probabilities. If NULL, the function uses the observed event times from `new_data`.
+#' 
+#' @return A data frame containing the predicted survival probabilities, cure rates, and other related metrics for each observation at the specified evaluation time points. The returned data frame includes the following columns:
+#' \itemize{
+#'   \item \code{id}: Identifier for each observation.
+#'   \item \code{.eval_time}: Evaluation time point.
+#'   \item \code{.pred_survival}: Predicted survival probability at the evaluation time point.
+#'   \item \code{.pred_curerate}: Predicted cure rate for each observation.
+#'   \item \code{.pred_survportion}: Component of the survival prediction that excludes the cure component.
+#'   \item \code{.weight_censored_raw}: Raw censoring weight at each evaluation time point.
+#'   \item \code{.weight_censored}: Inverse probability weight for censoring.
+#'   \item \code{event}: Event indicator from the new data.
+#'   \item \code{times}: Observed survival times from the new data.
+#' }
+#'
+#' @examples
+#' \dontrun{
+#'   # Assuming `final_fit` is a fitted model list and `new_data` is a data frame with appropriate structure:
+#'   predict_df <- predict_cure(final_fit = final_fit, new_data = new_data)
+#'   print(predict_df)
+#' }
+#' 
+#' @importFrom dplyr select mutate bind_cols
+#' @importFrom tidyr nest unnest
+#' @importFrom tibble tibble
+#' 
+#' @export
+predict_cure <- function(final_fit,
+                         new_data = NULL,
+                         eval_timepoints = NULL) {
+  
+  # Values from Observed Data & Model Fit ---------------------------------
+  
+  survival_times = new_data$times
+  events = new_data$event
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(new_data$times[new_data$event==1])
+  
+  # Extract cumulative hazard from final fit (for event times)
+  haz <- final_fit$haz
+  cumhaz <- final_fit$cumhaz
+  
+  # Set up Data 
+  data_x <- new_data %>%
+    select(-c(times, event, truth_survival)) %>%
+    as.matrix()
+  
+  # Evaluation Time Points --------------------------------------------------
+  # Default evaluation time points are observed event times if not supplied by user
+  
+  all_eval_timepoints <- eval_timepoints %||% sorted_survival_times
+  
+  # Censoring Weights --------------------------------------------------------
+  
+  censoring_survfit = survfit(Surv(survival_times, 1 - events) ~ 1)
+  
+  # CHECK- or censoring weights at all all respective survival times?
+  # censoring_weights = summary(censoring_survfit, times = all_eval_timepoints)
+  censoring_weights = summary(censoring_survfit, times = survival_times)$surv
+
+    
+  # Relative Risk for Cure and Cox Risk ------------------------------------
+  # (independent of time point) 
+  
+  #  * Cure probability risk prediction ------
+  fitcure <- final_fit$fitcure
+  predcure <- predict(fitcure, newx = data_x,
+                      s = min(fitcure$lambda), type = "response")
+  
+  # * Cox survival probability risk prediction ------
+  fitcox <- final_fit$fitcox
+  predsurvexp <- predict(fitcox, 
+                         newx = data_x,
+                         s = min(fitcox$lambda),
+                         type = "response")
+  
+  
+  
+  # Predicted Probabilities at Evaluated Time points -------------------------
+  # (dependent on time point- need IPCW)
+  
+  all_preds_all_tps <- list()
+  
+  # Evaluated in all patients, for each eval time point (iteration on time point)
+  for (timepoint_index in 1:length(all_eval_timepoints)) {
+    
+    # timepoint we are evaluating
+    eval_timepoint <- all_eval_timepoints[timepoint_index] 
+    
+
+    # * Inverse Probability Censoring Weights ---------
+    
+    # If no time points in new data less than lowest event time in model fit data (most cases):
+    if (eval_timepoint >= min(sorted_event_times)) {
+      
+      # get maximum event time that is less than time point we are evaluating 
+      closest_event_time <- max(
+        sorted_event_times[sorted_event_times <= eval_timepoint])
+      
+      index_closest_event_time <- which(sorted_event_times == closest_event_time)
+      
+      # Get predicted survival by using cumulative hazard at closest previous event time
+      predsurv <- exp(-cumhaz[index_closest_event_time]*predsurvexp)
+      
+      # CHECK - Probability of the censoring at evaluation time point (or should it be at previous index closest event time??)
+     #  censoring_weight_at_eval_time <- censoring_weights[timepoint_index]
+      censoring_weight_at_eval_time <- censoring_weights[index_closest_event_time]
+      
+      ipw <- 
+        # if observed time is greater than event time, use censoring weight at evaluation time (or time closest)
+        (as.numeric(survival_times > eval_timepoint)*censoring_weight_at_eval_time) + 
+        
+        # if observed time is less than or equal to eval time, use censoring weight at it's own observed time
+        as.numeric(survival_times <= eval_timepoint)*censoring_weights
+      
+      ipw = 1/(ipw+ 0.001)
+      
+      # if no events occurred before eval time 
+    } else if (eval_timepoint < min(sorted_event_times)) {
+      
+      predsurv <- rep(1,length(survival_times))
+      ipw <- 1
+      
+    }
+    
+    preds <- 1 - predcure + predcure*predsurv
+    
+    all_preds <- tibble(
+      
+      ".eval_time" = eval_timepoint,
+      ".pred_survival" = preds, 
+      ".pred_curerate" = predcure, 
+      ".pred_survportion" = predsurv, 
+      ".weight_censored_raw" = censoring_weights[timepoint_index],
+      ".weight_censored" = ipw) %>%
+      mutate(id = 1:nrow(.))
+    
+    all_preds_nest <-  all_preds %>% nest(.preds = -c(id))
+    
+    all_preds_all_tps[[timepoint_index]] <- all_preds
+    
+  }
+  
+  all_pred_df <- all_preds_all_tps %>% 
+    do.call("rbind", .)
+  
+  all_pred_df <- all_pred_df %>%
+    nest(.pred = -c(id)) %>% 
+    bind_cols(., "event" = new_data$event, 
+              "times" = new_data$times)
+  
+  return(all_pred_df)
+  
+}
+
+
+# Try Function ------------------------------------------------------------
+# 
+# predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+# predict_df_long <- predict_df %>%
+#   unnest(everything())
+
+
diff --git a/R/simulasso.R b/R/simulasso.R
index 91d6998..53ae132 100644
--- a/R/simulasso.R
+++ b/R/simulasso.R
@@ -1,28 +1,57 @@
-simulasso <- function(n=100,
-                      p=200,
-                      id_cure=NULL,
-                      id_cox=NULL,
-                      coefs_cure=NULL,
-                      coefs_cox=NULL
-                      ){
-  
+#' Simulate Data 
+#'
+#' @param n Number of patients to simulate
+#' @param p Number of parameters to simulate
+#' @param id_cure Which parameters to include in cure simulation
+#' @param id_cox Which parameters to include in cox simulation
+#' @param coefs_cure Pre-specified coefficients of cure
+#' @param coefs_cox  Pre-specified Coefficients of cox
+#' @param n_true_p Number of "true" non-zero coefficients
+#'
+#' @return
+#' - `t`- simulated survival times
+#' - `d`- simulated event indicator
+#' - `x` simulated data n x p matrix or data.frame
+#' - `id_cure` - IDs of selected cure parameters
+#' - `id_cure` - IDs of selected cox parameters
+#' - `coefs_cure` - Coefficients of cure parameters
+#' - `coefs_cox` - Coefficients of cox parameters
+#' 
+#' @export
+#'
+#' @examples
+#' simulasso(n = 10, p = 3)
+simulasso <- function(n = 100,
+                      p = 200,
+                      id_cure = NULL,
+                      id_cox = NULL,
+                      coefs_cure = NULL,
+                      coefs_cox = NULL, 
+                      n_true_p = 10) {
   require(mvtnorm)
+
+  t <- rep(NA, length = n)
+  d <- rep(NA, length = n)
+
+  # number of predictors is size 
+  if (is.null(id_cure)) id_cure <- sample(p, size = n_true_p)
+  if (is.null(id_cox)) id_cox <- sample(p, size = n_true_p)
+
+  if (is.null(coefs_cure)) coefs_cure <- runif(length(id_cure) + 1, min = 2, max = 3)
+  if (is.null(coefs_cox)) coefs_cox <- runif(length(id_cox), min = 2, max = 3)
+
+  x <- mvtnorm::rmvnorm(n, mean = rep(0, p))
+
+  # add intercept and calculate linear predictor and probability of cure using the logistic function:
   
-  t <- rep(NA,length=n)
-  d <- rep(NA,length=n)
-  
-  if(is.null(id_cure)) id_cure = sample(p,size=3)
-  if(is.null(id_cox)) id_cox = sample(p,size=3)
-  
-  if(is.null(coefs_cure)) coefs_cure = runif(length(id_cure)+1,min=2,max=3)
-  if(is.null(coefs_cox)) coefs_cox = runif(length(id_cox),min=2,max=3)
-  
-  x <- mvtnorm::rmvnorm(n,mean=rep(0,p))
-  puncure <- exp(cbind(1,x[,id_cure]) %*% coefs_cure)/(1+exp(cbind(1,x[,id_cure]) %*% coefs_cure))
-  uncure <- rbinom(n,1,prob=puncure)
-  
-  lambda <- exp(x[,id_cox] %*% coefs_cox)
+  puncure <- exp(cbind(1, x[, id_cure]) %*% coefs_cure) / (1 + exp(cbind(1, x[, id_cure]) %*% coefs_cure))
   
+  # make into event index
+  uncure <- rbinom(n, 1, prob = puncure)
+
+  # linear predictor survival
+  lp_cox <- exp(x[, id_cox] %*% coefs_cox)
+
   ### Should censoring time for cured subjects be generated from a same distribution as those uncured?
   
   for(i in 1:n){
@@ -31,8 +60,12 @@ simulasso <- function(n=100,
     c0 <- min(rexp(1,rate=1/30),runif(1,min=20,max=40))
     t[i] <- ifelse(uncure[i]==1, min(t0,c0), c0)
     d[i] <- ifelse(uncure[i]==1, as.numeric(I(t0 <= c0)), 0)
+
   }
-  
-  dat <- list(t=t,d=d,x=x,uncure=uncure,id_cure=id_cure,id_cox=id_cox,coefs_cure=coefs_cure,coefs_cox=coefs_cox)
-  
-}
\ No newline at end of file
+
+  dat <- list(t = t, d = d, x = x,
+              uncure = uncure,
+              id_cure = id_cure,
+              id_cox = id_cox, coefs_cure = coefs_cure, 
+              coefs_cox = coefs_cox)
+}
diff --git a/inst/compare-predict-brier-with-yardstick_P1.R b/inst/compare-predict-brier-with-yardstick_P1.R
new file mode 100644
index 0000000..db759a3
--- /dev/null
+++ b/inst/compare-predict-brier-with-yardstick_P1.R
@@ -0,0 +1,192 @@
+library(tidymodels)
+library(censored)
+tidymodels_prefer()
+
+
+# Try Tidymodels Example --------------------------------------------------
+
+
+# * Fit Lung Model  --------
+
+data(cancer, package="survival")
+data(lung_surv)
+
+#lung <- lung %>% drop_na()
+
+lung <- lung %>%
+  mutate(status = case_when(
+    status == 1 ~ 0, 
+    status == 2 ~ 1
+  ))
+
+
+ph_spec <- 
+  proportional_hazards() %>%
+  set_engine("survival") %>% 
+  set_mode("censored regression") 
+
+ph_spec
+
+set.seed(1)
+ph_fit <- ph_spec %>% fit(Surv(time, status) ~ ., data = lung)
+ph_fit
+
+cox <- coxph(Surv(time, status) ~ ., data = lung)
+cox
+cumhaz_all<- survfit(cox)
+cumhaz <- bind_cols(time = cumhaz_all$time, surv = cumhaz_all$surv)
+
+#cumhaz$surv[which.min(abs(cumhaz$time - closest_event_time))]
+
+#cumhaz$surv[cumhaz$time[]
+
+# * Predict Lung Model  -----------
+
+# Make predictions
+p <- predict(
+  ph_fit, 
+  lung, 
+  type = "survival",
+  eval_time = lung$time
+) %>%
+  mutate(surv_truth =  Surv(lung$time, lung$status))
+
+# add weights
+p_weights <- .censoring_weights_graf(ph_fit, predictions = p) %>%
+  mutate(pat_id = 1:nrow(.))
+
+p_weights_long <- p_weights %>%
+  unnest(cols = .pred)
+
+# calc brier
+brier_scores_lung <-
+  p_weights %>% 
+  brier_survival(truth = surv_truth, .pred)
+
+brier_scores_lung
+
+# Try our code on their data ----------------------------------------------
+
+# 
+# predict_df_long <- predict_df %>%
+#   unnest(everything())
+
+predict_df_nest_time <- p_weights_long %>%
+  nest(data = -.eval_time)
+
+predict_df_timepoint = predict_df_nest_time[10,] %>%
+  unnest(everything())
+
+brier_for_each_eval <- function(predict_df_timepoint) {
+  
+  
+  x = mean(I(predict_df_timepoint$times > predict_df_timepoint$.eval_time )*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+             I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+             (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001))
+  x
+}
+
+
+
+
+lung2 <- lung %>%
+  rename("times" = time, 
+         "event" = status)
+cox <- coxph(Surv(times, event) ~ ., data = lung2)
+cox
+cumhaz_all<- survfit(cox)
+cumhaz <- bind_cols(time = cumhaz_all$time, surv = cumhaz_all$surv)
+
+
+lung2 <- lung2 %>%
+  mutate(truth_survival = Surv(times, event))
+
+predict_cure(final_fit = cox, new_data = lung2, eval_timepoints = lung2$times)
+
+
+
+
+# Misc --------------------------------------------------------------------
+
+
+eval_time_test <- 300
+
+new_data = lung
+survival_times <- lung$time
+sorted_survival_times <- sort(survival_times)
+
+survival_status <- lung$status
+sorted_event_times = sort(survival_times[survival_status==1])
+
+censoring_survfit = survfit(Surv(survival_times, 1 - survival_status) ~ 1)
+censoring_weights = summary(censoring_survfit, times = survival_times)
+censoring_weights = censoring_weights$surv
+
+all_weights = bind_cols(censoring_weights_time, censoring_weights)
+names(all_weights) <- c("weights_time", "weights_surv")
+
+eval_timepoint = 300
+
+# * Inverse Probability Censoring Weights ---------
+
+# If no time points in new data less than lowest event time in model fit data (most cases):
+if (eval_timepoint >= min(sorted_event_times)) {
+  
+  # get maximum event time that is less than time point we are evaluating 
+  closest_event_time <- max(
+    sorted_event_times[sorted_event_times <= eval_timepoint])
+  
+  index_closest_event_time <- which(sorted_event_times == closest_event_time)
+  
+  # Get predicted survival by using cumulative hazard at closest previous event time
+  predsurv <- exp(-cumhaz[index_closest_event_time]*predsurvexp)
+  
+  # CHECK - Probability of the censoring at evaluation time point (or should it be at previous index closest event time??)
+  censoring_weight_at_eval_time <- censoring_weights[timepoint_index]
+  
+  ipw <- 
+    as.numeric(survival_times > eval_timepoint)*censoring_weight_at_eval_time + 
+    as.numeric(survival_times <= eval_timepoint)*censoring_weights
+  
+  ipw = 1/ipw
+  
+  # if no events occurred before eval time 
+} else if (eval_timepoint < min(sorted_event_times)) {
+  
+  predsurv <- rep(1,length(survival_times))
+  ipw <- 1
+  
+}
+
+cbind(survival_times, ipw) %>% View()
+
+# Compare ------
+lung_surv_long <- lung_surv %>% unnest(everything())
+
+lung_surv_long <- lung_surv_long %>% filter(.eval_time == eval_time_test)
+
+#lung %>% select(time, status) %>% distinct() %>% nrow()
+
+
+# Here- New ---------------------------------------------------------------------
+# ti & di is censored patient - If censored time is after eval time () ti > tbrier[l]
+# if event, and event time earlier than evaluation time - (0 - preds)^2/(ipw+0.001))
+
+# 1) If the observed time is a censoring time and that is before the evaluation time, the data point should make no contribution to the performance metric (their "category 3"). These values have a missing value for their probability estimate (and also for their weight column).
+# ^ not included, ensure they are NAs in predict df 
+
+# 2)  I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001)) 
+# Old version ----
+# brier[l] <- mean(I(ti > tbrier[l] & di==0)*
+#                    (1 - preds)^2/(ipw+0.001) + 
+#                    
+#                    I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001))
+
+# Updated Version ---
+
+
+brier[l] <- mean(I(ti > tbrier[l])*
+                   (1 - preds)^2/(ipw+0.001) + 
+                   
+                   I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001))
+
diff --git a/inst/compare-predict-brier-with-yardstick_P2.R b/inst/compare-predict-brier-with-yardstick_P2.R
new file mode 100644
index 0000000..de5c1f3
--- /dev/null
+++ b/inst/compare-predict-brier-with-yardstick_P2.R
@@ -0,0 +1,233 @@
+# we should use this: https://www.tidymodels.org/learn/statistics/survival-metrics/ 
+
+library(survival)
+library(tidyverse)
+library(glmnet)
+library(yardstick)
+
+# Load Simulation Data ---------------------------------------------------------------
+
+load(here::here("inst", "cureit-simulation-results", "sim_data_list.RData"))
+load(here::here("inst", "cureit-simulation-results", "sim_valid_data_list.RData"))
+load(here::here("inst","cureit-simulation-results", "cv_fits_100.RData"))
+
+
+# Function to Get Brier on Each Sim Data ----------------------------------
+
+get_brier_for_each_data <- function(dat, fit) {
+  
+  final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+  
+  # * Create Data frame to pass  -----
+  
+  data = data.frame(dat$x) %>%
+    mutate(times = dat$t, 
+           event = dat$d,
+           "truth_survival" = Surv(dat$t, dat$d))
+  
+  # * Run new Predict Function (glmnet cure) ------
+  
+  predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+  predict_df_long <- predict_df %>%
+    unnest(everything())
+  
+  
+  brier_scores_yardstick <-
+    predict_df %>% 
+    mutate(truth = Surv(times, event)) %>%
+    brier_survival(truth = truth, .pred)
+  
+  return(brier_scores_yardstick)
+  
+}
+
+
+# Manual Brier Score Calc (From Teng) -------------------------------------
+
+# * Version 1 -----
+
+# iterates over eval time 
+for (i in 1:100) {
+  
+  time_1 <- predict_df_long$.eval_time[[i]]
+  
+  predict_df_long_1 <- predict_df_long %>%
+    filter(.eval_time == time_1)
+  
+  x[i] <-  mean(I(predict_df_long_1$times > time_1)*(1 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored) + 
+                  I(predict_df_long_1$event == 1 & predict_df_long_1$times <= time_1)*
+                  (0 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored))
+  
+  # OLD --
+  # x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
+  #                I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
+  # 
+  # OLD --
+  # brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) +
+  #                    I(di==1)*(0 - preds)^2/(ipw+0.001))
+  # 
+  # NEW ?--
+  # brier[l] <- mean(I(ti > tbrier[l])*(1 - preds)^2/(ipw+0.001) + 
+  #                    I(di==1 & ti <= tbrier[l])*(0 - preds)^2/(ipw+0.001))
+  
+}
+
+
+
+# * Version 2 -----
+
+# simple version of above- I think I can delete
+brier[l] <- mean(I(ti > tbrier[l])*
+                   (1 - preds)^2/(ipw+0.001) + 
+                   
+                   I(ti <= tbrier[l] & di == 1)*(0 - preds)^2/(ipw+0.001))
+
+
+# * Version 3 -----
+brier_cure_update <- function(predict_df, truth = times, eval_timepoints = NULL) {
+  
+  
+  # Get values from observed data
+  survival_times = predict_df$times
+  sorted_survival_times <- sort(survival_times)
+  sorted_event_times = sort(data$times[data$event==1])
+  
+  
+  predict_df_long <- predict_df %>%
+    unnest(everything())
+  
+  predict_df_nest_time <- predict_df_long %>%
+    nest(data = -.eval_time)
+  
+  
+  map(predict_df_nest_time$data, function(x) {
+    mean(I(x$event == 0)*(1 - x$.pred_survival)^2/(x$.weight_censored + 0.001) + I(x$event == 1)*(0 - x$.pred_survival)^2/(x$.weight_censored + 0.001))
+    
+  })
+  
+  for (l in 1:length(eval_timepoints)) {
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+    
+    
+  }
+}
+
+# * Version 4 ----
+
+brier_for_each_eval <- function(predict_df_timepoint) {
+  
+  
+  x = mean(I(predict_df_timepoint$times > predict_df_timepoint$.eval_time )*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+             I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+             (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001))
+  x
+}
+
+
+brier_for_each_eval <- function(predict_df_timepoint) {
+  
+  
+  x = mean(
+    I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+      
+      I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+      (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001)
+  )
+  x/100
+}
+
+brier_for_each_eval2 <- function(predict_df_timepoint) {
+  
+  category1 = I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )
+  category2 = I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)
+  
+  x = mean(
+    category1*(0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) +
+      category2 *(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) 
+    
+  )
+  x
+  
+}
+
+
+
+# Example on Single Data Set -------------------------------------------------
+
+# Grab one data set as an example
+dat <- sim_data_list[[1]]
+fit <- cv_fits_list[[1]]
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+# Run predict functon
+predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+
+predict_df_long <- predict_df %>%
+  unnest(everything())
+
+# Get yardstick values
+brier_scores_yardstick <-
+  predict_df %>% 
+  mutate(truth = Surv(times, event)) %>%
+  brier_survival(truth = truth, .pred)
+
+
+# Example on Multiple Data ------------------------------------------------
+
+ 
+b <- get_brier_for_each_data(dat = sim_data_list[[3]], fit = cv_fits_list[[3]])
+
+
+res <- list()
+for (i in 1:10) {
+  
+  res[[i]] <- get_brier_for_each_data(sim_data_list[[i]], cv_fits_list[[i]])
+}
+
+ggplot(res[[3]], aes(x = .eval_time, .estimate)) + geom_point()
+
+
+
+
+# Compare to Our Manual Brier Calc ----------------------------------------
+
+# quick brier ---------
+dat <- sim_data_list[[3]]
+fit <- cv_fits_list[[3]]
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+# Create Data frame to pass  -----
+
+data = data.frame(dat$x) %>%
+  mutate(times = dat$t, 
+         event = dat$d,
+         "truth_survival" = Surv(dat$t, dat$d))
+
+predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+predict_df_long <- predict_df %>%
+  unnest(everything())
+
+
+
+# Brier With Yardstick -------------------------------------------------------
+library(yardstick)
+x <- c()
+
+
+times <- unique(predict_df_long$.eval_time)
+brier <- x
+
+brier_scores_manual <- bind_cols("times" = times, "brier" = brier)
+
+brier_scores_yardstick <-
+  predict_df %>% 
+  mutate(truth = Surv(times, event)) %>%
+  brier_survival(truth = truth, .pred)
+
+
+
+
+
+
+# Misc Code ---------------------------------------------------------------
+
diff --git a/inst/compare-predict-brier-with-yardstick_P3.R b/inst/compare-predict-brier-with-yardstick_P3.R
new file mode 100644
index 0000000..e3fede4
--- /dev/null
+++ b/inst/compare-predict-brier-with-yardstick_P3.R
@@ -0,0 +1,315 @@
+# we should use this: https://www.tidymodels.org/learn/statistics/survival-metrics/ 
+
+library(survival)
+library(tidyverse)
+library(glmnet)
+
+# Prep Data ---------------------------------------------------------------
+
+load(here::here("inst", "cureit-simulation-results", "sim_data_list.RData"))
+load(here::here("inst", "cureit-simulation-results", "sim_valid_data_list.RData"))
+load(here::here("inst","cureit-simulation-results", "cv_fits_100.RData"))
+
+dat <- sim_data_list[[1]]
+fit <- cv_fits_list[[1]]
+final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$min[2]]]
+
+# Create Data frame to pass 
+
+data = data.frame(dat$x) %>%
+  mutate(times = dat$t, 
+         event = dat$d,
+         "truth_survival" = Surv(dat$t, dat$d))
+
+# * Calculate Sensitivity of Variable Selection in Training Fits -------
+spe_cox_min <- c()
+sen_cox_min <- c()
+
+x <- for (i in 1:100) {
+  fit <- cv_fits_list[[i]]
+  #fit <- x$fit
+  dat <- sim_data_list[[i]]
+  
+  spe_cox_min[[i]] <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  sen_cox_min[[i]] <- mean(dat$id_cox %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  # spe_cox_min <- 1-mean(setdiff(1:200,dat$id_cure) %in% which(fit$fit[[fit$index$min[1]]][[fit$index$min[2]]]$beta!=0))
+  
+  # print(sen_cox_min)
+  
+}
+
+
+hist(unlist(sen_cox_min))
+hist(unlist(spe_cox_min))
+
+# Check Variable Selection on Standard Lasso
+# will add foldid (see new version of cv.lassocure())
+cv.glmnet(x = dat$x, y = Surv(dat$t, dat$d), family = "cox")
+
+# Predict Function (glmnet cure) ---------------------------------------------
+
+# Trying to get predictions in similar format as tidymodels yardstick
+# Eventually will integrate this with existing predict method function
+
+
+
+predict_df <- predict_cure(final_fit = final_fit, new_data = data)
+predict_df_long <- predict_df %>%
+  unnest(everything())
+
+
+
+# quick brier ---------
+x <- c()
+for (i in 1:100) {
+  
+  time_1 <- predict_df_long$.eval_time[[i]]
+  
+  predict_df_long_1 <- predict_df_long %>%
+    filter(.eval_time == time_1)
+  
+  x[i] <-  mean(I(predict_df_long_1$times > time_1)*(1 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored) + 
+                  I(predict_df_long_1$event == 1 & predict_df_long_1$times <= time_1)*
+                  (0 - predict_df_long_1$.pred_survival)^2*(predict_df_long_1$.weight_censored))
+  
+  # OLD -----
+  # x[i] <- mean(I(predict_df_long_1$event == 0)*(1 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored + 0.001) + 
+  #                I(predict_df_long_1$event ==1)*(0 - predict_df_long_1$.pred_survival)^2/(predict_df_long_1$.weight_censored  + 0.001))
+  # 
+  # OLD ----
+  # brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) +
+  #                    I(di==1)*(0 - preds)^2/(ipw+0.001))
+  # 
+  # NEW -----
+  # brier[l] <- mean(I(ti > tbrier[l])*(1 - preds)^2/(ipw+0.001) + 
+  #                    I(di==1 & ti <= tbrier[l])*(0 - preds)^2/(ipw+0.001))
+  
+}
+
+times <- unique(predict_df_long$.eval_time)
+brier <- x
+
+brier_scores_manual <- bind_cols("times" = times, "brier" = brier)
+
+# Brier With Yardstick -------------------------------------------------------
+library(yardstick)
+
+# Doesn't match- weights are wrong
+brier_scores_yardstick <-
+  predict_df %>% 
+  mutate(truth = Surv(times, event)) %>%
+  brier_survival(truth = truth, .pred)
+
+
+all_brier <- brier_scores_yardstick %>%
+  select(times = .eval_time, brier_yardstick = .estimate) %>%
+  left_join(brier_scores_manual)
+
+# Brier Function -------------------------------------------------------
+
+predict_df_long <- predict_df %>%
+  unnest(everything())
+
+predict_df_nest_time <- predict_df_long %>%
+  nest(data = -.eval_time)
+
+predict_df_timepoint = predict_df_nest_time[10,] %>%
+  unnest(everything())
+
+brier_for_each_eval <- function(predict_df_timepoint) {
+  
+  
+  x = mean(I(predict_df_timepoint$times > predict_df_timepoint$.eval_time )*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+                  I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+                  (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001))
+  x
+  }
+  
+
+brier_for_each_eval <- function(predict_df_timepoint) {
+  
+  
+  x = mean(
+    I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)*(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001) + 
+      
+             I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )*
+             (0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored+0.001)
+    )
+  x/100
+}
+
+brier_for_each_eval2 <- function(predict_df_timepoint) {
+  
+  category1 = I(predict_df_timepoint$event == 1 & predict_df_timepoint$times <= predict_df_timepoint$.eval_time  )
+  category2 = I(predict_df_timepoint$times > predict_df_timepoint$.eval_time)
+  
+  x = mean(
+    category1*(0 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) +
+    category2 *(1 - predict_df_timepoint$.pred_survival)^2/(predict_df_timepoint$.weight_censored) 
+     
+  )
+  x
+
+}
+
+brier_for_each_eval(predict_df_timepoint)
+
+# Compare to yardstick ----
+
+brier_survival_impl <- function(truth,
+                                estimate,
+                                censoring_weights,
+                                case_weights,
+                                eval_time) {
+  surv_time <- yardstick:::.extract_surv_time(truth)
+  surv_status <- yardstick:::.extract_surv_status(truth)
+  
+  if (!is.null(case_weights)) {
+    case_weights <- vec_cast(case_weights, to = double())
+    norm_const <- sum(case_weights)
+  } else {
+    case_weights <- rep(1, length(estimate))
+    norm_const <- sum(!survival::is.na.Surv(truth))
+  }
+  
+  category_1 <- surv_time <= eval_time & surv_status == 1
+  category_2 <- surv_time > eval_time
+  
+  # (0 - estimate) ^ 2 == estimate ^ 2
+  res <- (category_1 * estimate^2 * censoring_weights) +
+    (category_2 * (1 - estimate)^2 * censoring_weights)
+  
+  res <- res * case_weights
+  res <- sum(res, na.rm = TRUE)
+  res / norm_const
+}
+predict_df_timepoint2 <- predict_df_timepoint %>%
+  mutate(surv_obj = Surv(predict_df_timepoint$times, predict_df_timepoint$event))
+
+library(yardstick)
+brier_survival_impl(
+                    truth = predict_df_timepoint2$surv_obj,
+                    
+                    estimate = predict_df_timepoint2$.pred_survival,
+                                censoring_weights = predict_df_timepoint2$.weight_censored,
+                                case_weights = NULL,
+                                eval_time = predict_df_timepoint2$.eval_time)
+
+brier_for_each_eval(predict_df_timepoint)
+
+# Simple Former Brier Calculation ---------------------------------------------------
+library(glmnet)
+library(tidyverse)
+library(survival)
+
+calc_brier_old <- function(new_data, fit) {
+  # ti <- dat_valid$t
+  # di <- dat_valid$d
+  # xi <- dat_valid$x
+  ti <- new_data$t
+  di <- new_data$d
+  xi <- new_data$x
+  
+  # tj from train data
+  tj <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]$tj
+  final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]
+  
+  fitcure <- final_fit$fitcure
+  predcure <- predict(fitcure,newx=xi,s=min(fitcure$lambda),type="response")
+  
+  fitcox <- final_fit$fitcox
+  predsurvexp <- predict(fitcox,newx=xi,s=min(fitcox$lambda),type="response")
+  
+  haz <- final_fit$haz
+  cumhaz <- final_fit$cumhaz
+  
+  # censoring weights from test data
+  sf <- survfit(Surv(ti,1-di)~1)
+  sf_pred <- summary(sf, time = ti)
+  
+  predcens <- sf_pred$surv
+  tcens <- sf_pred$time
+  
+  # tbrier is evaluation timepoints (default to all timepoints in test data)
+  tbrier <- sort(ti)
+  brier <- rep(NA,length(tbrier))
+  ipw <- rep(NA,length(tbrier))
+  
+  for (l in 1:length(tbrier)){
+    
+    predsurv <- rep(NA,length(ti)) # Conditional survival prob for all patients at tbrier[l]
+    
+    if (tbrier[l] >= min(tj)){
+      
+      ids <- which(tj == max(tj[tj <= tbrier[l]]))
+      predsurv <- exp(-cumhaz[ids]*predsurvexp)
+      ipw <- as.numeric(ti > tbrier[l])*predcens[l] + as.numeric(ti <= tbrier[l])*predcens
+      
+    }else if (tbrier[l] < min(tj)){
+      
+      predsurv <- rep(1,length(ti))
+      ipw <- 1
+      
+    }
+    
+    preds <- 1 - predcure + predcure*predsurv
+    brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))
+    #ipw[l] <- ipw
+   # print(ipw)
+  }
+  
+  preds <- cbind.data.frame(tbrier, brier)
+  return(preds)
+ # plot(tbrier,brier,type="S")
+  
+}
+
+x <- calc_brier_old(sim_data_list[[1]], cv_fits_list[[1]])
+x_val <- calc_brier_old(sim_valid_data_list[[1]], cv_fits_list[[1]])
+
+x
+plot(x$tbrier,x$brier,type="S")
+plot(x_val$tbrier,x_val$brier,type="S")
+
+
+all_brier <- list()
+
+for (i in 1:length(sim_data_list)) {
+  all_brier[[i]] <- calc_brier_old( new_data = sim_data_list[[i]], fit = cv_fits_list[[i]])
+}
+
+all_brier_valid <- list()
+
+for (i in 1:length(sim_valid_data_list)) {
+  all_brier_valid[[i]] <- calc_brier_old( new_data = sim_valid_data_list[[i]],
+                                          fit = cv_fits_list[[i]])
+}
+
+x <- all_brier_valid[[1]]
+x_valid <- all_brier[[1]]
+
+ggplot(x, aes(x =  tbrier,y = brier)) + geom_line( aes(x =  tbrier,y = brier)) +
+  geom_line(data = x_valid, aes(x =tbrier, y = brier), color = "blue")
+
+plot(x$tbrier,x$brier,type="S")
+line(x_valid$tbrier,x_valid$brier,type="S")
+
+
+traps <- c()
+
+for (i in 1:length(all_brier)) {
+  traps[i] <- trapz(all_brier[[i]]$tbrier,all_brier[[i]]$brier)
+
+}
+
+
+all_brier2 <- all_brier %>%
+  do.call("rbind", .)
+
+x <- all_brier2 %>% group_by(round(tbrier, 1)) %>%
+  summarize(mean = mean(brier))
+
+plot(x$`round(tbrier, 1)`, x$mean,type="S")
+
+
+
diff --git a/inst/inst-README.md b/inst/inst-README.md
new file mode 100644
index 0000000..7cf4a72
--- /dev/null
+++ b/inst/inst-README.md
@@ -0,0 +1,18 @@
+# inst
+
+
+- `compare-predict-brier-with-yardstick_P2.R` - compare yardstick prediction and brier using our simulation data. Using new predict_cure() compare Teng's manual brier score calc to yardstick brier score calc
+
+
+- `compare-predict-brier-with-yardstick_P1.R` - compare yardstick prediction and brier using their example data
+
+- `compare-predict-brier-with-yardstick_P1.R` - misc comparison code (need to review this)
+
+- `run-simulations.R` - produce simulated data for lasso cure models
+
+- `nomogram-custom-edits.R` - custom edits to nomogram for cureit paper
+
+- `archive`
+	- `evaluate-simulations.R` - old code to calculate brier score on simulations (modified from Teng)
+	- `testcode.R` - old code to calculate brier score
+
diff --git a/inst/run-simulations.R b/inst/run-simulations.R
new file mode 100644
index 0000000..11f7ace
--- /dev/null
+++ b/inst/run-simulations.R
@@ -0,0 +1,175 @@
+source(here::here("R/cv.cureitlasso.R"))
+source(here::here("R/cureitlasso.R"))
+source(here::here("R/simulasso.R"))
+source(here::here("R/coxsplit.R"))
+
+# Potentially change sample sizes and number of parameters
+# We could try simulating scenarios with overlap between cure and cox
+
+# Simulate Cure Data --------------------------------------------------------
+
+# Test- Try simulating one train and test set
+
+
+dat <- simulasso() # training data
+
+
+
+dat_valid <- simulasso(id_cure = dat$id_cure,
+          id_cox = dat$id_cox,
+          coefs_cure = dat$coefs_cure,
+          coefs_cox = dat$coefs_cox)
+
+
+setdiff(dat$id_cox, dat_valid$id_cox)
+
+
+# * Training- Simulate n Data Sets --------------------------------------------------------
+sim_data_list <- list()
+set.seed(999)
+
+for (i in 1:100) {
+  sim_data_list[[i]] = simulasso()
+}
+
+sim_data_list[[1]]$id_cure
+
+# * Validation - Simulate n Validation Data ---------------------------------------------------
+
+sim_valid_data_list <- list()
+
+for (i in 1:100) {
+  dat <- sim_data_list[[i]]
+  sim_valid_data_list[[i]] = simulasso(id_cure = dat$id_cure,
+                                       id_cox = dat$id_cox,
+                                       coefs_cure = dat$coefs_cure,
+                                       coefs_cox = dat$coefs_cox)
+}
+
+sim_data_list[[1]]$id_cure
+sim_valid_data_list[[1]]$id_cure
+
+save(sim_data_list, file = here::here("inst", 
+                                      "cureit-simulation-results",
+                                      "sim_data_list_v2.RData"))
+
+save(sim_valid_data_list, file = here::here("inst", 
+                                            "cureit-simulation-results",
+                                            "sim_valid_data_list_v2.RData"))
+
+# Fit Lasso  ------------------------------------------------------------
+
+# test- A Single Lasso Fit 
+
+dat <- sim_data_list[[1]]
+test_single_run <- cureitlasso(
+            t = dat$t,
+            d = dat$d,
+            x = dat$x,
+            minmu.ratio=0.05, # minimum penalty value in the logistic model
+            minlambda.ratio=0.05, # minimum penalty value in the cox model
+            adaptive=FALSE,
+            length.grid=8,
+            tol=1e-2,
+            maxiter=100,
+            progress=FALSE)
+
+# Run Cross Validation ----------------------------------------------------
+
+# * Test a Single CV Fit ------
+# To get a sense of what is in the output
+
+dat_valid <- sim_data_list[[1]]
+
+test_single_cv <- cv.cureitlasso(t = dat_valid$t,
+                                 d = dat_valid$d,
+                                 x = dat_valid$x,
+                                 minmu.ratio=0.05, # minimum penalty value in the logistic model
+                                 minlambda.ratio=0.05, # minimum penalty value in the cox model
+                                 adaptive=FALSE,
+                                 length.grid=10,
+
+                                 tol=1e-2,
+                                 maxiter=100,
+
+                                 nfolds=5,
+                                 ncore=5,
+                                 seed=NULL,
+                                 progress=FALSE)
+
+test_list <- list()
+test_list[[1]] <- test_single_cv
+
+# * 100 Runs ----
+
+cv_fits_list <- list()
+start_time <- Sys.time()
+
+for (i in 1:50) {
+
+  dat_valid <- sim_data_list[[i]]
+  
+  cv_fits_list[[i]] <- cv.cureitlasso(t = dat_valid$t,
+                                    d = dat_valid$d,
+                                    x = dat_valid$x,
+                                    minmu.ratio=0.05, # minimum penalty value in the logistic model
+                                    minlambda.ratio=0.05, # minimum penalty value in the cox model
+                                    adaptive=FALSE,
+                                    length.grid=10,
+
+                                    tol=1e-2,
+                                    maxiter=100,
+
+                                    nfolds=5,
+                                    ncore=5,
+                                    seed=NULL,
+                                    progress=FALSE
+  )
+
+
+}
+
+#  8.670569 mins for 3 data sets 
+end_time <- Sys.time()
+end_time - start_time
+
+
+save(cv_fits_list, file = here::here("inst", "cureit-simulation-results", "cv_fits_50_v2.RData"))
+
+# * 50 Runs ----
+
+cv_fits_list2 <- list()
+
+start_time <- Sys.time()
+
+for (i in 1:50) {
+  
+  dat_valid <- sim_data_list[[i]]
+  
+  cv_fits_list2[[i]] <- cv.cureitlasso(t = dat_valid$t,
+                                      d = dat_valid$d,
+                                      x = dat_valid$x,
+                                      minmu.ratio=0.05, # minimum penalty value in the logistic model
+                                      minlambda.ratio=0.05, # minimum penalty value in the cox model
+                                      adaptive=FALSE,
+                                      length.grid=10,
+                                      
+                                      tol=1e-2,
+                                      maxiter=100,
+                                      
+                                      nfolds=5,
+                                      ncore=5,
+                                      seed=NULL,
+                                      progress=FALSE
+  )
+  
+  
+}
+
+# 2.369856 hours for 50 fits
+end_time <- Sys.time()
+end_time - start_time
+
+
+save(cv_fits_list2, file = here::here("inst", "cv_fits_50.RData"))
+