neuRotools/sliding_window_cor.R at main · wj-mitchell/neuRotools · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#'
#' @title sliding_window_cor.R || v2025.06.12
#'
#' @description This function calculates a time-resolved correlation between two signals using a sliding window with optional Gaussian weighting and parallelization.
#'
#' @param x (numeric vector) First time series to be correlated; must be the same length as `y`.
#' @param y (numeric vector) Second time series to be correlated; must be the same length as `x`.
#' @param window (integer) The width of the sliding window (in timepoints) over which the correlation is computed.
#' @param sigma (numeric) Standard deviation of the Gaussian kernel used to weight values within each window; controls temporal smoothing.
#' @param step (integer) Step size between the centers of successive windows; determines the resolution of the time-resolved correlation.
#' @param use (character) Argument passed to the `use` parameter in the `cor()` function; typically "complete.obs" or "pairwise.complete.obs" to handle missing data.
#' @param method (character) Correlation method to use; one of "pearson", "spearman", or "kendall".
#' @param zeroes_to_na (logical) If TRUE, correlation will return NA in windows where either `x` or `y` has zero variance; if FALSE, such cases will return 0 or the result from `cor()`.
#' @param n_cores (integer) Number of processor cores to use for parallel computation; defaults to half the available cores.
#' @param parallelize (logical) If TRUE (default), uses parallel computation with `foreach`; if FALSE, runs in serial.
#'
#' @return (numeric vector) A vector of correlation values, one for each sliding window, aligned to the center of each window segment.
#'
#' @export
sliding_window_cor <- function(x,
                               y,
                               window,
                               sigma = 3,
                               step = 1,
                               use = "complete.obs",
                               method = "pearson",
                               zeroes_to_na = FALSE,
                               n_cores = parallel::detectCores() / 2,
                               parallelize = FALSE) {

  require(stats)
  source("circle_shift.R", local = FALSE)

  # ----- QUALITY CONTROL CHECKS -----

  # Confirm x and y are equal in length
  if (length(x) != length(y)){
    stop("The length of x and y do not match. Please submit two arrays of equal length and try again")
  }

  # Confirm that x and y are numeric
  if (any(!is.numeric(x) | !is.numeric(y))){
    stop("x and y must be numeric arrays of equal length. Please consider coercing or reformatting your values and try again")
  }

  # Confirm that window size is numeric and > 1
  if (!is.numeric(window) | window < 2){
    stop("Window size must be a numeric value greater than 1. Please correct this and try again")
  }

  # Confirm that sigma is numeric and > 0
  if (!is.numeric(sigma) | sigma <= 0){
    stop("Sigma must be a non-zero numeric value. Please correct this and try again")
  }

  # Confirm that step size is numeric and >= 1
  if (!is.numeric(step) | step < 1){
    stop("Step size must be a non-zero numeric value. Please correct this and try again")
  }

  # Confirm that zeroes_to_na is logical
  if (!is.logical(zeroes_to_na)){
    stop("zeroes_to_na must be either TRUE or FALSE. Please correct this and try again")
  }

  # ----- PREP: DEFINE CORE VARIABLES -----

  # Determine how many timepoints we have
  nVols <- length(x)

  # Generating the median and series indices
  if ((nVols %% 2) != 0){
    median <- ceiling(nVols/2)
    series_index <- 0:nVols
  }
  if ((nVols %% 2) == 0){
    median <- nVols/2
    series_index <- 0:(nVols-1)
  }

  # Define the radius of the window (half its size)
  window_radius <- round(window / 2)

  # ----- CREATE A GAUSSIAN WINDOW FOR WEIGHTING -----
  convol <- generate_convolution_window(nVols, window_radius, sigma, median, series_index)

  # ----- DEFINE THE WINDOW CENTERS AND ITERATION PLAN -----

  # Defining how many unique window that we will have, defined as the number of volumes, minus how large the window radius is and divided by how far apart different iterations of windows will be
  # Note that I'm using the window radius instead of the window; without doing that, the beginning and end timepoints are downweighted much more severely than any other timepoint on average. By using the radius, the beginning begins at a weight around 1 and end ends at a weight around 1. It might be easier to visualize with plot() if you're having trouble imagining what I'm saying
  nWindow <- floor((nVols - window_radius) / step)

  # Define the center indices of each window
  indices <- seq(1, by = step, length.out = nWindow) + (window_radius / 2)

  # ----- SLIDING WINDOW CORRELATION -----
  if (parallelize) {
    # Register parallel cluster
    cl <- parallel::makeCluster(n_cores)
    doParallel::registerDoParallel(cl)

    # Compute correlations in parallel
    cor_sw <- foreach::foreach(i = seq_along(indices), .combine = c, .packages = "stats") %dopar% {
      WINDOW <- indices[i]
      shift_amt <- median - WINDOW
      compute_weighted_correlation(x, y, convol, shift_amt, WINDOW, window_radius, median,
                                   zeroes_to_na, method, use)
    }

    # Shut down the parallel cluster
    parallel::stopCluster(cl)

  } else {
    # Compute correlations in serial
    cor_sw <- numeric(length(indices))

    for (i in seq_along(indices)) {
      WINDOW <- indices[i]
      shift_amt <- median - WINDOW
      cor_sw[i] <- compute_weighted_correlation(x, y, convol, shift_amt, WINDOW, window_radius, median,
                                                zeroes_to_na, method, use)
    }
  }

  # ----- RETURN THE SLIDING WINDOW CORRELATION SERIES -----
  return(cor_sw)
}

#' @keywords internal
#' @description Helper function to generate Gaussian-weighted convolution window
#' @return (numeric vector) normalized Gaussian convolution window of length nVols
generate_convolution_window <- function(nVols, window_radius, sigma, median, series_index) {

  # Generate a Gaussian distribution centered on the median index
  gauss_dist <- exp(-((series_index - median)^2) / (2 * sigma^2))

  # Create a binary window of 1s centered around the median
  series_window <- rep(0, nVols)
  series_window[(median - window_radius + 1):(median + window_radius)] <- 1

  # Convolve the binary window with the Gaussian to apply weighting
  convol <- convolve(gauss_dist, series_window, type = "open")

  # Normalize so the peak value is 1
  convol <- convol / max(convol)

  # Original convolution length
  L <- length(convol)

  # Calculate trim bounds safely
  start_idx <- median + 1
  end_idx <- L - median + 1

  # Clip to valid bounds
  if (start_idx <= end_idx && (end_idx - start_idx + 1) >= nVols) {
    convol <- convol[start_idx:end_idx][1:nVols]
  } else {
    stop("Invalid convolution trimming: result would be shorter than nVols.")
  }

  return(convol)
}

#' @keywords internal
#' @description Helper function to apply weighted convolution and compute correlation
#' @return (numeric) correlation value for a given window
compute_weighted_correlation <- function(x, y, convol, shift_amt, window_center,
                                         window_radius, median, zeroes_to_na,
                                         method, use) {
  # Determine length of the signal
  nVols <- length(x)

  # Apply a circular shift to center the convolution
  convol_shift <- circle_shift(convol, shift_amt)

  # If the window spills off the back end (early in the series)
  if (window_center < median && convol_shift[nVols] != 0) {
    convol_shift[median:nVols] <- 0
    convol_shift <- convol_shift * (sum(convol) / sum(convol_shift))
  }

  # If the window spills off the front end (late in the series)
  if (window_center > median && convol_shift[1] != 0) {
    convol_shift[1:median] <- 0
    convol_shift <- convol_shift * (sum(convol) / sum(convol_shift))
  }

  # Apply the shifted convolution window to both signals
  data_x <- x * convol_shift
  data_y <- y * convol_shift

  # Trim both signals to only the cutoff region
  cutoffs <- c(max(1, window_center - window_radius), min(nVols, window_center + window_radius))
  data_x <- data_x[cutoffs[1]:cutoffs[2]]
  data_y <- data_y[cutoffs[1]:cutoffs[2]]

  # Check for complete cases and enough data to compute correlation
  complete_idx <- complete.cases(data_x, data_y)

  if (sum(complete_idx) >= 2 &&
      sd(data_x[complete_idx]) != 0 &&
      sd(data_y[complete_idx]) != 0) {

    # Compute the correlation on complete, non-constant data
    return(cor(data_x[complete_idx], data_y[complete_idx], method = method, use = use))

  } else if (!zeroes_to_na) {

    # If user doesn't want to convert zeroes to NA, try anyway (at their own risk)
    return(cor(data_x, data_y, method = method, use = use))

  } else {

    # Not enough data or all-zero, return NA
    return(NA)
  }
}