slide_index()
is similar to slide()
, but allows a secondary .i
ndex
vector to be provided.
This is often useful in business calculations, when
you want to compute a rolling computation looking "3 months back", which
is approximately but not equivalent to, 3 * 30 days. slide_index()
allows
for these irregular window sizes.
slide_index(.x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE) slide_index_vec( .x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE, .ptype = NULL ) slide_index_dbl(.x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE) slide_index_int(.x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE) slide_index_lgl(.x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE) slide_index_chr(.x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE) slide_index_dfr( .x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE, .names_to = rlang::zap(), .name_repair = c("unique", "universal", "check_unique") ) slide_index_dfc( .x, .i, .f, ..., .before = 0L, .after = 0L, .complete = FALSE, .size = NULL, .name_repair = c("unique", "universal", "check_unique", "minimal") )
.x 
The vector to iterate over and apply 

.i 
The index vector that determines the window sizes. It is fairly common to supply a date vector as the index, but not required. There are 3 restrictions on the index:

.f 
If a function, it is used as is. If a formula, e.g.
This syntax allows you to create very compact anonymous functions. 
...  Additional arguments passed on to the mapped function. 
.before, .after 
The ranges that result from applying 
.complete 
Should the function be evaluated on complete windows only? If 
.ptype 
A prototype corresponding to the type of the output. If If supplied, the result of each call to If 
.names_to  This controls what to do with input names supplied in

.name_repair  One of With 
.size  If, Alternatively, specify the desired number of rows, and any inputs of length 1 will be recycled appropriately. 
A vector fulfilling the following invariants:
slide_index()
vec_size(slide_index(.x)) == vec_size(.x)
vec_ptype(slide_index(.x)) == list()
slide_index_vec()
and slide_index_*()
variantsvec_size(slide_index_vec(.x)) == vec_size(.x)
vec_size(slide_index_vec(.x)[[1]]) == 1L
vec_ptype(slide_index_vec(.x, .ptype = ptype)) == ptype
library(lubridate) x < 1:5 # In some cases, sliding over `x` with a strict window size of 2 # will fit your use case. slide(x, ~.x, .before = 1)#> [[1]] #> [1] 1 #> #> [[2]] #> [1] 1 2 #> #> [[3]] #> [1] 2 3 #> #> [[4]] #> [1] 3 4 #> #> [[5]] #> [1] 4 5 #># However, if this `i` is a date vector paired with `x`, when computing # rolling calculations you might want to iterate over `x` while # respecting the fact that `i` is an irregular sequence. i < as.Date("20190815") + c(0:1, 4, 6, 7) # For example, a "2 day" window should not pair `"20190819"` and # `"20190821"` together, even though they are next to each other in `x`. # `slide_index()` computes the lookback value from the current date in `.i`, # meaning that if you are currently on `"20190821"` and look back 1 day, # it will correctly not include `"20190819"`. slide_index(i, i, ~.x, .before = 1)#> [[1]] #> [1] "20190815" #> #> [[2]] #> [1] "20190815" "20190816" #> #> [[3]] #> [1] "20190819" #> #> [[4]] #> [1] "20190821" #> #> [[5]] #> [1] "20190821" "20190822" #># We could have equivalently used a lubridate period object for this as well, # since `i  lubridate::days(1)` is allowed slide_index(i, i, ~.x, .before = lubridate::days(1))#> [[1]] #> [1] "20190815" #> #> [[2]] #> [1] "20190815" "20190816" #> #> [[3]] #> [1] "20190819" #> #> [[4]] #> [1] "20190821" #> #> [[5]] #> [1] "20190821" "20190822" #>#  # Functions for `.before` and `.after` # In some cases, it might not be appropriate to compute # `.i  .before` or `.i + .after`, either because there isn't a `` or `+` # method defined, or because there is an alternative way to perform the # arithmetic. For example, subtracting 1 month with ` months(1)` (using # lubridate) can sometimes land you on an invalid date that doesn't exist. i < as.Date(c("20190131", "20190228", "20190331")) # 20190331  months(1) = 20190231, which doesn't exist i  months(1)#> [1] "20181231" "20190128" NA# These NAs create problems with `slide_index()`, which doesn't allow # missing values in the computed endpoints try(slide_index(i, i, identity, .before = months(1)))#> Error : Endpoints generated by `.before` cannot be `NA`. #> ℹ They are `NA` at locations: 3.# In these cases, it is more appropriate to use `%m%`, # which will snap to the end of the month, at least giving you something # to work with. i %m% months(1)#> [1] "20181231" "20190128" "20190228"# To use this as your `.before` or `.after`, supply an anonymous function of # 1 argument that performs the computation slide_index(i, i, identity, .before = ~.x %m% months(1))#> [[1]] #> [1] "20190131" #> #> [[2]] #> [1] "20190131" "20190228" #> #> [[3]] #> [1] "20190228" "20190331" #># Notice that in the `.after` case, `20190228 %m+% months(1)` doesn't # capture the end of March, so it isn't included in the 2nd result slide_index(i, i, identity, .after = ~.x %m+% months(1))#> [[1]] #> [1] "20190131" "20190228" #> #> [[2]] #> [1] "20190228" #> #> [[3]] #> [1] "20190331" #>#  # When `.i` has repeated values, they are always grouped together. i < c(2017, 2017, 2018, 2019, 2020, 2020) slide_index(i, i, ~.x)#> [[1]] #> [1] 2017 2017 #> #> [[2]] #> [1] 2017 2017 #> #> [[3]] #> [1] 2018 #> #> [[4]] #> [1] 2019 #> #> [[5]] #> [1] 2020 2020 #> #> [[6]] #> [1] 2020 2020 #>slide_index(i, i, ~.x, .after = 1)#> [[1]] #> [1] 2017 2017 2018 #> #> [[2]] #> [1] 2017 2017 2018 #> #> [[3]] #> [1] 2018 2019 #> #> [[4]] #> [1] 2019 2020 2020 #> #> [[5]] #> [1] 2020 2020 #> #> [[6]] #> [1] 2020 2020 #>#  # Rolling regressions # Rolling regressions are easy with `slide_index()` because: #  Data frame `.x` values are iterated over rowwise #  The index is respected by using `.i` set.seed(123) df < data.frame( y = rnorm(100), x = rnorm(100), i = as.Date("20190815") + c(0, 2, 4, 6:102) # < irregular ) # 20 day rolling regression. Current day + 19 days back. # Additionally, set `.complete = TRUE` to not compute partial results. regr < slide_index(df, df$i, ~lm(y ~ x, .x), .before = 19, .complete = TRUE) regr[16:18]#> [[1]] #> NULL #> #> [[2]] #> #> Call: #> lm(formula = y ~ x, data = .x) #> #> Coefficients: #> (Intercept) x #> 0.3257 0.2067 #> #> #> [[3]] #> #> Call: #> lm(formula = y ~ x, data = .x) #> #> Coefficients: #> (Intercept) x #> 0.2574 0.2632 #> #># The first 16 slots are NULL because there is no possible way to # look back 19 days from the 16th index position and construct a full # window. But on the 17th index position, `""20190903"`, if we look # back 19 days we get to `""20190815"`, which is the same value as # `i[1]` so a full window can be constructed. df$i[16]  19 >= df$i[1] # FALSE#> [1] FALSEdf$i[17]  19 >= df$i[1] # TRUE#> [1] TRUE#  # Accessing the current index value # A very simplistic version of `purrr::map2()` fake_map2 < function(.x, .y, .f, ...) { Map(.f, .x, .y, ...) } # Occasionally you need to access the index value that you are currently on. # This is generally not possible with a single call to `slide_index()`, but # can be easily accomplished by following up a `slide_index()` call with a # `purrr::map2()`. In this example, we want to use the distance from the # current index value (in days) as a multiplier on `x`. Values further # away from the current date get a higher multiplier. set.seed(123) # 25 random days past 20000101 i < sort(as.Date("20000101") + sample(100, 25)) df < data.frame(i = i, x = rnorm(25)) weight_by_distance < function(df, i) { df$weight = abs(as.integer(df$i  i)) df$x_weighted = df$x * df$weight df } # Use `slide_index()` to just generate the rolling data. # Here we take the current date + 5 days before + 5 days after. dfs < slide_index(df, df$i, ~.x, .before = 5, .after = 5) # Follow up with a `map2()` with `i` as the second input. # This allows you to track the current `i` value and weight accordingly. result < fake_map2(dfs, df$i, weight_by_distance) head(result)#> [[1]] #> i x weight x_weighted #> 1 20000108 0.2179749 0 0.000000 #> 2 20000110 1.0260044 2 2.052009 #> #> [[2]] #> i x weight x_weighted #> 1 20000108 0.2179749 2 0.4359498 #> 2 20000110 1.0260044 0 0.0000000 #> 3 20000115 0.7288912 5 3.6444561 #> #> [[3]] #> i x weight x_weighted #> 1 20000110 1.0260044 5 5.1300222 #> 2 20000115 0.7288912 0 0.0000000 #> 3 20000116 0.6250393 1 0.6250393 #> #> [[4]] #> i x weight x_weighted #> 1 20000115 0.7288912 1 0.7288912 #> 2 20000116 0.6250393 0 0.0000000 #> #> [[5]] #> i x weight x_weighted #> 1 20000126 1.686693 0 0.000000 #> 2 20000127 0.837787 1 0.837787 #> #> [[6]] #> i x weight x_weighted #> 1 20000126 1.6866933 1 1.6866933 #> 2 20000127 0.8377870 0 0.0000000 #> 3 20000201 0.1533731 5 0.7668656 #>