The tsibble and feasts packages can be used to do this relatively easily.
First I'll create some data that is in the same format as yours.
library(tidyverse)
library(tsibble)
library(feasts)
df <- data.frame(
neighbourhood = 1:150,
year1 = 100 * runif(150),
year2 = 100 * runif(150),
year3 = 100 * runif(150),
year4 = 100 * runif(150),
year5 = 100 * runif(150),
year6 = 100 * runif(150),
year7 = 100 * runif(150),
year8 = 100 * runif(150)
)
Now we will turn it into a tsibble with year
as the index and neighbourhood
as a key.
df <- df %>%
pivot_longer(year1:year8, names_to = "year", values_to = "crime") %>%
mutate(
year = as.numeric(stringr::str_sub(year, 5, 5))
) %>%
as_tsibble(index = year, key = neighbourhood)
df
#> # A tsibble: 1,200 x 3 [1]
#> # Key: neighbourhood [150]
#> neighbourhood year crime
#> <int> <dbl> <dbl>
#> 1 1 1 32.7
#> 2 1 2 13.0
#> 3 1 3 70.6
#> 4 1 4 96.1
#> 5 1 5 28.4
#> 6 1 6 43.3
#> 7 1 7 73.2
#> 8 1 8 37.4
#> 9 2 1 95.3
#> 10 2 2 4.57
#> # … with 1,190 more rows
The autocorrelations for all 150 neighbourhoods are now very easy to compute using ACF()
.
# Lagged correlations within neighbourhoods
df %>% ACF(crime, lag_max = 3)
#> # A tsibble: 450 x 3 [1]
#> # Key: neighbourhood [150]
#> neighbourhood lag acf
#> <int> <lag> <dbl>
#> 1 1 1 -0.0842
#> 2 1 2 -0.592
#> 3 1 3 0.225
#> 4 2 1 -0.421
#> 5 2 2 0.0537
#> 6 2 3 -0.0429
#> 7 3 1 0.0206
#> 8 3 2 -0.626
#> 9 3 3 -0.108
#> 10 4 1 0.0885
#> # … with 440 more rows
Computing cross-correlations can be done using CCF()
but it requires two variables. So first we have to extend our tsibble to be keyed by all pairs of neighbourhoods.
# Create long tibble containing all pairs of neighbourhoods
pairs <- df %>%
mutate(n2 = neighbourhood) %>%
expand(year, neighbourhood, n2) %>%
left_join(df, by = c("year", "neighbourhood")) %>%
rename(
neighbourhoodA = neighbourhood,
crimeA = crime,
neighbourhood = n2
) %>%
left_join(df, by = c("year", "neighbourhood")) %>%
rename(
crimeB = crime,
neighbourhoodB = neighbourhood
) %>%
as_tsibble(index = year, key = c(neighbourhoodA, neighbourhoodB))
pairs
#> # A tsibble: 180,000 x 5 [1]
#> # Key: neighbourhoodA, neighbourhoodB [22,500]
#> year neighbourhoodA neighbourhoodB crimeA crimeB
#> <dbl> <int> <int> <dbl> <dbl>
#> 1 1 1 1 32.7 32.7
#> 2 2 1 1 13.0 13.0
#> 3 3 1 1 70.6 70.6
#> 4 4 1 1 96.1 96.1
#> 5 5 1 1 28.4 28.4
#> 6 6 1 1 43.3 43.3
#> 7 7 1 1 73.2 73.2
#> 8 8 1 1 37.4 37.4
#> 9 1 1 2 32.7 95.3
#> 10 2 1 2 13.0 4.57
#> # … with 179,990 more rows
Finally, we can compute the cross-correlations for all 22,500 pairs of neighbourhoods.
# Lagged cross-correlations between neighbourhoods
pairs %>% CCF(crimeA, crimeB, lag_max = 3)
#> # A tsibble: 157,500 x 4 [1]
#> # Key: neighbourhoodA, neighbourhoodB [22,500]
#> neighbourhoodA neighbourhoodB lag ccf
#> <int> <int> <lag> <dbl>
#> 1 1 1 -3 0.225
#> 2 1 1 -2 -0.592
#> 3 1 1 -1 -0.0842
#> 4 1 1 0 1
#> 5 1 1 1 -0.0842
#> 6 1 1 2 -0.592
#> 7 1 1 3 0.225
#> 8 1 2 -3 0.191
#> 9 1 2 -2 0.459
#> 10 1 2 -1 -0.237
#> # … with 157,490 more rows
Created on 2020-05-08 by the reprex package (v0.3.0)