the data provided was too sparse for me to make a plottable test, so I took the information in the tables provided and randomised data from them to make sufficient data to be 'plottable'
Unfortunately that means a great deal of the code at the beginning is an invention of df1 and df2, but then I have some useful code, about the join, pivot. calculating a linear transform of the second scale to the first scale and back so that the unemployed data will be plotted at a good height
df1_stub<-structure(list(Month = c("2019-10", "2019-07", "2020-07", "2019-02",
"2020-09", "2019-10", "2019-12", "2019-11", "2019-03", "2019-08"
), Falls.within = c("Metropolitan Police Service", "Metropolitan Police Service",
"Dorset Police", "West Midlands Police", "Sussex Police", "Suffolk Constabulary",
"Lincolnshire Police", "City of London Police", "West Mercia Police",
"Metropolitan Police Service"), Region = c("London", "London",
"South West", "West Midlands", "South East", "East of England",
"East Midlands", "London", "West Midlands", "London"), Crime = c("Theft",
"Anti-social behaviour", "Theft", "Theft", "Theft", "Violence and sexual offences",
"Violence and sexual offences", "Anti-social behaviour", "Anti-social behaviour",
"Theft")), class = "data.frame", row.names = c(NA, -10L))
df2_stub <- structure(list(Date = c("2019-10", "2019-07", "2020-07", "2019-02",
"2020-09", "2019-10", "2019-12", "2019-11", "2019-03", "2019-08"
), Region = structure(c(6L, 8L, 1L, 7L, 2L, 8L, 7L, 5L, 2L, 9L
), .Label = c("England", "South East", "South West", "London",
"East of England", "East Midlands", "West Midlands", "Yorkshire and The Humber",
"North East", "North West"), class = "factor"), Unemployment.rate = c(4.63665149464923,
4.28827168334814, 4.00444047276053, 4.31748261760943, 2.99042563020473,
3.75797094178592, 4.80322174913054, 4.2511880639675, 3.259676887651,
6.1811363458787)), class = "data.frame", row.names = c(NA, -10L
))
d1 <- summarise_all(tibble(df1_stub),~list(unique(.)))
d2 <- summarise_all(tibble(df2_stub) %>%
mutate_if(is.factor,as.character)
,~list(unique(.)))
set.seed(42)
num_rows_to_invent <-100000
df1 <- data.frame(Month=sample(seq.Date(from=as.Date("2019-02-01"),
to = as.Date("2020-07-01"),
by = "1 months"),size = num_rows_to_invent,replace = TRUE),
Region=sample(unlist(d1$Region),size = num_rows_to_invent,replace = TRUE),
Crime=sample(unlist(d1$Crime),size = num_rows_to_invent,replace = TRUE))
df2 <- data.frame(Date=sample(seq.Date(from=as.Date("2019-02-01"),
to = as.Date("2020-07-01"),
by = "1 months"),size = num_rows_to_invent,replace = TRUE),
Region=sample(unlist(d1$Region),size = num_rows_to_invent,replace = TRUE),
ur=sample(unlist(d2$Unemployment.rate),size = num_rows_to_invent,replace = TRUE)) %>%
group_by(Date,Region) %>% slice(1) %>% ungroup
library(tidyverse)
df3<- df1 %>%
count(Region, Month, Crime, name = 'Crime_occurrencies') %>%
full_join(df2, by=c("Region","Month"="Date")) %>% filter(Region != "England")
range(df3$Crime_occurrencies,na.rm = TRUE)
# [1] 249 623
range(df3$ur,na.rm = TRUE)
# [1] 2.990426 6.181136
(c(249,623) - 249)
1/374 * (6.181136 - 2.990426)
#0.00853131
#therefore
(c(249,623) - 249) * 0.00853131 + 2.990426
# and the opposite way
(c(2.990426, 6.181136 )- 2.990426) /0.00853131 +249
df4 <- pivot_longer(df3 %>% mutate(ur_falsescaled=(ur - 2.990426) /0.00853131 +249),
cols=c(Crime_occurrencies ,ur_falsescaled)) %>% mutate(
Crime=case_when(name=='ur_falsescaled' ~ "NA",
TRUE ~ Crime)
) %>% distinct()
p <- ggplot(df4 , aes(x=Month, y=value,colour=Crime,linetype=name)) + facet_wrap(~Region)+
geom_line()
# Create a simple secondary axis
p + scale_y_continuous(sec.axis = sec_axis(~ (.- 249) * 0.00853131 + 2.990426))