Add Spearman Correlation Coefficient and RMSE in a plot, and change the plot size

Dear all,

Thank you very much in advance for your help.

I've got the following dataset:

structure(list(Count = 1:14, GW = c(0.08, 0.04, 0.35, 0.54, 0.39, 
0.94, 0.51, 0.01, 0.44, 0.63, 0.14, 0.79, 0.43, 0.73), Pz1 = c(-2.459826614, 
-2.905007821, -2.241113224, -1.549264338, -1.761962438, -1.282612221, 
-0.428828702, 1.659042479, 2.63518648, 3.076022461, 1.886216859, 
0.124473561, -1.025720789, -1.969461882), Pz2 = c(-2.916090168, 
-3.262459435, -2.455396094, -1.488106654, -0.417171756, -1.781014095, 
-0.605012986, 1.037062685, 1.977265974, 2.587846362, 2.499228916, 
1.0852274, -0.503736287, -1.829562138), Pz3 = c(-2.507944967, 
-3.718722989, -2.812847708, -1.702389524, -0.356014073, -0.436223413, 
-1.10341486, 0.860878402, 1.355286181, 1.929925855, 2.011052817, 
1.698239458, 0.457017552, -1.307577636), Pz4 = c(-2.526729696, 
-3.310577787, -3.269111262, -2.059841138, -0.570296943, -0.375065729, 
0.241375822, 0.362476528, 1.179101897, 1.307946062, 1.35313231, 
1.210063358, 1.07002961, -0.346823797), Pz5 = c(-3.284551238, 
-3.329362517, -2.86096606, -2.516104692, -0.927748557, -0.5893486, 
0.302533506, 1.70726721, 0.680700023, 1.131761778, 0.731152517, 
0.552142851, 0.58185351, 0.266188261), Pz6 = c(-4.011896321, 
-4.087184059, -2.87975079, -2.107959491, -1.38401211, -0.946800213, 
0.088250636, 1.768424893, 2.025490705, 0.633359905, 0.554968233, 
-0.069836942, -0.076066996, -0.221987839), Pz7 = c(-4.769878994, 
-4.814529142, -3.637572331, -2.12674422, -0.975866909, -1.403063767, 
-0.269200978, 1.554142023, 2.086648388, 1.978150587, 0.05656636, 
-0.246021225, -0.698046789, -0.879908346)), class = "data.frame", row.names = c(NA, 
-14L))

And I am running this code to automatically save the plots in a folder:

library(tidyverse)
library(ggplot2)
#~~~ step 1: transform the data from 'wide' format to 'long' format ~~~

my_data %>%
    pivot_longer(names_to = 'key', values_to = 'value',-c(Count, GW)) %>%
    {. ->> my_data_long}


#~~~ step 2: write a function to plot the data and save the plot ~~~

saveplot_function <- function(i){
   
     my_plot <- my_data_long %>%  

      filter(  
        key == i
      ) %>%

     ggplot(aes(x=Count)) + 
       geom_line(aes(y = scale(GW)), color = "blue") + 
       geom_line(aes(y = scale(value)), color="red") +
          ylab(i)     
          ggsave(paste0('my_location/my_folder/', i, '.png'))
  }


#~~~ step 3: loop through all the values of 'key' (column names in the wide format data) ~~~

 for(i in unique(my_data_long$key)){
    
    saveplot_function(i)
    
  }

This code does the job, but I'd need to add a box (or just the text) showing the Spearman Correlation Coefficient and the RMSE for each plot (or better say for the two time series displayed within each plot).
Also, the plots are saved in a squared shape, and I would like them to be rectangular instead (See figure below. Something like this would be great!).

Could you kindly help me to rewrite the code accordingly? I am not an R expert at all and this code has been modified from an old code that someone kindly wrote for me ( and it took me ages to make it work for the purpose I am using it for :sweat_smile: ), so I would very much appreciate it if you could rewrite it (sorry!).

Additionally, as you can see from the code above, I used scale() . What I am trying to do is to standardise the plotted data using Z-Scores (i.e., Z = (x - mean(x)) / Standard_Deviation(x) ). Can you kindly confirm (or reject) that scale() standardises the time series using Z-Scores?

Thank you very much indeed for your help!

Simone

This is a template for making a line plot from the data in approximately the same layout as the image. Countless elaborations are possible to line width, color, aspect, labeling of axes, title, subtitle, caption, borders, background, boxes around the legend, font, etc. Resources for that granularity are available in the ggplot2 documentation plus The R Graphics Cookbook, The R Cookbook and Data Visualization.

# LIBRARIES

suppressPackageStartupMessages({
  library(ggplot2)
})

# FUNCTIONS

make_plot <- function(x) {
  base_plot + geom_line(aes(obs,GW)) +
    geom_line(aes(obs,dat[,x])) + 
    theme_minimal()
}

# DATA

dat <- data.frame(GW = c(
  0.08, 0.04, 0.35, 0.54, 0.39, 0.94, 0.51,
  0.01, 0.44, 0.63, 0.14, 0.79, 0.43, 0.73, NA, NA, NA, NA, NA,
  NA, NA, NA, NA
), Pz1 = c(
  -2.459826614, -2.905007821, -2.241113224,
  -1.549264338, -1.761962438, -1.282612221, -0.428828702, 1.659042479,
  2.63518648, 3.076022461, 1.886216859, 0.124473561, -1.025720789,
  -1.969461882, -2.216075605, -2.508858567, -2.495831241, -2.36513321,
  -2.295002655, -2.103511241, -1.009051105, -0.619616011, -0.941517493
), Pz2 = c(
  -2.916090168, -3.262459435, -2.455396094, -1.488106654,
  -0.417171756, -1.781014095, -0.605012986, 1.037062685, 1.977265974,
  2.587846362, 2.499228916, 1.0852274, -0.503736287, -1.829562138,
  -2.410220303, -2.135932209, -2.105646454, -2.79131286, -2.446274505,
  -2.412041743, -1.467408853, -1.207262381, -1.286558601
), Pz3 = c(
  -2.507944967,
  -3.718722989, -2.812847708, -1.702389524, -0.356014073, -0.436223413,
  -1.10341486, 0.860878402, 1.355286181, 1.929925855, 2.011052817,
  1.698239458, 0.457017552, -1.307577636, -2.270320559, -2.330076907,
  -1.732720095, -2.401128073, -2.872454155, -2.563313593, -1.775939355,
  -1.665620129, -1.874204971
), Pz4 = c(
  -2.526729696, -3.310577787,
  -3.269111262, -2.059841138, -0.570296943, -0.375065729, 0.241375822,
  0.362476528, 1.179101897, 1.307946062, 1.35313231, 1.210063358,
  1.07002961, -0.346823797, -1.748336058, -2.190177163, -1.926864794,
  -2.028201714, -2.482269368, -2.989493243, -1.927211205, -1.974150631,
  -2.332562719
), Pz5 = c(
  -3.284551238, -3.329362517, -2.86096606,
  -2.516104692, -0.927748557, -0.5893486, 0.302533506, 1.70726721,
  0.680700023, 1.131761778, 0.731152517, 0.552142851, 0.58185351,
  0.266188261, -0.787582218, -1.668192661, -1.78696505, -2.222346412,
  -2.109343009, -2.599308456, -2.353390855, -2.125422481, -2.641093221
), Pz6 = c(
  -4.011896321, -4.087184059, -2.87975079, -2.107959491,
  -1.38401211, -0.946800213, 0.088250636, 1.768424893, 2.025490705,
  0.633359905, 0.554968233, -0.069836942, -0.076066996, -0.221987839,
  -0.174570161, -0.707438822, -1.264980548, -2.082446669, -2.303487708,
  -2.226382097, -1.963206068, -2.551602131, -2.792365071
), Pz7 = c(
  -4.769878994,
  -4.814529142, -3.637572331, -2.12674422, -0.975866909, -1.403063767,
  -0.269200978, 1.554142023, 2.086648388, 1.978150587, 0.05656636,
  -0.246021225, -0.698046789, -0.879908346, -0.66274626, -0.094426764,
  -0.304226709, -1.560462167, -2.163587964, -2.420526796, -1.59027971,
  -2.161417344, -3.218544722
))

# PREPROCESSING
# create an index variable to use as the x-axis and rearrange the data
# frame so that it and the GW variable occur last

dat["obs"] <- 1:nrow(dat)
head(dat)
#>     GW       Pz1        Pz2        Pz3        Pz4        Pz5        Pz6
#> 1 0.08 -2.459827 -2.9160902 -2.5079450 -2.5267297 -3.2845512 -4.0118963
#> 2 0.04 -2.905008 -3.2624594 -3.7187230 -3.3105778 -3.3293625 -4.0871841
#> 3 0.35 -2.241113 -2.4553961 -2.8128477 -3.2691113 -2.8609661 -2.8797508
#> 4 0.54 -1.549264 -1.4881067 -1.7023895 -2.0598411 -2.5161047 -2.1079595
#> 5 0.39 -1.761962 -0.4171718 -0.3560141 -0.5702969 -0.9277486 -1.3840121
#> 6 0.94 -1.282612 -1.7810141 -0.4362234 -0.3750657 -0.5893486 -0.9468002
#>          Pz7 obs
#> 1 -4.7698790   1
#> 2 -4.8145291   2
#> 3 -3.6375723   3
#> 4 -2.1267442   4
#> 5 -0.9758669   5
#> 6 -1.4030638   6
dat <- dat[,c(2:ncol(dat),1)]
head(dat)
#>         Pz1        Pz2        Pz3        Pz4        Pz5        Pz6        Pz7
#> 1 -2.459827 -2.9160902 -2.5079450 -2.5267297 -3.2845512 -4.0118963 -4.7698790
#> 2 -2.905008 -3.2624594 -3.7187230 -3.3105778 -3.3293625 -4.0871841 -4.8145291
#> 3 -2.241113 -2.4553961 -2.8128477 -3.2691113 -2.8609661 -2.8797508 -3.6375723
#> 4 -1.549264 -1.4881067 -1.7023895 -2.0598411 -2.5161047 -2.1079595 -2.1267442
#> 5 -1.761962 -0.4171718 -0.3560141 -0.5702969 -0.9277486 -1.3840121 -0.9758669
#> 6 -1.282612 -1.7810141 -0.4362234 -0.3750657 -0.5893486 -0.9468002 -1.4030638
#>   obs   GW
#> 1   1 0.08
#> 2   2 0.04
#> 3   3 0.35
#> 4   4 0.54
#> 5   5 0.39
#> 6   6 0.94

# HELPER OBJECTS
# base plot

base_plot <- ggplot(dat,aes(obs,GW)) + geom_line()
base_plot
#> Warning: Removed 9 row(s) containing missing values (geom_path).

# indices
cols_to_plot <- 1:(ncol(dat)-2)
var_names <- names(dat[1:7])

# MAIN

# plot with GW with one additonal series

p <- make_plot(cols_to_plot[1])

# compress y-axis

p + coord_fixed()
#> Warning: Removed 9 row(s) containing missing values (geom_path).

#> Warning: Removed 9 row(s) containing missing values (geom_path).

# create statistics to plot

stat1 <- mean(dat[,1])
stat2 <- sd(dat[,1])

# add text annotation with plots

p + 
  annotate("text", x = 20, y = 3, label = paste("mean: ",stat1)) +
  annotate("text", x = 20, y = 2, label = paste("sd: ",stat2)) +
  coord_fixed()
#> Warning: Removed 9 row(s) containing missing values (geom_path).

#> Warning: Removed 9 row(s) containing missing values (geom_path).

Scaling in time series is generally done using log or power transformations. See Hyndman.

Hi @technocrat ,

Thanks for your help. This is great, however, I should start all again from scratches.
Is there any way to use my code (as "template"), and add a line(s) that computes (and displays) Spearman Correlation and RMSE between the 2 variables shown in each given plot?

Regards,

Simone

stat1, the mean and stat2 the standard deviation in the example were used as stand-ins for any pair of test statistics or other calculations that can be derived from pairs of the GW and any of the Pz variables.

# LIBRARIES

suppressPackageStartupMessages({
  library(ggplot2)
})

# FUNCTIONS

rmse <- function(x) sqrt(mean(x^2,na.rm = TRUE))

# DATA

dat <- data.frame(GW = c(
  0.08, 0.04, 0.35, 0.54, 0.39, 0.94, 0.51,
  0.01, 0.44, 0.63, 0.14, 0.79, 0.43, 0.73, NA, NA, NA, NA, NA,
  NA, NA, NA, NA
), Pz1 = c(
  -2.459826614, -2.905007821, -2.241113224,
  -1.549264338, -1.761962438, -1.282612221, -0.428828702, 1.659042479,
  2.63518648, 3.076022461, 1.886216859, 0.124473561, -1.025720789,
  -1.969461882, -2.216075605, -2.508858567, -2.495831241, -2.36513321,
  -2.295002655, -2.103511241, -1.009051105, -0.619616011, -0.941517493
), Pz2 = c(
  -2.916090168, -3.262459435, -2.455396094, -1.488106654,
  -0.417171756, -1.781014095, -0.605012986, 1.037062685, 1.977265974,
  2.587846362, 2.499228916, 1.0852274, -0.503736287, -1.829562138,
  -2.410220303, -2.135932209, -2.105646454, -2.79131286, -2.446274505,
  -2.412041743, -1.467408853, -1.207262381, -1.286558601
), Pz3 = c(
  -2.507944967,
  -3.718722989, -2.812847708, -1.702389524, -0.356014073, -0.436223413,
  -1.10341486, 0.860878402, 1.355286181, 1.929925855, 2.011052817,
  1.698239458, 0.457017552, -1.307577636, -2.270320559, -2.330076907,
  -1.732720095, -2.401128073, -2.872454155, -2.563313593, -1.775939355,
  -1.665620129, -1.874204971
), Pz4 = c(
  -2.526729696, -3.310577787,
  -3.269111262, -2.059841138, -0.570296943, -0.375065729, 0.241375822,
  0.362476528, 1.179101897, 1.307946062, 1.35313231, 1.210063358,
  1.07002961, -0.346823797, -1.748336058, -2.190177163, -1.926864794,
  -2.028201714, -2.482269368, -2.989493243, -1.927211205, -1.974150631,
  -2.332562719
), Pz5 = c(
  -3.284551238, -3.329362517, -2.86096606,
  -2.516104692, -0.927748557, -0.5893486, 0.302533506, 1.70726721,
  0.680700023, 1.131761778, 0.731152517, 0.552142851, 0.58185351,
  0.266188261, -0.787582218, -1.668192661, -1.78696505, -2.222346412,
  -2.109343009, -2.599308456, -2.353390855, -2.125422481, -2.641093221
), Pz6 = c(
  -4.011896321, -4.087184059, -2.87975079, -2.107959491,
  -1.38401211, -0.946800213, 0.088250636, 1.768424893, 2.025490705,
  0.633359905, 0.554968233, -0.069836942, -0.076066996, -0.221987839,
  -0.174570161, -0.707438822, -1.264980548, -2.082446669, -2.303487708,
  -2.226382097, -1.963206068, -2.551602131, -2.792365071
), Pz7 = c(
  -4.769878994,
  -4.814529142, -3.637572331, -2.12674422, -0.975866909, -1.403063767,
  -0.269200978, 1.554142023, 2.086648388, 1.978150587, 0.05656636,
  -0.246021225, -0.698046789, -0.879908346, -0.66274626, -0.094426764,
  -0.304226709, -1.560462167, -2.163587964, -2.420526796, -1.59027971,
  -2.161417344, -3.218544722
))

# MAIN

cor.test(dat$GW,dat$Pz1,method="spearman")
#> 
#>  Spearman's rank correlation rho
#> 
#> data:  dat$GW and dat$Pz1
#> S = 344, p-value = 0.3998
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#>      rho 
#> 0.243956

rmse(dat$GW - dat$Pz1)
#> [1] 2.122577
1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.