Help with ggplot2-Grouping, Factor Variabes

Hi,
I am trying to create ggplot animation with geom_point and geom_line.
The problem is that the groups have only one observation for each year that is only one observation for each group-year pair.
I want to have a line that traces the movement of the point for each group.
How can i achieve that by changing the aesthetics or the dataframe.
Any help is appreciated.

library(haven)
master_data_clean <- read_dta("G:/replication/output/master-data-clean.dta")


library(tidyverse)
library(psych)
#> 
#> Attaching package: 'psych'
#> The following objects are masked from 'package:ggplot2':
#> 
#>     %+%, alpha
data7= master_data_clean %>%
  select(c("latrine", "mboy", "mgirl", "wealth", "hhsize", "agehead", "educhead", "boys", "girls", "sexratio", "haryana", "post","year"))
data7= as.data.frame(data7)
data8= as.data.frame( subset(data7, data7$haryana==1 & data7$post==0))
describe_data8=describe(data8, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data8= as.data.frame(describe_data8)

data9= as.data.frame( subset(data7, data7$haryana==1 & data7$post==1))
describe_data9=describe(data9, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data9= as.data.frame(describe_data9)

data10= as.data.frame( subset(data7, data7$haryana==0 & data7$post==0))
describe_data10=describe(data10, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data10= as.data.frame(describe_data10)

data11= as.data.frame( subset(data7, data7$haryana==0 & data7$post==1))
describe_data11=describe(data11, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data11= as.data.frame(describe_data11)

data_meta1= cbind(describe_data8[,3], describe_data9[,3], describe_data10[,3], describe_data11[,3])

colnames(data_meta1)= c("haryana2004", "harayana2008", "control 2004", "control2008")
rownames(data_meta1)= c("latrine", "mboy", "mgirl", "wealth", "hhsize", "agehead", "educhead", 
                        "boys", "girls", "sexratio", "haryana", "post","year")

data_meta1= t(data_meta1)
data_meta1= as.data.frame(data_meta1)

data_meta1[,14]= c("haryana", "haryana", "control", "control")
data_meta1[,15]= c(2004,2008,2004,2008)
data_meta1[1:2, 16]= c(1,1)
data_meta1[3:4, 16]= c(2,2)

data_meta1= as.data.frame(data_meta1)

colnames(data_meta1)= c("latrine", "mboy", "mgirl", "wealth", "hhsize", "agehead", "educhead", "boys", "girls", 
                        "sexratio", "haryana", "post","year", "haryana2", "year2", "new")

data_meta1= as.data.frame(data_meta1)

library(ggplot2)
library(gganimate)
theme_set(theme_bw())
p2 <- ggplot(data_meta1, aes(year, latrine, color = as.factor( haryana2), frame = year2)) +
  geom_point() + geom_line(aes(group= new))


# Notice we added frame = year and saved the plot as p. 
# We then display it as an animation with the gg_animate function:
gganimate(p2)
#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?

#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?

Created on 2018-04-10 by the reprex package (v0.2.0).

Pictures here:

The data set is available on the following link:
https://drive.google.com/file/d/1lEF3QfOPYZ6NtLizsSBhQecmva91kFaZ/view?usp=sharing 1

See this post for some different methods involving tweenr (though I'm sure there are other ways as well).

I'd also suggest you change the title to be more informative so it's easier for people to see what your question is at a glance.

1 Like

Tried it doesn't help solve the problem.

Can you please include a reprex of your code for that?

library(haven)
master_data_clean <- read_dta("G:/replication/output/master-data-clean.dta")


library(tidyverse)
library(psych)
#> 
#> Attaching package: 'psych'
#> The following objects are masked from 'package:ggplot2':
#> 
#>     %+%, alpha
data7= master_data_clean %>%
  select(c("latrine", "mboy", "mgirl", "wealth", "hhsize", "agehead", "educhead", "boys", "girls", "sexratio", "haryana", "post","year"))
data7= as.data.frame(data7)
data8= as.data.frame( subset(data7, data7$haryana==1 & data7$post==0))
describe_data8=describe(data8, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data8= as.data.frame(describe_data8)

data9= as.data.frame( subset(data7, data7$haryana==1 & data7$post==1))
describe_data9=describe(data9, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data9= as.data.frame(describe_data9)

data10= as.data.frame( subset(data7, data7$haryana==0 & data7$post==0))
describe_data10=describe(data10, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data10= as.data.frame(describe_data10)

data11= as.data.frame( subset(data7, data7$haryana==0 & data7$post==1))
describe_data11=describe(data11, skew = FALSE, ranges = FALSE, IQR = FALSE)  
describe_data11= as.data.frame(describe_data11)

data_meta1= cbind(describe_data8[,3], describe_data9[,3], describe_data10[,3], describe_data11[,3])

colnames(data_meta1)= c("haryana2004", "harayana2008", "control 2004", "control2008")
rownames(data_meta1)= c("latrine", "mboy", "mgirl", "wealth", "hhsize", "agehead", "educhead", 
                        "boys", "girls", "sexratio", "haryana", "post","year")

data_meta1= t(data_meta1)
data_meta1= as.data.frame(data_meta1)

data_meta1[,14]= c("haryana", "haryana", "control", "control")
data_meta1[,15]= c(2004,2008,2004,2008)
data_meta1[1:2, 16]= c(1,1)
data_meta1[3:4, 16]= c(2,2)
data_meta1[,17]= c("haryana_wealth", "haryana_wealth", "control_wealth", "control_wealth")
data_meta1[,18]= c("haryana_educhead", "haryana_educhead", "control_educhead", "control_educhead")


data_meta1= as.data.frame(data_meta1)

colnames(data_meta1)= c("latrine", "mboy", "mgirl", "wealth", "hhsize", "agehead", "educhead", "boys", "girls", 
                        "sexratio", "haryana", "post","year", "haryana2_latrine", "year2", "new", 
                        "haryana2_wealth", "haryana2_educhead")

data_meta1= as.data.frame(data_meta1)

library(ggplot2)
library(gganimate)
theme_set(theme_bw())
p2 <- ggplot(data_meta1, aes(as.numeric(year), frame = year2)) +
  geom_point(aes(y= latrine, color = as.factor( haryana2_latrine), size= latrine)) + 
  geom_line(aes(y= latrine, group=as.factor(new), color = as.factor( haryana2_latrine)))+
  geom_point(aes(y= wealth, color = as.factor( haryana2_wealth), size=wealth)) + 
  geom_line(aes(y= wealth, group=as.factor(new), color = as.factor( haryana2_wealth)))+
  geom_point(aes(y= educhead, color = as.factor( haryana2_educhead), size= educhead)) + 
  geom_line(aes(y= educhead, group=as.factor(new), color = as.factor( haryana2_educhead)))

# Notice we added frame = year and saved the plot as p. 
# We then display it as an animation with the gg_animate function:
gganimate(p2)
#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?
#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?
#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?

#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?
#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?
#> geom_path: Each group consists of only one observation. Do you need to
#> adjust the group aesthetic?


library(tweenr)
data1= tween_elements(data_meta1, time = "year", group = "new", ease = "linear")
#> Error in col2rgb(d): invalid color name 'haryana'
Created on 2018-04-10 by the reprex package (v0.2.0).

What does this mean??

It sounds like your question is about plotting syntax and not (necessarily?) about data munging, so it would probably make it even easier for other people to help if you if you could include example data directly in your question. For instance, you might be able to include a sample of the data_meta1 table. Here's what I mean, based on the charts you attached:

data_meta1 <- tribble(
  ~year,      ~latrine,   ~haryana2,  year2,  new,
  2002-02-01,     0.15,     control,   2004,  not_sure_what_goes_here,
  2002-04-01,     0.29,     haryana,   2004,  not_sure_what_goes_here,
  2008-01-01,     0.21,     control,   2008,  not_sure_what_goes_here,
  2008-01-01,     0.46,     haryana,   2008,  not_sure_what_goes_here
)

BTW, here's a great tool for creating these from data you already have:

To your question, I wonder if you might be able to adapt the third example in

which uses the "cumulative" argument like this:

p3 <- ggplot(gapminder, aes(gdpPercap, lifeExp, frame = year)) +
  geom_path(aes(cumulative = TRUE, group = country)) +
  scale_x_log10() +
  facet_wrap(~continent)

gganimate(p3)

For your example it might be something like

p2 <- ggplot(data_meta1, aes(year, latrine, 
color = as.factor( haryana2), frame = year2)) +
  geom_point() + geom_path(aes(group= new, cumulative = TRUE))

Good luck!

Thanks a lot, it worked beautifully.
From next time I will add the data also.
Best.

Just one more thing, How do I change the speed at which the frames are transitioned and also the pause time at each frame.

See bottom of: