How to set bar chart with different color in googleVis?


#1

Hi guys!

The following plot is what I want to get.

da <- structure(list(data_source = structure(c(3L, 6L, 1L, 5L, 4L, 
7L, 2L, 8L, 9L), .Label = c("arXiv", "engineering.com", "EPO", 
"Foerderkatalog", "Fraunhofer Publica", "newatlas.com", "opcconnect.opcfoundation.org", 
"phys.org", "theengineer.co.uk"), class = "factor"), data_type = structure(c(3L, 
2L, 4L, 4L, 1L, 2L, 2L, 2L, 2L), .Label = c("FUNDING_PROJECT", 
"NEWS", "PATENT", "SCIENT_PUBLICATION"), class = "factor"), count = c(1253L, 
656L, 297L, 26L, 24L, 3L, 2L, 1L, 1L)), .Names = c("data_source", 
"data_type", "count"), class = "data.frame", row.names = c(NA, 
-9L))
plotData <- da

library(ggplot2)
library(dplyr)

# plotData <- da %>% 
#     arrange(count) %>% 
#     mutate(
#         data_source = factor(data_source, levels = data_source),
#         data_type = factor(data_type, levels = c("FUNDING_PROJECT", 
#                                                  "SCIENT_PUBLICATION",
#                                                  "PATENT","NEWS"))
#    )

plotData[["data_source"]] <- factor(plotData$data_source, levels = arrange(plotData ,plotData$count)$data_source)
# Make cVal column a factor with specified levels
plotData[["data_type"]] <- factor(plotData[["data_type"]], 
                                  levels = c(
                                      "FUNDING_PROJECT",
                                      "SCIENT_PUBLICATION",
                                      "PATENT",
                                      "NEWS"))



# Initialise ggplot
p <- ggplot(plotData)

# Plot bar chart
p <- p + geom_bar(aes_q(x = as.name("data_source"), 
                        y = as.name("count"),
                        fill = as.name("data_type")),
                  stat = "identity", 
                  position = position_dodge())

# Add values as labels on bars
plotData <- cbind(plotData, yLabel = paste0("  ", plotData[,"count"], "  "))
p <- p + geom_text(data = plotData,
                   aes_q(x = as.name("data_source"),
                         y = as.name("count"),
                         label = as.name("yLabel"),
                         hjust = "inward"),
                   size = (5/14)*12 # conversion from mm to px
)



# Assign colours to data types
p <- p + scale_fill_manual(name = "", 
                           values = c(
                               "FUNDING_PROJECT"    = "red",
                               "SCIENT_PUBLICATION" = "blue",  
                               "PATENT"             = "green", 
                               "NEWS"               = "black"), 
                           labels = c(
                               "FUNDING_PROJECT"    = "Funded projects",
                               "SCIENT_PUBLICATION" = "Scientific publications",
                               "PATENT"             = "Patents",  
                               "NEWS"               = "News"))


# Flip coordinates
p <- p + coord_flip()


plot(p)

I also try to use googleVis to get the plot. But I do not know how to set the color. And I think the legend will be hard to set too.


da <- structure(list(data_source = structure(c(3L, 6L, 1L, 5L, 4L, 
7L, 2L, 8L, 9L), .Label = c("arXiv", "engineering.com", "EPO", 
"Foerderkatalog", "Fraunhofer Publica", "newatlas.com", "opcconnect.opcfoundation.org", 
"phys.org", "theengineer.co.uk"), class = "factor"), data_type = structure(c(3L, 
2L, 4L, 4L, 1L, 2L, 2L, 2L, 2L), .Label = c("FUNDING_PROJECT", 
"NEWS", "PATENT", "SCIENT_PUBLICATION"), class = "factor"), count = c(1253L, 
656L, 297L, 26L, 24L, 3L, 2L, 1L, 1L)), .Names = c("data_source", 
"data_type", "count"), class = "data.frame", row.names = c(NA, 
-9L))
#> structure(list(data_source = structure(c(3L, 6L, 1L, 5L, 4L, 
#> 7L, 2L, 8L, 9L), .Label = c("arXiv", "engineering.com", "EPO", 
#> "Foerderkatalog", "Fraunhofer Publica", "newatlas.com", "opcconnect.opcfoundation.org", 
#> "phys.org", "theengineer.co.uk"), class = "factor"), data_type = structure(c(3L, 
#> 2L, 4L, 4L, 1L, 2L, 2L, 2L, 2L), .Label = c("FUNDING_PROJECT", 
#> "NEWS", "PATENT", "SCIENT_PUBLICATION"), class = "factor"), count = c(1253L, 
#> 656L, 297L, 26L, 24L, 3L, 2L, 1L, 1L)), .Names = c("data_source", 
#> "data_type", "count"), class = "data.frame", row.names = c(NA, 
#> -9L))
library(googleVis)
#> Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
#> 
#> Welcome to googleVis version 0.6.2
#> 
#> Please read Google's Terms of Use
#> before you start using the package:
#> https://developers.google.com/terms/
#> 
#> Note, the plot method of googleVis will by default use
#> the standard browser to display its output.
#> 
#> See the googleVis package vignettes for more details,
#> or visit http://github.com/mages/googleVis.
#> 
#> To suppress this message use:
#> suppressPackageStartupMessages(library(googleVis))
p <- gvisBarChart(da, xvar = "data_source", yvar = "count" )
plot(p)
#> starting httpd help server ...
#>  done

Thank you.
Song


#2

It will be much easier to help you if you post a reproducible example (i.e. reprex), this includes the code that you used to create both your ggplot2 and googleVis versions of the graphs (this includes any data wrangling you do prior to the graphs) and your data (you can use dput()). This is much easier than getting it from the picture or from a link as they can just copy and paste directly into their R session.


#3
da <- read.csv("/Users/song/Downloads/da.csv", header = TRUE, sep = ";")
da <- dput(da)
structure(list(data_source = structure(c(3L, 6L, 1L, 5L, 4L, 
7L, 2L, 8L, 9L), .Label = c("arXiv", "engineering.com", "EPO", 
"Foerderkatalog", "Fraunhofer Publica", "newatlas.com", "opcconnect.opcfoundation.org", 
"phys.org", "theengineer.co.uk"), class = "factor"), data_type = structure(c(3L, 
2L, 4L, 4L, 1L, 2L, 2L, 2L, 2L), .Label = c("FUNDING_PROJECT", 
"NEWS", "PATENT", "SCIENT_PUBLICATION"), class = "factor"), count = c(1253L, 
656L, 297L, 26L, 24L, 3L, 2L, 1L, 1L)), .Names = c("data_source", 
"data_type", "count"), class = "data.frame", row.names = c(NA, 
-9L))
plotData <- da

library(ggplot2)

# Make cVal column a factor with specified levels
plotData[["data_source"]] <- factor(plotData$data_source, levels = arrange(plotData ,plotData$count)$data_source)
plotData[["data_type"]] <- factor(plotData[["data_type"]], 
                                  levels = c(
                                            "FUNDING_PROJECT",
                                            "SCIENT_PUBLICATION",
                                            "PATENT",
                                            "NEWS"))

# Initialise ggplot
p <- ggplot(plotData)

# Plot bar chart
p <- p + geom_bar(aes_q(x = as.name("data_source"), 
                        y = as.name("count"),
                        fill = as.name("data_type")),
                  stat = "identity", 
                  position = position_dodge())

# Add values as labels on bars
plotData <- cbind(plotData, yLabel = paste0("  ", plotData[,"count"], "  "))
p <- p + geom_text(data = plotData,
                   aes_q(x = as.name("data_source"),
                         y = as.name("count"),
                         label = as.name("yLabel"),
                         hjust = "inward"),
                   size = (5/14)*12 # conversion from mm to px
)

# Assign colours to data types
p <- p + scale_fill_manual(name = "", 
                           values = c(
                               "FUNDING_PROJECT"    = "red",
                               "SCIENT_PUBLICATION" = "blue",  
                               "PATENT"             = "green", 
                               "NEWS"               = "black"), 
                           labels = c(
                               "FUNDING_PROJECT"    = "Funded projects",
                               "SCIENT_PUBLICATION" = "Scientific publications",
                               "PATENT"             = "Patents",  
                               "NEWS"               = "News"))

# Flip coordinates
p <- p + coord_flip()

plot(p)

Hi.

Thank you for the help. It is very helpful. In the past, I always felt hard to ask questions, and now it will never happen again.

I changed my question.

Best
Song


#4

So from your edited question, it seems you are ok with doing this in either ggplot or googleVis. Since I have never used googleVis, I can not help you with that. As for ggplot, you are factoring the wrong variable to order the bars. You need to factor the data_source column and the data_type column (Assuming you want the legend ordered in a specific way).

If you replace your plotData <- da with this:

library(dplyr)
plotData <- da %>% 
  arrange(count) %>% 
  mutate(
    data_source = factor(data_source, levels = data_source),
    data_type = factor(data_type, levels = c("FUNDING_PROJECT", 
                                             "SCIENT_PUBLICATION",
                                             "PATENT","NEWS"))
  )

Then remove your factor statement (you could leave it but it is just redundant). Since you set the colors in your scale_fill_manual function I am assuming you are happy with those colors. If you want the exact colors shown in your example then you can use @daattali’s color picker to try and match them as closely as possible.

If you leave the colors as you have them, then adding the above code prior to your ggplot code will give you this:

The count for the News category is there but if you leave the fill color as black then the text does not show.


#5

Hi.

Thanks. I also find out why my code is wrong. Because I wrap the plot into a function and send plotData into the function, plotData is always factorized by data_source before. I forget this.

I try to use googleVis is because I will use ggplotly for reactive plot later, ggplotly is not happy with coord_flip(). googleVis seems to be easy, actually not.

The worse case will be: I use plotly to refactor the snippet done by ggplot.

Best
Song


#6

Hi. I made some progress in googleVis. Just letting you know.

da <- structure(list(data_source = structure(c(3L, 6L, 1L, 5L, 4L, 
7L, 2L, 8L, 9L), .Label = c("arXiv", "engineering.com", "EPO", 
"Foerderkatalog", "Fraunhofer Publica", "newatlas.com", "opcconnect.opcfoundation.org", 
"phys.org", "theengineer.co.uk"), class = "factor"), data_type = structure(c(3L, 
2L, 4L, 4L, 1L, 2L, 2L, 2L, 2L), .Label = c("FUNDING_PROJECT", 
"NEWS", "PATENT", "SCIENT_PUBLICATION"), class = "factor"), count = c(1253L, 
656L, 297L, 26L, 24L, 3L, 2L, 1L, 1L)), .Names = c("data_source", 
"data_type", "count"), class = "data.frame", row.names = c(NA, 
-9L))

library(googleVis)
da$count.style <- c("green", "black", "blue", "blue", "red",
                    "black","black","black","black")

library(googleVis)
p <- gvisBarChart(da, xvar = "data_source",
                  yvar = c("count" , "count.style" ))
plot(p)