How do I exclude a specific Column in my data frame when creating a bar chart AND use that column later to create an overlapping line graph

Hello.

I want to re-create a graph similar to the one below on the left.

What I have currently is

library(readxl)
library(dplyr)
library(plm)
library(reshape2)
library(lubridate)
library(knitr)
library(ggplot2)
library(ggthemes)
library(scales)
library(tidyr)
library(plyr)

df <- read_excel("Australia_BOP.xlsx")

new_df <- df[, c("year", "current_account", "balance_on_goods_and_services", "primary_income_net","secondary_income_net")]

# change to long form
new_df_long <- melt(new_df, id.vars = c("year"),
                    variable.name = "category",
                    value.name = "millions_of_us_dollars")

pl2 <- ggplot(data = new_df_long, aes(fill = category, x = year, y = millions_of_us_dollars))
pl2 <- pl2 + geom_bar(stat = "identity")
pl2

What I want is to still keep "current_account" in my dataframe, and create a bar chart with just "balance_on_goods_and_services", "primary_income_net","secondary_income_net". I need "current_account" so I can create a line chart with it.

Sorry I can't add more than 1 photo apparently.
Any help would be appreciated.

Thanks

The attached photos are

for the data set

My current output is

Hi, welcome to the forum.

The code is great but a screenshot of the data is not really useful.

Can you supply some sample data? A handy way to supply some sample data is the dput() function. In the case of a large dataset something like dput(head(mydata, 100)) should supply the data we need. Just do dput(mydata) where mydata is your data. Copy the output and paste it here.

Of course!

structure(list(year = c("Q1 2019", "Q2 2019", "Q3 2019", "Q4 2019",
"Q1 2020", "Q2 2020", "Q3 2020", "Q4 2020", "Q1 2021", "Q2 2021",
"Q3 2021", "Q4 2021"), current_account = c(-1109.02667393265,
3660.72528764784, 2139.92399300988, 1486.36342433968, 5515.29390026161,
12064.9430330826, 5022.83386227289, 9913.46475323625, 13303.9556437895,
20135.4601644229, 12518.021780825, 4878.9121836567), balance_on_goods_and_services = c(11801.8387671099,
13297.5303303175, 12799.1033701068, 9568.76352533526, 13843.5061283231,
15801.9999571294, 8970.41928486537, 11837.168974812, 20421.3013504932,
26861.9146673647, 26041.3654940305, 17482.4650165682), primary_income_credit = c(12546.1758732497,
11863.2151611759, 11189.7050563377, 12043.3023573049, 11084.5431930097,
9310.18322646373, 10899.1417022828, 10769.0933481838, 12155.2236224838,
12160.4561654009, 12758.3289971177, 14049.3723657532), primary_income_debit = c(24983.3722467487,
21420.8807438295, 21846.8281323522, 20217.9594984319, 19092.9710217002,
12476.0914260247, 14594.905800346, 12461.369016348, 18767.0035539964,
17569.0401021531, 25983.3096222044, 26400.0525283542), primary_income_net = c(-12437.196373499,
-9557.6655826536, -10657.1230760145, -8174.657141127, -8008.4278286905,
-3165.90819956097, -3695.7640980632, -1692.2756681642, -6611.7799315126,
-5408.5839367522, -13224.9806250867, -12350.680162601), secondary_income_credit = c(1867.60946631433,
1915.45507207133, 1878.08832186004, 1879.99346223377, 1742.36438175766,
1516.72445434643, 1644.70802583184, 1722.2080678168, 1786.48701294582,
1803.88829106719, 1792.38318207304, 1826.95038751716), secondary_income_debit = c(2341.27853385782,
1994.59453208744, 1880.14462294237, 1787.73642210234, 2062.1487811286,
2087.87317883227, 1896.52935036112, 1953.63662122838, 2292.05278813689,
3121.75885725675, 2090.7462701918, 2079.82305782767), secondary_income_net = c(-473.66906754349,
-79.13946001611, -2.05630108233004, 92.2570401314301, -319.78439937094,
-571.14872448584, -251.82132452928, -231.42855341158, -505.56577519107,
-1317.87056618956, -298.36308811876, -252.87267031051), nominal_gdp = c(348056.96609927,
348056.96609927, 348056.96609927, 348056.96609927, 331725.264780802,
331725.264780802, 331725.264780802, 331725.264780802, 388166.840809015,
388166.840809015, 388166.840809015, 388166.840809015)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -12L))

Is this good?

Looks good to me.

thanks.

1 Like

Does this help? Note, for my own convenience i renamed your tibbles to dat1 (df) and dat2 ( new_df_long).

pl1 <- ggplot(dat2, aes(fill = category, x = year, y = millions_of_us_dollars))
pl2 <- pl1 + geom_bar(stat = "identity", show.legend = TRUE) 

 pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
                      scale_linetype_manual('My line',values='solid')

There seems to be a compile error.

I originally thought for the last line

pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
                      scale_linetype_manual('My line',values='solid')

should be instead

pl2 <- pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
                      scale_linetype_manual('My line',values='solid')

pl2

but that also gives a compile error.

What should I do?

does new_df_long preserve 'current_account' as a column ? probably not... and that is probably good ?

No. The columns in "new_df_long" do not have "current_account" as a column.

Still, that doesn't bring me closer to creating a bar chart and line chart in the same frame, which is what I want.


new_df_long_flt <- filter(new_df_long,
                          category %in% c("current_account",
                                          "balance_on_goods_and_services",
                                          "primary_income_net",
                                          "secondary_income_net")) |> 
  mutate(category = as.character(category))
cur <- new_df_long_flt |> filter(category=="current_account")
notcur <- setdiff(new_df_long_flt,cur)

pl1 <- ggplot(notcur, aes(fill = category, 
                          x = year,
                          y = millions_of_us_dollars))
pl2 <- pl1 + geom_col(show.legend = TRUE) 

pl2 + geom_line(data=cur,
                aes(fill=NULL,
                    color="Current Account"),
                 group=1) +
  scale_color_manual(values=c("Current Account"="black"))+
  theme(legend.position = "top")
1 Like

Beats me. It was late when I was doing this and I may have messed up a copy & paste.

Here is my complete code and a crappy version of the plot. The plot settings need tweaking.

library(tidyverse)
library(plm)
library(reshape2)
library(lubridate)
library(knitr)
library(ggthemes)
library(scales)
library(plyr)

dat1 <- structure(list(year = c("Q1 2019", "Q2 2019", "Q3 2019", "Q4 2019",
    "Q1 2020", "Q2 2020", "Q3 2020", "Q4 2020", "Q1 2021", "Q2 2021",
    "Q3 2021", "Q4 2021"), current_account = c(-1109.02667393265,
                   3660.72528764784, 2139.92399300988, 1486.36342433968, 5515.29390026161,
                   12064.9430330826, 5022.83386227289, 9913.46475323625, 13303.9556437895,
                   20135.4601644229, 12518.021780825, 4878.9121836567), balance_on_goods_and_services = c(11801.8387671099,
                      13297.5303303175, 12799.1033701068, 9568.76352533526, 13843.5061283231,
                      15801.9999571294, 8970.41928486537, 11837.168974812, 20421.3013504932,
                      26861.9146673647, 26041.3654940305, 17482.4650165682), primary_income_credit = c(12546.1758732497,
                   11863.2151611759, 11189.7050563377, 12043.3023573049, 11084.5431930097,
                   9310.18322646373, 10899.1417022828, 10769.0933481838, 12155.2236224838,
                   12160.4561654009, 12758.3289971177, 14049.3723657532), primary_income_debit = c(24983.3722467487,
               21420.8807438295, 21846.8281323522, 20217.9594984319, 19092.9710217002,
               12476.0914260247, 14594.905800346, 12461.369016348, 18767.0035539964,
               17569.0401021531, 25983.3096222044, 26400.0525283542), primary_income_net = c(-12437.196373499,
         -9557.6655826536, -10657.1230760145, -8174.657141127, -8008.4278286905,
         -3165.90819956097, -3695.7640980632, -1692.2756681642, -6611.7799315126,
         -5408.5839367522, -13224.9806250867, -12350.680162601), secondary_income_credit = c(1867.60946631433,
         1915.45507207133, 1878.08832186004, 1879.99346223377, 1742.36438175766,
         1516.72445434643, 1644.70802583184, 1722.2080678168, 1786.48701294582,
         1803.88829106719, 1792.38318207304, 1826.95038751716), secondary_income_debit = c(2341.27853385782,
       1994.59453208744, 1880.14462294237, 1787.73642210234, 2062.1487811286,
       2087.87317883227, 1896.52935036112, 1953.63662122838, 2292.05278813689,
       3121.75885725675, 2090.7462701918, 2079.82305782767), secondary_income_net = c(-473.66906754349,
  -79.13946001611, -2.05630108233004, 92.2570401314301, -319.78439937094,
  -571.14872448584, -251.82132452928, -231.42855341158, -505.56577519107,
  -1317.87056618956, -298.36308811876, -252.87267031051), nominal_gdp = c(348056.96609927,
                  348056.96609927, 348056.96609927, 348056.96609927, 331725.264780802,
                  331725.264780802, 331725.264780802, 331725.264780802, 388166.840809015,
                  388166.840809015, 388166.840809015, 388166.840809015)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -12L))

dat2 <- dat1[, c("year", "current_account", "balance_on_goods_and_services", "primary_income_net","secondary_income_net")]

# change to long form
dat3 <- melt(dat2, id.vars = c("year", "current_account"),
                    variable.name = "category",
                    value.name = "millions_of_us_dollars")


pl1 <- ggplot(dat3, aes(fill = category, x = year, y = millions_of_us_dollars))
pl2 <- pl1 + geom_bar(stat = "identity", show.legend = TRUE) 

pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
  scale_linetype_manual('My line',values='solid')

1 Like

Oh wow! Thank you so much. Yea, I'll look at the code carefully and maybe I'll ask you if I have any questions about it.

Appreciate it!

Thanks you so much! This also works! I like how you did the formatting as well. I'll look carefully at the code.

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.