How do I exclude a specific Column in my data frame when creating a bar chart AND use that column later to create an overlapping line graph

Hello.

I want to re-create a graph similar to the one below on the left.

What I have currently is

library(readxl)
library(dplyr)
library(plm)
library(reshape2)
library(lubridate)
library(knitr)
library(ggplot2)
library(ggthemes)
library(scales)
library(tidyr)
library(plyr)

df <- read_excel("Australia_BOP.xlsx")

new_df <- df[, c("year", "current_account", "balance_on_goods_and_services", "primary_income_net","secondary_income_net")]

# change to long form
new_df_long <- melt(new_df, id.vars = c("year"),
                    variable.name = "category",
                    value.name = "millions_of_us_dollars")

pl2 <- ggplot(data = new_df_long, aes(fill = category, x = year, y = millions_of_us_dollars))
pl2 <- pl2 + geom_bar(stat = "identity")
pl2

What I want is to still keep "current_account" in my dataframe, and create a bar chart with just "balance_on_goods_and_services", "primary_income_net","secondary_income_net". I need "current_account" so I can create a line chart with it.

Sorry I can't add more than 1 photo apparently.
Any help would be appreciated.

Thanks

The attached photos are

for the data set

My current output is

Hi, welcome to the forum.

The code is great but a screenshot of the data is not really useful.

Can you supply some sample data? A handy way to supply some sample data is the dput() function. In the case of a large dataset something like dput(head(mydata, 100)) should supply the data we need. Just do dput(mydata) where mydata is your data. Copy the output and paste it here.

Of course!

structure(list(year = c("Q1 2019", "Q2 2019", "Q3 2019", "Q4 2019",
"Q1 2020", "Q2 2020", "Q3 2020", "Q4 2020", "Q1 2021", "Q2 2021",
"Q3 2021", "Q4 2021"), current_account = c(-1109.02667393265,
3660.72528764784, 2139.92399300988, 1486.36342433968, 5515.29390026161,
12064.9430330826, 5022.83386227289, 9913.46475323625, 13303.9556437895,
20135.4601644229, 12518.021780825, 4878.9121836567), balance_on_goods_and_services = c(11801.8387671099,
13297.5303303175, 12799.1033701068, 9568.76352533526, 13843.5061283231,
15801.9999571294, 8970.41928486537, 11837.168974812, 20421.3013504932,
26861.9146673647, 26041.3654940305, 17482.4650165682), primary_income_credit = c(12546.1758732497,
11863.2151611759, 11189.7050563377, 12043.3023573049, 11084.5431930097,
9310.18322646373, 10899.1417022828, 10769.0933481838, 12155.2236224838,
12160.4561654009, 12758.3289971177, 14049.3723657532), primary_income_debit = c(24983.3722467487,
21420.8807438295, 21846.8281323522, 20217.9594984319, 19092.9710217002,
12476.0914260247, 14594.905800346, 12461.369016348, 18767.0035539964,
17569.0401021531, 25983.3096222044, 26400.0525283542), primary_income_net = c(-12437.196373499,
-9557.6655826536, -10657.1230760145, -8174.657141127, -8008.4278286905,
-3165.90819956097, -3695.7640980632, -1692.2756681642, -6611.7799315126,
-5408.5839367522, -13224.9806250867, -12350.680162601), secondary_income_credit = c(1867.60946631433,
1915.45507207133, 1878.08832186004, 1879.99346223377, 1742.36438175766,
1516.72445434643, 1644.70802583184, 1722.2080678168, 1786.48701294582,
1803.88829106719, 1792.38318207304, 1826.95038751716), secondary_income_debit = c(2341.27853385782,
1994.59453208744, 1880.14462294237, 1787.73642210234, 2062.1487811286,
2087.87317883227, 1896.52935036112, 1953.63662122838, 2292.05278813689,
3121.75885725675, 2090.7462701918, 2079.82305782767), secondary_income_net = c(-473.66906754349,
-79.13946001611, -2.05630108233004, 92.2570401314301, -319.78439937094,
-571.14872448584, -251.82132452928, -231.42855341158, -505.56577519107,
-1317.87056618956, -298.36308811876, -252.87267031051), nominal_gdp = c(348056.96609927,
348056.96609927, 348056.96609927, 348056.96609927, 331725.264780802,
331725.264780802, 331725.264780802, 331725.264780802, 388166.840809015,
388166.840809015, 388166.840809015, 388166.840809015)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -12L))

Is this good?

Looks good to me.

thanks.

1 Like

Does this help? Note, for my own convenience i renamed your tibbles to dat1 (df) and dat2 ( new_df_long).

pl1 <- ggplot(dat2, aes(fill = category, x = year, y = millions_of_us_dollars))
pl2 <- pl1 + geom_bar(stat = "identity", show.legend = TRUE) 

 pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
                      scale_linetype_manual('My line',values='solid')

There seems to be a compile error.

I originally thought for the last line

pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
                      scale_linetype_manual('My line',values='solid')

should be instead

pl2 <- pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
                      scale_linetype_manual('My line',values='solid')

pl2

but that also gives a compile error.

What should I do?

does new_df_long preserve 'current_account' as a column ? probably not... and that is probably good ?

No. The columns in "new_df_long" do not have "current_account" as a column.

Still, that doesn't bring me closer to creating a bar chart and line chart in the same frame, which is what I want.


new_df_long_flt <- filter(new_df_long,
                          category %in% c("current_account",
                                          "balance_on_goods_and_services",
                                          "primary_income_net",
                                          "secondary_income_net")) |> 
  mutate(category = as.character(category))
cur <- new_df_long_flt |> filter(category=="current_account")
notcur <- setdiff(new_df_long_flt,cur)

pl1 <- ggplot(notcur, aes(fill = category, 
                          x = year,
                          y = millions_of_us_dollars))
pl2 <- pl1 + geom_col(show.legend = TRUE) 

pl2 + geom_line(data=cur,
                aes(fill=NULL,
                    color="Current Account"),
                 group=1) +
  scale_color_manual(values=c("Current Account"="black"))+
  theme(legend.position = "top")
1 Like

Beats me. It was late when I was doing this and I may have messed up a copy & paste.

Here is my complete code and a crappy version of the plot. The plot settings need tweaking.

library(tidyverse)
library(plm)
library(reshape2)
library(lubridate)
library(knitr)
library(ggthemes)
library(scales)
library(plyr)

dat1 <- structure(list(year = c("Q1 2019", "Q2 2019", "Q3 2019", "Q4 2019",
    "Q1 2020", "Q2 2020", "Q3 2020", "Q4 2020", "Q1 2021", "Q2 2021",
    "Q3 2021", "Q4 2021"), current_account = c(-1109.02667393265,
                   3660.72528764784, 2139.92399300988, 1486.36342433968, 5515.29390026161,
                   12064.9430330826, 5022.83386227289, 9913.46475323625, 13303.9556437895,
                   20135.4601644229, 12518.021780825, 4878.9121836567), balance_on_goods_and_services = c(11801.8387671099,
                      13297.5303303175, 12799.1033701068, 9568.76352533526, 13843.5061283231,
                      15801.9999571294, 8970.41928486537, 11837.168974812, 20421.3013504932,
                      26861.9146673647, 26041.3654940305, 17482.4650165682), primary_income_credit = c(12546.1758732497,
                   11863.2151611759, 11189.7050563377, 12043.3023573049, 11084.5431930097,
                   9310.18322646373, 10899.1417022828, 10769.0933481838, 12155.2236224838,
                   12160.4561654009, 12758.3289971177, 14049.3723657532), primary_income_debit = c(24983.3722467487,
               21420.8807438295, 21846.8281323522, 20217.9594984319, 19092.9710217002,
               12476.0914260247, 14594.905800346, 12461.369016348, 18767.0035539964,
               17569.0401021531, 25983.3096222044, 26400.0525283542), primary_income_net = c(-12437.196373499,
         -9557.6655826536, -10657.1230760145, -8174.657141127, -8008.4278286905,
         -3165.90819956097, -3695.7640980632, -1692.2756681642, -6611.7799315126,
         -5408.5839367522, -13224.9806250867, -12350.680162601), secondary_income_credit = c(1867.60946631433,
         1915.45507207133, 1878.08832186004, 1879.99346223377, 1742.36438175766,
         1516.72445434643, 1644.70802583184, 1722.2080678168, 1786.48701294582,
         1803.88829106719, 1792.38318207304, 1826.95038751716), secondary_income_debit = c(2341.27853385782,
       1994.59453208744, 1880.14462294237, 1787.73642210234, 2062.1487811286,
       2087.87317883227, 1896.52935036112, 1953.63662122838, 2292.05278813689,
       3121.75885725675, 2090.7462701918, 2079.82305782767), secondary_income_net = c(-473.66906754349,
  -79.13946001611, -2.05630108233004, 92.2570401314301, -319.78439937094,
  -571.14872448584, -251.82132452928, -231.42855341158, -505.56577519107,
  -1317.87056618956, -298.36308811876, -252.87267031051), nominal_gdp = c(348056.96609927,
                  348056.96609927, 348056.96609927, 348056.96609927, 331725.264780802,
                  331725.264780802, 331725.264780802, 331725.264780802, 388166.840809015,
                  388166.840809015, 388166.840809015, 388166.840809015)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -12L))

dat2 <- dat1[, c("year", "current_account", "balance_on_goods_and_services", "primary_income_net","secondary_income_net")]

# change to long form
dat3 <- melt(dat2, id.vars = c("year", "current_account"),
                    variable.name = "category",
                    value.name = "millions_of_us_dollars")


pl1 <- ggplot(dat3, aes(fill = category, x = year, y = millions_of_us_dollars))
pl2 <- pl1 + geom_bar(stat = "identity", show.legend = TRUE) 

pl2 + geom_line( aes(year, current_account,linetype='current_account', group=1), color="black") +
  scale_linetype_manual('My line',values='solid')

1 Like

Oh wow! Thank you so much. Yea, I'll look at the code carefully and maybe I'll ask you if I have any questions about it.

Appreciate it!

Thanks you so much! This also works! I like how you did the formatting as well. I'll look carefully at the code.