Thanks for the feedback. Here's the code:
Libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(ggmap)
Import the BP-Stats data into a data table called “BP_Stats_Data”
BP_Stats_Data<- read.table("bp-stats.csv", header=TRUE, sep = ",", dec= ".")
Check structure
str(BP_Stats_Data)
Display summary
summary(data.frame(BP_Stats_Data))
Remove missing data (data cleaning)
BP_Stats_Data <- na.omit(BP_Stats_Data)
Visualise levels of CO2 over the 27 years using a bar plot (data represention)
table(BP_Stats_Data$co2_mtco2,BP_Stats_Data$Year)
ggplot(BP_Stats_Data, aes(x=Year, co2_mtco2)) + geom_bar(stat="identity")+ ggtitle("Carbon dioxide levels 1990-2017")
Data subset selection of Renewables
Using subset function
Renewables <- c("biofuels_kboed", "biofuels_ktoe", "biogeo_mtoe", "biogeo_twh", "biomass_mtoe", "biomass_twh", "geothermal_mtoe", "geothermal_twh", "hydro_mtoe", "hydro_twh", "renewables_mtoe", "renewables_twh", "solar_mtoe", "solar_twh", "wind_mtoe", "wind_twh")
Renewables <- subset(BP_Stats_Data,biofuels_kboed > 0 | biofuels_ktoe > 0 | biogeo_mtoe > 0 | biogeo_twh > 0 | biomass_mtoe > 0 | biomass_twh > 0 | geothermal_mtoe > 0 | geothermal_twh > 0 | hydro_mtoe > 0 | hydro_twh > 0 | renewables_mtoe > 0 | renewables_twh > 0 | solar_mtoe > 0 | solar_twh > 0 | wind_mtoe > 0 | wind_twh > 0, select=c(Country, Year, biofuels_kboed, biofuels_ktoe, biogeo_mtoe, biogeo_twh, biomass_mtoe, biomass_twh, geothermal_mtoe, geothermal_twh, hydro_mtoe, hydro_twh, renewables_mtoe, renewables_twh, solar_mtoe, solar_twh, wind_mtoe, wind_twh))
Renewables <- Renewables %>% mutate (Combined_Renewables = rowSums(.[3:18]))
str(Renewables)
summary(Renewables)
Use ggplot2 to visualize Combined Renewables by Year
ggplot(data=Renewables, mapping = aes(x = Year,Combined_Renewables )) + geom_line(aes(col = Combined_Renewables)) + ggtitle("Combined Renewables by Year")
Data subset selection of Non-Renewables
Using subset function
Non_Renewables <- c("co2_mtco2","coalcons_mtoe","coalprod_mt","coalprod_mtoe","elect_twh","electbyfuel_coal","electbyfuel_gas","electbyfuel_oil","electbyfuel_other","gascons_bcfd","gascons_bcm","gascons_mtoe","gasprod_bcfd","gasprod_bcm","gasprod_mtoe","nuclear_mtoe","nuclear_twh","oilcons_kbd","oilcons_mt","oilcons_mtoe","oilprod_kbd","oilprod_mt","oilreserves_bbl","primary_mtoe")
Non_Renewables <- subset(BP_Stats_Data,co2_mtco2 > 0 | coalcons_mtoe > 0 | coalprod_mt > 0 | coalprod_mtoe > 0 | elect_twh > 0 | electbyfuel_coal > 0 | electbyfuel_gas > 0 | electbyfuel_oil > 0 | electbyfuel_other > 0 | gascons_bcfd > 0 | gascons_bcm > 0 | gascons_mtoe > 0 | gasprod_bcfd > 0 | gasprod_bcm > 0 | gasprod_mtoe > 0 | nuclear_mtoe > 0 | nuclear_twh > 0 | oilcons_kbd > 0 | oilcons_mt > 0 | oilcons_mtoe > 0 | oilprod_kbd > 0 | oilprod_mt > 0 | oilreserves_bbl > 0 | primary_mtoe> 0, select=c(Country, Year, co2_mtco2, coalcons_mtoe, coalprod_mt, coalprod_mtoe, elect_twh, electbyfuel_coal, electbyfuel_gas, electbyfuel_oil, electbyfuel_other, gascons_bcfd, gascons_bcm, gascons_mtoe, gasprod_bcfd, gasprod_bcm, gasprod_mtoe, nuclear_mtoe, nuclear_twh, oilcons_kbd, oilcons_mt, oilcons_mtoe, oilprod_kbd, oilprod_mt, oilreserves_bbl, primary_mtoe))
Non_Renewables <- Non_Renewables %>% mutate (Combined_Non_Renewables = rowSums(.[3:26]))
str(Non_Renewables)
summary(Non_Renewables)
Use ggplot2 to visualize Combined Non-Renewables by Year
ggplot(data=Non_Renewables, mapping = aes(x = Year,Combined_Non_Renewables )) + geom_line(aes(col = Combined_Non_Renewables)) + ggtitle("Combined Non-Renewables by Year")
Group based data subset - group by Country for 2017
Global_Energy <- subset.data.frame(BP_Stats_Data,Year == 2017, select = c(Country,OPEC,EU,OECD))
group_by(Country)
summary(Global_Energy)
str(Global_Energy)
dput(Global_Energy)