library(tidyverse)

covidcases_active_and_death_correlations <- read_csv("full_data.csv", col_names = FALSE)

dfadc<-as.data.frame(t(covidcases_active_and_death_correlations))

dfadc<-dfadc[-1,]

names(dfadc)[1] <- "new_cases"

names(dfadc)[2] <- "new_deaths"

dfadc$new_cases<-as.numeric(dfadc$new_cases)

dfadc$new_deaths<-as.numeric(dfadc$new_deaths)

cor(dfadc$new_deaths,dfadc$new_cases,use="pairwise.complete.obs")

pdf("visualization.pdf")

plot(jitter(dfadc$new_deaths,1),dfadc$new_cases,xlab="Deaths Cases",ylab="Active Cases", main="Active vs Death Covid Cases")

#plot(dfadc$new_deaths,dfadc$new_cases,xlab="Deaths Cases",ylab="Active Cases",main="Active vs Death Covid Cases")

abline(lm(dfadc$new_cases~dfadc$new_deaths))

#hist(dfadc$new_deaths)

dt <-dfadc$new_cases

dtMin=min(dt,na.rm=TRUE)

dtMax=max(dt,na.rm=TRUE)

dtMean=mean(dt,na.rm=TRUE)

dtSd=sd(dt,na.rm=TRUE)

h <- hist(dt, breaks = 30, density = 15,

col = "lightgray",

ylab = "Number of Cases",

xlab = "Total Active Cases",

main = "Frequency Distribution of Active and Death Cases",

xlim=c(dtMin,9),

ylim=c(0,100)) #you might want to tweak this

x <-seq(dtMin, dtMax, .1) #creates a sequence of numbers between first 2 params

y1 <-dnorm(x, mean=dtMean, sd=dtSd) #creates a theoretical normal distribution based on that

y1 <- y1 *diff(h$mids[1:2]) *length(dt) #a multiplier to make it fit is the histogram

lines(x, y1, col="blue")

dev.off()