Global Temperature Anomalies -- code

---
title: "Just the Facts - An Intuitive Approach to Understanding Climate Change"
author: "Kyle Brewster"
output: html_document
---
```{r, echo=FALSE, results='hide', warning=FALSE, error=FALSE, message=FALSE}
# getwd() -- [1] "C:/Users/Kyle/Desktop/FALL 21 - UO/Econometrics"
library(pacman)
pacman::p_load(tidyverse, readr, ggplot2)
```
```{r, echo=FALSE, results='hide', warning=FALSE, error=FALSE, message=FALSE}
# Importing out first data set.
library(readxl)
past_mill_temps <- read_excel("past1000tempsxl.xlsx")
```
```{r, echo=FALSE, results='hide', warning=FALSE, error=FALSE, message=FALSE, results='hide'}
library(ggplot2)
library(mgcv)
plot_full <- ggplot(past_mill_temps,aes(x=Year, y=T))+
  geom_point(size=.5)+
  geom_vline(xintercept = 1880, size=.75, color="red")+
  geom_smooth(method = 'gam')+
  ylab("Temperature Anomaly (K)")+
  theme(plot.title = element_text(face = "bold.italic", size=10, color = "dark blue"))+
  ggtitle("Figure 1.")+
  ylim(-1.5, .75)
```
```{r, echo=FALSE, results='hide', warning=FALSE, error=FALSE, message=FALSE}
reg1_temps <- gam(T~Year, data=past_mill_temps)
# GAM Regression line on pre-1880 filtered data
library(ggplot2)
library(dplyr)
pre1880_temps <- past_mill_temps %>%
  select(c(1,2)) %>%
  group_by(Year) %>%
  filter(Year<1880)
library(mgcv)
reg2_pre1880 <- gam(T~Year, data=pre1880_temps)
```
```{r, echo=FALSE, results='hide', warning=FALSE, error=FALSE  message=FALSE}
plot_pre <- ggplot(pre1880_temps,aes(x=Year, y=T))+
  geom_point(size=.5)+
  geom_smooth(method = "gam", color="red")+
  ylab("Temperatur Anomaly (K)")+
  xlim(0,1880)+
  ylim(-1.5,.75)+
  theme(plot.title = element_text(face = "bold.italic", size=10, color = "dark blue"))+
  ggtitle("Figure 2.")
library(ggpubr)
ggarrange(plot_full, plot_pre, align = "v")
```
``` {r, eval=FALSE, echo=FALSE, warning=FALSE, error=FALSE, message=FALSE}
# Saving this for personal future use. 
library(stargazer)
stargazer(reg1_temps, reg2_pre1880, type = "text", title = "Regression Results With/Without post-1880 Data")
```
```{r, echo=FALSE}
# created data frame with subsequent years to predict values from our model:
predicted_values <- data.frame(Year=c(1880:2000))
predicted_values$predictions <- predict(reg2_pre1880, newdata=predicted_values)
names(predicted_values)[names(predicted_values) == "predictions"] <- "T"
# ^ Changing name of column to keep consistent with other data
```
```{r, echo=FALSE, warning=FALSE, message=FALSE, error=FALSE}
ggplot(past_mill_temps,aes(x=Year, y=T))+
  geom_point(size=1)+
  geom_point(data=predicted_values, color="green")+
  geom_vline(xintercept = 1880, size=.75, color="red")+
  geom_smooth(method = 'gam', size=2)+
  ylab("Temperatur Anomaly (K)")+
  theme(axis.title = element_text(size=9),
        plot.title = element_text(face = "bold.italic", size=10, color="dark blue"))+
  ggtitle("Figure 3")
```
```{r, echo=FALSE}
# First, let's import the NASA data set on post-1880 temperature records
library(readxl)
NASA_GISTEMP_data_xl <- read_excel("NASA_GISTEMP_data_xl.xlsx")
```
```{r, echo=FALSE, results='hide', message=FALSE, error=FALSE, warning=FALSE}
# Now let's change the class of these two variables and update our data frame we can calculate the average of the two different methods of measurement to use in our analyses.
NASA_GISTEMP_data_xl$`J-D` <- as.numeric(NASA_GISTEMP_data_xl$`J-D`)
# Getting rid of annoying hyphen.
names(NASA_GISTEMP_data_xl)[names(NASA_GISTEMP_data_xl) == "J-D"] <- "JD"
```
```{r, echo=FALSE, error=FALSE, message=FALSE, warning=FALSE}
ggplot(NASA_GISTEMP_data_xl,aes(x=Year, y=JD))+
  geom_point()+
  geom_smooth(method = 'gam')+
  ylab("Temperature Anomaly in Degrees Celsius (C)")+
  theme(axis.title = element_text(size=9),
        plot.title = element_text(face = "bold.italic", size=10, color="dark blue"))+
  ggtitle("Figure 4")
```
```{r, echo=FALSE, results='hide', message=FALSE}
library(mgcv)
#to get rid of hyphen in name
names(NASA_GISTEMP_data_xl)[names(NASA_GISTEMP_data_xl) == "J-D"] <- "JD"
reg3_NASA <- gam(JD~Year, data=NASA_GISTEMP_data_xl)
```
```{r, echo=FALSE, error=FALSE, message=FALSE}
# Includes global data on forest coverage change by country since 1990s
deforestation_df <- read.csv("~/Deforestation.csv")
# Carbon emissions by year from 1750 onward (could consider using log data)
c02_emissions <- read.csv("~/c02_emissions.csv")
# Methane emissions by year
annual_methane_emissions <- read.csv("~/annual_methane_emissions.csv")
```
####    C A R B O N 
```{r, echo=FALSE, results='hide'}
names(c02_emissions)[names(c02_emissions) == "Annual.CO2.emissions"] <- "Emissions"
Global_co2_emissions <- c02_emissions %>%
  group_by(Entity) %>% 
  filter(Entity == "World")
# Emissions in Millions of tonnes
Global_co2_emissions$Emissions_mil_tonnes <- Global_co2_emissions$Emissions/1000000
# Emissions in Billions of tonnes
Global_co2_emissions$Emissions_bil_tonnes <- Global_co2_emissions$Emissions/1000000000
```
```{r, echo=FALSE}
plot_carbon <- ggplot(Global_co2_emissions,aes(x=Year, y=Emissions_bil_tonnes))+
  geom_point()+
  geom_smooth(method = 'loess', formula = y ~ x)+
  theme(axis.title = element_text(size=9),
        plot.title = element_text(face = "bold.italic", size=10, color="dark blue"))+
  ylab("Level of Emissions (billions of tonnes")+
  ggtitle("Figure 5")
```
```{r, echo=FALSE}
plot_log_carbon <- ggplot(Global_co2_emissions,aes(x=Year, y=log(Emissions)))+
  geom_point()+
  geom_smooth(method = 'loess', formula = y ~ x)+
  theme(axis.title = element_text(size=9),
        plot.title = element_text(face = "bold.italic", size=10, color="dark blue"))+
  ylab("Log Level of Emissions")+
  ggtitle("Figure 6")
library(ggpubr)
ggarrange(plot_carbon, plot_log_carbon)
```
####    D E F O R E S T A T I O N
```{r, echo=FALSE, results='hide', message=FALSE, error=FALSE}
# First to clean up the data a bit for easier use.
Global_deforestation <- deforestation_df %>%
  group_by(Entity) %>% 
  filter(Entity == "World")
# Making the class a numeric value just in case.
Global_deforestation$Forest.cover <- as.numeric(Global_deforestation$Forest.cover)
```
```{r, echo=FALSE, warning=FALSE, results='hide', message=FALSE, error=FALSE}
plot_forest_lim <- ggplot(Global_deforestation,aes(x=Year, y=Forest.cover))+
  geom_point()+
  ylab("Forest Coverage (Percentage)")+
  ylim(31.25,32.6)+
  theme(axis.title = element_text(size=9),
        plot.title = element_text(face = "bold.italic", size=10, color="dark blue"))+
  geom_smooth(method = lm, size=.75)+
  ggtitle("Figure 7")
plot_forest <- ggplot(Global_deforestation,aes(x=Year, y=Forest.cover))+
  geom_point()+
  ylab("Forest Coverage (Percentage)")+
  ylim(0,33)+
  theme(axis.title = element_text(size=9),
        plot.title = element_text(face = "bold.italic", size=10, color="dark blue"))+
  geom_smooth(method = lm, size=.25)+
  ggtitle("Figure 8")
```
```{r, echo=FALSE, warning=FALSE, results='hide', message=FALSE, error=FALSE}
library(ggpubr)
ggarrange(plot_forest_lim, plot_forest)
```
####    M E T H A N E 
```{r, echo=FALSE, results='hide'}
# ...not gonna have that column name...
names(annual_methane_emissions)[names(annual_methane_emissions) == "Total.including.LUCF..CH4.emissions..CAIT."] <- "total_emissions"
Global_methane <- annual_methane_emissions %>%
  group_by(Entity) %>% 
  filter(Entity == "World")
# Emissions in Millions of tonnes
Global_methane$Emissions_mil_tonnes <- Global_methane$total_emissions/1000000
# Emissions in Billions of tonnes
Global_methane$Emissions_bil_tonnes <- Global_methane$total_emissions/1000000000
```
```{r, echo=FALSE}
ggplot(Global_methane,aes(x=Year, y=Emissions_bil_tonnes))+
  geom_point()+
  geom_smooth(method = "loess", formula = y ~ x)+
  ylab("Global Methane (CH4) Emissions (billions of tonnes)")+
  ylim(6.5,9)+
  theme(axis.title = element_text(size=9),
        plot.title = element_text(face = "bold.italic", size=10, color="dark blue"))+
  ggtitle("Figure 9")
```
```{r, echo=FALSE, results='hide', message=FALSE, warning=FALSE, error=FALSE}
## Cleaning up the data to make it more usable in the ranges we wish to analyze.

# Cleaning NASA GISTEMP data
clean_NASA_GISTEMP <- NASA_GISTEMP_data_xl %>%
  select(c("Year", "JD")) %>%
  group_by(Year) %>%
  filter(Year >= 1990 & Year <2021)
names(clean_NASA_GISTEMP)[names(clean_NASA_GISTEMP) == "JD"] <- "temp_anomaly"
# Cleaning Methane data
clean_methane <- Global_methane %>% select(c(3,4,5,6))
names(clean_methane)[names(clean_methane) == "total_emissions"] <- "CH4_total_emissions"
names(clean_methane)[names(clean_methane) == "Emissions_mil_tonnes"] <- "CH4_mil_ton"
names(clean_methane)[names(clean_methane) == "Emissions_bil_tonnes"] <- "CH4_bil_ton"
# Cleaning CO2 data
clean_co2 <- Global_co2_emissions %>%
  select(c(3,4,5,6)) %>%
  group_by(Year) %>%
  filter(Year >= 1990)
names(clean_co2)[names(clean_co2) == "Emissions"] <- "co2_total_emissions"
names(clean_co2)[names(clean_co2) == "Emissions_mil_tonnes"] <- "co2_mil_ton"
names(clean_co2)[names(clean_co2) == "Emissions_bil_tonnes"] <- "co2_bil_ton"
# Cleaning deforestation data
clean_deforestation <- Global_deforestation %>%
  select(c(3,4))
```
```{r, echo=FALSE, results='hide'}
# Adding NULL values for missing data points in methane data. 
clean_methane_org <- clean_methane
clean_methane <-  clean_methane %>% 
  ungroup() %>%
  add_row(Entity="World", Year=2017) %>%
  add_row(Entity="World", Year=2018) %>%
  add_row(Entity="World", Year=2019) %>%
  add_row(Entity="World", Year=2020)
```
```{r, echo=FALSE, results='hide'}
# Merging data sets
merged_data_1 <- full_join(clean_co2, clean_deforestation, by = "Year")
merged_data_2 <- full_join(clean_methane, clean_NASA_GISTEMP, by = "Year")
joint_df <- full_join(merged_data_1, merged_data_2, by = "Year")
# Then cleaning up new data frame to remove irrelevant columns. 
joint_df <- joint_df %>% select(-c(1, 6, 8))
names(joint_df)
```
```{r, echo=FALSE, message=FALSE, error=FALSE, warning=FALSE}
# Each regression line uses Loess method
library(ggplot2)
ggp_temp <- ggplot(joint_df,aes(x=Year, y=temp_anomaly))+
  geom_point()+
  geom_smooth()+
  ylab("Temperature Anomaly (Celsius)")+
  theme(axis.title = element_text(size=8.5),
        plot.title = element_text(face = "bold.italic", size=10, color = "dark blue"))+
  ggtitle("Figure A")
ggp_forest <- ggplot(joint_df,aes(x=Year, y=Forest.cover))+
  geom_point()+
  geom_smooth(size=.5)+
  ylab("Forest Coverage (Percentage)")+
  theme(axis.title = element_text(size=8.5),
        plot.title = element_text(face = "bold.italic", size=10, color = "dark blue"))+
  ggtitle("Figure B")
ggp_co2 <- ggplot(joint_df,aes(x=Year, y=co2_bil_ton))+
  geom_point()+
  geom_smooth()+
  ylab("CO2 Emissions (billions of tonnes)")+
  theme(axis.title = element_text(size=8.5),
        plot.title = element_text(face = "bold.italic", size=10, color = "dark blue"))+
  ggtitle("Figure C")
ggp_ch4 <- ggplot(joint_df,aes(x=Year, y=CH4_bil_ton))+
  geom_point()+
  geom_smooth()+
  ylab("CH4 Emissions (billions of tonnes")+
  theme(axis.title = element_text(size=8.5),
        plot.title = element_text(face = "bold.italic", size=10, color = "dark blue"))+
  ggtitle("Figure D")
```
```{r, echo=FALSE, message=FALSE, error=FALSE, warning=FALSE}
library(ggpubr)
ggarrange(ggp_temp, ggp_forest, ggp_co2, ggp_ch4)
```
```{r, eval=FALSE, echo=FALSE, message=FALSE, error=FALSE, warning=FALSE}
# Had difficulties getting table to work properly, so not including this section in blog post, but saving for future use and referecne
joint_reg1 <- lm(data=joint_df, formula = temp_anomaly ~ Year)
joint_reg2 <- lm(data=joint_df, formula = temp_anomaly ~ Year + co2_bil_ton)
joint_reg3 <- lm(data=joint_df, formula = temp_anomaly ~ Year + co2_bil_ton + CH4_bil_ton)
joint_reg4 <- lm(data=joint_df, formula = temp_anomaly ~ Year + co2_bil_ton + CH4_bil_ton + Forest.cover)
# The coding below is not included in the finailized version as shown before in this blog, but is the beginning of the coding I was doing regarding interpretation of our resutls and additional inferences we can conclude.
library(stargazer)
stargazer(joint_reg1, joint_reg2, joint_reg3, joint_reg4, type="html", title = "Temperature Anomaly Regression")
```
```{r, echo=FALSE, eval=FALSE}
joint_regA <- lm(data=joint_df, formula = temp_anomaly ~ co2_bil_ton)
joint_regB <- lm(data=joint_df, formula = temp_anomaly ~ co2_bil_ton + CH4_bil_ton)
joint_regC <- lm(data=joint_df, formula = temp_anomaly ~ co2_bil_ton + CH4_bil_ton + Forest.cover)
library(stargazer)
stargazer(joint_regA, joint_regB, joint_regC, type="html", title = "Temperature Anomaly Regression")
```