Big Query
Kyle Brewster
2022-06-14
pacman::p_load(sf,
tidyverse,
lubridate,
DBI,
dbplyr,
bigrquery,
sandwich,
here)
pacman::p_install(rms,force=FALSE)
## Package is already on your system.
sf::sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
0. Setup
dsn_dir = here("data/shapefiles/kiribati_eez/")
## Read in Kiribati shapefile
kiribati_eez =
read_sf(dsn = dsn_dir, layer = "kiribati_eez") %>%
select(GeoName, geometry)
## Get the projection string for later
proj_str = st_crs(kiribati_eez)
## Subset Line Islands
line =
kiribati_eez %>%
subset(GeoName %in% c("Kiribati Exclusive Economic Zone (Line Islands)")) %>%
rename(region = GeoName) %>%
mutate(region = "Line Group")
## Subset Gilbert Islands
gilbert =
kiribati_eez %>%
subset(GeoName %in%
c("Kiribati Exclusive Economic Zone (Gilbert Islands)")) %>%
rename(region = GeoName) %>%
mutate(region = "Gilbert Islands")
dsn_dir = here("data/shapefiles/pipa_shapefile/")
## Read in PIPA shapefile
pipa =
read_sf(dsn = dsn_dir, layer = "worldheritagemarineprogramme") %>%
st_transform(proj_str) %>%
select(region = full_name, geometry) %>%
mutate(region = "PIPA")
## Merge all regions of interest
all_regions = rbind(line, gilbert, pipa)
## Get bounding box of Kiribati
bbox =
kiribati_eez %>%
st_bbox()
## Extent the bounding box 1 degree in every direction.
## We will use these limits to query the GFW database
min_lon = bbox[["xmin"]] - 1
max_lon = bbox[["xmax"]] + 1
min_lat = bbox[["ymin"]] - 1
max_lat = bbox[["ymax"]] + 1
if(min_lon <= -180) {
min_lon =
kiribati_eez %>%
st_crop(c(xmin=180, xmax=0, ymin=min_lat, ymax=max_lat)) %>%
st_bbox() %>%
pluck("xmin") - 1
}
if(max_lon >= 180) {
max_lon =
kiribati_eez %>%
st_crop(c(xmin=0, xmax=-180, ymin=min_lat, ymax=max_lat)) %>%
st_bbox() %>%
pluck("xmax") + 1
}
## Define mapping resoluion in degrees
resolution = 0.5
## Read in regional area sizes
region_areas = read_csv(here("data/region_areas.csv"))
1. Establishing Database Connection
bq_auth(path = Sys.getenv("~/kb_gce_key.json"))
## ! Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to
## the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The bigrquery package is using a cached token for 'brewster.kyle@gmail.com'.
billing_id = Sys.getenv("GCE_DEFAULT_PROJECT_ID")
cony <- dbConnect(
bigrquery::bigquery(),
project = "global-fishing-watch",
dataset = "global_footprint_of_fisheries",
billing = billing_id)
cony
## <BigQueryConnection>
## Dataset: global-fishing-watch.global_footprint_of_fisheries
## Billing: 117287121109189583528
2. Testing and Authenticating Connection
dbListTables(cony)
## [1] "fishing_effort" "fishing_effort_byvessel"
## [3] "fishing_vessels" "vessels"
3. Preview of Data
effort = tbl(cony, "fishing_effort")
effort
4. Query the database
effort_query = effort %>%
mutate(lon_bin = lon_bin/100, # STEP 1: Filtering values
lat_bin = lat_bin/100,
lat_cent_var=(((lat_bin/resolution)*resolution)+(0.5*resolution)),
lon_cent_var=(((lon_bin/resolution)*resolution)+(0.5*resolution))) %>%
filter(fishing_hours>0, # STEP 2: Creating variables for binning (above)
min_lat<lat_bin | lat_bin<max_lat,
min_lon<lon_bin | lon_bin<max_lon,
date < '2016-12-31')
5. Collect the Data
effort_binned = effort_query %>%
group_by(lat_cent_var,lon_cent_var,date) %>%
summarise(avg_hrs_year = mean(fishing_hours, na.rm=TRUE))
6. Disconnect
dbDisconnect(cony)
7. Convert to sf object
trans_effort = tbl(cony, effort_binned)
effort %>% select(1:8) %>% head(10) %>% collect()
8. Finalizing the Data
Part 1: Cropping
Part 2: Replacing NAs and Tallies
Part 3: Splitting and Normalizing
gfw =
gfw %>%
left_join(region_areas) %>%
mutate(region = ifelse(region=="PIPA", "PIPA", "Control")) %>%
group_by(date, region) %>%
mutate(area_km2 = sum(area_km2)) %>%
mutate(fishing_hours_norm = fishing_hours/area_km2*1000) %>%
summarise(fishing_hours_norm = sum(fishing_hours_norm)) %>%
ungroup
gfw
Plotting the Figure
Work in progress, more updates to come soon!