Rbootcamp Project California Enviromental Screen

Keren Xu

2018/08/20

https://xkcd.com/1410/

# Load csv dataset
library(readr)
caldata <- read_csv("/Users/XUKEREN/Desktop/rbootcamp/projects/12-california-env/CalEnviroScreen_2.0_2014.csv")
head(caldata)
## # A tibble: 6 x 52
##   `Census Tract` `Total Populatio… `California Count… `Click for interact…
##            <dbl>             <int> <chr>              <chr>               
## 1     6019001100              3174 Fresno             <NA>                
## 2     6019000300              3609 Fresno             <NA>                
## 3     6019000200              3167 Fresno             <NA>                
## 4     6019001500              2206 Fresno             <NA>                
## 5     6019000600              6161 Fresno             <NA>                
## 6     6071001600              6133 San Bernardino     <NA>                
## # ... with 48 more variables: `CES 2.0 Score` <dbl>, `CES 2.0 Percentile
## #   Range` <chr>, Hyperlink <chr>, Ozone <dbl>, `Ozone Pctl` <dbl>,
## #   PM2.5 <dbl>, `PM2.5 Pctl` <dbl>, `Diesel PM` <dbl>, `Diesel PM
## #   Pctl` <dbl>, `Drinking Water` <dbl>, `Drinking Water Pctl` <dbl>,
## #   Pesticides <dbl>, `Pesticides Pctl` <dbl>, `Tox. Release` <dbl>, `Tox.
## #   Release Pctl` <dbl>, Traffic <dbl>, `Traffic Pctl` <dbl>, `Cleanup
## #   Sites` <dbl>, `Cleanup Sites Pctl` <dbl>, `Groundwater Threats` <dbl>,
## #   `Groundwater Threats Pctl` <dbl>, `Haz. Waste` <dbl>, `Haz. Waste
## #   Pctl` <dbl>, `Imp. Water Bodies` <int>, `Imp. Water Bodies
## #   Pctl` <dbl>, `Solid Waste` <dbl>, `Solid Waste Pctl` <dbl>, `Pollution
## #   Burden` <dbl>, `Pollution Burden Score` <dbl>, `Pollution Burden
## #   Pctl` <dbl>, Age <dbl>, `Age Pctl` <dbl>, Asthma <dbl>, `Asthma
## #   Pctl` <dbl>, `Low Birth Weight` <dbl>, `Low Birth Weight Pctl` <dbl>,
## #   Education <dbl>, `Education Pctl` <dbl>, `Linguistic Isolation` <dbl>,
## #   `Linguistic Isolation Pctl` <dbl>, Poverty <dbl>, `Poverty
## #   Pctl` <dbl>, Unemployment <dbl>, `Unemployment Pctl` <dbl>, `Pop.
## #   Char.` <dbl>, `Pop. Char. Score` <dbl>, `Pop. Char. Pctl` <dbl>,
## #   `Location 1` <chr>
# Load shp file
library(rgdal)
calshape <- readOGR(dsn ="/Users/XUKEREN/Desktop/rbootcamp/projects/12-california-env/CA_Counties/CA_Counties_TIGER2016.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/XUKEREN/Desktop/rbootcamp/projects/12-california-env/CA_Counties/CA_Counties_TIGER2016.shp", layer: "CA_Counties_TIGER2016"
## with 58 features
## It has 17 fields
## Integer64 fields read as strings:  ALAND AWATER
head(calshape@data)
##   STATEFP COUNTYFP COUNTYNS GEOID          NAME             NAMELSAD LSAD
## 0      06      091 00277310 06091        Sierra        Sierra County   06
## 1      06      067 00277298 06067    Sacramento    Sacramento County   06
## 2      06      083 00277306 06083 Santa Barbara Santa Barbara County   06
## 3      06      009 01675885 06009     Calaveras     Calaveras County   06
## 4      06      111 00277320 06111       Ventura       Ventura County   06
## 5      06      037 00277283 06037   Los Angeles   Los Angeles County   06
##   CLASSFP MTFCC CSAFP CBSAFP METDIVFP FUNCSTAT       ALAND     AWATER
## 0      H1 G4020  <NA>   <NA>     <NA>        A  2468694587   23299110
## 1      H1 G4020   472  40900     <NA>        A  2499183617   76073827
## 2      H1 G4020  <NA>  42200     <NA>        A  7084000598 2729814515
## 3      H1 G4020  <NA>   <NA>     <NA>        A  2641820834   43806026
## 4      H1 G4020   348  37100     <NA>        A  4773390489  945942791
## 5      H1 G4020   348  31080    31084        A 10510651024 1794730436
##      INTPTLAT     INTPTLON
## 0 +39.5769252 -120.5219926
## 1 +38.4500114 -121.3404409
## 2 +34.5370572 -120.0399729
## 3 +38.1838996 -120.5614415
## 4 +34.3587415 -119.1331432
## 5 +34.1963983 -118.2618616
plot(calshape)

# After taking a look at each dataset. I decided to visualize PM2.5 for each county.
# Compute the average of PM2.5 for each county
library(dplyr)
PMfile<-aggregate(caldata$PM2.5, list(caldata$`California County`), mean, na.rm = TRUE)

# Set column name
colnames(PMfile)<-c("county", "PM2.5")
head(PMfile)
##      county    PM2.5
## 1   Alameda 8.199591
## 2    Alpine 3.051420
## 3    Amador 7.338971
## 4     Butte 9.285976
## 5 Calaveras 6.327272
## 6    Colusa 6.113190
# Merge two datasets
names(calshape@data)[names(calshape@data) == 'NAME']<-'county'
calshape@data <- merge(calshape@data,PMfile,by="county")

# library tmap
library(tmap)
qtm(shp = calshape, fill = "PM2.5", fill.palette = "Blues")


References