artistChoropleth.R (4056B)
1 # Another look at the MoMA data, showing the number of artists per country. 2 # Of all the plots, this probably contains the most inaccuracies. 3 # A lot of this code was inspired by: 4 # http://rpsychologist.com/working-with-shapefiles-projections-and-world-maps-in-ggplot 5 # I also got help from: 6 # https://github.com/hadley/ggplot2/wiki/plotting-polygon-shapefiles 7 # and a bug fix from: 8 # http://stackoverflow.com/questions/13662448/what-does-the-following-error-mean-topologyexception-found-non-nonded-intersec 9 10 require("rgeos") 11 require("rgdal") # requires sp, will use proj.4 if installed 12 require("maptools") 13 require("ggplot2") 14 require("RSQLite") 15 require("dplyr") 16 17 18 # Figure out how many artists came from each country ---------------------- 19 20 momaDB <- src_sqlite("../momaDB.sqlite") 21 artists <- tbl(momaDB, "artists") 22 artistCounts <- artists %>% 23 # Ugh, bug somewhere wiped out Chinese ISO codes :( 24 select(birth_nationality, iso3166) %>% 25 collect() %>% 26 mutate(iso3166 = ifelse(is.na(iso3166) & grepl("^Chin", birth_nationality, ignore.case = TRUE), 27 "CN", 28 iso3166)) %>% 29 # FIXME! This breaks my heart. Namibia happens to share the ISO 3166-1 alpha-2 code with missing data. 30 filter(iso3166 != "NA") %>% 31 count(iso3166) %>% 32 collect() 33 34 35 # Read in the shapefiles and turn them into DFs --------------------------- 36 37 # Read in the WGS84 bounding box shapefile and make its DF 38 bbox <- readOGR("ne_110m_graticules_all", layer="ne_110m_wgs84_bounding_box") 39 bboxMoll <- spTransform(bbox, CRS("+proj=moll")) # reproject bounding box 40 bboxMollDF <- fortify(bboxMoll) 41 42 # Graticule 43 grat <- readOGR("ne_110m_graticules_all", layer="ne_110m_graticules_15") 44 gratMoll <- spTransform(grat, CRS("+proj=moll")) # reproject graticule 45 gratMollDF <- fortify(gratMoll) 46 47 # Read in country borders shapefile 48 countries <- readOGR("ne_110m_admin_0_countries", layer="ne_110m_admin_0_countries") 49 countriesMoll <- spTransform(countries, CRS("+proj=moll")) 50 # Fixes a bug in the shapefile 51 countriesMoll <- gBuffer(countriesMoll, width=0, byid=TRUE) 52 53 # Create a DF for the countries and add the iso_A2 tags to it 54 countriesMoll@data$id <- rownames(countriesMoll@data) 55 countriesMollDF <- fortify(countriesMoll, region="id") 56 countriesMoll.data <- select(countriesMoll@data, id, iso_a2) 57 countriesMollDF <- left_join(countriesMollDF, countriesMoll.data, by="id") 58 59 60 # Combine the artist counts with spatial data ----------------------------- 61 62 countriesMollDF <- left_join(countriesMollDF, artistCounts, 63 by = c("iso_a2" = "iso3166")) %>% 64 mutate(n = ifelse(is.na(n), 0, n), 65 n_1 = pmax(n, 1)) 66 67 68 # Draw the map ------------------------------------------------------------ 69 70 colorScale <- round(10^seq(0, log10(max(artistCounts$n)), length.out=5), -1) 71 72 p1 <- ggplot(bboxMollDF, aes(long,lat, group=group)) + 73 geom_polygon(fill="white") + 74 geom_path(data=gratMollDF, aes(long, lat, group=group, fill=NULL), linetype="solid", color="grey50", alpha=0.2) + 75 geom_polygon(data=countriesMollDF, aes(long,lat, group=group, fill=n_1)) + 76 geom_path(data=countriesMollDF, aes(long,lat, group=group), color="white", size=0.3) + 77 scale_fill_gradient2(low = "#e5f5f9", mid = "#99d8c9", high = "#2ca25f", 78 name = "Count", trans = "log", 79 breaks = colorScale, labels = colorScale) + 80 labs(title="Artists at MoMA per country") + 81 coord_equal() + 82 list(theme(panel.grid.minor = element_blank(), 83 panel.grid.major = element_blank(), 84 panel.background = element_blank(), 85 plot.background = element_rect(fill="#e6e8ed"), 86 panel.border = element_blank(), 87 axis.line = element_blank(), 88 axis.text.x = element_blank(), 89 axis.text.y = element_blank(), 90 axis.ticks = element_blank(), 91 axis.title.x = element_blank(), 92 axis.title.y = element_blank(), 93 plot.title = element_text(size=16))) 94 print(p1) 95 ggsave("artists_per_country.png")