moma-collection

The Museum of Modern Art (MoMA) collection data.
git clone https://git.eamoncaddigan.net/moma-collection.git
Log | Files | Refs | README | LICENSE

artistChoropleth.R (4056B)


      1 # Another look at the MoMA data, showing the number of artists per country.
      2 # Of all the plots, this probably contains the most inaccuracies. 
      3 # A lot of this code was inspired by: 
      4 # http://rpsychologist.com/working-with-shapefiles-projections-and-world-maps-in-ggplot
      5 # I also got help from:
      6 # https://github.com/hadley/ggplot2/wiki/plotting-polygon-shapefiles
      7 # and a bug fix from:
      8 # http://stackoverflow.com/questions/13662448/what-does-the-following-error-mean-topologyexception-found-non-nonded-intersec
      9 
     10 require("rgeos")
     11 require("rgdal") # requires sp, will use proj.4 if installed
     12 require("maptools")
     13 require("ggplot2")
     14 require("RSQLite")
     15 require("dplyr")
     16 
     17 
     18 # Figure out how many artists came from each country ----------------------
     19 
     20 momaDB <- src_sqlite("../momaDB.sqlite")
     21 artists <- tbl(momaDB, "artists")
     22 artistCounts <- artists %>%
     23   # Ugh, bug somewhere wiped out Chinese ISO codes :(
     24   select(birth_nationality, iso3166) %>%
     25   collect() %>%
     26   mutate(iso3166 = ifelse(is.na(iso3166) & grepl("^Chin", birth_nationality, ignore.case = TRUE),
     27                           "CN",
     28                           iso3166)) %>%
     29   # FIXME! This breaks my heart. Namibia happens to share the ISO 3166-1 alpha-2 code with missing data. 
     30   filter(iso3166 != "NA") %>%
     31   count(iso3166) %>%
     32   collect()
     33 
     34 
     35 # Read in the shapefiles and turn them into DFs ---------------------------
     36 
     37 # Read in the WGS84 bounding box shapefile and make its DF
     38 bbox <- readOGR("ne_110m_graticules_all", layer="ne_110m_wgs84_bounding_box") 
     39 bboxMoll <- spTransform(bbox, CRS("+proj=moll"))  # reproject bounding box
     40 bboxMollDF <- fortify(bboxMoll)
     41 
     42 # Graticule
     43 grat <- readOGR("ne_110m_graticules_all", layer="ne_110m_graticules_15") 
     44 gratMoll <- spTransform(grat, CRS("+proj=moll"))  # reproject graticule
     45 gratMollDF <- fortify(gratMoll)
     46 
     47 # Read in country borders shapefile
     48 countries <- readOGR("ne_110m_admin_0_countries", layer="ne_110m_admin_0_countries")
     49 countriesMoll <- spTransform(countries, CRS("+proj=moll"))
     50 # Fixes a bug in the shapefile
     51 countriesMoll <- gBuffer(countriesMoll, width=0, byid=TRUE)
     52 
     53 # Create a DF for the countries and add the iso_A2 tags to it
     54 countriesMoll@data$id <- rownames(countriesMoll@data)
     55 countriesMollDF <- fortify(countriesMoll, region="id")
     56 countriesMoll.data <- select(countriesMoll@data, id, iso_a2)
     57 countriesMollDF <- left_join(countriesMollDF, countriesMoll.data, by="id")
     58 
     59 
     60 # Combine the artist counts with spatial data -----------------------------
     61 
     62 countriesMollDF <- left_join(countriesMollDF, artistCounts, 
     63                              by = c("iso_a2" = "iso3166")) %>%
     64   mutate(n = ifelse(is.na(n), 0, n),
     65          n_1 = pmax(n, 1))
     66 
     67 
     68 # Draw the map ------------------------------------------------------------
     69 
     70 colorScale <- round(10^seq(0, log10(max(artistCounts$n)), length.out=5), -1)
     71 
     72 p1 <- ggplot(bboxMollDF, aes(long,lat, group=group)) + 
     73   geom_polygon(fill="white") +
     74   geom_path(data=gratMollDF, aes(long, lat, group=group, fill=NULL), linetype="solid", color="grey50", alpha=0.2) +
     75   geom_polygon(data=countriesMollDF, aes(long,lat, group=group, fill=n_1)) + 
     76   geom_path(data=countriesMollDF, aes(long,lat, group=group), color="white", size=0.3) +
     77   scale_fill_gradient2(low = "#e5f5f9", mid = "#99d8c9", high = "#2ca25f",
     78                       name = "Count", trans = "log",
     79                       breaks = colorScale, labels = colorScale) + 
     80   labs(title="Artists at MoMA per country") + 
     81   coord_equal() + 
     82   list(theme(panel.grid.minor = element_blank(),
     83              panel.grid.major = element_blank(),
     84              panel.background = element_blank(),
     85              plot.background = element_rect(fill="#e6e8ed"),
     86              panel.border = element_blank(),
     87              axis.line = element_blank(),
     88              axis.text.x = element_blank(),
     89              axis.text.y = element_blank(),
     90              axis.ticks = element_blank(),
     91              axis.title.x = element_blank(),
     92              axis.title.y = element_blank(),
     93              plot.title = element_text(size=16)))
     94 print(p1)
     95 ggsave("artists_per_country.png")