acquisitionsByArtistGender.R (3429B)
1 # Just curious whether different departments have different acquisition activity 2 # over the years. 3 4 require(dplyr) 5 require(tidyr) 6 require(ggplot2) 7 8 # Well that was easy 9 momaDB <- src_sqlite("momaDB.sqlite") 10 artworks <- tbl(momaDB, "artworks") 11 artists <- tbl(momaDB, "artists") 12 13 14 # Add new artist info to the artworks table ------------------------------- 15 16 artists <- select(artists, 17 Artist, ArtistBio, iso3166, gender) 18 artworks <- left_join(artworks, artists, by = c("Artist", "ArtistBio")) 19 20 21 # Just pull out the year, since dates aren't always formatted correctly. 22 artworks <- artworks %>% 23 collect %>% 24 filter(grepl("[0-9]{4}", DateAcquired)) %>% 25 mutate(year_acquired = as.integer(sub(".*([0-9]{4}).*", "\\1", DateAcquired)), 26 gender = ifelse(is.na(gender), "unknown", gender)) %>% 27 select(year_acquired, gender) 28 29 # Find the cumulative number of works from each department 30 artworks <- artworks %>% 31 group_by(year_acquired, gender) %>% 32 tally() %>% 33 # Fill in missing years 34 spread(gender, n, fill = 0) %>% gather("gender", "n", -year_acquired) %>% 35 # Find the total works over time 36 group_by(gender) %>% 37 mutate(total_works=cumsum(n)) %>% 38 ungroup() %>% 39 mutate(gender = factor(gender, c("unknown", "female", "male"))) 40 41 p1 <- artworks %>% 42 ggplot(aes(x=year_acquired, y=total_works, fill=gender, order=gender)) + 43 geom_area(position="stack") + 44 theme_minimal() + 45 scale_fill_manual(values=c("#4daf4a", "#e41a1c", "#377eb8"), 46 guide = guide_legend(title = "Artist Gender")) + 47 scale_x_continuous(breaks=round(seq(min(artworks$year_acquired), 48 max(artworks$year_acquired), 49 length.out = 5))) + 50 labs(title = "MoMA's cumulative works by time", 51 y = "Total works", 52 x = "Year acquired") 53 print(p1) 54 ggsave("cumulative_works_by_gender.png") 55 56 p2 <- artworks %>% 57 filter(gender != "unknown")%>% 58 ggplot(aes(x=year_acquired, y=n, fill=gender, color=gender)) + 59 geom_area(position="identity", alpha=0.1) + 60 geom_line(size=1.2) + 61 theme_minimal() + 62 scale_fill_manual(values=c("#e41a1c", "#377eb8"), 63 guide=FALSE) + 64 scale_color_manual(values=c("#e41a1c", "#377eb8"), 65 guide = guide_legend(title = "Artist Gender")) + 66 scale_x_continuous(breaks=round(seq(min(artworks$year_acquired), 67 max(artworks$year_acquired), 68 length.out = 5))) + 69 labs(title = "MoMA's acquisitions by year", 70 y = "Number of acquisitions", 71 x = "Year acquired") 72 print(p2) 73 ggsave("total_works_by_gender.png") 74 75 p3 <- artworks %>% 76 filter(gender != "unknown") %>% 77 group_by(year_acquired) %>% 78 mutate(percent_n = 100*n/sum(n)) %>% 79 ungroup() %>% 80 ggplot(aes(x=year_acquired, y=percent_n, fill=gender, order=gender)) + 81 geom_area(position="stack") + 82 theme_minimal() + 83 scale_fill_manual(values=c("#e41a1c", "#377eb8"), 84 guide = guide_legend(title = "Artist Gender")) + 85 scale_x_continuous(breaks=round(seq(min(artworks$year_acquired), 86 max(artworks$year_acquired), 87 length.out = 5))) + 88 labs(title = "MoMA's acquisitions and artist gender", 89 y = "Percentage of works acquired", 90 x = "Year acquired") 91 print(p3) 92 ggsave("percent_works_by_gender.png")