combineCountryInfo.R (2055B)
1 # Still dealing with countries/languages/etc I didn't even LOOK for a package 2 # that did this for me. Dumb. 3 # Here I'll try to merge everything into a single DF with country name, 4 # adjectival, and language code 5 6 library(dplyr) 7 library(tidyr) 8 9 # A one-to-many mapping of codes to language names is fine here ----------- 10 11 languagesToCodes <- read.csv("languages_to_codes.csv", 12 stringsAsFactors = FALSE) 13 languagesToCodes <- languagesToCodes %>% 14 select(-language_name) %>% 15 gather("language_number", "language_name", language_name_1:language_name_4) %>% 16 filter(!is.na(language_name)) %>% 17 select(-language_number) 18 19 20 # Now add the codes to the listing of languages for countries ------------- 21 22 countriesToLanguages <- read.csv("countries_to_languages.csv", 23 stringsAsFactors = FALSE) 24 countriesToLanguages <- left_join(countriesToLanguages, languagesToCodes, 25 by=c("first_language" = "language_name")) %>% 26 # Thanks to obnoxious hand-editing, this only drops EIGHT countries 27 filter(!is.na(iso639)) %>% 28 select(country_name, iso639) 29 30 31 # Now add the codes/languages to the list of countries/adjectivals -------- 32 33 countriesToAdjectivals <- read.csv("countries_to_adjectivals.csv", 34 stringsAsFactors = FALSE) 35 36 countriesToAdjectivals <- left_join(countriesToAdjectivals, countriesToLanguages, 37 by=c("natural_country_name" = "country_name")) %>% 38 # This only loses 75 countries. Not terrible 39 filter(!is.na(iso639)) 40 41 42 # These will be the DFs with which I interact ----------------------------- 43 44 nationalityToCountry <- countriesToAdjectivals %>% 45 select(natural_country_name, adjectival_1:adjectival_4) %>% 46 gather("adjectival_number", "adjectival", adjectival_1:adjectival_4) %>% 47 filter(!is.na(adjectival)) %>% 48 select(nationality = adjectival, country = natural_country_name) 49 50 countryToLanguage <- countriesToAdjectivals %>% 51 select(country = natural_country_name, iso639)