commit 31ce3a8fe8713d183d7c7747d79e85fe3be6daa9
parent fa490d61701b8ead8f95d19839e1630a7c2c03b4
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date: Wed, 29 Jul 2015 16:10:48 -0400
Okay, I can make a fairly dece DF that had country name, adjectival, and language code.
Diffstat:
5 files changed, 90 insertions(+), 34 deletions(-)
diff --git a/countries/combineCountryInfo.R b/countries/combineCountryInfo.R
@@ -0,0 +1,40 @@
+# Still dealing with countries/languages/etc I didn't even LOOK for a package
+# that did this for me. Dumb.
+# Here I'll try to merge everything into a single DF with country name,
+# adjectival, and language code
+
+library(dplyr)
+library(tidyr)
+
+# A one-to-many mapping of codes to language names is fine here -----------
+
+languagesToCodes <- read.csv("languages_to_codes.csv",
+ stringsAsFactors = FALSE)
+languagesToCodes <- languagesToCodes %>%
+ select(-language_name) %>%
+ gather("language_number", "language_name", language_name_1:language_name_4) %>%
+ filter(!is.na(language_name)) %>%
+ select(-language_number)
+
+
+# Now add the codes to the listing of languages for countries -------------
+
+countriesToLanguages <- read.csv("countries_to_languages.csv",
+ stringsAsFactors = FALSE)
+countriesToLanguages <- left_join(countriesToLanguages, languagesToCodes,
+ by=c("first_language" = "language_name")) %>%
+ # Thanks to obnoxious hand-editing, this only drops EIGHT countries
+ filter(!is.na(iso639)) %>%
+ select(country_name, iso639)
+
+
+# Now add the codes/languages to the list of countries/adjectivals --------
+
+countriesToAdjectivals <- read.csv("countries_to_adjectivals.csv",
+ stringsAsFactors = FALSE)
+
+countriesToAdjectivals <- left_join(countriesToAdjectivals, countriesToLanguages,
+ by=c("natural_country_name" = "country_name")) %>%
+ # This only loses 75 countries. Not terrible
+ filter(!is.na(iso639))
+
diff --git a/countries/countries_to_adjectivals.csv b/countries/countries_to_adjectivals.csv
@@ -47,8 +47,8 @@ Cayman Islands,Caymanian,Cayman Islands,Caymanian,NA,NA,NA
Central African Republic,Central African,Central African Republic,Central African,NA,NA,NA
Chad,Chadian,Chad,Chadian,NA,NA,NA
Chile,Chilean,Chile,Chilean,NA,NA,NA
-"China, People's Republic of",Chinese,People's Republic of China,Chinese,NA,NA,NA
-"China, Republic of","See Taiwan, below",Republic of China,Chinese,Taiwanese,NA,NA
+"China, People's Republic of",Chinese,China,Chinese,NA,NA,NA
+"China, Republic of","See Taiwan, below",Taiwan,Chinese,Taiwanese,NA,NA
Christmas Island,Christmas Island,Christmas Island,Christmas Island,NA,NA,NA
Cocos (Keeling) Islands,Cocos Island,Cocos (Keeling) Islands,Cocos Island,NA,NA,NA
Colombia,Colombian,Colombia,Colombian,NA,NA,NA
diff --git a/countries/countries_to_languages.csv b/countries/countries_to_languages.csv
@@ -1,20 +1,20 @@
country_name,languages,first_language
-Afghanistan,"Dari Persian, Pashtu (both official), other Turkic and minor languages",Dari Persian
+Afghanistan,"Dari Persian, Pashtu (both official), other Turkic and minor languages",Pashto
Albania,"Albanian (Tosk is the official dialect), Greek",Albanian
Algeria,"Arabic (official), French, Berber dialects",Arabic
-Andorra,"Catalán (official), French, Castilian, Portuguese",Catalán
+Andorra,"Catalán (official), French, Castilian, Portuguese",Catalan
Angola,"Portuguese (official), Bantu and other African languages",Portuguese
Antigua and Barbuda,"English (official), local dialects",English
Argentina,"Spanish (official), English, Italian, German, French",Spanish
Armenia,"Armenian 98%, Yezidi, Russian",Armenian
Australia,"English 79%, native and other languages",English
Austria,"German (official nationwide); Slovene, Croatian, Hungarian (each official in one region)",German
-Azerbaijan,"Azerbaijani Turkic 89%, Russian 3%, Armenian 2%, other 6% (1995 est.)",Azerbaijani Turkic
-Bahamas,"English (official), Creole (among Haitian immigrants)",English
+Azerbaijan,"Azerbaijani Turkic 89%, Russian 3%, Armenian 2%, other 6% (1995 est.)",Azerbaijani
+The Bahamas,"English (official), Creole (among Haitian immigrants)",English
Bahrain,"Arabic, English, Farsi, Urdu",Arabic
Bangladesh,"Bangla (official), English",Bangla
Barbados,English,English
-Belarus,"Belorussian (White Russian), Russian, other",Belorussian
+Belarus,"Belorussian (White Russian), Russian, other",Belarusian
Belgium,"Dutch (Flemish) 60%, French 40%, German less than 1% (all official)",Dutch
Belize,"English (official), Spanish, Mayan, Garifuna (Carib), Creole",English
Benin,"French (official), Fon, Yoruba, tribal languages",French
@@ -25,7 +25,7 @@ Botswana,"English 2% (official), Setswana 78%, Kalanga 8%, Sekgalagadi 3%, other
Brazil,"Portuguese (official), Spanish, English, French",Portuguese
Brunei,"Malay (official), English, Chinese",Malay
Bulgaria,"Bulgarian 85%, Turkish 10%, Roma 4%",Bulgarian
-Burkina Faso,French (official); native African (Sudanic) languages 90%,French
+Burkina Fasoa,French (official); native African (Sudanic) languages 90%,French
Burundi,"Kirundi and French (official), Swahili",Kirundi
Cambodia,"Khmer 95% (official), French, English",Khmer
Cameroon,"French, English (both official); 24 major African language groups",French
@@ -53,6 +53,7 @@ East Timor,"Tetum, Portuguese (official); Bahasa Indonesia, English; other indig
Ecuador,"Spanish (official), Quechua, other Amerindian languages",Spanish
Egypt,"Arabic (official), English and French widely understood by educated classes",Arabic
El Salvador,"Spanish, Nahua (among some Amerindians)",Spanish
+England,English,English
Equatorial Guinea,"Spanish, French (both official); pidgin English, Fang, Bubi, Ibo",Spanish
Eritrea,"Afar, Arabic, Tigre and Kunama, Tigrinya, other Cushitic languages",Afar
Estonia,"Estonian 67% (official), Russian 30%, other (2000)",Estonian
@@ -65,18 +66,21 @@ Gambia,"English (official), Mandinka, Wolof, Fula, other indigenous",English
Georgia,"Georgian 71% (official), Russian 9%, Armenian 7%, Azerbaijani 6%, other 7% (Abkhaz is the official language in Abkhazia)",Georgian
Germany,German,German
Ghana,"English (official), African languages (including Akan, Moshi-Dagomba, Ewe, and Ga)",English
+Great Britain,English,English
Greece,"Greek 99% (official), English, French",Greek
+Greenland,Danish,Danish
Grenada,"English (official), French patois",English
Guatemala,"Spanish 60%, Amerindian languages 40% (23 officially recognized Amerindian languages, including Quiche, Cakchiquel, Kekchi, Mam, Garifuna, and Xinca)",Spanish
Guinea,"French (official), native tongues (Malinké, Susu, Fulani)",French
Guinea-Bissau,"Portuguese (official), Criolo, African languages",Portuguese
Guyana,"English (official), Amerindian dialects, Creole, Hindi, Urdu",English
-Haiti,Creole and French (both official),Hatian Creole
+Haiti,Creole and French (both official),Haitian Creole
Honduras,"Spanish (official), Amerindian dialects; English widely spoken in business",Spanish
-Hungary,"Magyar (Hungarian) 94%, other 6%",Magyar
+Hong Kong,Chinese (Cantonese),Chinese
+Hungary,"Magyar (Hungarian) 94%, other 6%",Hungarian
Iceland,"Icelandic, English, Nordic languages, German widely spoken",Icelandic
India,"Hindi 30%, English, Bengali, Gujarati, Kashmiri, Malayalam, Marathi, Oriya, Punjabi, Tamil, Telugu, Urdu, Kannada, Assamese, Sanskrit, Sindhi (all official); Hindi/Urdu; 1,600+ dialects",Hindi
-Indonesia,"Bahasa Indonesia (official), English, Dutch, Javanese, and more than 580 other languages and dialects",Bahasa Indonesia
+Indonesia,"Bahasa Indonesia (official), English, Dutch, Javanese, and more than 580 other languages and dialects",Indonesian
Iran,"Persian and Persian dialects 58%, Turkic and Turkic dialects 26%, Kurdish 9%, Luri 2%, Balochi 1%, Arabic 1%, Turkish 1%, other 2%",Persian
Iraq,"Arabic (official), Kurdish (official in Kurdish regions), Assyrian, Armenian",Arabic
Ireland,"English, Irish (Gaelic) (both official)",English
@@ -85,9 +89,11 @@ Italy,"Italian (official); German-, French-, and Slovene-speaking minorities",It
Jamaica,"English, Jamaican Creole",English
Japan,Japanese,Japanese
Jordan,"Arabic (official), English",Arabic
-Kazakhstan,"Kazak (Qazaq, state language) 64%; Russian (official, used in everyday business) 95% (2001 est.)",Kazak
+Kazakhstan,"Kazak (Qazaq, state language) 64%; Russian (official, used in everyday business) 95% (2001 est.)",Kazakh
Kenya,"English (official), Swahili (national), and numerous indigenous languages",English
Kiribati,"English (official), I-Kiribati (Gilbertese)",English
+Democratic People's Republic of Korea,,Korean
+Republic of Korea,,Korean
"Korea, North",Korean,Korean
"Korea, South","Korean, English widely taught",Korean
Kosovo,"Albanian (official), Serbian (official), Bosnian, Turkish, Roma",Albanian
@@ -101,12 +107,12 @@ Liberia,"English 20% (official), some 20 ethnic-group languages",English
Libya,"Arabic, Italian, and English widely understood in major cities",Arabic
Liechtenstein,"German (official), Alemannic dialect",German
Lithuania,"Lithuanian 82% (official), Russian 8%, Polish 6% (2001)",Lithuanian
-Luxembourg,"Luxermbourgish (national) French, German (both administrative)",Luxermbourgish
+Luxembourg,"Luxermbourgish (national) French, German (both administrative)",Luxembourgish
Macedonia,"Macedonian 67%, Albanian 25% (both official); Turkish 4%, Roma 2%, Serbian 1% (2002)",Macedonian
Madagascar,Malagasy and French (both official),Malagasy
Malawi,"Chichewa 57.2% (official), Chinyanja 12.8%, Chiyao 10.1%, Chitumbuka 9.5%, Chisena 2.7%, Chilomwe 2.4%, Chitonga 1.7%, other 3.6% (1998)",Chichewa
-Malaysia,"Bahasa Melayu (Malay, official), English, Chinese dialects (Cantonese, Mandarin, Hokkien, Hakka, Hainan, Foochow), Tamil, Telugu, Malayalam, Panjabi, Thai; several indigenous languages (including Iban, Kadazan) in East Malaysia",Bahasa Melayu
-Maldives,Maldivian Dhivehi (official); English spoken by most government officials,Maldivian Dhivehi
+Malaysia,"Bahasa Melayu (Malay, official), English, Chinese dialects (Cantonese, Mandarin, Hokkien, Hakka, Hainan, Foochow), Tamil, Telugu, Malayalam, Panjabi, Thai; several indigenous languages (including Iban, Kadazan) in East Malaysia",Malay
+Maldives,Maldivian Dhivehi (official); English spoken by most government officials,Divehi
Mali,"French (official), Bambara 80%, numerous African languages",French
Malta,Maltese and English (both official),Maltese
Marshall Islands,"Marshallese 98% (two major dialects from the Malayo-Polynesian family), English widely spoken as a second language (both official); Japanese",Marshallese
@@ -114,7 +120,7 @@ Mauritania,"Hassaniya Arabic (official), Pulaar, Soninke, French, Wolof",Hassani
Mauritius,"English less than 1% (official), Creole 81%, Bojpoori 12%, French 3% (2000)",Creole
Mexico,"Spanish, various Mayan, Nahuatl, and other regional indigenous languages",Spanish
Micronesia,"English (official, common), Chukese, Pohnpeian, Yapase, Kosrean, Ulithian, Woleaian, Nukuoro, Kapingamarangi",English
-Moldova,"Moldovan (official; virtually the same as Romanian), Russian, Gagauz (a Turkish dialect)",Moldovan
+Moldova,"Moldovan (official; virtually the same as Romanian), Russian, Gagauz (a Turkish dialect)",Romanian
Monaco,"French (official), English, Italian, Monégasque",French
Mongolia,"Mongolian, 90%; also Turkic and Russian (1999)",Mongolian
Montenegro,Serbian/Montenegrin (Ijekavian dialect—official),Serbian
@@ -122,32 +128,36 @@ Morocco,"Arabic (official), Berber dialects, French often used for business, gov
Mozambique,"Portuguese 9% (official; second language of 27%), Emakhuwa 26%, Xichangana 11%, Elomwe 8%, Cisena 7%, Echuwabo 6%, other Mozambican languages 32% (1997)",Portuguese
Myanmar,"Burmese, minority languages",Burmese
Namibia,"English 7% (official), Afrikaans is common language of most of the population and of about 60% of the white population, German 32%; indigenous languages: Oshivambo, Herero, Nama",English
-Nauru,"Nauruan (official), English",Nauruan
+Nauru,"Nauruan (official), English",Nauru
Nepal,"Nepali 48% (official), Maithali 12%, Bhojpuri 7%, Tharu 6%, Tamang 5%, others. English spoken by many in government and business (2001)",Nepali
Netherlands,"Dutch, Frisian (both official)",Dutch
New Zealand,"English, Maori (both official)",English
Nicaragua,Spanish 98% (official); English and indigenous languages on Atlantic coast (1995),Spanish
Niger,"French (official), Hausa, Djerma",French
Nigeria,"English (official), Hausa, Yoruba, Ibo, Fulani, and more than 200 others",English
+Northern Ireland,"English, Irish",English
Norway,"Bokmål Norwegian, Nynorsk Norwegian (both official); small Sami- and Finnish-speaking minorities (Sami is official in six municipalities)",Bokmål Norwegian
Oman,"Arabic (official), English, Baluchi, Urdu, Indian dialects",Arabic
Pakistan,"Urdu 8%, English (both official); Punjabi 48%, Sindhi 12%, Siraiki (a Punjabi variant) 10%, Pashtu 8%, Balochi 3%, Hindko 2%, Brahui 1%, Burushaski, and others 8%",Urdu
Palau,"Palauan 64.7%, English 9.4%, Sonsoralese, Tobi, Angaur (each official on some islands), Filipino 13.5%, Chinese 5.7%, Carolinian 1.5%, Japanese 1.5%, other Asian 2.3%, other languages 1.5% (2000)",Palauan
-Palestinian State (proposed),"Arabic, Hebrew, English",Arabic
+Palestine,"Arabic, Hebrew, English",Arabic
Panama,"Spanish (official), English 14%, many bilingual",Spanish
Papua New Guinea,"Tok Pisin (Melanesian Pidgin, the lingua franca), Hiri Motu (in Papua region), English 1%–2%; 715 indigenous languages",Tok Pisin
Paraguay,"Spanish, Guaraní (both official)",Spanish
Peru,"Spanish, Quéchua (both official); Aymara; many minor Amazonian languages",Spanish
-Philippines,"Filipino (based on Tagalog), English (both official); eight major dialects: Tagalog, Cebuano, Ilocano, Hiligaynon or Ilonggo, Bicol, Waray, Pampango, and Pangasinense",Filipino
+Philippines,"Filipino (based on Tagalog), English (both official); eight major dialects: Tagalog, Cebuano, Ilocano, Hiligaynon or Ilonggo, Bicol, Waray, Pampango, and Pangasinense",Tagalog
Poland,Polish 98% (2002),Polish
Portugal,"Portuguese (official), Mirandese (official, but locally used)",Portuguese
+Puerto Rico,,Spanish
Qatar,Arabic (official); English a common second language,Arabic
Romania,"Romanian (official), Hungarian, German",Romanian
Russia,"Russian, others",Russian
Rwanda,"Kinyarwanda, French, and English (all official); Kiswahili in commercial centers",Kinyarwanda
+Scotland,"English, Scots, Scottish Gaelic",English
St. Kitts and Nevis,English,English
St. Lucia,"English (official), French patois",English
St. Vincent and the Grenadines,"English, French patois",English
+American Samoa,"Samoan, English",Samoan
Samoa,"Samoan, English",Samoan
San Marino,Italian,Italian
São Tomé and Príncipe,Portuguese (official),Portuguese
@@ -156,12 +166,12 @@ Senegal,"French (official); Wolof, Pulaar, Jola, Mandinka",French
Serbia,"Serbian (official); Romanian, Hungarian, Slovak, and Croatian (all official in Vojvodina); Albanian (official in Kosovo)",Serbian
Seychelles,"Seselwa Creole 92%, English 5%, French (all official) (2002)",Seselwa Creole
Sierra Leone,"English (official), Mende (southern vernacular), Temne (northern vernacular), Krio (lingua franca)",English
-Singapore,"Mandarin 35%, English 23%, Malay 14.1%, Hokkien 11.4%, Cantonese 5.7%, Teochew 4.9%, Tamil 3.2%, other Chinese dialects 1.8%, other 0.9% (2000)",Mandarin
+Singapore,"Mandarin 35%, English 23%, Malay 14.1%, Hokkien 11.4%, Cantonese 5.7%, Teochew 4.9%, Tamil 3.2%, other Chinese dialects 1.8%, other 0.9% (2000)",Chinese
Slovakia,"Slovak 84% (official), Hungarian 11%, Roma 2%, Ukrainian 1% (2001)",Slovak
-Slovenia,"Slovenian 91%, Serbo-Croatian 5% (2002)",Slovenian
+Slovenia,"Slovenian 91%, Serbo-Croatian 5% (2002)",Slovene
Solomon Islands,"English 1%–2% (official), Melanesian pidgin (lingua franca), 120 indigenous languages",English
Somalia,"Somali (official), Arabic, English, Italian",Somali
-South Africa,"IsiZulu 23.8%, IsiXhosa 17.6%, Afrikaans 13.3%, Sepedi 9.4%, English 8.2%, Setswana 8.2%, Sesotho 7.9%, Xitsonga 4.4%, other 7.2%",IsiZulu
+South Africa,"IsiZulu 23.8%, IsiXhosa 17.6%, Afrikaans 13.3%, Sepedi 9.4%, English 8.2%, Setswana 8.2%, Sesotho 7.9%, Xitsonga 4.4%, other 7.2%",Zulu
South Sudan,"English (official), Arabic (includes Juba and Sudanese variants) (official), regional languages include Dinka, Nuer, Bari, Zande, Shilluk",English
Spain,"Castilian Spanish 74% (official nationwide); Catalan 17%, Galician 7%, Basque 2% (each official regionally)",Spanish
Sri Lanka,"Sinhala 74% (official and national), Tamil 18% (national), other 8%; English is commonly used in government and spoken competently by about 10%",Sinhala
@@ -176,7 +186,7 @@ Tajikistan,"Tajik (official), Russian widely used in government and business",Ta
Tanzania,"Swahili, English (both official); Arabic; many local languages",Swahili
Thailand,"Thai (Siamese), English (secondary language of the elite), ethnic and regional dialects",Thai
Togo,"French (official, commerce); Ewé, Mina (south); Kabyé, Dagomba (north); and many dialects",French
-Tonga,"Tongan (an Austronesian language), English",Tongan
+Tonga,"Tongan (an Austronesian language), English",Tonga
Trinidad and Tobago,"English (official), Hindi, French, Spanish, Chinese",English
Tunisia,"Arabic (official, commerce), French (commerce)",Arabic
Turkey,"Turkish (official), Kurdish, Dimli, Azeri, Kabardian",Turkish
@@ -193,7 +203,8 @@ Vanuatu,"Bislama 23% (a Melanesian pidgin English), English 2%, French 1% (all 3
Vatican City (Holy See),"Italian, Latin, French, various other languages",Italian
Venezuela,"Spanish (official), numerous indigenous dialects",Spanish
Vietnam,"Vietnamese (official); English (increasingly favored as a second language); some French, Chinese, Khmer; mountain area languages (Mon-Khmer and Malayo-Polynesian)",Vietnamese
-Western Sahara (proposed state),"Hassaniya Arabic, Moroccan Arabic",Hassaniya Arabic
+Wales,"Welsh, English",Welsh
+Western Sahara,"Hassaniya Arabic, Moroccan Arabic",Hassaniya Arabic
Yemen,Arabic,Arabic
Zambia,"English (official); major vernaculars: Bemba, Kaonda, Lozi, Lunda, Luvale, Nyanja, Tonga; about 70 other indigenous languages",English
Zimbabwe,"English (official), Shona, Ndebele (Sindebele), numerous minor tribal dialects",English
diff --git a/countries/languages_to_codes.csv b/countries/languages_to_codes.csv
@@ -1,4 +1,4 @@
-language_name,iso639,language_name__1,language_name__2,language_name__3,language_name__4
+language_name,iso639,language_name_1,language_name_2,language_name_3,language_name_4
Abkhaz,ab,Abkhaz,NA,NA,NA
Afar,aa,Afar,NA,NA,NA
Afrikaans,af,Afrikaans,NA,NA,NA
@@ -54,7 +54,7 @@ German,de,German,NA,NA,NA
Greek (modern),el,Greek,NA,NA,NA
Guaraní,gn,Guaraní,NA,NA,NA
Gujarati,gu,Gujarati,NA,NA,NA
-"Haitian, Haitian Creole",ht,Haitian,Creole,NA,NA
+"Haitian, Haitian Creole",ht,Haitian,Haitian Creole,NA,NA
Hausa,ha,Hausa,NA,NA,NA
Hebrew (modern),he,Hebrew,NA,NA,NA
Herero,hz,Herero,NA,NA,NA
@@ -112,8 +112,8 @@ Nauru,na,Nauru,NA,NA,NA
Northern Ndebele,nd,Northern Ndebele,NA,NA,NA
Nepali,ne,Nepali,NA,NA,NA
Ndonga,ng,Ndonga,NA,NA,NA
-Norwegian Bokmål,nb,Norwegian Bokmål,NA,NA,NA
-Norwegian Nynorsk,nn,Norwegian Nynorsk,NA,NA,NA
+Norwegian Bokmål,nb,Norwegian Bokmål,Bokmål Norwegian,NA,NA
+Norwegian Nynorsk,nn,Norwegian Nynorsk,Norwegian Nynorsk,NA,NA
Norwegian,no,Norwegian,NA,NA,NA
Nuosu,ii,Nuosu,NA,NA,NA
Southern Ndebele,nr,Southern Ndebele,NA,NA,NA
@@ -125,7 +125,7 @@ Oriya,or,Oriya,NA,NA,NA
"Ossetian, Ossetic",os,Ossetian,Ossetic,NA,NA
"Panjabi, Punjabi",pa,Panjabi,Punjabi,NA,NA
Pali,pi,Pali,NA,NA,NA
-Persian (Farsi),fa,Persian (Farsi),NA,NA,NA
+Persian (Farsi),fa,Persian,Farsi,NA,NA
Polish,pl,Polish,NA,NA,NA
"Pashto, Pushto",ps,Pashto,Pushto,NA,NA
Portuguese,pt,Portuguese,NA,NA,NA
@@ -158,11 +158,11 @@ Telugu,te,Telugu,NA,NA,NA
Tajik,tg,Tajik,NA,NA,NA
Thai,th,Thai,NA,NA,NA
Tigrinya,ti,Tigrinya,NA,NA,NA
-"Tibetan Standard, Tibetan, Central",bo,Tibetan Standard,Tibetan,Central,NA
+"Tibetan Standard, Tibetan, Central",bo,Tibetan Standard,Tibetan,Central Tibetan,NA
Turkmen,tk,Turkmen,NA,NA,NA
Tagalog,tl,Tagalog,NA,NA,NA
Tswana,tn,Tswana,NA,NA,NA
-Tonga (Tonga Islands),to,Tonga (Tonga Islands),NA,NA,NA
+Tonga (Tonga Islands),to,Tonga,NA,NA,NA
Turkish,tr,Turkish,NA,NA,NA
Tsonga,ts,Tsonga,NA,NA,NA
Tatar,tt,Tatar,NA,NA,NA
diff --git a/countries/scrapeCountryInfo.R b/countries/scrapeCountryInfo.R
@@ -16,7 +16,9 @@ getTableFromWeb <- function(url, xpath) {
return(tableList[[1]])
}
-# Countries and their adjectival forms
+
+# Countries and their adjectival forms ------------------------------------
+
countriesToAdjectivals <- getTableFromWeb("https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations",
"//*[@id=\"mw-content-text\"]/table[1]")
@@ -40,7 +42,8 @@ for (i in seq_len(max(vapply(splitAdjectivals, length, 1)))) {
write.csv(countriesToAdjectivals, "countries_to_adjectivals.csv", row.names = FALSE)
-# Countries to languages
+# Countries to languages --------------------------------------------------
+
countriesToLanguages <- getTableFromWeb("http://www.infoplease.com/ipa/A0855611.html",
"//*[@id=\"Pg\"]/table[1]")
@@ -50,7 +53,9 @@ countriesToLanguages <- countriesToLanguages %>%
first_language = sub(" and .*", "", first_language))
write.csv(countriesToLanguages, "countries_to_languages.csv", row.names = FALSE)
-# Languages to ISO-639-1 codes
+
+# Languages to ISO-639-1 codes --------------------------------------------
+
languagesToCodes <- getTableFromWeb("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes",
"//*[@id=\"mw-content-text\"]/table[2]")