moma-collection

The Museum of Modern Art (MoMA) collection data.
git clone https://git.eamoncaddigan.net/moma-collection.git
Log | Files | Refs | README | LICENSE

commit fa490d61701b8ead8f95d19839e1630a7c2c03b4
parent 768004d61b9f1f5d17fc68b91189f1a046c66778
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Wed, 29 Jul 2015 14:38:56 -0400

Oh man did this experience mission creep. So now I'm figuring out the mapping between countries (e.g., England), adjectivals (e.g., English), spoken laguages, and the ISO-639-1 codes.

Diffstat:
Acountries/countries_to_adjectivals.csv | 266+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acountries/countries_to_languages.csv | 199+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acountries/languages_to_codes.csv | 186+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acountries/scrapeCountryInfo.R | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 718 insertions(+), 0 deletions(-)

diff --git a/countries/countries_to_adjectivals.csv b/countries/countries_to_adjectivals.csv @@ -0,0 +1,266 @@ +country_name,adjectivals,natural_country_name,adjectival_1,adjectival_2,adjectival_3,adjectival_4 +Abkhazia (region in Georgia),"Abkhaz, Abkhazian",Abkhazia (region in Georgia),Abkhaz,Abkhazian,NA,NA +Afghanistan,Afghan,Afghanistan,Afghan,NA,NA,NA +Åland Islands,Åland Island,Åland Islands,Åland Island,NA,NA,NA +Albania,Albanian,Albania,Albanian,NA,NA,NA +Algeria,Algerian,Algeria,Algerian,NA,NA,NA +American Samoa,American Samoan,American Samoa,American Samoan,NA,NA,NA +Andorra,Andorran,Andorra,Andorran,NA,NA,NA +Angola,Angolan,Angola,Angolan,NA,NA,NA +Anguilla,Anguillan,Anguilla,Anguillan,NA,NA,NA +Antarctica,Antarctic,Antarctica,Antarctic,NA,NA,NA +Antigua and Barbuda,"Antiguan, Barbudan",Antigua and Barbuda,Antiguan,Barbudan,NA,NA +Argentina,Argentine,Argentina,Argentine,NA,NA,NA +Armenia,Armenian,Armenia,Armenian,NA,NA,NA +Aruba,Aruban,Aruba,Aruban,NA,NA,NA +Australia,Australian,Australia,Australian,NA,NA,NA +Austria,Austrian,Austria,Austrian,NA,NA,NA +Azerbaijan,"Azerbaijani, Azeri",Azerbaijan,Azerbaijani,Azeri,NA,NA +"Bahamas, The",Bahamian,The Bahamas,Bahamian,NA,NA,NA +Bahrain,Bahraini,Bahrain,Bahraini,NA,NA,NA +Bangladesh,Bangladeshi,Bangladesh,Bangladeshi,NA,NA,NA +Barbados,Barbadian,Barbados,Barbadian,NA,NA,NA +Belarus,Belarusian,Belarus,Belarusian,NA,NA,NA +Belgium,Belgian,Belgium,Belgian,NA,NA,NA +Belize,Belizean,Belize,Belizean,NA,NA,NA +Benin,"Beninese, Beninois",Benin,Beninese,Beninois,NA,NA +Bermuda,"Bermudian, Bermudan",Bermuda,Bermudian,Bermudan,NA,NA +Bhutan,Bhutanese,Bhutan,Bhutanese,NA,NA,NA +Bolivia,Bolivian,Bolivia,Bolivian,NA,NA,NA +Bonaire,Bonaire,Bonaire,Bonaire,NA,NA,NA +Bosnia and Herzegovina,"Bosnian, Herzegovinian",Bosnia and Herzegovina,Bosnian,Herzegovinian,NA,NA +Botswana,"Motswana, Botswanan",Botswana,Motswana,Botswanan,NA,NA +Bouvet Island,Bouvet Island,Bouvet Island,Bouvet Island,NA,NA,NA +Brazil,Brazilian,Brazil,Brazilian,NA,NA,NA +British Indian Ocean Territory,BIOT,British Indian Ocean Territory,BIOT,NA,NA,NA +Brunei,Bruneian,Brunei,Bruneian,NA,NA,NA +Bulgaria,Bulgarian,Bulgaria,Bulgarian,NA,NA,NA +Burkina Fasoa,Burkinabé,Burkina Fasoa,Burkinabé,NA,NA,NA +Burma,Burmese,Burma,Burmese,NA,NA,NA +Burundi,Burundian,Burundi,Burundian,NA,NA,NA +Cabo Verde,Cabo Verdean,Cabo Verde,Cabo Verdean,NA,NA,NA +Cambodia,Cambodian,Cambodia,Cambodian,NA,NA,NA +Cameroon,Cameroonian,Cameroon,Cameroonian,NA,NA,NA +Canada,Canadian,Canada,Canadian,NA,NA,NA +Cape Verde,Cabo Verdean,Cape Verde,Cabo Verdean,NA,NA,NA +Cayman Islands,Caymanian,Cayman Islands,Caymanian,NA,NA,NA +Central African Republic,Central African,Central African Republic,Central African,NA,NA,NA +Chad,Chadian,Chad,Chadian,NA,NA,NA +Chile,Chilean,Chile,Chilean,NA,NA,NA +"China, People's Republic of",Chinese,People's Republic of China,Chinese,NA,NA,NA +"China, Republic of","See Taiwan, below",Republic of China,Chinese,Taiwanese,NA,NA +Christmas Island,Christmas Island,Christmas Island,Christmas Island,NA,NA,NA +Cocos (Keeling) Islands,Cocos Island,Cocos (Keeling) Islands,Cocos Island,NA,NA,NA +Colombia,Colombian,Colombia,Colombian,NA,NA,NA +Comoros,"Comoran, Comorian",Comoros,Comoran,Comorian,NA,NA +"Congo, Democratic Republic of the",Congolese,Democratic Republic of the Congo,Congolese,NA,NA,NA +"Congo, Republic of the",NA,Republic of the Congo,Congolese,NA,NA,NA +Cook Islands,Cook Island,Cook Islands,Cook Island,NA,NA,NA +Costa Rica,Costa Rican,Costa Rica,Costa Rican,NA,NA,NA +Côte d'Ivoire,Ivorian,Côte d'Ivoire,Ivorian,NA,NA,NA +Croatia,Croatian,Croatia,Croatian,NA,NA,NA +Cuba,Cuban,Cuba,Cuban,NA,NA,NA +Curaçao,Curaçaoan,Curaçao,Curaçaoan,NA,NA,NA +Cyprus,Cypriot,Cyprus,Cypriot,NA,NA,NA +Czech Republic,Czech,Czech Republic,Czech,NA,NA,NA +Denmark,Danish,Denmark,Danish,NA,NA,NA +Djibouti,Djiboutian,Djibouti,Djiboutian,NA,NA,NA +Dominica,Dominican,Dominica,Dominican,NA,NA,NA +Dominican Republic,Dominican,Dominican Republic,Dominican,NA,NA,NA +East Timor,Timorese,East Timor,Timorese,NA,NA,NA +Ecuador,Ecuadorian,Ecuador,Ecuadorian,NA,NA,NA +Egypt,Egyptian,Egypt,Egyptian,NA,NA,NA +El Salvador,Salvadoran,El Salvador,Salvadoran,NA,NA,NA +England,"English, British",England,English,British,NA,NA +Equatorial Guinea,"Equatorial Guinean, Equatoguinean",Equatorial Guinea,Equatorial Guinean,Equatoguinean,NA,NA +Eritrea,Eritrean,Eritrea,Eritrean,NA,NA,NA +Estonia,Estonian,Estonia,Estonian,NA,NA,NA +Ethiopia,Ethiopian,Ethiopia,Ethiopian,NA,NA,NA +European Union,European,European Union,European,NA,NA,NA +Falkland Islands,Falkland Island,Falkland Islands,Falkland Island,NA,NA,NA +Faroe Islands,Faroese,Faroe Islands,Faroese,NA,NA,NA +Fiji,Fijian,Fiji,Fijian,NA,NA,NA +Finland,Finnish,Finland,Finnish,NA,NA,NA +France,French,France,French,NA,NA,NA +French Guiana,French Guianese,French Guiana,French Guianese,NA,NA,NA +French Polynesia,French Polynesian,French Polynesia,French Polynesian,NA,NA,NA +French Southern Territories,French Southern Territories,French Southern Territories,French Southern Territories,NA,NA,NA +Gabon,Gabonese,Gabon,Gabonese,NA,NA,NA +"Gambia, The",Gambian,The Gambia,Gambian,NA,NA,NA +Georgia,Georgian,Georgia,Georgian,NA,NA,NA +Germany,German,Germany,German,NA,NA,NA +Ghana,Ghanaian,Ghana,Ghanaian,NA,NA,NA +Gibraltar,Gibraltar,Gibraltar,Gibraltar,NA,NA,NA +Great Britain,"British, UK",Great Britain,British,UK,NA,NA +Greece,"Greek, Hellenic",Greece,Greek,Hellenic,NA,NA +Greenland,Greenlandic,Greenland,Greenlandic,NA,NA,NA +Grenada,Grenadian,Grenada,Grenadian,NA,NA,NA +Guadeloupe,Guadeloupe,Guadeloupe,Guadeloupe,NA,NA,NA +Guam,"Guamanian, Guambat",Guam,Guamanian,Guambat,NA,NA +Guatemala,Guatemalan,Guatemala,Guatemalan,NA,NA,NA +Guernsey,Channel Island,Guernsey,Channel Island,NA,NA,NA +Guinea,Guinean,Guinea,Guinean,NA,NA,NA +Guinea-Bissau,Bissau-Guinean,Guinea-Bissau,Bissau-Guinean,NA,NA,NA +Guyana,Guyanese,Guyana,Guyanese,NA,NA,NA +Haiti,Haitian,Haiti,Haitian,NA,NA,NA +Heard Island and McDonald Islands,"Heard Island, McDonald Islands",Heard Island and McDonald Islands,Heard Island,McDonald Islands,NA,NA +Honduras,Honduran,Honduras,Honduran,NA,NA,NA +Hong Kong,"Hong Kong, Chinese",Hong Kong,Hong Kong,Chinese,NA,NA +Hungary,"Hungarian, Magyar",Hungary,Hungarian,Magyar,NA,NA +Iceland,Icelandic,Iceland,Icelandic,NA,NA,NA +India,Indian,India,Indian,NA,NA,NA +Indonesia,Indonesian,Indonesia,Indonesian,NA,NA,NA +Iran,"Iranian, Persian",Iran,Iranian,Persian,NA,NA +Iraq,Iraqi,Iraq,Iraqi,NA,NA,NA +Ireland,Irish,Ireland,Irish,NA,NA,NA +Isle of Man,Manx,Isle of Man,Manx,NA,NA,NA +Israel,Israeli,Israel,Israeli,NA,NA,NA +Italy,Italian,Italy,Italian,NA,NA,NA +Ivory Coast,Ivorian,Ivory Coast,Ivorian,NA,NA,NA +Jamaica,Jamaican,Jamaica,Jamaican,NA,NA,NA +Jan Mayen,Jan Mayen,Jan Mayen,Jan Mayen,NA,NA,NA +Japan,Japanese,Japan,Japanese,NA,NA,NA +Jersey,Channel Island,Jersey,Channel Island,NA,NA,NA +Jordan,Jordanian,Jordan,Jordanian,NA,NA,NA +Kazakhstan,"Kazakhstani, Kazakh",Kazakhstan,Kazakhstani,Kazakh,NA,NA +Kenya,Kenyan,Kenya,Kenyan,NA,NA,NA +Kiribati,I-Kiribati,Kiribati,I-Kiribati,NA,NA,NA +"Korea, Democratic People's Republic of",North Korean,Democratic People's Republic of Korea,North Korean,NA,NA,NA +"Korea, Republic of",South Korean,Republic of Korea,South Korean,NA,NA,NA +Kosovo,"Kosovar, Kosovan",Kosovo,Kosovar,Kosovan,NA,NA +Kuwait,Kuwaiti,Kuwait,Kuwaiti,NA,NA,NA +Kyrgyzstan,"Kyrgyzstani, Kyrgyz, Kirgiz, Kirghiz",Kyrgyzstan,Kyrgyzstani,Kyrgyz,Kirgiz,Kirghiz +Laos,"Lao, Laotian",Laos,Lao,Laotian,NA,NA +Latvia,Latvian,Latvia,Latvian,NA,NA,NA +Lebanon,Lebanese,Lebanon,Lebanese,NA,NA,NA +Lesotho,Basotho,Lesotho,Basotho,NA,NA,NA +Liberia,Liberian,Liberia,Liberian,NA,NA,NA +Libya,Libyan,Libya,Libyan,NA,NA,NA +Liechtenstein,Liechtenstein,Liechtenstein,Liechtenstein,NA,NA,NA +Lithuania,Lithuanian,Lithuania,Lithuanian,NA,NA,NA +Luxembourg,"Luxembourg, Luxembourgish",Luxembourg,Luxembourg,Luxembourgish,NA,NA +Macau,"Macanese, Chinese",Macau,Macanese,Chinese,NA,NA +"Macedonia, Republic of",Macedonian,Republic of Macedonia,Macedonian,NA,NA,NA +Madagascar,Malagasy,Madagascar,Malagasy,NA,NA,NA +Malawi,Malawian,Malawi,Malawian,NA,NA,NA +Malaysia,Malaysian,Malaysia,Malaysian,NA,NA,NA +Maldives,Maldivian,Maldives,Maldivian,NA,NA,NA +Mali,"Malian, Malinese",Mali,Malian,Malinese,NA,NA +Malta,Maltese,Malta,Maltese,NA,NA,NA +Marshall Islands,Marshallese,Marshall Islands,Marshallese,NA,NA,NA +Martinique,"Martiniquais, Martinican",Martinique,Martiniquais,Martinican,NA,NA +Mauritania,Mauritanian,Mauritania,Mauritanian,NA,NA,NA +Mauritius,Mauritian,Mauritius,Mauritian,NA,NA,NA +Mayotte,Mahoran,Mayotte,Mahoran,NA,NA,NA +Mexico,Mexican,Mexico,Mexican,NA,NA,NA +"Micronesia, Federated States of",Micronesian,Federated States of Micronesia,Micronesian,NA,NA,NA +Moldova,Moldovan,Moldova,Moldovan,NA,NA,NA +Monaco,"Monégasque, Monacan",Monaco,Monégasque,Monacan,NA,NA +Mongolia,Mongolian,Mongolia,Mongolian,NA,NA,NA +Montenegro,Montenegrin,Montenegro,Montenegrin,NA,NA,NA +Montserrat,Montserratian,Montserrat,Montserratian,NA,NA,NA +Morocco,Moroccan,Morocco,Moroccan,NA,NA,NA +Mozambique,Mozambican,Mozambique,Mozambican,NA,NA,NA +Myanmar,Burmese,Myanmar,Burmese,NA,NA,NA +Namibia,Namibian,Namibia,Namibian,NA,NA,NA +Nauru,Nauruan,Nauru,Nauruan,NA,NA,NA +Nepal,"Nepali, Nepalese",Nepal,Nepali,Nepalese,NA,NA +Netherlands,"Dutch, Netherlandic",Netherlands,Dutch,Netherlandic,NA,NA +New Caledonia,New Caledonian,New Caledonia,New Caledonian,NA,NA,NA +New Zealand,"New Zealand, NZ",New Zealand,New Zealand,NZ,NA,NA +Nicaragua,Nicaraguan,Nicaragua,Nicaraguan,NA,NA,NA +Niger,Nigerien,Niger,Nigerien,NA,NA,NA +Nigeria,Nigerian,Nigeria,Nigerian,NA,NA,NA +Niue,Niuean,Niue,Niuean,NA,NA,NA +Norfolk Island,Norfolk Island,Norfolk Island,Norfolk Island,NA,NA,NA +Northern Ireland,"Northern Irish, British",Northern Ireland,Northern Irish,British,NA,NA +Northern Mariana Islands,Northern Marianan,Northern Mariana Islands,Northern Marianan,NA,NA,NA +Norway,Norwegian,Norway,Norwegian,NA,NA,NA +Oman,Omani,Oman,Omani,NA,NA,NA +Pakistan,Pakistani,Pakistan,Pakistani,NA,NA,NA +Palau,Palauan,Palau,Palauan,NA,NA,NA +Palestine,Palestinian,Palestine,Palestinian,NA,NA,NA +Panama,Panamanian,Panama,Panamanian,NA,NA,NA +Papua New Guinea,"Papua New Guinean, Papuan",Papua New Guinea,Papua New Guinean,Papuan,NA,NA +Paraguay,Paraguayan,Paraguay,Paraguayan,NA,NA,NA +Peru,Peruvian,Peru,Peruvian,NA,NA,NA +Philippines,"Philippine, Filipino",Philippines,Philippine,Filipino,NA,NA +Pitcairn Islands,Pitcairn Island,Pitcairn Islands,Pitcairn Island,NA,NA,NA +Poland,Polish,Poland,Polish,NA,NA,NA +Portugal,Portuguese,Portugal,Portuguese,NA,NA,NA +Puerto Rico,Puerto Rican,Puerto Rico,Puerto Rican,NA,NA,NA +Qatar,Qatari,Qatar,Qatari,NA,NA,NA +Réunion,"Réunionese, Réunionnais",Réunion,Réunionese,Réunionnais,NA,NA +Romania,Romanian,Romania,Romanian,NA,NA,NA +Russia,Russian,Russia,Russian,NA,NA,NA +Rwanda,Rwandan,Rwanda,Rwandan,NA,NA,NA +Saba,Saba,Saba,Saba,NA,NA,NA +Saint Barthélemy,Barthélemois,Saint Barthélemy,Barthélemois,NA,NA,NA +"Saint Helena, Ascension and Tristan da Cunha",Saint Helenian,Saint Ascension and Tristan da Cunha Helena,Saint Helenian,NA,NA,NA +Saint Kitts and Nevis,"Kittitian, Nevisian",Saint Kitts and Nevis,Kittitian,Nevisian,NA,NA +Saint Lucia,Saint Lucian,Saint Lucia,Saint Lucian,NA,NA,NA +Saint Martin,Saint-Martinoise,Saint Martin,Saint-Martinoise,NA,NA,NA +Saint Pierre and Miquelon,"Saint-Pierrais, Miquelonnais",Saint Pierre and Miquelon,Saint-Pierrais,Miquelonnais,NA,NA +Saint Vincent and the Grenadines,"Saint Vincentian, Vincentian",Saint Vincent and the Grenadines,Saint Vincentian,Vincentian,NA,NA +Samoa,Samoan,Samoa,Samoan,NA,NA,NA +San Marino,Sammarinese,San Marino,Sammarinese,NA,NA,NA +São Tomé and Príncipe,São Toméan,São Tomé and Príncipe,São Toméan,NA,NA,NA +Saudi Arabia,"Saudi, Saudi Arabian",Saudi Arabia,Saudi,Saudi Arabian,NA,NA +Scotland,"Scots, Scottish, British",Scotland,Scots,Scottish,British,NA +Senegal,Senegalese,Senegal,Senegalese,NA,NA,NA +Serbia,Serbian,Serbia,Serbian,NA,NA,NA +Seychelles,Seychellois,Seychelles,Seychellois,NA,NA,NA +Sierra Leone,Sierra Leonean,Sierra Leone,Sierra Leonean,NA,NA,NA +Singapore,"Singapore, Singaporean",Singapore,Singapore,Singaporean,NA,NA +Sint Eustatius,"Sint Eustatius, Statian",Sint Eustatius,Sint Eustatius,Statian,NA,NA +Sint Maarten,Sint Maarten,Sint Maarten,Sint Maarten,NA,NA,NA +Slovakia,Slovak,Slovakia,Slovak,NA,NA,NA +Slovenia,"Slovenian, Slovene",Slovenia,Slovenian,Slovene,NA,NA +Solomon Islands,Solomon Island,Solomon Islands,Solomon Island,NA,NA,NA +Somalia,"Somali, Somalian",Somalia,Somali,Somalian,NA,NA +South Africa,South African,South Africa,South African,NA,NA,NA +South Georgia and the South Sandwich Islands,"South Georgia, South Sandwich Islands",South Georgia and the South Sandwich Islands,South Georgia,South Sandwich Islands,NA,NA +South Ossetia (Region of Georgia),South Ossetian,South Ossetia (Region of Georgia),South Ossetian,NA,NA,NA +South Sudan,South Sudanese,South Sudan,South Sudanese,NA,NA,NA +Spain,Spanish,Spain,Spanish,NA,NA,NA +Sri Lanka,Sri Lankan,Sri Lanka,Sri Lankan,NA,NA,NA +Sudan,Sudanese,Sudan,Sudanese,NA,NA,NA +Surinam,Surinamese,Surinam,Surinamese,NA,NA,NA +Svalbard,Svalbard,Svalbard,Svalbard,NA,NA,NA +Swaziland,Swazi,Swaziland,Swazi,NA,NA,NA +Sweden,Swedish,Sweden,Swedish,NA,NA,NA +Switzerland,Swiss,Switzerland,Swiss,NA,NA,NA +Syria,Syrian,Syria,Syrian,NA,NA,NA +Taiwan,"Chinese, Taiwanese",Taiwan,Chinese,Taiwanese,NA,NA +Tajikistan,Tajikistani,Tajikistan,Tajikistani,NA,NA,NA +Tanzania,Tanzanian,Tanzania,Tanzanian,NA,NA,NA +Thailand,Thai,Thailand,Thai,NA,NA,NA +Timor-Leste,Timorese,Timor-Leste,Timorese,NA,NA,NA +Togo,Togolese,Togo,Togolese,NA,NA,NA +Tokelau,Tokelauan,Tokelau,Tokelauan,NA,NA,NA +Tonga,Tongan,Tonga,Tongan,NA,NA,NA +Trinidad and Tobago,"Trinidadian, Tobagonian",Trinidad and Tobago,Trinidadian,Tobagonian,NA,NA +Tunisia,Tunisian,Tunisia,Tunisian,NA,NA,NA +Turkey,Turkish,Turkey,Turkish,NA,NA,NA +Turkmenistan,Turkmen,Turkmenistan,Turkmen,NA,NA,NA +Turks and Caicos Islands,Turks and Caicos Island,Turks and Caicos Islands,Turks and Caicos Island,NA,NA,NA +Tuvalu,Tuvaluan,Tuvalu,Tuvaluan,NA,NA,NA +Uganda,Ugandan,Uganda,Ugandan,NA,NA,NA +Ukraine,Ukrainian,Ukraine,Ukrainian,NA,NA,NA +United Arab Emirates,"Emirati, Emirian, Emiri",United Arab Emirates,Emirati,Emirian,Emiri,NA +United Kingdom,"British, UK",United Kingdom,British,UK,NA,NA +United States,"United States, US, American",United States,United States,US,American,NA +Uruguay,Uruguayan,Uruguay,Uruguayan,NA,NA,NA +Uzbekistan,"Uzbekistani, Uzbek",Uzbekistan,Uzbekistani,Uzbek,NA,NA +Vanuatu,"Ni-Vanuatu, Vanuatuan",Vanuatu,Ni-Vanuatu,Vanuatuan,NA,NA +Vatican City State,Vatican,Vatican City State,Vatican,NA,NA,NA +Venezuela,Venezuelan,Venezuela,Venezuelan,NA,NA,NA +Vietnam,Vietnamese,Vietnam,Vietnamese,NA,NA,NA +"Virgin Islands, British",British Virgin Island,Virgin British Islands,British Virgin Island,NA,NA,NA +"Virgin Islands, United States",U.S. Virgin Island,Virgin United States Islands,U.S. Virgin Island,NA,NA,NA +Wales,"Welsh, British",Wales,Welsh,British,NA,NA +Wallis and Futuna,"Wallis and Futuna, Wallisian, Futunan",Wallis and Futuna,Wallis and Futuna,Wallisian,Futunan,NA +Western Sahara,"Sahrawi, Sahrawian, Sahraouian",Western Sahara,Sahrawi,Sahrawian,Sahraouian,NA +Yemen,Yemeni,Yemen,Yemeni,NA,NA,NA +Zambia,Zambian,Zambia,Zambian,NA,NA,NA +Zimbabwe,Zimbabwean,Zimbabwe,Zimbabwean,NA,NA,NA diff --git a/countries/countries_to_languages.csv b/countries/countries_to_languages.csv @@ -0,0 +1,199 @@ +country_name,languages,first_language +Afghanistan,"Dari Persian, Pashtu (both official), other Turkic and minor languages",Dari Persian +Albania,"Albanian (Tosk is the official dialect), Greek",Albanian +Algeria,"Arabic (official), French, Berber dialects",Arabic +Andorra,"Catalán (official), French, Castilian, Portuguese",Catalán +Angola,"Portuguese (official), Bantu and other African languages",Portuguese +Antigua and Barbuda,"English (official), local dialects",English +Argentina,"Spanish (official), English, Italian, German, French",Spanish +Armenia,"Armenian 98%, Yezidi, Russian",Armenian +Australia,"English 79%, native and other languages",English +Austria,"German (official nationwide); Slovene, Croatian, Hungarian (each official in one region)",German +Azerbaijan,"Azerbaijani Turkic 89%, Russian 3%, Armenian 2%, other 6% (1995 est.)",Azerbaijani Turkic +Bahamas,"English (official), Creole (among Haitian immigrants)",English +Bahrain,"Arabic, English, Farsi, Urdu",Arabic +Bangladesh,"Bangla (official), English",Bangla +Barbados,English,English +Belarus,"Belorussian (White Russian), Russian, other",Belorussian +Belgium,"Dutch (Flemish) 60%, French 40%, German less than 1% (all official)",Dutch +Belize,"English (official), Spanish, Mayan, Garifuna (Carib), Creole",English +Benin,"French (official), Fon, Yoruba, tribal languages",French +Bhutan,"Dzongkha (official), Tibetan dialects (among Bhotes), Nepalese dialects (among Nepalese)",Dzongkha +Bolivia,"Spanish, Quechua, Aymara (all official)",Spanish +Bosnia and Herzegovina,"Bosnian, Croatian, Serbian",Bosnian +Botswana,"English 2% (official), Setswana 78%, Kalanga 8%, Sekgalagadi 3%, other (2001)",English +Brazil,"Portuguese (official), Spanish, English, French",Portuguese +Brunei,"Malay (official), English, Chinese",Malay +Bulgaria,"Bulgarian 85%, Turkish 10%, Roma 4%",Bulgarian +Burkina Faso,French (official); native African (Sudanic) languages 90%,French +Burundi,"Kirundi and French (official), Swahili",Kirundi +Cambodia,"Khmer 95% (official), French, English",Khmer +Cameroon,"French, English (both official); 24 major African language groups",French +Canada,"English 59.3%, French 23.2% (both official); other 17.5%",English +Cape Verde,"Portuguese, Criuolo",Portuguese +Central African Republic,"French (official), Sangho (lingua franca, national), tribal languages",French +Chad,"French, Arabic (both official); Sara; more than 120 languages and dialects",French +Chile,Spanish,Spanish +China,"Standard Chinese (Mandarin/Putonghua), Yue (Cantonese), Wu (Shanghaiese), Minbei (Fuzhou), Minnan (Hokkien-Taiwanese), Xiang, Gan, Hakka dialects, minority languages",Chinese +Colombia,Spanish,Spanish +Comoros,"Arabic and French (both official), Shikomoro (Swahili/Arabic blend)",Arabic +"Congo, Democratic Republic of the","French (official), Lingala, Kingwana, Kikongo, Tshiluba",French +"Congo, Republic of","French (official), Lingala, Monokutuba, Kikongo, many local languages and dialects",French +Costa Rica,"Spanish (official), English",Spanish +Côte d'Ivoire,French (official) and African languages (Dioula esp.),French +Croatia,"Croatian 96% (official), other 4% (including Italian, Hungarian, Czech, Slovak, German)",Croatian +Cuba,Spanish,Spanish +Cyprus,"Greek, Turkish (both official); English",Greek +Czech Republic,Czech,Czech +Denmark,"Danish, Faroese, Greenlandic (Inuit dialect), German; English is the predominant second language",Danish +Djibouti,"French and Arabic (both official), Somali, Afar",French +Dominica,English (official) and French patois,English +Dominican Republic,Spanish,Spanish +East Timor,"Tetum, Portuguese (official); Bahasa Indonesia, English; other indigenous languages, including Tetum, Galole, Mambae, and Kemak",Tetum +Ecuador,"Spanish (official), Quechua, other Amerindian languages",Spanish +Egypt,"Arabic (official), English and French widely understood by educated classes",Arabic +El Salvador,"Spanish, Nahua (among some Amerindians)",Spanish +Equatorial Guinea,"Spanish, French (both official); pidgin English, Fang, Bubi, Ibo",Spanish +Eritrea,"Afar, Arabic, Tigre and Kunama, Tigrinya, other Cushitic languages",Afar +Estonia,"Estonian 67% (official), Russian 30%, other (2000)",Estonian +Ethiopia,"Amharic, Tigrigna, Orominga, Guaragigna, Somali, Arabic, English, over 70 others",Amharic +Fiji,"English (official), Fijian, Hindustani",English +Finland,"Finnish 92%, Swedish 6% (both official); small Sami- (Lapp) and Russian-speaking minorities",Finnish +France,"French 100%, rapidly declining regional dialects (Provençal, Breton, Alsatian, Corsican, Catalan, Basque, Flemish)",French +Gabon,"French (official), Fang, Myene, Nzebi, Bapounou/Eschira, Bandjabi",French +Gambia,"English (official), Mandinka, Wolof, Fula, other indigenous",English +Georgia,"Georgian 71% (official), Russian 9%, Armenian 7%, Azerbaijani 6%, other 7% (Abkhaz is the official language in Abkhazia)",Georgian +Germany,German,German +Ghana,"English (official), African languages (including Akan, Moshi-Dagomba, Ewe, and Ga)",English +Greece,"Greek 99% (official), English, French",Greek +Grenada,"English (official), French patois",English +Guatemala,"Spanish 60%, Amerindian languages 40% (23 officially recognized Amerindian languages, including Quiche, Cakchiquel, Kekchi, Mam, Garifuna, and Xinca)",Spanish +Guinea,"French (official), native tongues (Malinké, Susu, Fulani)",French +Guinea-Bissau,"Portuguese (official), Criolo, African languages",Portuguese +Guyana,"English (official), Amerindian dialects, Creole, Hindi, Urdu",English +Haiti,Creole and French (both official),Hatian Creole +Honduras,"Spanish (official), Amerindian dialects; English widely spoken in business",Spanish +Hungary,"Magyar (Hungarian) 94%, other 6%",Magyar +Iceland,"Icelandic, English, Nordic languages, German widely spoken",Icelandic +India,"Hindi 30%, English, Bengali, Gujarati, Kashmiri, Malayalam, Marathi, Oriya, Punjabi, Tamil, Telugu, Urdu, Kannada, Assamese, Sanskrit, Sindhi (all official); Hindi/Urdu; 1,600+ dialects",Hindi +Indonesia,"Bahasa Indonesia (official), English, Dutch, Javanese, and more than 580 other languages and dialects",Bahasa Indonesia +Iran,"Persian and Persian dialects 58%, Turkic and Turkic dialects 26%, Kurdish 9%, Luri 2%, Balochi 1%, Arabic 1%, Turkish 1%, other 2%",Persian +Iraq,"Arabic (official), Kurdish (official in Kurdish regions), Assyrian, Armenian",Arabic +Ireland,"English, Irish (Gaelic) (both official)",English +Israel,"Hebrew (official), Arabic, English",Hebrew +Italy,"Italian (official); German-, French-, and Slovene-speaking minorities",Italian +Jamaica,"English, Jamaican Creole",English +Japan,Japanese,Japanese +Jordan,"Arabic (official), English",Arabic +Kazakhstan,"Kazak (Qazaq, state language) 64%; Russian (official, used in everyday business) 95% (2001 est.)",Kazak +Kenya,"English (official), Swahili (national), and numerous indigenous languages",English +Kiribati,"English (official), I-Kiribati (Gilbertese)",English +"Korea, North",Korean,Korean +"Korea, South","Korean, English widely taught",Korean +Kosovo,"Albanian (official), Serbian (official), Bosnian, Turkish, Roma",Albanian +Kuwait,"Arabic (official), English",Arabic +Kyrgyzstan,"Kyrgyz, Russian (both official)",Kyrgyz +Laos,"Lao (official), French, English, various ethnic languages",Lao +Latvia,"Latvian 58% (official), Russian 38%, Lithuanian, other (2000)",Latvian +Lebanon,"Arabic (official), French, English, Armenian",Arabic +Lesotho,"English, Sesotho (both official); Zulu, Xhosa",English +Liberia,"English 20% (official), some 20 ethnic-group languages",English +Libya,"Arabic, Italian, and English widely understood in major cities",Arabic +Liechtenstein,"German (official), Alemannic dialect",German +Lithuania,"Lithuanian 82% (official), Russian 8%, Polish 6% (2001)",Lithuanian +Luxembourg,"Luxermbourgish (national) French, German (both administrative)",Luxermbourgish +Macedonia,"Macedonian 67%, Albanian 25% (both official); Turkish 4%, Roma 2%, Serbian 1% (2002)",Macedonian +Madagascar,Malagasy and French (both official),Malagasy +Malawi,"Chichewa 57.2% (official), Chinyanja 12.8%, Chiyao 10.1%, Chitumbuka 9.5%, Chisena 2.7%, Chilomwe 2.4%, Chitonga 1.7%, other 3.6% (1998)",Chichewa +Malaysia,"Bahasa Melayu (Malay, official), English, Chinese dialects (Cantonese, Mandarin, Hokkien, Hakka, Hainan, Foochow), Tamil, Telugu, Malayalam, Panjabi, Thai; several indigenous languages (including Iban, Kadazan) in East Malaysia",Bahasa Melayu +Maldives,Maldivian Dhivehi (official); English spoken by most government officials,Maldivian Dhivehi +Mali,"French (official), Bambara 80%, numerous African languages",French +Malta,Maltese and English (both official),Maltese +Marshall Islands,"Marshallese 98% (two major dialects from the Malayo-Polynesian family), English widely spoken as a second language (both official); Japanese",Marshallese +Mauritania,"Hassaniya Arabic (official), Pulaar, Soninke, French, Wolof",Hassaniya Arabic +Mauritius,"English less than 1% (official), Creole 81%, Bojpoori 12%, French 3% (2000)",Creole +Mexico,"Spanish, various Mayan, Nahuatl, and other regional indigenous languages",Spanish +Micronesia,"English (official, common), Chukese, Pohnpeian, Yapase, Kosrean, Ulithian, Woleaian, Nukuoro, Kapingamarangi",English +Moldova,"Moldovan (official; virtually the same as Romanian), Russian, Gagauz (a Turkish dialect)",Moldovan +Monaco,"French (official), English, Italian, Monégasque",French +Mongolia,"Mongolian, 90%; also Turkic and Russian (1999)",Mongolian +Montenegro,Serbian/Montenegrin (Ijekavian dialect—official),Serbian +Morocco,"Arabic (official), Berber dialects, French often used for business, government, and diplomacy",Arabic +Mozambique,"Portuguese 9% (official; second language of 27%), Emakhuwa 26%, Xichangana 11%, Elomwe 8%, Cisena 7%, Echuwabo 6%, other Mozambican languages 32% (1997)",Portuguese +Myanmar,"Burmese, minority languages",Burmese +Namibia,"English 7% (official), Afrikaans is common language of most of the population and of about 60% of the white population, German 32%; indigenous languages: Oshivambo, Herero, Nama",English +Nauru,"Nauruan (official), English",Nauruan +Nepal,"Nepali 48% (official), Maithali 12%, Bhojpuri 7%, Tharu 6%, Tamang 5%, others. English spoken by many in government and business (2001)",Nepali +Netherlands,"Dutch, Frisian (both official)",Dutch +New Zealand,"English, Maori (both official)",English +Nicaragua,Spanish 98% (official); English and indigenous languages on Atlantic coast (1995),Spanish +Niger,"French (official), Hausa, Djerma",French +Nigeria,"English (official), Hausa, Yoruba, Ibo, Fulani, and more than 200 others",English +Norway,"Bokmål Norwegian, Nynorsk Norwegian (both official); small Sami- and Finnish-speaking minorities (Sami is official in six municipalities)",Bokmål Norwegian +Oman,"Arabic (official), English, Baluchi, Urdu, Indian dialects",Arabic +Pakistan,"Urdu 8%, English (both official); Punjabi 48%, Sindhi 12%, Siraiki (a Punjabi variant) 10%, Pashtu 8%, Balochi 3%, Hindko 2%, Brahui 1%, Burushaski, and others 8%",Urdu +Palau,"Palauan 64.7%, English 9.4%, Sonsoralese, Tobi, Angaur (each official on some islands), Filipino 13.5%, Chinese 5.7%, Carolinian 1.5%, Japanese 1.5%, other Asian 2.3%, other languages 1.5% (2000)",Palauan +Palestinian State (proposed),"Arabic, Hebrew, English",Arabic +Panama,"Spanish (official), English 14%, many bilingual",Spanish +Papua New Guinea,"Tok Pisin (Melanesian Pidgin, the lingua franca), Hiri Motu (in Papua region), English 1%–2%; 715 indigenous languages",Tok Pisin +Paraguay,"Spanish, Guaraní (both official)",Spanish +Peru,"Spanish, Quéchua (both official); Aymara; many minor Amazonian languages",Spanish +Philippines,"Filipino (based on Tagalog), English (both official); eight major dialects: Tagalog, Cebuano, Ilocano, Hiligaynon or Ilonggo, Bicol, Waray, Pampango, and Pangasinense",Filipino +Poland,Polish 98% (2002),Polish +Portugal,"Portuguese (official), Mirandese (official, but locally used)",Portuguese +Qatar,Arabic (official); English a common second language,Arabic +Romania,"Romanian (official), Hungarian, German",Romanian +Russia,"Russian, others",Russian +Rwanda,"Kinyarwanda, French, and English (all official); Kiswahili in commercial centers",Kinyarwanda +St. Kitts and Nevis,English,English +St. Lucia,"English (official), French patois",English +St. Vincent and the Grenadines,"English, French patois",English +Samoa,"Samoan, English",Samoan +San Marino,Italian,Italian +São Tomé and Príncipe,Portuguese (official),Portuguese +Saudi Arabia,Arabic,Arabic +Senegal,"French (official); Wolof, Pulaar, Jola, Mandinka",French +Serbia,"Serbian (official); Romanian, Hungarian, Slovak, and Croatian (all official in Vojvodina); Albanian (official in Kosovo)",Serbian +Seychelles,"Seselwa Creole 92%, English 5%, French (all official) (2002)",Seselwa Creole +Sierra Leone,"English (official), Mende (southern vernacular), Temne (northern vernacular), Krio (lingua franca)",English +Singapore,"Mandarin 35%, English 23%, Malay 14.1%, Hokkien 11.4%, Cantonese 5.7%, Teochew 4.9%, Tamil 3.2%, other Chinese dialects 1.8%, other 0.9% (2000)",Mandarin +Slovakia,"Slovak 84% (official), Hungarian 11%, Roma 2%, Ukrainian 1% (2001)",Slovak +Slovenia,"Slovenian 91%, Serbo-Croatian 5% (2002)",Slovenian +Solomon Islands,"English 1%–2% (official), Melanesian pidgin (lingua franca), 120 indigenous languages",English +Somalia,"Somali (official), Arabic, English, Italian",Somali +South Africa,"IsiZulu 23.8%, IsiXhosa 17.6%, Afrikaans 13.3%, Sepedi 9.4%, English 8.2%, Setswana 8.2%, Sesotho 7.9%, Xitsonga 4.4%, other 7.2%",IsiZulu +South Sudan,"English (official), Arabic (includes Juba and Sudanese variants) (official), regional languages include Dinka, Nuer, Bari, Zande, Shilluk",English +Spain,"Castilian Spanish 74% (official nationwide); Catalan 17%, Galician 7%, Basque 2% (each official regionally)",Spanish +Sri Lanka,"Sinhala 74% (official and national), Tamil 18% (national), other 8%; English is commonly used in government and spoken competently by about 10%",Sinhala +Sudan,"Arabic (official), Nubian, Ta Bedawie, diverse dialects of Nilotic, Nilo-Hamitic, Sudanic languages, English",Arabic +Suriname,"Dutch (official), Surinamese (lingua franca), English widely spoken, Hindustani, Javanese",Dutch +Swaziland,"English, siSwati (both official)",English +Sweden,"Swedish, small Sami- and Finnish-speaking minorities",Swedish +Switzerland,"German 64%, French 20%, Italian 7% (all official); Romansch 0.5% (national)",German +Syria,"Arabic (official); Kurdish, Armenian, Aramaic, Circassian widely understood; French, English somewhat understood",Arabic +Taiwan,"Chinese (Mandarin, official), Taiwanese (Min), Hakka dialects",Chinese +Tajikistan,"Tajik (official), Russian widely used in government and business",Tajik +Tanzania,"Swahili, English (both official); Arabic; many local languages",Swahili +Thailand,"Thai (Siamese), English (secondary language of the elite), ethnic and regional dialects",Thai +Togo,"French (official, commerce); Ewé, Mina (south); Kabyé, Dagomba (north); and many dialects",French +Tonga,"Tongan (an Austronesian language), English",Tongan +Trinidad and Tobago,"English (official), Hindi, French, Spanish, Chinese",English +Tunisia,"Arabic (official, commerce), French (commerce)",Arabic +Turkey,"Turkish (official), Kurdish, Dimli, Azeri, Kabardian",Turkish +Turkmenistan,"Turkmen 72%; Russian 12%; Uzbek 9%, other 7%",Turkmen +Tuvalu,"Tuvaluan, English, Samoan, Kiribati (on the island of Nui)",Tuvaluan +Uganda,"English (official), Ganda or Luganda, other Niger-Congo languages, Nilo-Saharan languages, Swahili, Arabic",English +Ukraine,"Ukrainian 67%, Russian 24%, Romanian, Polish, Hungarian",Ukrainian +United Arab Emirates,"Arabic (official), Persian, English, Hindi, Urdu",Arabic +United Kingdom,"English, Welsh, Scots Gaelic",English +United States,"English 82%, Spanish 11% (2000)",English +Uruguay,"Spanish, Portunol, or Brazilero",Spanish +Uzbekistan,"Uzbek 74.3%, Russian 14.2%, Tajik 4.4%, other 7.1%",Uzbek +Vanuatu,"Bislama 23% (a Melanesian pidgin English), English 2%, French 1% (all 3 official); more than 100 local languages 73%",Bislama +Vatican City (Holy See),"Italian, Latin, French, various other languages",Italian +Venezuela,"Spanish (official), numerous indigenous dialects",Spanish +Vietnam,"Vietnamese (official); English (increasingly favored as a second language); some French, Chinese, Khmer; mountain area languages (Mon-Khmer and Malayo-Polynesian)",Vietnamese +Western Sahara (proposed state),"Hassaniya Arabic, Moroccan Arabic",Hassaniya Arabic +Yemen,Arabic,Arabic +Zambia,"English (official); major vernaculars: Bemba, Kaonda, Lozi, Lunda, Luvale, Nyanja, Tonga; about 70 other indigenous languages",English +Zimbabwe,"English (official), Shona, Ndebele (Sindebele), numerous minor tribal dialects",English diff --git a/countries/languages_to_codes.csv b/countries/languages_to_codes.csv @@ -0,0 +1,186 @@ +language_name,iso639,language_name__1,language_name__2,language_name__3,language_name__4 +Abkhaz,ab,Abkhaz,NA,NA,NA +Afar,aa,Afar,NA,NA,NA +Afrikaans,af,Afrikaans,NA,NA,NA +Akan,ak,Akan,NA,NA,NA +Albanian,sq,Albanian,NA,NA,NA +Amharic,am,Amharic,NA,NA,NA +Arabic,ar,Arabic,NA,NA,NA +Aragonese,an,Aragonese,NA,NA,NA +Armenian,hy,Armenian,NA,NA,NA +Assamese,as,Assamese,NA,NA,NA +Avaric,av,Avaric,NA,NA,NA +Avestan,ae,Avestan,NA,NA,NA +Aymara,ay,Aymara,NA,NA,NA +Azerbaijani,az,Azerbaijani,NA,NA,NA +Bambara,bm,Bambara,NA,NA,NA +Bashkir,ba,Bashkir,NA,NA,NA +Basque,eu,Basque,NA,NA,NA +Belarusian,be,Belarusian,NA,NA,NA +"Bengali, Bangla",bn,Bengali,Bangla,NA,NA +Bihari,bh,Bihari,NA,NA,NA +Bislama,bi,Bislama,NA,NA,NA +Bosnian,bs,Bosnian,NA,NA,NA +Breton,br,Breton,NA,NA,NA +Bulgarian,bg,Bulgarian,NA,NA,NA +Burmese,my,Burmese,NA,NA,NA +Catalan,ca,Catalan,NA,NA,NA +Chamorro,ch,Chamorro,NA,NA,NA +Chechen,ce,Chechen,NA,NA,NA +"Chichewa, Chewa, Nyanja",ny,Chichewa,Chewa,Nyanja,NA +Chinese,zh,Chinese,NA,NA,NA +Chuvash,cv,Chuvash,NA,NA,NA +Cornish,kw,Cornish,NA,NA,NA +Corsican,co,Corsican,NA,NA,NA +Cree,cr,Cree,NA,NA,NA +Croatian,hr,Croatian,NA,NA,NA +Czech,cs,Czech,NA,NA,NA +Danish,da,Danish,NA,NA,NA +"Divehi, Dhivehi, Maldivian",dv,Divehi,Dhivehi,Maldivian,NA +Dutch,nl,Dutch,NA,NA,NA +Dzongkha,dz,Dzongkha,NA,NA,NA +English,en,English,NA,NA,NA +Esperanto,eo,Esperanto,NA,NA,NA +Estonian,et,Estonian,NA,NA,NA +Ewe,ee,Ewe,NA,NA,NA +Faroese,fo,Faroese,NA,NA,NA +Fijian,fj,Fijian,NA,NA,NA +Finnish,fi,Finnish,NA,NA,NA +French,fr,French,NA,NA,NA +"Fula, Fulah, Pulaar, Pular",ff,Fula,Fulah,Pulaar,Pular +Galician,gl,Galician,NA,NA,NA +Georgian,ka,Georgian,NA,NA,NA +German,de,German,NA,NA,NA +Greek (modern),el,Greek,NA,NA,NA +Guaraní,gn,Guaraní,NA,NA,NA +Gujarati,gu,Gujarati,NA,NA,NA +"Haitian, Haitian Creole",ht,Haitian,Creole,NA,NA +Hausa,ha,Hausa,NA,NA,NA +Hebrew (modern),he,Hebrew,NA,NA,NA +Herero,hz,Herero,NA,NA,NA +Hindi,hi,Hindi,NA,NA,NA +Hiri Motu,ho,Hiri Motu,NA,NA,NA +Hungarian,hu,Hungarian,NA,NA,NA +Interlingua,ia,Interlingua,NA,NA,NA +Indonesian,id,Indonesian,NA,NA,NA +Interlingue,ie,Interlingue,NA,NA,NA +Irish,ga,Irish,NA,NA,NA +Igbo,ig,Igbo,NA,NA,NA +Inupiaq,ik,Inupiaq,NA,NA,NA +Ido,io,Ido,NA,NA,NA +Icelandic,is,Icelandic,NA,NA,NA +Italian,it,Italian,NA,NA,NA +Inuktitut,iu,Inuktitut,NA,NA,NA +Japanese,ja,Japanese,NA,NA,NA +Javanese,jv,Javanese,NA,NA,NA +"Kalaallisut, Greenlandic",kl,Kalaallisut,Greenlandic,NA,NA +Kannada,kn,Kannada,NA,NA,NA +Kanuri,kr,Kanuri,NA,NA,NA +Kashmiri,ks,Kashmiri,NA,NA,NA +Kazakh,kk,Kazakh,NA,NA,NA +Khmer,km,Khmer,NA,NA,NA +"Kikuyu, Gikuyu",ki,Kikuyu,Gikuyu,NA,NA +Kinyarwanda,rw,Kinyarwanda,NA,NA,NA +Kyrgyz,ky,Kyrgyz,NA,NA,NA +Komi,kv,Komi,NA,NA,NA +Kongo,kg,Kongo,NA,NA,NA +Korean,ko,Korean,NA,NA,NA +Kurdish,ku,Kurdish,NA,NA,NA +"Kwanyama, Kuanyama",kj,Kwanyama,Kuanyama,NA,NA +Latin,la,Latin,NA,NA,NA +Ladin,,Ladin,NA,NA,NA +"Luxembourgish, Letzeburgesch",lb,Luxembourgish,Letzeburgesch,NA,NA +Ganda,lg,Ganda,NA,NA,NA +"Limburgish, Limburgan, Limburger",li,Limburgish,Limburgan,Limburger,NA +Lingala,ln,Lingala,NA,NA,NA +Lao,lo,Lao,NA,NA,NA +Lithuanian,lt,Lithuanian,NA,NA,NA +Luba-Katanga,lu,Luba-Katanga,NA,NA,NA +Latvian,lv,Latvian,NA,NA,NA +Manx,gv,Manx,NA,NA,NA +Macedonian,mk,Macedonian,NA,NA,NA +Malagasy,mg,Malagasy,NA,NA,NA +Malay,ms,Malay,NA,NA,NA +Malayalam,ml,Malayalam,NA,NA,NA +Maltese,mt,Maltese,NA,NA,NA +Maori,mi,Maori,NA,NA,NA +Marathi (Mara<U+1E6D>hi),mr,Marathi,NA,NA,NA +Marshallese,mh,Marshallese,NA,NA,NA +Mongolian,mn,Mongolian,NA,NA,NA +Nauru,na,Nauru,NA,NA,NA +"Navajo, Navaho",nv,Navajo,Navaho,NA,NA +Northern Ndebele,nd,Northern Ndebele,NA,NA,NA +Nepali,ne,Nepali,NA,NA,NA +Ndonga,ng,Ndonga,NA,NA,NA +Norwegian Bokmål,nb,Norwegian Bokmål,NA,NA,NA +Norwegian Nynorsk,nn,Norwegian Nynorsk,NA,NA,NA +Norwegian,no,Norwegian,NA,NA,NA +Nuosu,ii,Nuosu,NA,NA,NA +Southern Ndebele,nr,Southern Ndebele,NA,NA,NA +Occitan,oc,Occitan,NA,NA,NA +"Ojibwe, Ojibwa",oj,Ojibwe,Ojibwa,NA,NA +"Old Church Slavonic, Church Slavonic, Old Bulgarian",cu,Old Church Slavonic,Church Slavonic,Old Bulgarian,NA +Oromo,om,Oromo,NA,NA,NA +Oriya,or,Oriya,NA,NA,NA +"Ossetian, Ossetic",os,Ossetian,Ossetic,NA,NA +"Panjabi, Punjabi",pa,Panjabi,Punjabi,NA,NA +Pali,pi,Pali,NA,NA,NA +Persian (Farsi),fa,Persian (Farsi),NA,NA,NA +Polish,pl,Polish,NA,NA,NA +"Pashto, Pushto",ps,Pashto,Pushto,NA,NA +Portuguese,pt,Portuguese,NA,NA,NA +Quechua,qu,Quechua,NA,NA,NA +Romansh,rm,Romansh,NA,NA,NA +Kirundi,rn,Kirundi,NA,NA,NA +Romanian,ro,Romanian,NA,NA,NA +Russian,ru,Russian,NA,NA,NA +Sanskrit (Sa<U+1E41>sk<U+1E5B>ta),sa,Sanskrit (Sa<U+1E41>sk<U+1E5B>ta),NA,NA,NA +Sardinian,sc,Sardinian,NA,NA,NA +Sindhi,sd,Sindhi,NA,NA,NA +Northern Sami,se,Northern Sami,NA,NA,NA +Samoan,sm,Samoan,NA,NA,NA +Sango,sg,Sango,NA,NA,NA +Serbian,sr,Serbian,NA,NA,NA +"Scottish Gaelic, Gaelic",gd,Scottish Gaelic,Gaelic,NA,NA +Shona,sn,Shona,NA,NA,NA +"Sinhala, Sinhalese",si,Sinhala,Sinhalese,NA,NA +Slovak,sk,Slovak,NA,NA,NA +Slovene,sl,Slovene,NA,NA,NA +Somali,so,Somali,NA,NA,NA +Southern Sotho,st,Southern Sotho,NA,NA,NA +Spanish,es,Spanish,NA,NA,NA +Sundanese,su,Sundanese,NA,NA,NA +Swahili,sw,Swahili,NA,NA,NA +Swati,ss,Swati,NA,NA,NA +Swedish,sv,Swedish,NA,NA,NA +Tamil,ta,Tamil,NA,NA,NA +Telugu,te,Telugu,NA,NA,NA +Tajik,tg,Tajik,NA,NA,NA +Thai,th,Thai,NA,NA,NA +Tigrinya,ti,Tigrinya,NA,NA,NA +"Tibetan Standard, Tibetan, Central",bo,Tibetan Standard,Tibetan,Central,NA +Turkmen,tk,Turkmen,NA,NA,NA +Tagalog,tl,Tagalog,NA,NA,NA +Tswana,tn,Tswana,NA,NA,NA +Tonga (Tonga Islands),to,Tonga (Tonga Islands),NA,NA,NA +Turkish,tr,Turkish,NA,NA,NA +Tsonga,ts,Tsonga,NA,NA,NA +Tatar,tt,Tatar,NA,NA,NA +Twi,tw,Twi,NA,NA,NA +Tahitian,ty,Tahitian,NA,NA,NA +Uyghur,ug,Uyghur,NA,NA,NA +Ukrainian,uk,Ukrainian,NA,NA,NA +Urdu,ur,Urdu,NA,NA,NA +Uzbek,uz,Uzbek,NA,NA,NA +Venda,ve,Venda,NA,NA,NA +Vietnamese,vi,Vietnamese,NA,NA,NA +Volapük,vo,Volapük,NA,NA,NA +Walloon,wa,Walloon,NA,NA,NA +Welsh,cy,Welsh,NA,NA,NA +Wolof,wo,Wolof,NA,NA,NA +Western Frisian,fy,Western Frisian,NA,NA,NA +Xhosa,xh,Xhosa,NA,NA,NA +Yiddish,yi,Yiddish,NA,NA,NA +Yoruba,yo,Yoruba,NA,NA,NA +"Zhuang, Chuang",za,Zhuang,Chuang,NA,NA +Zulu,zu,Zulu,NA,NA,NA diff --git a/countries/scrapeCountryInfo.R b/countries/scrapeCountryInfo.R @@ -0,0 +1,67 @@ +# Here's a BUNCH of code to pull in the tables I want for mapping countries to +# adjectivals and languages and ISO-639-1 codes. Probably deserves its own +# package. + +# For now, the CSVs this writes still need a little manual touch-up. It's not +# much data tho. + +library(rvest) +library(dplyr) + +getTableFromWeb <- function(url, xpath) { + tableList <- url %>% + html() %>% + html_nodes(xpath=xpath) %>% + html_table(fill=TRUE) + return(tableList[[1]]) +} + +# Countries and their adjectival forms +countriesToAdjectivals <- getTableFromWeb("https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations", + "//*[@id=\"mw-content-text\"]/table[1]") + +colnames(countriesToAdjectivals) <- sub(" ", "_", + tolower(colnames(countriesToAdjectivals))) + +countriesToAdjectivals <- countriesToAdjectivals[2:nrow(countriesToAdjectivals),] %>% + select(country_name, adjectivals) %>% + # Get rid of the wikipedia cruft + mutate_each(funs(gsub("\\[.*\\]", "", .))) %>% + # For later splitting of adjectivals + mutate(adjectivals = sub(" or ", ", ", adjectivals)) %>% + # Rearrange the country name into its natural order + mutate(natural_country_name = sub("([[:alpha:]]*), ([[:alpha:]].*)", + "\\2 \\1", + country_name)) +splitAdjectivals <- strsplit(countriesToAdjectivals[["adjectivals"]], ",[[:space:]]*") +for (i in seq_len(max(vapply(splitAdjectivals, length, 1)))) { + countriesToAdjectivals[[paste("adjectival", i, sep="_")]] <- vapply(splitAdjectivals, function(x) { x[i] }, "") +} +write.csv(countriesToAdjectivals, "countries_to_adjectivals.csv", row.names = FALSE) + + +# Countries to languages +countriesToLanguages <- getTableFromWeb("http://www.infoplease.com/ipa/A0855611.html", + "//*[@id=\"Pg\"]/table[1]") + +colnames(countriesToLanguages) <- c("country_name", "languages") +countriesToLanguages <- countriesToLanguages %>% + mutate(first_language = sub("[[:space:]]*[[:punct:][:digit:]].*", "", languages), + first_language = sub(" and .*", "", first_language)) +write.csv(countriesToLanguages, "countries_to_languages.csv", row.names = FALSE) + +# Languages to ISO-639-1 codes +languagesToCodes <- getTableFromWeb("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes", + "//*[@id=\"mw-content-text\"]/table[2]") + +# Column names are difficult. Just hacking away here. +languagesToCodes <- languagesToCodes[, c(3, 5)] +colnames(languagesToCodes) <- c("language_name", "iso639") + +# Some languages have multiple names +splitLanguageNames <- strsplit(languagesToCodes[["language_name"]], ",[[:space:]]*") +for (i in seq_len(max(vapply(splitLanguageNames, length, 1)))) { + languagesToCodes[[paste("language_name_", i, sep="_")]] <- vapply(splitLanguageNames, function(x) { x[i] }, "") +} +write.csv(languagesToCodes, "languages_to_codes.csv", row.names = FALSE) +