commit fa490d61701b8ead8f95d19839e1630a7c2c03b4
parent 768004d61b9f1f5d17fc68b91189f1a046c66778
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date: Wed, 29 Jul 2015 14:38:56 -0400
Oh man did this experience mission creep. So now I'm figuring out the mapping between countries (e.g., England), adjectivals (e.g., English), spoken laguages, and the ISO-639-1 codes.
Diffstat:
4 files changed, 718 insertions(+), 0 deletions(-)
diff --git a/countries/countries_to_adjectivals.csv b/countries/countries_to_adjectivals.csv
@@ -0,0 +1,266 @@
+country_name,adjectivals,natural_country_name,adjectival_1,adjectival_2,adjectival_3,adjectival_4
+Abkhazia (region in Georgia),"Abkhaz, Abkhazian",Abkhazia (region in Georgia),Abkhaz,Abkhazian,NA,NA
+Afghanistan,Afghan,Afghanistan,Afghan,NA,NA,NA
+Åland Islands,Åland Island,Åland Islands,Åland Island,NA,NA,NA
+Albania,Albanian,Albania,Albanian,NA,NA,NA
+Algeria,Algerian,Algeria,Algerian,NA,NA,NA
+American Samoa,American Samoan,American Samoa,American Samoan,NA,NA,NA
+Andorra,Andorran,Andorra,Andorran,NA,NA,NA
+Angola,Angolan,Angola,Angolan,NA,NA,NA
+Anguilla,Anguillan,Anguilla,Anguillan,NA,NA,NA
+Antarctica,Antarctic,Antarctica,Antarctic,NA,NA,NA
+Antigua and Barbuda,"Antiguan, Barbudan",Antigua and Barbuda,Antiguan,Barbudan,NA,NA
+Argentina,Argentine,Argentina,Argentine,NA,NA,NA
+Armenia,Armenian,Armenia,Armenian,NA,NA,NA
+Aruba,Aruban,Aruba,Aruban,NA,NA,NA
+Australia,Australian,Australia,Australian,NA,NA,NA
+Austria,Austrian,Austria,Austrian,NA,NA,NA
+Azerbaijan,"Azerbaijani, Azeri",Azerbaijan,Azerbaijani,Azeri,NA,NA
+"Bahamas, The",Bahamian,The Bahamas,Bahamian,NA,NA,NA
+Bahrain,Bahraini,Bahrain,Bahraini,NA,NA,NA
+Bangladesh,Bangladeshi,Bangladesh,Bangladeshi,NA,NA,NA
+Barbados,Barbadian,Barbados,Barbadian,NA,NA,NA
+Belarus,Belarusian,Belarus,Belarusian,NA,NA,NA
+Belgium,Belgian,Belgium,Belgian,NA,NA,NA
+Belize,Belizean,Belize,Belizean,NA,NA,NA
+Benin,"Beninese, Beninois",Benin,Beninese,Beninois,NA,NA
+Bermuda,"Bermudian, Bermudan",Bermuda,Bermudian,Bermudan,NA,NA
+Bhutan,Bhutanese,Bhutan,Bhutanese,NA,NA,NA
+Bolivia,Bolivian,Bolivia,Bolivian,NA,NA,NA
+Bonaire,Bonaire,Bonaire,Bonaire,NA,NA,NA
+Bosnia and Herzegovina,"Bosnian, Herzegovinian",Bosnia and Herzegovina,Bosnian,Herzegovinian,NA,NA
+Botswana,"Motswana, Botswanan",Botswana,Motswana,Botswanan,NA,NA
+Bouvet Island,Bouvet Island,Bouvet Island,Bouvet Island,NA,NA,NA
+Brazil,Brazilian,Brazil,Brazilian,NA,NA,NA
+British Indian Ocean Territory,BIOT,British Indian Ocean Territory,BIOT,NA,NA,NA
+Brunei,Bruneian,Brunei,Bruneian,NA,NA,NA
+Bulgaria,Bulgarian,Bulgaria,Bulgarian,NA,NA,NA
+Burkina Fasoa,Burkinabé,Burkina Fasoa,Burkinabé,NA,NA,NA
+Burma,Burmese,Burma,Burmese,NA,NA,NA
+Burundi,Burundian,Burundi,Burundian,NA,NA,NA
+Cabo Verde,Cabo Verdean,Cabo Verde,Cabo Verdean,NA,NA,NA
+Cambodia,Cambodian,Cambodia,Cambodian,NA,NA,NA
+Cameroon,Cameroonian,Cameroon,Cameroonian,NA,NA,NA
+Canada,Canadian,Canada,Canadian,NA,NA,NA
+Cape Verde,Cabo Verdean,Cape Verde,Cabo Verdean,NA,NA,NA
+Cayman Islands,Caymanian,Cayman Islands,Caymanian,NA,NA,NA
+Central African Republic,Central African,Central African Republic,Central African,NA,NA,NA
+Chad,Chadian,Chad,Chadian,NA,NA,NA
+Chile,Chilean,Chile,Chilean,NA,NA,NA
+"China, People's Republic of",Chinese,People's Republic of China,Chinese,NA,NA,NA
+"China, Republic of","See Taiwan, below",Republic of China,Chinese,Taiwanese,NA,NA
+Christmas Island,Christmas Island,Christmas Island,Christmas Island,NA,NA,NA
+Cocos (Keeling) Islands,Cocos Island,Cocos (Keeling) Islands,Cocos Island,NA,NA,NA
+Colombia,Colombian,Colombia,Colombian,NA,NA,NA
+Comoros,"Comoran, Comorian",Comoros,Comoran,Comorian,NA,NA
+"Congo, Democratic Republic of the",Congolese,Democratic Republic of the Congo,Congolese,NA,NA,NA
+"Congo, Republic of the",NA,Republic of the Congo,Congolese,NA,NA,NA
+Cook Islands,Cook Island,Cook Islands,Cook Island,NA,NA,NA
+Costa Rica,Costa Rican,Costa Rica,Costa Rican,NA,NA,NA
+Côte d'Ivoire,Ivorian,Côte d'Ivoire,Ivorian,NA,NA,NA
+Croatia,Croatian,Croatia,Croatian,NA,NA,NA
+Cuba,Cuban,Cuba,Cuban,NA,NA,NA
+Curaçao,Curaçaoan,Curaçao,Curaçaoan,NA,NA,NA
+Cyprus,Cypriot,Cyprus,Cypriot,NA,NA,NA
+Czech Republic,Czech,Czech Republic,Czech,NA,NA,NA
+Denmark,Danish,Denmark,Danish,NA,NA,NA
+Djibouti,Djiboutian,Djibouti,Djiboutian,NA,NA,NA
+Dominica,Dominican,Dominica,Dominican,NA,NA,NA
+Dominican Republic,Dominican,Dominican Republic,Dominican,NA,NA,NA
+East Timor,Timorese,East Timor,Timorese,NA,NA,NA
+Ecuador,Ecuadorian,Ecuador,Ecuadorian,NA,NA,NA
+Egypt,Egyptian,Egypt,Egyptian,NA,NA,NA
+El Salvador,Salvadoran,El Salvador,Salvadoran,NA,NA,NA
+England,"English, British",England,English,British,NA,NA
+Equatorial Guinea,"Equatorial Guinean, Equatoguinean",Equatorial Guinea,Equatorial Guinean,Equatoguinean,NA,NA
+Eritrea,Eritrean,Eritrea,Eritrean,NA,NA,NA
+Estonia,Estonian,Estonia,Estonian,NA,NA,NA
+Ethiopia,Ethiopian,Ethiopia,Ethiopian,NA,NA,NA
+European Union,European,European Union,European,NA,NA,NA
+Falkland Islands,Falkland Island,Falkland Islands,Falkland Island,NA,NA,NA
+Faroe Islands,Faroese,Faroe Islands,Faroese,NA,NA,NA
+Fiji,Fijian,Fiji,Fijian,NA,NA,NA
+Finland,Finnish,Finland,Finnish,NA,NA,NA
+France,French,France,French,NA,NA,NA
+French Guiana,French Guianese,French Guiana,French Guianese,NA,NA,NA
+French Polynesia,French Polynesian,French Polynesia,French Polynesian,NA,NA,NA
+French Southern Territories,French Southern Territories,French Southern Territories,French Southern Territories,NA,NA,NA
+Gabon,Gabonese,Gabon,Gabonese,NA,NA,NA
+"Gambia, The",Gambian,The Gambia,Gambian,NA,NA,NA
+Georgia,Georgian,Georgia,Georgian,NA,NA,NA
+Germany,German,Germany,German,NA,NA,NA
+Ghana,Ghanaian,Ghana,Ghanaian,NA,NA,NA
+Gibraltar,Gibraltar,Gibraltar,Gibraltar,NA,NA,NA
+Great Britain,"British, UK",Great Britain,British,UK,NA,NA
+Greece,"Greek, Hellenic",Greece,Greek,Hellenic,NA,NA
+Greenland,Greenlandic,Greenland,Greenlandic,NA,NA,NA
+Grenada,Grenadian,Grenada,Grenadian,NA,NA,NA
+Guadeloupe,Guadeloupe,Guadeloupe,Guadeloupe,NA,NA,NA
+Guam,"Guamanian, Guambat",Guam,Guamanian,Guambat,NA,NA
+Guatemala,Guatemalan,Guatemala,Guatemalan,NA,NA,NA
+Guernsey,Channel Island,Guernsey,Channel Island,NA,NA,NA
+Guinea,Guinean,Guinea,Guinean,NA,NA,NA
+Guinea-Bissau,Bissau-Guinean,Guinea-Bissau,Bissau-Guinean,NA,NA,NA
+Guyana,Guyanese,Guyana,Guyanese,NA,NA,NA
+Haiti,Haitian,Haiti,Haitian,NA,NA,NA
+Heard Island and McDonald Islands,"Heard Island, McDonald Islands",Heard Island and McDonald Islands,Heard Island,McDonald Islands,NA,NA
+Honduras,Honduran,Honduras,Honduran,NA,NA,NA
+Hong Kong,"Hong Kong, Chinese",Hong Kong,Hong Kong,Chinese,NA,NA
+Hungary,"Hungarian, Magyar",Hungary,Hungarian,Magyar,NA,NA
+Iceland,Icelandic,Iceland,Icelandic,NA,NA,NA
+India,Indian,India,Indian,NA,NA,NA
+Indonesia,Indonesian,Indonesia,Indonesian,NA,NA,NA
+Iran,"Iranian, Persian",Iran,Iranian,Persian,NA,NA
+Iraq,Iraqi,Iraq,Iraqi,NA,NA,NA
+Ireland,Irish,Ireland,Irish,NA,NA,NA
+Isle of Man,Manx,Isle of Man,Manx,NA,NA,NA
+Israel,Israeli,Israel,Israeli,NA,NA,NA
+Italy,Italian,Italy,Italian,NA,NA,NA
+Ivory Coast,Ivorian,Ivory Coast,Ivorian,NA,NA,NA
+Jamaica,Jamaican,Jamaica,Jamaican,NA,NA,NA
+Jan Mayen,Jan Mayen,Jan Mayen,Jan Mayen,NA,NA,NA
+Japan,Japanese,Japan,Japanese,NA,NA,NA
+Jersey,Channel Island,Jersey,Channel Island,NA,NA,NA
+Jordan,Jordanian,Jordan,Jordanian,NA,NA,NA
+Kazakhstan,"Kazakhstani, Kazakh",Kazakhstan,Kazakhstani,Kazakh,NA,NA
+Kenya,Kenyan,Kenya,Kenyan,NA,NA,NA
+Kiribati,I-Kiribati,Kiribati,I-Kiribati,NA,NA,NA
+"Korea, Democratic People's Republic of",North Korean,Democratic People's Republic of Korea,North Korean,NA,NA,NA
+"Korea, Republic of",South Korean,Republic of Korea,South Korean,NA,NA,NA
+Kosovo,"Kosovar, Kosovan",Kosovo,Kosovar,Kosovan,NA,NA
+Kuwait,Kuwaiti,Kuwait,Kuwaiti,NA,NA,NA
+Kyrgyzstan,"Kyrgyzstani, Kyrgyz, Kirgiz, Kirghiz",Kyrgyzstan,Kyrgyzstani,Kyrgyz,Kirgiz,Kirghiz
+Laos,"Lao, Laotian",Laos,Lao,Laotian,NA,NA
+Latvia,Latvian,Latvia,Latvian,NA,NA,NA
+Lebanon,Lebanese,Lebanon,Lebanese,NA,NA,NA
+Lesotho,Basotho,Lesotho,Basotho,NA,NA,NA
+Liberia,Liberian,Liberia,Liberian,NA,NA,NA
+Libya,Libyan,Libya,Libyan,NA,NA,NA
+Liechtenstein,Liechtenstein,Liechtenstein,Liechtenstein,NA,NA,NA
+Lithuania,Lithuanian,Lithuania,Lithuanian,NA,NA,NA
+Luxembourg,"Luxembourg, Luxembourgish",Luxembourg,Luxembourg,Luxembourgish,NA,NA
+Macau,"Macanese, Chinese",Macau,Macanese,Chinese,NA,NA
+"Macedonia, Republic of",Macedonian,Republic of Macedonia,Macedonian,NA,NA,NA
+Madagascar,Malagasy,Madagascar,Malagasy,NA,NA,NA
+Malawi,Malawian,Malawi,Malawian,NA,NA,NA
+Malaysia,Malaysian,Malaysia,Malaysian,NA,NA,NA
+Maldives,Maldivian,Maldives,Maldivian,NA,NA,NA
+Mali,"Malian, Malinese",Mali,Malian,Malinese,NA,NA
+Malta,Maltese,Malta,Maltese,NA,NA,NA
+Marshall Islands,Marshallese,Marshall Islands,Marshallese,NA,NA,NA
+Martinique,"Martiniquais, Martinican",Martinique,Martiniquais,Martinican,NA,NA
+Mauritania,Mauritanian,Mauritania,Mauritanian,NA,NA,NA
+Mauritius,Mauritian,Mauritius,Mauritian,NA,NA,NA
+Mayotte,Mahoran,Mayotte,Mahoran,NA,NA,NA
+Mexico,Mexican,Mexico,Mexican,NA,NA,NA
+"Micronesia, Federated States of",Micronesian,Federated States of Micronesia,Micronesian,NA,NA,NA
+Moldova,Moldovan,Moldova,Moldovan,NA,NA,NA
+Monaco,"Monégasque, Monacan",Monaco,Monégasque,Monacan,NA,NA
+Mongolia,Mongolian,Mongolia,Mongolian,NA,NA,NA
+Montenegro,Montenegrin,Montenegro,Montenegrin,NA,NA,NA
+Montserrat,Montserratian,Montserrat,Montserratian,NA,NA,NA
+Morocco,Moroccan,Morocco,Moroccan,NA,NA,NA
+Mozambique,Mozambican,Mozambique,Mozambican,NA,NA,NA
+Myanmar,Burmese,Myanmar,Burmese,NA,NA,NA
+Namibia,Namibian,Namibia,Namibian,NA,NA,NA
+Nauru,Nauruan,Nauru,Nauruan,NA,NA,NA
+Nepal,"Nepali, Nepalese",Nepal,Nepali,Nepalese,NA,NA
+Netherlands,"Dutch, Netherlandic",Netherlands,Dutch,Netherlandic,NA,NA
+New Caledonia,New Caledonian,New Caledonia,New Caledonian,NA,NA,NA
+New Zealand,"New Zealand, NZ",New Zealand,New Zealand,NZ,NA,NA
+Nicaragua,Nicaraguan,Nicaragua,Nicaraguan,NA,NA,NA
+Niger,Nigerien,Niger,Nigerien,NA,NA,NA
+Nigeria,Nigerian,Nigeria,Nigerian,NA,NA,NA
+Niue,Niuean,Niue,Niuean,NA,NA,NA
+Norfolk Island,Norfolk Island,Norfolk Island,Norfolk Island,NA,NA,NA
+Northern Ireland,"Northern Irish, British",Northern Ireland,Northern Irish,British,NA,NA
+Northern Mariana Islands,Northern Marianan,Northern Mariana Islands,Northern Marianan,NA,NA,NA
+Norway,Norwegian,Norway,Norwegian,NA,NA,NA
+Oman,Omani,Oman,Omani,NA,NA,NA
+Pakistan,Pakistani,Pakistan,Pakistani,NA,NA,NA
+Palau,Palauan,Palau,Palauan,NA,NA,NA
+Palestine,Palestinian,Palestine,Palestinian,NA,NA,NA
+Panama,Panamanian,Panama,Panamanian,NA,NA,NA
+Papua New Guinea,"Papua New Guinean, Papuan",Papua New Guinea,Papua New Guinean,Papuan,NA,NA
+Paraguay,Paraguayan,Paraguay,Paraguayan,NA,NA,NA
+Peru,Peruvian,Peru,Peruvian,NA,NA,NA
+Philippines,"Philippine, Filipino",Philippines,Philippine,Filipino,NA,NA
+Pitcairn Islands,Pitcairn Island,Pitcairn Islands,Pitcairn Island,NA,NA,NA
+Poland,Polish,Poland,Polish,NA,NA,NA
+Portugal,Portuguese,Portugal,Portuguese,NA,NA,NA
+Puerto Rico,Puerto Rican,Puerto Rico,Puerto Rican,NA,NA,NA
+Qatar,Qatari,Qatar,Qatari,NA,NA,NA
+Réunion,"Réunionese, Réunionnais",Réunion,Réunionese,Réunionnais,NA,NA
+Romania,Romanian,Romania,Romanian,NA,NA,NA
+Russia,Russian,Russia,Russian,NA,NA,NA
+Rwanda,Rwandan,Rwanda,Rwandan,NA,NA,NA
+Saba,Saba,Saba,Saba,NA,NA,NA
+Saint Barthélemy,Barthélemois,Saint Barthélemy,Barthélemois,NA,NA,NA
+"Saint Helena, Ascension and Tristan da Cunha",Saint Helenian,Saint Ascension and Tristan da Cunha Helena,Saint Helenian,NA,NA,NA
+Saint Kitts and Nevis,"Kittitian, Nevisian",Saint Kitts and Nevis,Kittitian,Nevisian,NA,NA
+Saint Lucia,Saint Lucian,Saint Lucia,Saint Lucian,NA,NA,NA
+Saint Martin,Saint-Martinoise,Saint Martin,Saint-Martinoise,NA,NA,NA
+Saint Pierre and Miquelon,"Saint-Pierrais, Miquelonnais",Saint Pierre and Miquelon,Saint-Pierrais,Miquelonnais,NA,NA
+Saint Vincent and the Grenadines,"Saint Vincentian, Vincentian",Saint Vincent and the Grenadines,Saint Vincentian,Vincentian,NA,NA
+Samoa,Samoan,Samoa,Samoan,NA,NA,NA
+San Marino,Sammarinese,San Marino,Sammarinese,NA,NA,NA
+São Tomé and Príncipe,São Toméan,São Tomé and Príncipe,São Toméan,NA,NA,NA
+Saudi Arabia,"Saudi, Saudi Arabian",Saudi Arabia,Saudi,Saudi Arabian,NA,NA
+Scotland,"Scots, Scottish, British",Scotland,Scots,Scottish,British,NA
+Senegal,Senegalese,Senegal,Senegalese,NA,NA,NA
+Serbia,Serbian,Serbia,Serbian,NA,NA,NA
+Seychelles,Seychellois,Seychelles,Seychellois,NA,NA,NA
+Sierra Leone,Sierra Leonean,Sierra Leone,Sierra Leonean,NA,NA,NA
+Singapore,"Singapore, Singaporean",Singapore,Singapore,Singaporean,NA,NA
+Sint Eustatius,"Sint Eustatius, Statian",Sint Eustatius,Sint Eustatius,Statian,NA,NA
+Sint Maarten,Sint Maarten,Sint Maarten,Sint Maarten,NA,NA,NA
+Slovakia,Slovak,Slovakia,Slovak,NA,NA,NA
+Slovenia,"Slovenian, Slovene",Slovenia,Slovenian,Slovene,NA,NA
+Solomon Islands,Solomon Island,Solomon Islands,Solomon Island,NA,NA,NA
+Somalia,"Somali, Somalian",Somalia,Somali,Somalian,NA,NA
+South Africa,South African,South Africa,South African,NA,NA,NA
+South Georgia and the South Sandwich Islands,"South Georgia, South Sandwich Islands",South Georgia and the South Sandwich Islands,South Georgia,South Sandwich Islands,NA,NA
+South Ossetia (Region of Georgia),South Ossetian,South Ossetia (Region of Georgia),South Ossetian,NA,NA,NA
+South Sudan,South Sudanese,South Sudan,South Sudanese,NA,NA,NA
+Spain,Spanish,Spain,Spanish,NA,NA,NA
+Sri Lanka,Sri Lankan,Sri Lanka,Sri Lankan,NA,NA,NA
+Sudan,Sudanese,Sudan,Sudanese,NA,NA,NA
+Surinam,Surinamese,Surinam,Surinamese,NA,NA,NA
+Svalbard,Svalbard,Svalbard,Svalbard,NA,NA,NA
+Swaziland,Swazi,Swaziland,Swazi,NA,NA,NA
+Sweden,Swedish,Sweden,Swedish,NA,NA,NA
+Switzerland,Swiss,Switzerland,Swiss,NA,NA,NA
+Syria,Syrian,Syria,Syrian,NA,NA,NA
+Taiwan,"Chinese, Taiwanese",Taiwan,Chinese,Taiwanese,NA,NA
+Tajikistan,Tajikistani,Tajikistan,Tajikistani,NA,NA,NA
+Tanzania,Tanzanian,Tanzania,Tanzanian,NA,NA,NA
+Thailand,Thai,Thailand,Thai,NA,NA,NA
+Timor-Leste,Timorese,Timor-Leste,Timorese,NA,NA,NA
+Togo,Togolese,Togo,Togolese,NA,NA,NA
+Tokelau,Tokelauan,Tokelau,Tokelauan,NA,NA,NA
+Tonga,Tongan,Tonga,Tongan,NA,NA,NA
+Trinidad and Tobago,"Trinidadian, Tobagonian",Trinidad and Tobago,Trinidadian,Tobagonian,NA,NA
+Tunisia,Tunisian,Tunisia,Tunisian,NA,NA,NA
+Turkey,Turkish,Turkey,Turkish,NA,NA,NA
+Turkmenistan,Turkmen,Turkmenistan,Turkmen,NA,NA,NA
+Turks and Caicos Islands,Turks and Caicos Island,Turks and Caicos Islands,Turks and Caicos Island,NA,NA,NA
+Tuvalu,Tuvaluan,Tuvalu,Tuvaluan,NA,NA,NA
+Uganda,Ugandan,Uganda,Ugandan,NA,NA,NA
+Ukraine,Ukrainian,Ukraine,Ukrainian,NA,NA,NA
+United Arab Emirates,"Emirati, Emirian, Emiri",United Arab Emirates,Emirati,Emirian,Emiri,NA
+United Kingdom,"British, UK",United Kingdom,British,UK,NA,NA
+United States,"United States, US, American",United States,United States,US,American,NA
+Uruguay,Uruguayan,Uruguay,Uruguayan,NA,NA,NA
+Uzbekistan,"Uzbekistani, Uzbek",Uzbekistan,Uzbekistani,Uzbek,NA,NA
+Vanuatu,"Ni-Vanuatu, Vanuatuan",Vanuatu,Ni-Vanuatu,Vanuatuan,NA,NA
+Vatican City State,Vatican,Vatican City State,Vatican,NA,NA,NA
+Venezuela,Venezuelan,Venezuela,Venezuelan,NA,NA,NA
+Vietnam,Vietnamese,Vietnam,Vietnamese,NA,NA,NA
+"Virgin Islands, British",British Virgin Island,Virgin British Islands,British Virgin Island,NA,NA,NA
+"Virgin Islands, United States",U.S. Virgin Island,Virgin United States Islands,U.S. Virgin Island,NA,NA,NA
+Wales,"Welsh, British",Wales,Welsh,British,NA,NA
+Wallis and Futuna,"Wallis and Futuna, Wallisian, Futunan",Wallis and Futuna,Wallis and Futuna,Wallisian,Futunan,NA
+Western Sahara,"Sahrawi, Sahrawian, Sahraouian",Western Sahara,Sahrawi,Sahrawian,Sahraouian,NA
+Yemen,Yemeni,Yemen,Yemeni,NA,NA,NA
+Zambia,Zambian,Zambia,Zambian,NA,NA,NA
+Zimbabwe,Zimbabwean,Zimbabwe,Zimbabwean,NA,NA,NA
diff --git a/countries/countries_to_languages.csv b/countries/countries_to_languages.csv
@@ -0,0 +1,199 @@
+country_name,languages,first_language
+Afghanistan,"Dari Persian, Pashtu (both official), other Turkic and minor languages",Dari Persian
+Albania,"Albanian (Tosk is the official dialect), Greek",Albanian
+Algeria,"Arabic (official), French, Berber dialects",Arabic
+Andorra,"Catalán (official), French, Castilian, Portuguese",Catalán
+Angola,"Portuguese (official), Bantu and other African languages",Portuguese
+Antigua and Barbuda,"English (official), local dialects",English
+Argentina,"Spanish (official), English, Italian, German, French",Spanish
+Armenia,"Armenian 98%, Yezidi, Russian",Armenian
+Australia,"English 79%, native and other languages",English
+Austria,"German (official nationwide); Slovene, Croatian, Hungarian (each official in one region)",German
+Azerbaijan,"Azerbaijani Turkic 89%, Russian 3%, Armenian 2%, other 6% (1995 est.)",Azerbaijani Turkic
+Bahamas,"English (official), Creole (among Haitian immigrants)",English
+Bahrain,"Arabic, English, Farsi, Urdu",Arabic
+Bangladesh,"Bangla (official), English",Bangla
+Barbados,English,English
+Belarus,"Belorussian (White Russian), Russian, other",Belorussian
+Belgium,"Dutch (Flemish) 60%, French 40%, German less than 1% (all official)",Dutch
+Belize,"English (official), Spanish, Mayan, Garifuna (Carib), Creole",English
+Benin,"French (official), Fon, Yoruba, tribal languages",French
+Bhutan,"Dzongkha (official), Tibetan dialects (among Bhotes), Nepalese dialects (among Nepalese)",Dzongkha
+Bolivia,"Spanish, Quechua, Aymara (all official)",Spanish
+Bosnia and Herzegovina,"Bosnian, Croatian, Serbian",Bosnian
+Botswana,"English 2% (official), Setswana 78%, Kalanga 8%, Sekgalagadi 3%, other (2001)",English
+Brazil,"Portuguese (official), Spanish, English, French",Portuguese
+Brunei,"Malay (official), English, Chinese",Malay
+Bulgaria,"Bulgarian 85%, Turkish 10%, Roma 4%",Bulgarian
+Burkina Faso,French (official); native African (Sudanic) languages 90%,French
+Burundi,"Kirundi and French (official), Swahili",Kirundi
+Cambodia,"Khmer 95% (official), French, English",Khmer
+Cameroon,"French, English (both official); 24 major African language groups",French
+Canada,"English 59.3%, French 23.2% (both official); other 17.5%",English
+Cape Verde,"Portuguese, Criuolo",Portuguese
+Central African Republic,"French (official), Sangho (lingua franca, national), tribal languages",French
+Chad,"French, Arabic (both official); Sara; more than 120 languages and dialects",French
+Chile,Spanish,Spanish
+China,"Standard Chinese (Mandarin/Putonghua), Yue (Cantonese), Wu (Shanghaiese), Minbei (Fuzhou), Minnan (Hokkien-Taiwanese), Xiang, Gan, Hakka dialects, minority languages",Chinese
+Colombia,Spanish,Spanish
+Comoros,"Arabic and French (both official), Shikomoro (Swahili/Arabic blend)",Arabic
+"Congo, Democratic Republic of the","French (official), Lingala, Kingwana, Kikongo, Tshiluba",French
+"Congo, Republic of","French (official), Lingala, Monokutuba, Kikongo, many local languages and dialects",French
+Costa Rica,"Spanish (official), English",Spanish
+Côte d'Ivoire,French (official) and African languages (Dioula esp.),French
+Croatia,"Croatian 96% (official), other 4% (including Italian, Hungarian, Czech, Slovak, German)",Croatian
+Cuba,Spanish,Spanish
+Cyprus,"Greek, Turkish (both official); English",Greek
+Czech Republic,Czech,Czech
+Denmark,"Danish, Faroese, Greenlandic (Inuit dialect), German; English is the predominant second language",Danish
+Djibouti,"French and Arabic (both official), Somali, Afar",French
+Dominica,English (official) and French patois,English
+Dominican Republic,Spanish,Spanish
+East Timor,"Tetum, Portuguese (official); Bahasa Indonesia, English; other indigenous languages, including Tetum, Galole, Mambae, and Kemak",Tetum
+Ecuador,"Spanish (official), Quechua, other Amerindian languages",Spanish
+Egypt,"Arabic (official), English and French widely understood by educated classes",Arabic
+El Salvador,"Spanish, Nahua (among some Amerindians)",Spanish
+Equatorial Guinea,"Spanish, French (both official); pidgin English, Fang, Bubi, Ibo",Spanish
+Eritrea,"Afar, Arabic, Tigre and Kunama, Tigrinya, other Cushitic languages",Afar
+Estonia,"Estonian 67% (official), Russian 30%, other (2000)",Estonian
+Ethiopia,"Amharic, Tigrigna, Orominga, Guaragigna, Somali, Arabic, English, over 70 others",Amharic
+Fiji,"English (official), Fijian, Hindustani",English
+Finland,"Finnish 92%, Swedish 6% (both official); small Sami- (Lapp) and Russian-speaking minorities",Finnish
+France,"French 100%, rapidly declining regional dialects (Provençal, Breton, Alsatian, Corsican, Catalan, Basque, Flemish)",French
+Gabon,"French (official), Fang, Myene, Nzebi, Bapounou/Eschira, Bandjabi",French
+Gambia,"English (official), Mandinka, Wolof, Fula, other indigenous",English
+Georgia,"Georgian 71% (official), Russian 9%, Armenian 7%, Azerbaijani 6%, other 7% (Abkhaz is the official language in Abkhazia)",Georgian
+Germany,German,German
+Ghana,"English (official), African languages (including Akan, Moshi-Dagomba, Ewe, and Ga)",English
+Greece,"Greek 99% (official), English, French",Greek
+Grenada,"English (official), French patois",English
+Guatemala,"Spanish 60%, Amerindian languages 40% (23 officially recognized Amerindian languages, including Quiche, Cakchiquel, Kekchi, Mam, Garifuna, and Xinca)",Spanish
+Guinea,"French (official), native tongues (Malinké, Susu, Fulani)",French
+Guinea-Bissau,"Portuguese (official), Criolo, African languages",Portuguese
+Guyana,"English (official), Amerindian dialects, Creole, Hindi, Urdu",English
+Haiti,Creole and French (both official),Hatian Creole
+Honduras,"Spanish (official), Amerindian dialects; English widely spoken in business",Spanish
+Hungary,"Magyar (Hungarian) 94%, other 6%",Magyar
+Iceland,"Icelandic, English, Nordic languages, German widely spoken",Icelandic
+India,"Hindi 30%, English, Bengali, Gujarati, Kashmiri, Malayalam, Marathi, Oriya, Punjabi, Tamil, Telugu, Urdu, Kannada, Assamese, Sanskrit, Sindhi (all official); Hindi/Urdu; 1,600+ dialects",Hindi
+Indonesia,"Bahasa Indonesia (official), English, Dutch, Javanese, and more than 580 other languages and dialects",Bahasa Indonesia
+Iran,"Persian and Persian dialects 58%, Turkic and Turkic dialects 26%, Kurdish 9%, Luri 2%, Balochi 1%, Arabic 1%, Turkish 1%, other 2%",Persian
+Iraq,"Arabic (official), Kurdish (official in Kurdish regions), Assyrian, Armenian",Arabic
+Ireland,"English, Irish (Gaelic) (both official)",English
+Israel,"Hebrew (official), Arabic, English",Hebrew
+Italy,"Italian (official); German-, French-, and Slovene-speaking minorities",Italian
+Jamaica,"English, Jamaican Creole",English
+Japan,Japanese,Japanese
+Jordan,"Arabic (official), English",Arabic
+Kazakhstan,"Kazak (Qazaq, state language) 64%; Russian (official, used in everyday business) 95% (2001 est.)",Kazak
+Kenya,"English (official), Swahili (national), and numerous indigenous languages",English
+Kiribati,"English (official), I-Kiribati (Gilbertese)",English
+"Korea, North",Korean,Korean
+"Korea, South","Korean, English widely taught",Korean
+Kosovo,"Albanian (official), Serbian (official), Bosnian, Turkish, Roma",Albanian
+Kuwait,"Arabic (official), English",Arabic
+Kyrgyzstan,"Kyrgyz, Russian (both official)",Kyrgyz
+Laos,"Lao (official), French, English, various ethnic languages",Lao
+Latvia,"Latvian 58% (official), Russian 38%, Lithuanian, other (2000)",Latvian
+Lebanon,"Arabic (official), French, English, Armenian",Arabic
+Lesotho,"English, Sesotho (both official); Zulu, Xhosa",English
+Liberia,"English 20% (official), some 20 ethnic-group languages",English
+Libya,"Arabic, Italian, and English widely understood in major cities",Arabic
+Liechtenstein,"German (official), Alemannic dialect",German
+Lithuania,"Lithuanian 82% (official), Russian 8%, Polish 6% (2001)",Lithuanian
+Luxembourg,"Luxermbourgish (national) French, German (both administrative)",Luxermbourgish
+Macedonia,"Macedonian 67%, Albanian 25% (both official); Turkish 4%, Roma 2%, Serbian 1% (2002)",Macedonian
+Madagascar,Malagasy and French (both official),Malagasy
+Malawi,"Chichewa 57.2% (official), Chinyanja 12.8%, Chiyao 10.1%, Chitumbuka 9.5%, Chisena 2.7%, Chilomwe 2.4%, Chitonga 1.7%, other 3.6% (1998)",Chichewa
+Malaysia,"Bahasa Melayu (Malay, official), English, Chinese dialects (Cantonese, Mandarin, Hokkien, Hakka, Hainan, Foochow), Tamil, Telugu, Malayalam, Panjabi, Thai; several indigenous languages (including Iban, Kadazan) in East Malaysia",Bahasa Melayu
+Maldives,Maldivian Dhivehi (official); English spoken by most government officials,Maldivian Dhivehi
+Mali,"French (official), Bambara 80%, numerous African languages",French
+Malta,Maltese and English (both official),Maltese
+Marshall Islands,"Marshallese 98% (two major dialects from the Malayo-Polynesian family), English widely spoken as a second language (both official); Japanese",Marshallese
+Mauritania,"Hassaniya Arabic (official), Pulaar, Soninke, French, Wolof",Hassaniya Arabic
+Mauritius,"English less than 1% (official), Creole 81%, Bojpoori 12%, French 3% (2000)",Creole
+Mexico,"Spanish, various Mayan, Nahuatl, and other regional indigenous languages",Spanish
+Micronesia,"English (official, common), Chukese, Pohnpeian, Yapase, Kosrean, Ulithian, Woleaian, Nukuoro, Kapingamarangi",English
+Moldova,"Moldovan (official; virtually the same as Romanian), Russian, Gagauz (a Turkish dialect)",Moldovan
+Monaco,"French (official), English, Italian, Monégasque",French
+Mongolia,"Mongolian, 90%; also Turkic and Russian (1999)",Mongolian
+Montenegro,Serbian/Montenegrin (Ijekavian dialect—official),Serbian
+Morocco,"Arabic (official), Berber dialects, French often used for business, government, and diplomacy",Arabic
+Mozambique,"Portuguese 9% (official; second language of 27%), Emakhuwa 26%, Xichangana 11%, Elomwe 8%, Cisena 7%, Echuwabo 6%, other Mozambican languages 32% (1997)",Portuguese
+Myanmar,"Burmese, minority languages",Burmese
+Namibia,"English 7% (official), Afrikaans is common language of most of the population and of about 60% of the white population, German 32%; indigenous languages: Oshivambo, Herero, Nama",English
+Nauru,"Nauruan (official), English",Nauruan
+Nepal,"Nepali 48% (official), Maithali 12%, Bhojpuri 7%, Tharu 6%, Tamang 5%, others. English spoken by many in government and business (2001)",Nepali
+Netherlands,"Dutch, Frisian (both official)",Dutch
+New Zealand,"English, Maori (both official)",English
+Nicaragua,Spanish 98% (official); English and indigenous languages on Atlantic coast (1995),Spanish
+Niger,"French (official), Hausa, Djerma",French
+Nigeria,"English (official), Hausa, Yoruba, Ibo, Fulani, and more than 200 others",English
+Norway,"Bokmål Norwegian, Nynorsk Norwegian (both official); small Sami- and Finnish-speaking minorities (Sami is official in six municipalities)",Bokmål Norwegian
+Oman,"Arabic (official), English, Baluchi, Urdu, Indian dialects",Arabic
+Pakistan,"Urdu 8%, English (both official); Punjabi 48%, Sindhi 12%, Siraiki (a Punjabi variant) 10%, Pashtu 8%, Balochi 3%, Hindko 2%, Brahui 1%, Burushaski, and others 8%",Urdu
+Palau,"Palauan 64.7%, English 9.4%, Sonsoralese, Tobi, Angaur (each official on some islands), Filipino 13.5%, Chinese 5.7%, Carolinian 1.5%, Japanese 1.5%, other Asian 2.3%, other languages 1.5% (2000)",Palauan
+Palestinian State (proposed),"Arabic, Hebrew, English",Arabic
+Panama,"Spanish (official), English 14%, many bilingual",Spanish
+Papua New Guinea,"Tok Pisin (Melanesian Pidgin, the lingua franca), Hiri Motu (in Papua region), English 1%–2%; 715 indigenous languages",Tok Pisin
+Paraguay,"Spanish, Guaraní (both official)",Spanish
+Peru,"Spanish, Quéchua (both official); Aymara; many minor Amazonian languages",Spanish
+Philippines,"Filipino (based on Tagalog), English (both official); eight major dialects: Tagalog, Cebuano, Ilocano, Hiligaynon or Ilonggo, Bicol, Waray, Pampango, and Pangasinense",Filipino
+Poland,Polish 98% (2002),Polish
+Portugal,"Portuguese (official), Mirandese (official, but locally used)",Portuguese
+Qatar,Arabic (official); English a common second language,Arabic
+Romania,"Romanian (official), Hungarian, German",Romanian
+Russia,"Russian, others",Russian
+Rwanda,"Kinyarwanda, French, and English (all official); Kiswahili in commercial centers",Kinyarwanda
+St. Kitts and Nevis,English,English
+St. Lucia,"English (official), French patois",English
+St. Vincent and the Grenadines,"English, French patois",English
+Samoa,"Samoan, English",Samoan
+San Marino,Italian,Italian
+São Tomé and Príncipe,Portuguese (official),Portuguese
+Saudi Arabia,Arabic,Arabic
+Senegal,"French (official); Wolof, Pulaar, Jola, Mandinka",French
+Serbia,"Serbian (official); Romanian, Hungarian, Slovak, and Croatian (all official in Vojvodina); Albanian (official in Kosovo)",Serbian
+Seychelles,"Seselwa Creole 92%, English 5%, French (all official) (2002)",Seselwa Creole
+Sierra Leone,"English (official), Mende (southern vernacular), Temne (northern vernacular), Krio (lingua franca)",English
+Singapore,"Mandarin 35%, English 23%, Malay 14.1%, Hokkien 11.4%, Cantonese 5.7%, Teochew 4.9%, Tamil 3.2%, other Chinese dialects 1.8%, other 0.9% (2000)",Mandarin
+Slovakia,"Slovak 84% (official), Hungarian 11%, Roma 2%, Ukrainian 1% (2001)",Slovak
+Slovenia,"Slovenian 91%, Serbo-Croatian 5% (2002)",Slovenian
+Solomon Islands,"English 1%–2% (official), Melanesian pidgin (lingua franca), 120 indigenous languages",English
+Somalia,"Somali (official), Arabic, English, Italian",Somali
+South Africa,"IsiZulu 23.8%, IsiXhosa 17.6%, Afrikaans 13.3%, Sepedi 9.4%, English 8.2%, Setswana 8.2%, Sesotho 7.9%, Xitsonga 4.4%, other 7.2%",IsiZulu
+South Sudan,"English (official), Arabic (includes Juba and Sudanese variants) (official), regional languages include Dinka, Nuer, Bari, Zande, Shilluk",English
+Spain,"Castilian Spanish 74% (official nationwide); Catalan 17%, Galician 7%, Basque 2% (each official regionally)",Spanish
+Sri Lanka,"Sinhala 74% (official and national), Tamil 18% (national), other 8%; English is commonly used in government and spoken competently by about 10%",Sinhala
+Sudan,"Arabic (official), Nubian, Ta Bedawie, diverse dialects of Nilotic, Nilo-Hamitic, Sudanic languages, English",Arabic
+Suriname,"Dutch (official), Surinamese (lingua franca), English widely spoken, Hindustani, Javanese",Dutch
+Swaziland,"English, siSwati (both official)",English
+Sweden,"Swedish, small Sami- and Finnish-speaking minorities",Swedish
+Switzerland,"German 64%, French 20%, Italian 7% (all official); Romansch 0.5% (national)",German
+Syria,"Arabic (official); Kurdish, Armenian, Aramaic, Circassian widely understood; French, English somewhat understood",Arabic
+Taiwan,"Chinese (Mandarin, official), Taiwanese (Min), Hakka dialects",Chinese
+Tajikistan,"Tajik (official), Russian widely used in government and business",Tajik
+Tanzania,"Swahili, English (both official); Arabic; many local languages",Swahili
+Thailand,"Thai (Siamese), English (secondary language of the elite), ethnic and regional dialects",Thai
+Togo,"French (official, commerce); Ewé, Mina (south); Kabyé, Dagomba (north); and many dialects",French
+Tonga,"Tongan (an Austronesian language), English",Tongan
+Trinidad and Tobago,"English (official), Hindi, French, Spanish, Chinese",English
+Tunisia,"Arabic (official, commerce), French (commerce)",Arabic
+Turkey,"Turkish (official), Kurdish, Dimli, Azeri, Kabardian",Turkish
+Turkmenistan,"Turkmen 72%; Russian 12%; Uzbek 9%, other 7%",Turkmen
+Tuvalu,"Tuvaluan, English, Samoan, Kiribati (on the island of Nui)",Tuvaluan
+Uganda,"English (official), Ganda or Luganda, other Niger-Congo languages, Nilo-Saharan languages, Swahili, Arabic",English
+Ukraine,"Ukrainian 67%, Russian 24%, Romanian, Polish, Hungarian",Ukrainian
+United Arab Emirates,"Arabic (official), Persian, English, Hindi, Urdu",Arabic
+United Kingdom,"English, Welsh, Scots Gaelic",English
+United States,"English 82%, Spanish 11% (2000)",English
+Uruguay,"Spanish, Portunol, or Brazilero",Spanish
+Uzbekistan,"Uzbek 74.3%, Russian 14.2%, Tajik 4.4%, other 7.1%",Uzbek
+Vanuatu,"Bislama 23% (a Melanesian pidgin English), English 2%, French 1% (all 3 official); more than 100 local languages 73%",Bislama
+Vatican City (Holy See),"Italian, Latin, French, various other languages",Italian
+Venezuela,"Spanish (official), numerous indigenous dialects",Spanish
+Vietnam,"Vietnamese (official); English (increasingly favored as a second language); some French, Chinese, Khmer; mountain area languages (Mon-Khmer and Malayo-Polynesian)",Vietnamese
+Western Sahara (proposed state),"Hassaniya Arabic, Moroccan Arabic",Hassaniya Arabic
+Yemen,Arabic,Arabic
+Zambia,"English (official); major vernaculars: Bemba, Kaonda, Lozi, Lunda, Luvale, Nyanja, Tonga; about 70 other indigenous languages",English
+Zimbabwe,"English (official), Shona, Ndebele (Sindebele), numerous minor tribal dialects",English
diff --git a/countries/languages_to_codes.csv b/countries/languages_to_codes.csv
@@ -0,0 +1,186 @@
+language_name,iso639,language_name__1,language_name__2,language_name__3,language_name__4
+Abkhaz,ab,Abkhaz,NA,NA,NA
+Afar,aa,Afar,NA,NA,NA
+Afrikaans,af,Afrikaans,NA,NA,NA
+Akan,ak,Akan,NA,NA,NA
+Albanian,sq,Albanian,NA,NA,NA
+Amharic,am,Amharic,NA,NA,NA
+Arabic,ar,Arabic,NA,NA,NA
+Aragonese,an,Aragonese,NA,NA,NA
+Armenian,hy,Armenian,NA,NA,NA
+Assamese,as,Assamese,NA,NA,NA
+Avaric,av,Avaric,NA,NA,NA
+Avestan,ae,Avestan,NA,NA,NA
+Aymara,ay,Aymara,NA,NA,NA
+Azerbaijani,az,Azerbaijani,NA,NA,NA
+Bambara,bm,Bambara,NA,NA,NA
+Bashkir,ba,Bashkir,NA,NA,NA
+Basque,eu,Basque,NA,NA,NA
+Belarusian,be,Belarusian,NA,NA,NA
+"Bengali, Bangla",bn,Bengali,Bangla,NA,NA
+Bihari,bh,Bihari,NA,NA,NA
+Bislama,bi,Bislama,NA,NA,NA
+Bosnian,bs,Bosnian,NA,NA,NA
+Breton,br,Breton,NA,NA,NA
+Bulgarian,bg,Bulgarian,NA,NA,NA
+Burmese,my,Burmese,NA,NA,NA
+Catalan,ca,Catalan,NA,NA,NA
+Chamorro,ch,Chamorro,NA,NA,NA
+Chechen,ce,Chechen,NA,NA,NA
+"Chichewa, Chewa, Nyanja",ny,Chichewa,Chewa,Nyanja,NA
+Chinese,zh,Chinese,NA,NA,NA
+Chuvash,cv,Chuvash,NA,NA,NA
+Cornish,kw,Cornish,NA,NA,NA
+Corsican,co,Corsican,NA,NA,NA
+Cree,cr,Cree,NA,NA,NA
+Croatian,hr,Croatian,NA,NA,NA
+Czech,cs,Czech,NA,NA,NA
+Danish,da,Danish,NA,NA,NA
+"Divehi, Dhivehi, Maldivian",dv,Divehi,Dhivehi,Maldivian,NA
+Dutch,nl,Dutch,NA,NA,NA
+Dzongkha,dz,Dzongkha,NA,NA,NA
+English,en,English,NA,NA,NA
+Esperanto,eo,Esperanto,NA,NA,NA
+Estonian,et,Estonian,NA,NA,NA
+Ewe,ee,Ewe,NA,NA,NA
+Faroese,fo,Faroese,NA,NA,NA
+Fijian,fj,Fijian,NA,NA,NA
+Finnish,fi,Finnish,NA,NA,NA
+French,fr,French,NA,NA,NA
+"Fula, Fulah, Pulaar, Pular",ff,Fula,Fulah,Pulaar,Pular
+Galician,gl,Galician,NA,NA,NA
+Georgian,ka,Georgian,NA,NA,NA
+German,de,German,NA,NA,NA
+Greek (modern),el,Greek,NA,NA,NA
+Guaraní,gn,Guaraní,NA,NA,NA
+Gujarati,gu,Gujarati,NA,NA,NA
+"Haitian, Haitian Creole",ht,Haitian,Creole,NA,NA
+Hausa,ha,Hausa,NA,NA,NA
+Hebrew (modern),he,Hebrew,NA,NA,NA
+Herero,hz,Herero,NA,NA,NA
+Hindi,hi,Hindi,NA,NA,NA
+Hiri Motu,ho,Hiri Motu,NA,NA,NA
+Hungarian,hu,Hungarian,NA,NA,NA
+Interlingua,ia,Interlingua,NA,NA,NA
+Indonesian,id,Indonesian,NA,NA,NA
+Interlingue,ie,Interlingue,NA,NA,NA
+Irish,ga,Irish,NA,NA,NA
+Igbo,ig,Igbo,NA,NA,NA
+Inupiaq,ik,Inupiaq,NA,NA,NA
+Ido,io,Ido,NA,NA,NA
+Icelandic,is,Icelandic,NA,NA,NA
+Italian,it,Italian,NA,NA,NA
+Inuktitut,iu,Inuktitut,NA,NA,NA
+Japanese,ja,Japanese,NA,NA,NA
+Javanese,jv,Javanese,NA,NA,NA
+"Kalaallisut, Greenlandic",kl,Kalaallisut,Greenlandic,NA,NA
+Kannada,kn,Kannada,NA,NA,NA
+Kanuri,kr,Kanuri,NA,NA,NA
+Kashmiri,ks,Kashmiri,NA,NA,NA
+Kazakh,kk,Kazakh,NA,NA,NA
+Khmer,km,Khmer,NA,NA,NA
+"Kikuyu, Gikuyu",ki,Kikuyu,Gikuyu,NA,NA
+Kinyarwanda,rw,Kinyarwanda,NA,NA,NA
+Kyrgyz,ky,Kyrgyz,NA,NA,NA
+Komi,kv,Komi,NA,NA,NA
+Kongo,kg,Kongo,NA,NA,NA
+Korean,ko,Korean,NA,NA,NA
+Kurdish,ku,Kurdish,NA,NA,NA
+"Kwanyama, Kuanyama",kj,Kwanyama,Kuanyama,NA,NA
+Latin,la,Latin,NA,NA,NA
+Ladin,,Ladin,NA,NA,NA
+"Luxembourgish, Letzeburgesch",lb,Luxembourgish,Letzeburgesch,NA,NA
+Ganda,lg,Ganda,NA,NA,NA
+"Limburgish, Limburgan, Limburger",li,Limburgish,Limburgan,Limburger,NA
+Lingala,ln,Lingala,NA,NA,NA
+Lao,lo,Lao,NA,NA,NA
+Lithuanian,lt,Lithuanian,NA,NA,NA
+Luba-Katanga,lu,Luba-Katanga,NA,NA,NA
+Latvian,lv,Latvian,NA,NA,NA
+Manx,gv,Manx,NA,NA,NA
+Macedonian,mk,Macedonian,NA,NA,NA
+Malagasy,mg,Malagasy,NA,NA,NA
+Malay,ms,Malay,NA,NA,NA
+Malayalam,ml,Malayalam,NA,NA,NA
+Maltese,mt,Maltese,NA,NA,NA
+Maori,mi,Maori,NA,NA,NA
+Marathi (Mara<U+1E6D>hi),mr,Marathi,NA,NA,NA
+Marshallese,mh,Marshallese,NA,NA,NA
+Mongolian,mn,Mongolian,NA,NA,NA
+Nauru,na,Nauru,NA,NA,NA
+"Navajo, Navaho",nv,Navajo,Navaho,NA,NA
+Northern Ndebele,nd,Northern Ndebele,NA,NA,NA
+Nepali,ne,Nepali,NA,NA,NA
+Ndonga,ng,Ndonga,NA,NA,NA
+Norwegian Bokmål,nb,Norwegian Bokmål,NA,NA,NA
+Norwegian Nynorsk,nn,Norwegian Nynorsk,NA,NA,NA
+Norwegian,no,Norwegian,NA,NA,NA
+Nuosu,ii,Nuosu,NA,NA,NA
+Southern Ndebele,nr,Southern Ndebele,NA,NA,NA
+Occitan,oc,Occitan,NA,NA,NA
+"Ojibwe, Ojibwa",oj,Ojibwe,Ojibwa,NA,NA
+"Old Church Slavonic, Church Slavonic, Old Bulgarian",cu,Old Church Slavonic,Church Slavonic,Old Bulgarian,NA
+Oromo,om,Oromo,NA,NA,NA
+Oriya,or,Oriya,NA,NA,NA
+"Ossetian, Ossetic",os,Ossetian,Ossetic,NA,NA
+"Panjabi, Punjabi",pa,Panjabi,Punjabi,NA,NA
+Pali,pi,Pali,NA,NA,NA
+Persian (Farsi),fa,Persian (Farsi),NA,NA,NA
+Polish,pl,Polish,NA,NA,NA
+"Pashto, Pushto",ps,Pashto,Pushto,NA,NA
+Portuguese,pt,Portuguese,NA,NA,NA
+Quechua,qu,Quechua,NA,NA,NA
+Romansh,rm,Romansh,NA,NA,NA
+Kirundi,rn,Kirundi,NA,NA,NA
+Romanian,ro,Romanian,NA,NA,NA
+Russian,ru,Russian,NA,NA,NA
+Sanskrit (Sa<U+1E41>sk<U+1E5B>ta),sa,Sanskrit (Sa<U+1E41>sk<U+1E5B>ta),NA,NA,NA
+Sardinian,sc,Sardinian,NA,NA,NA
+Sindhi,sd,Sindhi,NA,NA,NA
+Northern Sami,se,Northern Sami,NA,NA,NA
+Samoan,sm,Samoan,NA,NA,NA
+Sango,sg,Sango,NA,NA,NA
+Serbian,sr,Serbian,NA,NA,NA
+"Scottish Gaelic, Gaelic",gd,Scottish Gaelic,Gaelic,NA,NA
+Shona,sn,Shona,NA,NA,NA
+"Sinhala, Sinhalese",si,Sinhala,Sinhalese,NA,NA
+Slovak,sk,Slovak,NA,NA,NA
+Slovene,sl,Slovene,NA,NA,NA
+Somali,so,Somali,NA,NA,NA
+Southern Sotho,st,Southern Sotho,NA,NA,NA
+Spanish,es,Spanish,NA,NA,NA
+Sundanese,su,Sundanese,NA,NA,NA
+Swahili,sw,Swahili,NA,NA,NA
+Swati,ss,Swati,NA,NA,NA
+Swedish,sv,Swedish,NA,NA,NA
+Tamil,ta,Tamil,NA,NA,NA
+Telugu,te,Telugu,NA,NA,NA
+Tajik,tg,Tajik,NA,NA,NA
+Thai,th,Thai,NA,NA,NA
+Tigrinya,ti,Tigrinya,NA,NA,NA
+"Tibetan Standard, Tibetan, Central",bo,Tibetan Standard,Tibetan,Central,NA
+Turkmen,tk,Turkmen,NA,NA,NA
+Tagalog,tl,Tagalog,NA,NA,NA
+Tswana,tn,Tswana,NA,NA,NA
+Tonga (Tonga Islands),to,Tonga (Tonga Islands),NA,NA,NA
+Turkish,tr,Turkish,NA,NA,NA
+Tsonga,ts,Tsonga,NA,NA,NA
+Tatar,tt,Tatar,NA,NA,NA
+Twi,tw,Twi,NA,NA,NA
+Tahitian,ty,Tahitian,NA,NA,NA
+Uyghur,ug,Uyghur,NA,NA,NA
+Ukrainian,uk,Ukrainian,NA,NA,NA
+Urdu,ur,Urdu,NA,NA,NA
+Uzbek,uz,Uzbek,NA,NA,NA
+Venda,ve,Venda,NA,NA,NA
+Vietnamese,vi,Vietnamese,NA,NA,NA
+Volapük,vo,Volapük,NA,NA,NA
+Walloon,wa,Walloon,NA,NA,NA
+Welsh,cy,Welsh,NA,NA,NA
+Wolof,wo,Wolof,NA,NA,NA
+Western Frisian,fy,Western Frisian,NA,NA,NA
+Xhosa,xh,Xhosa,NA,NA,NA
+Yiddish,yi,Yiddish,NA,NA,NA
+Yoruba,yo,Yoruba,NA,NA,NA
+"Zhuang, Chuang",za,Zhuang,Chuang,NA,NA
+Zulu,zu,Zulu,NA,NA,NA
diff --git a/countries/scrapeCountryInfo.R b/countries/scrapeCountryInfo.R
@@ -0,0 +1,67 @@
+# Here's a BUNCH of code to pull in the tables I want for mapping countries to
+# adjectivals and languages and ISO-639-1 codes. Probably deserves its own
+# package.
+
+# For now, the CSVs this writes still need a little manual touch-up. It's not
+# much data tho.
+
+library(rvest)
+library(dplyr)
+
+getTableFromWeb <- function(url, xpath) {
+ tableList <- url %>%
+ html() %>%
+ html_nodes(xpath=xpath) %>%
+ html_table(fill=TRUE)
+ return(tableList[[1]])
+}
+
+# Countries and their adjectival forms
+countriesToAdjectivals <- getTableFromWeb("https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations",
+ "//*[@id=\"mw-content-text\"]/table[1]")
+
+colnames(countriesToAdjectivals) <- sub(" ", "_",
+ tolower(colnames(countriesToAdjectivals)))
+
+countriesToAdjectivals <- countriesToAdjectivals[2:nrow(countriesToAdjectivals),] %>%
+ select(country_name, adjectivals) %>%
+ # Get rid of the wikipedia cruft
+ mutate_each(funs(gsub("\\[.*\\]", "", .))) %>%
+ # For later splitting of adjectivals
+ mutate(adjectivals = sub(" or ", ", ", adjectivals)) %>%
+ # Rearrange the country name into its natural order
+ mutate(natural_country_name = sub("([[:alpha:]]*), ([[:alpha:]].*)",
+ "\\2 \\1",
+ country_name))
+splitAdjectivals <- strsplit(countriesToAdjectivals[["adjectivals"]], ",[[:space:]]*")
+for (i in seq_len(max(vapply(splitAdjectivals, length, 1)))) {
+ countriesToAdjectivals[[paste("adjectival", i, sep="_")]] <- vapply(splitAdjectivals, function(x) { x[i] }, "")
+}
+write.csv(countriesToAdjectivals, "countries_to_adjectivals.csv", row.names = FALSE)
+
+
+# Countries to languages
+countriesToLanguages <- getTableFromWeb("http://www.infoplease.com/ipa/A0855611.html",
+ "//*[@id=\"Pg\"]/table[1]")
+
+colnames(countriesToLanguages) <- c("country_name", "languages")
+countriesToLanguages <- countriesToLanguages %>%
+ mutate(first_language = sub("[[:space:]]*[[:punct:][:digit:]].*", "", languages),
+ first_language = sub(" and .*", "", first_language))
+write.csv(countriesToLanguages, "countries_to_languages.csv", row.names = FALSE)
+
+# Languages to ISO-639-1 codes
+languagesToCodes <- getTableFromWeb("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes",
+ "//*[@id=\"mw-content-text\"]/table[2]")
+
+# Column names are difficult. Just hacking away here.
+languagesToCodes <- languagesToCodes[, c(3, 5)]
+colnames(languagesToCodes) <- c("language_name", "iso639")
+
+# Some languages have multiple names
+splitLanguageNames <- strsplit(languagesToCodes[["language_name"]], ",[[:space:]]*")
+for (i in seq_len(max(vapply(splitLanguageNames, length, 1)))) {
+ languagesToCodes[[paste("language_name_", i, sep="_")]] <- vapply(splitLanguageNames, function(x) { x[i] }, "")
+}
+write.csv(languagesToCodes, "languages_to_codes.csv", row.names = FALSE)
+