GenderGuesser

An R package for using the genderize.io API to guess the gender of names.
git clone https://git.eamoncaddigan.net/GenderGuesser.git
Log | Files | Refs | README | LICENSE

commit 762368b13b094d22399496c2eab79607871b4444
parent 707ed17993344282ce35926dcb3b908c59ed82e9
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Tue, 11 Aug 2015 17:07:21 -0400

Checking language and country validity. Also found and fixed a bug in language handling!

Diffstat:
MR/guessGender.R | 36++++++++++++++++++++++++++----------
Mman/checkLanguageCountryCodes.Rd | 5+++--
2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/R/guessGender.R b/R/guessGender.R @@ -3,14 +3,29 @@ # Helper functions -------------------------------------------------------- #' Check country and language code. -#' -#' Makes sure that no more than one of countryCode or languageCode is *not* NA -#' (i.e., they can both be NA, or one can be NA). +#' +#' Makes sure that no more than one of countryCode or languageCode is *not* NA +#' (i.e., they can both be NA, or one can be NA). Also ensures that any code +#' specified is recognized by genderize.io. #' @keywords internal -checkLanguageCountryCodes <- function(countryCode, languageCode) { - # TODO: Check code validity - if (sum(is.na(c(countryCode, languageCode))) < 1) { - stop("Only one of countryCode or languageCode can be passed") +checkLanguageCountryCodes <- function(languageCode, countryCode) { + checkCodeInVector <- function(code, codeVector) { + return(match(tolower(code), tolower(codeVector), nomatch = 0) > 0) + } + + # Very ugly control flow here. + if (!is.na(countryCode)) { + if (!checkCodeInVector(countryCode, genderizeCountries)) { + stop("Country code not in list") + } + if (!is.na(languageCode)) { + stop("Only one of countryCode or languageCode can be specified") + } + } + if (!is.na(languageCode)) { + if (!checkCodeInVector(languageCode, genderizeLanguages)) { + stop("Language code not in list") + } } } @@ -43,7 +58,7 @@ lookupNameVectorGenderize <- function(nameVector, if (length(nameVector) > 10) { stop("This only accepts 10 or fewer names") } - checkLanguageCountryCodes(countryCode, languageCode) + checkLanguageCountryCodes(languageCode, countryCode) # Construct the query query <- paste("name[", seq_along(nameVector), "]=", nameVector, @@ -107,7 +122,7 @@ lookupNameVectorGenderize <- function(nameVector, #' guessGender(c("Natalie", "Liam", "Eamon"), countryCode = "US") guessGender <- function(nameVector, countryCode = NA, languageCode = NA, apiKey = NA) { - checkLanguageCountryCodes(countryCode, languageCode) + checkLanguageCountryCodes(languageCode, countryCode) # genderize.io only handles 10 names at a time. Create a list of vectors, each # with no more than 10 names. @@ -121,7 +136,8 @@ guessGender <- function(nameVector, # Run the queries responseList <- list() for (i in seq_along(queryList)) { - responseDF <- lookupNameVectorGenderize(queryList[[i]], countryCode, apiKey) + responseDF <- lookupNameVectorGenderize(queryList[[i]], + countryCode, languageCode, apiKey) if (is.null(responseDF)) { break } else { diff --git a/man/checkLanguageCountryCodes.Rd b/man/checkLanguageCountryCodes.Rd @@ -4,11 +4,12 @@ \alias{checkLanguageCountryCodes} \title{Check country and language code.} \usage{ -checkLanguageCountryCodes(countryCode, languageCode) +checkLanguageCountryCodes(languageCode, countryCode) } \description{ Makes sure that no more than one of countryCode or languageCode is *not* NA -(i.e., they can both be NA, or one can be NA). +(i.e., they can both be NA, or one can be NA). Also ensures that any code +specified is recognized by genderize.io. } \keyword{internal}