GenderGuesser

An R package for using the genderize.io API to guess the gender of names.
git clone https://git.eamoncaddigan.net/GenderGuesser.git
Log | Files | Refs | README | LICENSE

commit 707ed17993344282ce35926dcb3b908c59ed82e9
parent 26e1a453d9ddf0575885050a5133c4a241b161a5
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Tue, 11 Aug 2015 16:39:57 -0400

Have sysdata with countries and languages, and tidied up some code.

Diffstat:
MDESCRIPTION | 2++
MR/guessGender.R | 37++++++++++++++++++-------------------
AR/sysdata.rda | 0
Mdata-raw/getCountryAndLanguageCodes.R | 30++++++++++++++++++++++++++++--
Mman/checkLanguageCountryCodes.Rd | 3++-
Mman/guessGender.Rd | 2+-
Dman/lookupNameVector.Rd | 25-------------------------
Aman/lookupNameVectorGenderize.Rd | 25+++++++++++++++++++++++++
8 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION @@ -7,5 +7,7 @@ Depends: R (>= 3.2.1) Imports: httr (>= 1.0.0), jsonlite +Suggests: + devtools License: GPL-3 LazyData: true diff --git a/R/guessGender.R b/R/guessGender.R @@ -2,10 +2,11 @@ # Helper functions -------------------------------------------------------- -#' Check country and language code. +#' Check country and language code. #' -#' Makes sure that no more than one of countryCode or languageCode is *not* NA -#' (i.e., they can both be NA, or one can be NA). +#' Makes sure that no more than one of countryCode or languageCode is *not* NA +#' (i.e., they can both be NA, or one can be NA). +#' @keywords internal checkLanguageCountryCodes <- function(countryCode, languageCode) { # TODO: Check code validity if (sum(is.na(c(countryCode, languageCode))) < 1) { @@ -30,17 +31,14 @@ getListElement <- function(listName, elementName) { # API functions ----------------------------------------------------------- -#' Look up a vector of names. +#' Look up a vector of names on genderize.io. #' #' This function actually implements the genderize.io API. Can only query 10 #' names at a time. -#' @param nameVector A vector containing one or more names to look up. -#' @param countryCode An optional ISO 3166-1 alpha-2 country code. -#' @param languageCode An optional ISO 639-1 language code. Only one of -#' countryCode or languageCode can be specified. -#' @param apiKey An optional API key for genderize.io. +#' @inheritParams guessGender #' @keywords internal -lookupNameVector <- function(nameVector, countryCode = NA, languageCode = NA, apiKey = NA) { +lookupNameVectorGenderize <- function(nameVector, + countryCode = NA, languageCode = NA, apiKey = NA) { # Make sure that no more than 10 names were passed if (length(nameVector) > 10) { stop("This only accepts 10 or fewer names") @@ -49,16 +47,16 @@ lookupNameVector <- function(nameVector, countryCode = NA, languageCode = NA, ap # Construct the query query <- paste("name[", seq_along(nameVector), "]=", nameVector, - sep="", - collapse="&") + sep = "", + collapse = "&") if (!is.na(countryCode)) { - query <- paste(query, "&country_id=", countryCode, sep="") + query <- paste(query, "&country_id=", countryCode, sep = "") } if (!is.na(languageCode)) { - query <- paste(query, "&language_id=", languageCode, sep="") + query <- paste(query, "&language_id=", languageCode, sep = "") } if (!is.na(apiKey)) { - query <- paste(query, "&apikey=", apiKey, sep="") + query <- paste(query, "&apikey=", apiKey, sep = "") } # Run it! @@ -66,7 +64,7 @@ lookupNameVector <- function(nameVector, countryCode = NA, languageCode = NA, ap queryResult <- httr::GET("https://api.genderize.io", query = query, httr::config(ssl_verifypeer = FALSE)) if (httr::status_code(queryResult) == 200) { - responseFromJSON <- jsonlite::fromJSON(httr::content(queryResult, as="text")) + responseFromJSON <- jsonlite::fromJSON(httr::content(queryResult, as = "text")) # Make sure this is a data.frame with the correct columns. I bet fromJSON # can do this for me but I don't know how. This code works whether fromJSON # returned a list (the response to one name) or a data.frame (the response @@ -106,8 +104,9 @@ lookupNameVector <- function(nameVector, countryCode = NA, languageCode = NA, ap #' @param apiKey An optional API key for genderize.io. #' @export #' @examples -#' guessGender(c("Eamon", "Sean"), countryCode = "US") -guessGender <- function(nameVector, countryCode = NA, languageCode = NA, apiKey = NA) { +#' guessGender(c("Natalie", "Liam", "Eamon"), countryCode = "US") +guessGender <- function(nameVector, + countryCode = NA, languageCode = NA, apiKey = NA) { checkLanguageCountryCodes(countryCode, languageCode) # genderize.io only handles 10 names at a time. Create a list of vectors, each @@ -122,7 +121,7 @@ guessGender <- function(nameVector, countryCode = NA, languageCode = NA, apiKey # Run the queries responseList <- list() for (i in seq_along(queryList)) { - responseDF <- lookupNameVector(queryList[[i]], countryCode, apiKey) + responseDF <- lookupNameVectorGenderize(queryList[[i]], countryCode, apiKey) if (is.null(responseDF)) { break } else { diff --git a/R/sysdata.rda b/R/sysdata.rda Binary files differ. diff --git a/data-raw/getCountryAndLanguageCodes.R b/data-raw/getCountryAndLanguageCodes.R @@ -1,2 +1,28 @@ -#https://api.genderize.io/languages -#https://api.genderize.io/countries + +# Read genderize.io's lists of supported country and language codes ------- + +queryResult <- httr::GET("https://api.genderize.io/languages", + httr::config(ssl_verifypeer = FALSE)) +if (httr::status_code(queryResult) == 200) { + responseFromJSON <- jsonlite::fromJSON(httr::content(queryResult, as="text")) + genderizeLanguages <- responseFromJSON[["languages"]] + # Don't know why they return an empty string. + genderizeLanguages <- genderizeLanguages[nchar(genderizeLanguages) > 0] +} else { + stop("Couldn't load language list") +} + +queryResult <- httr::GET("https://api.genderize.io/countries", + httr::config(ssl_verifypeer = FALSE)) +if (httr::status_code(queryResult) == 200) { + responseFromJSON <- jsonlite::fromJSON(httr::content(queryResult, as="text")) + genderizeCountries <- responseFromJSON[["countries"]] +} else { + stop("Couldn't load country list") +} + + +# Save the lists ---------------------------------------------------------- + +devtools::use_data(genderizeLanguages, genderizeCountries, + internal = TRUE, overwrite = TRUE) diff --git a/man/checkLanguageCountryCodes.Rd b/man/checkLanguageCountryCodes.Rd @@ -8,6 +8,7 @@ checkLanguageCountryCodes(countryCode, languageCode) } \description{ Makes sure that no more than one of countryCode or languageCode is *not* NA - (i.e., they can both be NA, or one can be NA). +(i.e., they can both be NA, or one can be NA). } +\keyword{internal} diff --git a/man/guessGender.Rd b/man/guessGender.Rd @@ -21,6 +21,6 @@ This function uses the genderize.io API to supply estimates of the gender one or more names. } \examples{ -guessGender(c("Eamon", "Sean"), countryCode = "US") +guessGender(c("Natalie", "Liam", "Eamon"), countryCode = "US") } diff --git a/man/lookupNameVector.Rd b/man/lookupNameVector.Rd @@ -1,25 +0,0 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/guessGender.R -\name{lookupNameVector} -\alias{lookupNameVector} -\title{Look up a vector of names.} -\usage{ -lookupNameVector(nameVector, countryCode = NA, languageCode = NA, - apiKey = NA) -} -\arguments{ -\item{nameVector}{A vector containing one or more names to look up.} - -\item{countryCode}{An optional ISO 3166-1 alpha-2 country code.} - -\item{languageCode}{An optional ISO 639-1 language code. Only one of -countryCode or languageCode can be specified.} - -\item{apiKey}{An optional API key for genderize.io.} -} -\description{ -This function actually implements the genderize.io API. Can only query 10 -names at a time. -} -\keyword{internal} - diff --git a/man/lookupNameVectorGenderize.Rd b/man/lookupNameVectorGenderize.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/guessGender.R +\name{lookupNameVectorGenderize} +\alias{lookupNameVectorGenderize} +\title{Look up a vector of names on genderize.io.} +\usage{ +lookupNameVectorGenderize(nameVector, countryCode = NA, languageCode = NA, + apiKey = NA) +} +\arguments{ +\item{nameVector}{A vector containing one or more names to look up.} + +\item{countryCode}{An optional ISO 3166-1 alpha-2 country code.} + +\item{languageCode}{An optional ISO 639-1 language code. Only one of +countryCode or languageCode can be specified.} + +\item{apiKey}{An optional API key for genderize.io.} +} +\description{ +This function actually implements the genderize.io API. Can only query 10 +names at a time. +} +\keyword{internal} +