commit 47eac115940fe8a59c760ef512ad334135757c0e
parent 6fb53f77727b9af8c5b7e5ac22e1586057aa5d75
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date: Fri, 26 Feb 2016 21:13:33 -0500
Checking inputs.
Diffstat:
1 file changed, 33 insertions(+), 19 deletions(-)
diff --git a/R/patternapply.R b/R/patternapply.R
@@ -1,22 +1,36 @@
#' Iteratively try patterns against a character vector.
-#'
+#'
#' @param X A character vector where matches are sought.
#' @param patterns A vector of regular expression patterns.
#' @param replacements A list of replacement information, must match the length
#' of \code(patterns). Each element must be a character vector. This can
#' include backreferences "\1" to "\9" to parenthesized subexpressions of the
#' corresponding pattern.
-#'
+#'
#' @return A list of replacement vectors with class "replacement_list".
patternapply <- function(X, patterns,
replacements = list(paste(seq_along(patterns)))) {
+ # Check the inputs
+ if (class(patterns) != "character") {
+ stop("'patterns' must be a character vector")
+ }
+ if (class(replacements) != "list") {
+ stop("'replacements' must be a list")
+ }
+ if (!all(vapply(replacements, function(x) class(x) == "character", logical(1)))) {
+ stop("'replacements' can only contain character vectors")
+ }
+ if (length(patterns) != length(replacements)) {
+ stop("'patterns' and 'replacements' must have the same length")
+ }
+
# Keep track of which records have already been matched to a pattern.
matchIndex <- rep(NA_integer_, length(X))
# This will contain a replacement vector for each element in X.
- replacementList <- list()
-
+ replacementList <- vector("list", length(replacements))
+
for (i in seq_along(patterns)) {
# Match the pattern to the record, omitting items already matched.
matchData <- regexec(patterns[i], X[is.na(matchIndex)])
@@ -28,46 +42,46 @@ patternapply <- function(X, patterns,
# Where are new matches?
matchedHere <- rep(FALSE, length(X))
matchedHere[is.na(matchIndex)] <- matches
-
+
# Figure out which of the replacements are backreferences (and not just
- # strings).
- indexReplacements <- vapply(regmatches(replacements[[i]],
- regexec("^\\\\([1-9])$",
- replacements[[i]])),
- function(x) as.integer(x[2]),
+ # strings).
+ indexReplacements <- vapply(regmatches(replacements[[i]],
+ regexec("^\\\\([1-9])$",
+ replacements[[i]])),
+ function(x) as.integer(x[2]),
integer(1))
stringReplacements <- replacements[[i]]
stringReplacements[!is.na(indexReplacements)] <- NA
# Grab the matched groups
matchedStrings <- regmatches(X[matchedHere], matchData[matches])
-
+
# Create a replacement matrix. Each column is the replacement vector for a
# record.
- backreferenceMatrix <- vapply(matchedStrings, `[`,
- character(length(indexReplacements)),
+ backreferenceMatrix <- vapply(matchedStrings, `[`,
+ character(length(indexReplacements)),
indexReplacements+1)
replacementMatrix <- matrix(rep(stringReplacements, numMatches),
ncol = numMatches)
replacementMatrix[is.na(replacementMatrix)] <- backreferenceMatrix[is.na(replacementMatrix)]
# Fill in the data for the new matches.
- replacementList[matchedHere] <- lapply(seq_len(numMatches),
+ replacementList[matchedHere] <- lapply(seq_len(numMatches),
function(i) replacementMatrix[, i])
# Update the indices of the matches
matchIndex[matchedHere] <- i
}
-
- # Using col_names to hopefully support named vectors in the future.
+
+ # Using col_names to hopefully support named vectors in the future.
maxReplacements <- max(vapply(replacements, length, integer(1)))
attr(replacementList, "col_names") <- paste0("match_", seq_len(maxReplacements))
-
+
# This will probably be useful for people.
attr(replacementList, "match_index") <- matchIndex
-
+
# S3 Object
class(replacementList) <- "replacement_list"
-
+
return(replacementList)
}