commit 26129de9bd5b978e6197d835c5dea58599633913
parent 624273d5d6cc5dd748be5a09c8a8c20796a20d5d
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date: Mon, 7 Mar 2016 12:15:51 -0500
countPatternMatches(), a helpful utility.
Diffstat:
4 files changed, 59 insertions(+), 0 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,4 +2,5 @@
S3method(as.data.frame,replacement_list)
S3method(as.matrix,replacement_list)
+export(countPatternMatches)
export(patternapply)
diff --git a/R/countPatternMatches.R b/R/countPatternMatches.R
@@ -0,0 +1,19 @@
+#' Count how many matches are generated by each pattern.
+#'
+#' @param X A character vector where matches are sought.
+#' @param patterns A vector of regular expression patterns.
+#'
+#' @return A integer vector of same length as \code{patterns} with a count of
+#' matches.
+#'
+#' @details This is meant to be useful when developing a vector of regexes to
+#' apply to text. Unlike \code{patternappy()}, which only finds the first
+#' regex that matches each element of text, a text element here can match
+#' multiple regexes. Therefore, the sum of the returned vector may not equal
+#' the length of the input \code{X}.
+#' @export
+countPatternMatches <- function(X, patterns) {
+ return(vapply(patterns,
+ function(pattern) length(grep(pattern, X)),
+ integer(1)))
+}
diff --git a/man/countPatternMatches.Rd b/man/countPatternMatches.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/countPatternMatches.R
+\name{countPatternMatches}
+\alias{countPatternMatches}
+\title{Count how many matches are generated by each pattern.}
+\usage{
+countPatternMatches(X, patterns)
+}
+\arguments{
+\item{X}{A character vector where matches are sought.}
+
+\item{patterns}{A vector of regular expression patterns.}
+}
+\value{
+A integer vector of same length as \code{patterns} with a count of
+ matches.
+}
+\description{
+Count how many matches are generated by each pattern.
+}
+\details{
+This is meant to be useful when developing a vector of regexes to
+ apply to text. Unlike \code{patternappy()}, which only finds the first
+ regex that matches each element of text, a text element here can match
+ multiple regexes. Therefore, the sum of the returned vector may not equal
+ the length of the input \code{X}.
+}
+
diff --git a/tests/testthat/test_countPatternMatches.R b/tests/testthat/test_countPatternMatches.R
@@ -0,0 +1,11 @@
+library(patternapply)
+context("countPatternMatches")
+
+test_that("countPatternMatches", {
+ expect_equal(as.vector(countPatternMatches(paste(seq(0,99)), paste(seq(0,9)))),
+ c(10, rep(19, 9)))
+ expect_equal(as.vector(countPatternMatches(letters, letters)),
+ rep(1, length(letters)))
+ expect_equal(as.vector(countPatternMatches("abc", c("^a", "^b", "^."))),
+ c(1, 0, 1))
+})