patternapply

Iteratively try patterns against a character vector.
git clone https://git.eamoncaddigan.net/patternapply.git
Log | Files | Refs | README | LICENSE

commit 26129de9bd5b978e6197d835c5dea58599633913
parent 624273d5d6cc5dd748be5a09c8a8c20796a20d5d
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Mon,  7 Mar 2016 12:15:51 -0500

countPatternMatches(), a helpful utility.

Diffstat:
MNAMESPACE | 1+
AR/countPatternMatches.R | 19+++++++++++++++++++
Aman/countPatternMatches.Rd | 28++++++++++++++++++++++++++++
Atests/testthat/test_countPatternMatches.R | 11+++++++++++
4 files changed, 59 insertions(+), 0 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE @@ -2,4 +2,5 @@ S3method(as.data.frame,replacement_list) S3method(as.matrix,replacement_list) +export(countPatternMatches) export(patternapply) diff --git a/R/countPatternMatches.R b/R/countPatternMatches.R @@ -0,0 +1,19 @@ +#' Count how many matches are generated by each pattern. +#' +#' @param X A character vector where matches are sought. +#' @param patterns A vector of regular expression patterns. +#' +#' @return A integer vector of same length as \code{patterns} with a count of +#' matches. +#' +#' @details This is meant to be useful when developing a vector of regexes to +#' apply to text. Unlike \code{patternappy()}, which only finds the first +#' regex that matches each element of text, a text element here can match +#' multiple regexes. Therefore, the sum of the returned vector may not equal +#' the length of the input \code{X}. +#' @export +countPatternMatches <- function(X, patterns) { + return(vapply(patterns, + function(pattern) length(grep(pattern, X)), + integer(1))) +} diff --git a/man/countPatternMatches.Rd b/man/countPatternMatches.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/countPatternMatches.R +\name{countPatternMatches} +\alias{countPatternMatches} +\title{Count how many matches are generated by each pattern.} +\usage{ +countPatternMatches(X, patterns) +} +\arguments{ +\item{X}{A character vector where matches are sought.} + +\item{patterns}{A vector of regular expression patterns.} +} +\value{ +A integer vector of same length as \code{patterns} with a count of + matches. +} +\description{ +Count how many matches are generated by each pattern. +} +\details{ +This is meant to be useful when developing a vector of regexes to + apply to text. Unlike \code{patternappy()}, which only finds the first + regex that matches each element of text, a text element here can match + multiple regexes. Therefore, the sum of the returned vector may not equal + the length of the input \code{X}. +} + diff --git a/tests/testthat/test_countPatternMatches.R b/tests/testthat/test_countPatternMatches.R @@ -0,0 +1,11 @@ +library(patternapply) +context("countPatternMatches") + +test_that("countPatternMatches", { + expect_equal(as.vector(countPatternMatches(paste(seq(0,99)), paste(seq(0,9)))), + c(10, rep(19, 9))) + expect_equal(as.vector(countPatternMatches(letters, letters)), + rep(1, length(letters))) + expect_equal(as.vector(countPatternMatches("abc", c("^a", "^b", "^."))), + c(1, 0, 1)) +})