patternapply

Iteratively try patterns against a character vector.
git clone https://git.eamoncaddigan.net/patternapply.git
Log | Files | Refs | README | LICENSE

commit 57905ec9dc9086b8e09d120e53c25b22d3961836
parent 26129de9bd5b978e6197d835c5dea58599633913
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Mon,  7 Mar 2016 15:37:40 -0500

filterPatternMatches(): Another helper utility.

Diffstat:
MNAMESPACE | 1+
AR/filterPatternMatches.R | 21+++++++++++++++++++++
Aman/filterPatternMatches.Rd | 27+++++++++++++++++++++++++++
Atests/testthat/test_filterPatternMatches.R | 7+++++++
4 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE @@ -3,4 +3,5 @@ S3method(as.data.frame,replacement_list) S3method(as.matrix,replacement_list) export(countPatternMatches) +export(filterPatternMatches) export(patternapply) diff --git a/R/filterPatternMatches.R b/R/filterPatternMatches.R @@ -0,0 +1,21 @@ +#' Filter out all of the records that match an existing pattern. +#' +#' @param X A character vector where matches are sought. +#' @param patterns A vector of regular expression patterns. +#' +#' @return The subset of \code{X} that did not match any of the regexes in +#' \code{patterns} +#' +#' @details This is meant to be useful when developing a vector of regexes to +#' apply to text. Regular expressions can be appended to \code{patterns} +#' interactively until the edge cases are all covered, and then +#' \code{patternapply()} can be deployed to extract data. +#' @export +filterPatternMatches <- function(X, patterns) { + unmatchedRecords <- X + for (pattern in patterns) { + unmatchedRecords <- subset(unmatchedRecords, + !grepl(pattern, unmatchedRecords)) + } + return(unmatchedRecords) +} diff --git a/man/filterPatternMatches.Rd b/man/filterPatternMatches.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filterPatternMatches.R +\name{filterPatternMatches} +\alias{filterPatternMatches} +\title{Filter out all of the records that match an existing pattern.} +\usage{ +filterPatternMatches(X, patterns) +} +\arguments{ +\item{X}{A character vector where matches are sought.} + +\item{patterns}{A vector of regular expression patterns.} +} +\value{ +The subset of \code{X} that did not match any of the regexes in + \code{patterns} +} +\description{ +Filter out all of the records that match an existing pattern. +} +\details{ +This is meant to be useful when developing a vector of regexes to + apply to text. Regular expressions can be appended to \code{patterns} + interactively until the edge cases are all covered, and then + \code{patternapply()} can be deployed to extract data. +} + diff --git a/tests/testthat/test_filterPatternMatches.R b/tests/testthat/test_filterPatternMatches.R @@ -0,0 +1,7 @@ +library(patternapply) +context("filterPatternMatches") + +test_that("filterPatternMatches", { + expect_equal(filterPatternMatches(paste(seq(0,99)), "[1-9]$"), + paste(seq(0, 99, by=10))) +})