filterPatternMatches(): Another helper utility. - patternapply - Iteratively try patterns against a character vector.

commit 57905ec9dc9086b8e09d120e53c25b22d3961836
parent 26129de9bd5b978e6197d835c5dea58599633913
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Mon,  7 Mar 2016 15:37:40 -0500

filterPatternMatches(): Another helper utility.

Diffstat:
M NAMESPACE  | 1 +
A R/filterPatternMatches.R  | 21 +++++++++++++++++++++
A man/filterPatternMatches.Rd  | 27 +++++++++++++++++++++++++++
A tests/testthat/test_filterPatternMatches.R  | 7 +++++++

4 files changed, 56 insertions(+), 0 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,4 +3,5 @@
 S3method(as.data.frame,replacement_list)
 S3method(as.matrix,replacement_list)
 export(countPatternMatches)
+export(filterPatternMatches)
 export(patternapply)
diff --git a/R/filterPatternMatches.R b/R/filterPatternMatches.R
@@ -0,0 +1,21 @@
+#' Filter out all of the records that match an existing pattern.
+#' 
+#' @param X A character vector where matches are sought.
+#' @param patterns A vector of regular expression patterns.
+#'   
+#' @return The subset of \code{X} that did not match any of the regexes in 
+#'   \code{patterns}
+#'   
+#' @details This is meant to be useful when developing a vector of regexes to 
+#'   apply to text. Regular expressions can be appended to \code{patterns}
+#'   interactively until the edge cases are all covered, and then
+#'   \code{patternapply()} can be deployed to extract data.
+#' @export
+filterPatternMatches <- function(X, patterns) {
+  unmatchedRecords <- X
+  for (pattern in patterns) {
+    unmatchedRecords <- subset(unmatchedRecords, 
+                               !grepl(pattern, unmatchedRecords))
+  }
+  return(unmatchedRecords)
+}
diff --git a/man/filterPatternMatches.Rd b/man/filterPatternMatches.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filterPatternMatches.R
+\name{filterPatternMatches}
+\alias{filterPatternMatches}
+\title{Filter out all of the records that match an existing pattern.}
+\usage{
+filterPatternMatches(X, patterns)
+}
+\arguments{
+\item{X}{A character vector where matches are sought.}
+
+\item{patterns}{A vector of regular expression patterns.}
+}
+\value{
+The subset of \code{X} that did not match any of the regexes in 
+  \code{patterns}
+}
+\description{
+Filter out all of the records that match an existing pattern.
+}
+\details{
+This is meant to be useful when developing a vector of regexes to 
+  apply to text. Regular expressions can be appended to \code{patterns}
+  interactively until the edge cases are all covered, and then
+  \code{patternapply()} can be deployed to extract data.
+}
+
diff --git a/tests/testthat/test_filterPatternMatches.R b/tests/testthat/test_filterPatternMatches.R
@@ -0,0 +1,7 @@
+library(patternapply)
+context("filterPatternMatches")
+
+test_that("filterPatternMatches", {
+  expect_equal(filterPatternMatches(paste(seq(0,99)), "[1-9]$"),
+               paste(seq(0, 99, by=10)))
+})

	patternapply Iteratively try patterns against a character vector.
	git clone https://git.eamoncaddigan.net/patternapply.git
	Log \| Files \| Refs \| README \| LICENSE

M	NAMESPACE	\|	1	+
A	R/filterPatternMatches.R	\|	21	+++++++++++++++++++++
A	man/filterPatternMatches.Rd	\|	27	+++++++++++++++++++++++++++
A	tests/testthat/test_filterPatternMatches.R	\|	7	+++++++