Initial commit; non-functional code. - patternapply - Iteratively try patterns against a character vector.

commit 51c38bc9ecc4eb7932234c0e58547f0caecbb664
parent d711b852897f0a4263b03c1090de7510258d7381
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Sun, 21 Feb 2016 19:57:11 -0500

Initial commit; non-functional code.

Diffstat:
A DESCRIPTION  | 10 ++++++++++
A NAMESPACE  | 1 +
A R/patternapply.R  | 37 +++++++++++++++++++++++++++++++++++++
A man/hello.Rd  | 12 ++++++++++++

4 files changed, 60 insertions(+), 0 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,10 @@
+Package: patternlist
+Type: Package
+Title: Try Regular Expressions in Turn to Data
+Version: 0.1
+Date: 2016-02-19
+Author: Eamon Caddigan
+Maintainer: Eamon Caddigan <eamon.caddigan@gmail.com>
+Description: Extracts information from vectors (or columns) of text data that can take one of several formats by applying regular expressions in turn until a match is found. Mostly useful for ultimately turning such a vector into a data.frame.
+License: BSD 3
+LazyData: TRUE
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/patternapply.R b/R/patternapply.R
@@ -0,0 +1,37 @@
+
+#' Iteratively try patterns against a character vector.
+#'
+#' @param X A character vector where matches are sought.
+#' @param patterns A vector of regular expression patterns.
+#' @param replacements A vector of replacement information, must match the
+#'   length of \code(patterns). This can either be a character vector or list of
+#'   character vectors. This can include backreferences "\1" to "\9" to
+#'   parenthesized subexpressions of the corresponding pattern.
+#'
+#' @return A vector of replacements. Matches the format of \code(replacements).
+patternapply <- function(X, patterns,
+                         replacements = paste(seq_along(patterns))) {
+
+  # Keep track of which records have already been matched to a pattern.
+  matchFalses <- rep(FALSE, length(X))
+  matchedAlready <- matchFalses
+
+  for (pattern in patterns) {
+    # Match the pattern to the
+    matchedIndices <- regexec(pattern, X[!matchedAlready])
+
+    # Find all the places where matches occurred.
+    matches <- vapply(matchedIndices, `[`, integer(1), 1) != -1
+    matchedStrings <- regmatches(X, matchedIndices)
+
+    # Where are new matches?
+    matchedHere <- matchFalses
+    matchedHere[!matchedAlready] <- vapply(matchedIndices, `[`, integer(1), 1) != -1
+
+    # Fill in the data for the new matches.
+    artists[matchedHere, commonCols] <- bioData[bioData$is_match, commonCols]
+
+    # Update the list of matched rows.
+    matchedAlready <- matchedAlready | matchedHere
+  }
+}
diff --git a/man/hello.Rd b/man/hello.Rd
@@ -0,0 +1,12 @@
+\name{hello}
+\alias{hello}
+\title{Hello, World!}
+\usage{
+hello()
+}
+\description{
+Prints 'Hello, world!'.
+}
+\examples{
+hello()
+}

	patternapply Iteratively try patterns against a character vector.
	git clone https://git.eamoncaddigan.net/patternapply.git
	Log \| Files \| Refs \| README \| LICENSE

A	DESCRIPTION	\|	10	++++++++++
A	NAMESPACE	\|	1	+
A	R/patternapply.R	\|	37	+++++++++++++++++++++++++++++++++++++
A	man/hello.Rd	\|	12	++++++++++++