commit 51c38bc9ecc4eb7932234c0e58547f0caecbb664
parent d711b852897f0a4263b03c1090de7510258d7381
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date: Sun, 21 Feb 2016 19:57:11 -0500
Initial commit; non-functional code.
Diffstat:
4 files changed, 60 insertions(+), 0 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,10 @@
+Package: patternlist
+Type: Package
+Title: Try Regular Expressions in Turn to Data
+Version: 0.1
+Date: 2016-02-19
+Author: Eamon Caddigan
+Maintainer: Eamon Caddigan <eamon.caddigan@gmail.com>
+Description: Extracts information from vectors (or columns) of text data that can take one of several formats by applying regular expressions in turn until a match is found. Mostly useful for ultimately turning such a vector into a data.frame.
+License: BSD 3
+LazyData: TRUE
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/patternapply.R b/R/patternapply.R
@@ -0,0 +1,37 @@
+
+#' Iteratively try patterns against a character vector.
+#'
+#' @param X A character vector where matches are sought.
+#' @param patterns A vector of regular expression patterns.
+#' @param replacements A vector of replacement information, must match the
+#' length of \code(patterns). This can either be a character vector or list of
+#' character vectors. This can include backreferences "\1" to "\9" to
+#' parenthesized subexpressions of the corresponding pattern.
+#'
+#' @return A vector of replacements. Matches the format of \code(replacements).
+patternapply <- function(X, patterns,
+ replacements = paste(seq_along(patterns))) {
+
+ # Keep track of which records have already been matched to a pattern.
+ matchFalses <- rep(FALSE, length(X))
+ matchedAlready <- matchFalses
+
+ for (pattern in patterns) {
+ # Match the pattern to the
+ matchedIndices <- regexec(pattern, X[!matchedAlready])
+
+ # Find all the places where matches occurred.
+ matches <- vapply(matchedIndices, `[`, integer(1), 1) != -1
+ matchedStrings <- regmatches(X, matchedIndices)
+
+ # Where are new matches?
+ matchedHere <- matchFalses
+ matchedHere[!matchedAlready] <- vapply(matchedIndices, `[`, integer(1), 1) != -1
+
+ # Fill in the data for the new matches.
+ artists[matchedHere, commonCols] <- bioData[bioData$is_match, commonCols]
+
+ # Update the list of matched rows.
+ matchedAlready <- matchedAlready | matchedHere
+ }
+}
diff --git a/man/hello.Rd b/man/hello.Rd
@@ -0,0 +1,12 @@
+\name{hello}
+\alias{hello}
+\title{Hello, World!}
+\usage{
+hello()
+}
+\description{
+Prints 'Hello, world!'.
+}
+\examples{
+hello()
+}