patternapply

Iteratively try patterns against a character vector.
git clone https://git.eamoncaddigan.net/patternapply.git
Log | Files | Refs | README | LICENSE

commit 51c38bc9ecc4eb7932234c0e58547f0caecbb664
parent d711b852897f0a4263b03c1090de7510258d7381
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Sun, 21 Feb 2016 19:57:11 -0500

Initial commit; non-functional code.

Diffstat:
ADESCRIPTION | 10++++++++++
ANAMESPACE | 1+
AR/patternapply.R | 37+++++++++++++++++++++++++++++++++++++
Aman/hello.Rd | 12++++++++++++
4 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION @@ -0,0 +1,10 @@ +Package: patternlist +Type: Package +Title: Try Regular Expressions in Turn to Data +Version: 0.1 +Date: 2016-02-19 +Author: Eamon Caddigan +Maintainer: Eamon Caddigan <eamon.caddigan@gmail.com> +Description: Extracts information from vectors (or columns) of text data that can take one of several formats by applying regular expressions in turn until a match is found. Mostly useful for ultimately turning such a vector into a data.frame. +License: BSD 3 +LazyData: TRUE diff --git a/NAMESPACE b/NAMESPACE @@ -0,0 +1 @@ +exportPattern("^[[:alpha:]]+") diff --git a/R/patternapply.R b/R/patternapply.R @@ -0,0 +1,37 @@ + +#' Iteratively try patterns against a character vector. +#' +#' @param X A character vector where matches are sought. +#' @param patterns A vector of regular expression patterns. +#' @param replacements A vector of replacement information, must match the +#' length of \code(patterns). This can either be a character vector or list of +#' character vectors. This can include backreferences "\1" to "\9" to +#' parenthesized subexpressions of the corresponding pattern. +#' +#' @return A vector of replacements. Matches the format of \code(replacements). +patternapply <- function(X, patterns, + replacements = paste(seq_along(patterns))) { + + # Keep track of which records have already been matched to a pattern. + matchFalses <- rep(FALSE, length(X)) + matchedAlready <- matchFalses + + for (pattern in patterns) { + # Match the pattern to the + matchedIndices <- regexec(pattern, X[!matchedAlready]) + + # Find all the places where matches occurred. + matches <- vapply(matchedIndices, `[`, integer(1), 1) != -1 + matchedStrings <- regmatches(X, matchedIndices) + + # Where are new matches? + matchedHere <- matchFalses + matchedHere[!matchedAlready] <- vapply(matchedIndices, `[`, integer(1), 1) != -1 + + # Fill in the data for the new matches. + artists[matchedHere, commonCols] <- bioData[bioData$is_match, commonCols] + + # Update the list of matched rows. + matchedAlready <- matchedAlready | matchedHere + } +} diff --git a/man/hello.Rd b/man/hello.Rd @@ -0,0 +1,12 @@ +\name{hello} +\alias{hello} +\title{Hello, World!} +\usage{ +hello() +} +\description{ +Prints 'Hello, world!'. +} +\examples{ +hello() +}