[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
311/376: Add a primop for regular expression pattern matching
From: |
Ludovic Courtès |
Subject: |
311/376: Add a primop for regular expression pattern matching |
Date: |
Wed, 28 Jan 2015 22:05:50 +0000 |
civodul pushed a commit to tag 1.8
in repository guix.
commit 976df480c918f050608f7a23a4a21415c43475c3
Author: Eelco Dolstra <address@hidden>
Date: Tue Nov 25 11:47:06 2014 +0100
Add a primop for regular expression pattern matching
The function ‘builtins.match’ takes a POSIX extended regular
expression and an arbitrary string. It returns ‘null’ if the string
does not match the regular expression. Otherwise, it returns a list
containing substring matches corresponding to parenthesis groups in
the regex. The regex must match the entire string (i.e. there is an
implied "^<pat>$" around the regex). For example:
match "foo" "foobar" => null
match "foo" "foo" => []
match "f(o+)(.*)" "foooobar" => ["oooo" "bar"]
match "(.*/)?([^/]*)" "/dir/file.nix" => ["/dir/" "file.nix"]
match "(.*/)?([^/]*)" "file.nix" => [null "file.nix"]
The following example finds all regular files with extension .nix or
.patch underneath the current directory:
let
findFiles = pat: dir: concatLists (mapAttrsToList (name: type:
if type == "directory" then
findFiles pat (dir + "/" + name)
else if type == "regular" && match pat name != null then
[(dir + "/" + name)]
else []) (readDir dir));
in findFiles ".*\\.(nix|patch)" (toString ./.)
---
src/libexpr/primops.cc | 30 +++++++++++++++++++++++++++++-
src/libutil/regex.cc | 23 ++++++++++++++++++++---
src/libutil/regex.hh | 9 ++++++++-
tests/lang/eval-okay-regex-match.exp | 1 +
tests/lang/eval-okay-regex-match.nix | 26 ++++++++++++++++++++++++++
5 files changed, 84 insertions(+), 5 deletions(-)
diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc
index ed50c80..b0596da 100644
--- a/src/libexpr/primops.cc
+++ b/src/libexpr/primops.cc
@@ -1430,7 +1430,34 @@ static void prim_hashString(EvalState & state, const Pos
& pos, Value * * args,
string s = state.forceString(*args[1], context, pos);
mkString(v, printHash(hashString(ht, s)), context);
-};
+}
+
+
+/* Match a regular expression against a string and return either
+ ‘null’ or a list containing substring matches. */
+static void prim_match(EvalState & state, const Pos & pos, Value * * args,
Value & v)
+{
+ Regex regex(state.forceStringNoCtx(*args[0], pos), true);
+
+ PathSet context;
+ string s = state.forceString(*args[1], context, pos);
+
+ Regex::Subs subs;
+ if (!regex.matches(s, subs)) {
+ mkNull(v);
+ return;
+ }
+
+ unsigned int len = subs.empty() ? 0 : subs.rbegin()->first + 1;
+ state.mkList(v, len);
+ for (unsigned int n = 0; n < len; ++n) {
+ auto i = subs.find(n);
+ if (i == subs.end())
+ mkNull(*(v.list.elems[n] = state.allocValue()));
+ else
+ mkString(*(v.list.elems[n] = state.allocValue()), i->second);
+ }
+}
/*************************************************************
@@ -1584,6 +1611,7 @@ void EvalState::createBaseEnv()
addPrimOp("__unsafeDiscardStringContext", 1,
prim_unsafeDiscardStringContext);
addPrimOp("__unsafeDiscardOutputDependency", 1,
prim_unsafeDiscardOutputDependency);
addPrimOp("__hashString", 2, prim_hashString);
+ addPrimOp("__match", 2, prim_match);
// Versions
addPrimOp("__parseDrvName", 1, prim_parseDrvName);
diff --git a/src/libutil/regex.cc b/src/libutil/regex.cc
index 36c8458..84274b3 100644
--- a/src/libutil/regex.cc
+++ b/src/libutil/regex.cc
@@ -1,13 +1,16 @@
#include "regex.hh"
#include "types.hh"
+#include <algorithm>
+
namespace nix {
-Regex::Regex(const string & pattern)
+Regex::Regex(const string & pattern, bool subs)
{
/* Patterns must match the entire string. */
- int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), REG_NOSUB |
REG_EXTENDED);
- if (err) throw Error(format("compiling pattern ‘%1%’: %2%") % pattern %
showError(err));
+ int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), (subs ? 0 :
REG_NOSUB) | REG_EXTENDED);
+ if (err) throw RegexError(format("compiling pattern ‘%1%’: %2%") % pattern
% showError(err));
+ nrParens = subs ? std::count(pattern.begin(), pattern.end(), '(') : 0;
}
Regex::~Regex()
@@ -23,6 +26,20 @@ bool Regex::matches(const string & s)
throw Error(format("matching string ‘%1%’: %2%") % s % showError(err));
}
+bool Regex::matches(const string & s, Subs & subs)
+{
+ regmatch_t pmatch[nrParens + 2];
+ int err = regexec(&preg, s.c_str(), nrParens + 2, pmatch, 0);
+ if (err == 0) {
+ for (unsigned int n = 2; n < nrParens + 2; ++n)
+ if (pmatch[n].rm_eo != -1)
+ subs[n - 2] = string(s, pmatch[n].rm_so, pmatch[n].rm_eo -
pmatch[n].rm_so);
+ return true;
+ }
+ else if (err == REG_NOMATCH) return false;
+ throw Error(format("matching string ‘%1%’: %2%") % s % showError(err));
+}
+
string Regex::showError(int err)
{
char buf[256];
diff --git a/src/libutil/regex.hh b/src/libutil/regex.hh
index aa012b7..53e31f4 100644
--- a/src/libutil/regex.hh
+++ b/src/libutil/regex.hh
@@ -5,16 +5,23 @@
#include <sys/types.h>
#include <regex.h>
+#include <map>
+
namespace nix {
+MakeError(RegexError, Error)
+
class Regex
{
public:
- Regex(const string & pattern);
+ Regex(const string & pattern, bool subs = false);
~Regex();
bool matches(const string & s);
+ typedef std::map<unsigned int, string> Subs;
+ bool matches(const string & s, Subs & subs);
private:
+ unsigned nrParens;
regex_t preg;
string showError(int err);
};
diff --git a/tests/lang/eval-okay-regex-match.exp
b/tests/lang/eval-okay-regex-match.exp
new file mode 100644
index 0000000..27ba77d
--- /dev/null
+++ b/tests/lang/eval-okay-regex-match.exp
@@ -0,0 +1 @@
+true
diff --git a/tests/lang/eval-okay-regex-match.nix
b/tests/lang/eval-okay-regex-match.nix
new file mode 100644
index 0000000..ae65015
--- /dev/null
+++ b/tests/lang/eval-okay-regex-match.nix
@@ -0,0 +1,26 @@
+with builtins;
+
+let
+
+ matches = pat: s: match pat s != null;
+
+ splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)";
+
+in
+
+assert matches "foobar" "foobar";
+assert matches "fo*" "f";
+assert !matches "fo+" "f";
+assert matches "fo*" "fo";
+assert matches "fo*" "foo";
+assert matches "fo+" "foo";
+assert matches "fo{1,2}" "foo";
+assert !matches "fo{1,2}" "fooo";
+assert !matches "fo*" "foobar";
+
+assert match "(.*)\\.nix" "foobar.nix" == [ "foobar" ];
+
+assert splitFN "/path/to/foobar.nix" == [ "/path/to/" "/path/to" "foobar"
"nix" ];
+assert splitFN "foobar.cc" == [ null null "foobar" "cc" ];
+
+true
- 249/376: nix-copy-closure: Use strict, (continued)
- 249/376: nix-copy-closure: Use strict, Ludovic Courtès, 2015/01/28
- 241/376: Tweak, Ludovic Courtès, 2015/01/28
- 304/376: Combine introduction / quick start parts, Ludovic Courtès, 2015/01/28
- 250/376: Remove unused @sshOpts flag, Ludovic Courtès, 2015/01/28
- 305/376: Update quick start section, Ludovic Courtès, 2015/01/28
- 306/376: Update installation section, Ludovic Courtès, 2015/01/28
- 308/376: Build derivations in a more predictable order, Ludovic Courtès, 2015/01/28
- 307/376: Don't create unnecessary substitution goals for derivations, Ludovic Courtès, 2015/01/28
- 309/376: More build-cache-failures -> build-cache-failure, Ludovic Courtès, 2015/01/28
- 310/376: forceString(): Accept pos argument, Ludovic Courtès, 2015/01/28
- 311/376: Add a primop for regular expression pattern matching,
Ludovic Courtès <=
- 312/376: Rely on XML catalogs to find the DocBook schemas and stylesheets, Ludovic Courtès, 2015/01/28
- 313/376: Intro: Mention binary caches, Ludovic Courtès, 2015/01/28
- 303/376: Manual: Bump date, Ludovic Courtès, 2015/01/28
- 315/376: Make all ExternalValueBase functions const, Ludovic Courtès, 2015/01/28
- 318/376: Shut up a warning, Ludovic Courtès, 2015/01/28
- 317/376: Fix another operator precedence issue found by Perl 5.20, Ludovic Courtès, 2015/01/28
- 322/376: Remove Fedora 18, 19 builds, Ludovic Courtès, 2015/01/28
- 319/376: Use posix_spawn to run the pager, Ludovic Courtès, 2015/01/28
- 321/376: Remove some platforms with too-old compilers, Ludovic Courtès, 2015/01/28
- 323/376: Explicitly include required C headers, Ludovic Courtès, 2015/01/28