uhop · wrmedford · Nov 23, 2025
diff --git a/README.md b/README.md
@@ -165,6 +165,34 @@ RE2("б").replace("абв", bufReplacer);
 This feature works for string and buffer inputs. If a buffer was used as an input, its output will be returned as
 a buffer too, otherwise a string will be returned.
 
+### `RE2.Set`
+
+When the same string must be tested against many patterns, [`RE2::Set`](https://github.com/google/re2/wiki/SetSyntax) builds a single automaton for all of them. It frequently beats running a large list of individual regular expressions one by one.
+
+* `new RE2.Set(patterns[, flagsOrOptions][, options])`
+  * `patterns` is any iterable of strings, `Buffer`s, `RegExp`, or `RE2` instances; flags (if provided) apply to the whole set.
+  * `flagsOrOptions` can be a string/`Buffer` with flags (`i`, `m`, `s`, `u`, `g`, `y`, `d`) or an options object.
+  * `options.anchor` can be `'unanchored'` (default), `'start'`, or `'both'`.
+* `set.match(str)` returns an array of indexes of matching patterns.
+* `set.test(str)` returns `true` if any pattern matches.
+* Read-only properties:
+  * `set.size`, `set.flags`, `set.anchor`
+  * `set.source` (all patterns joined with `|`), `set.sources` (individual pattern sources)
+
+Example:
+
+```js
+const routes = new RE2.Set([
+  '^/users/\\d+$',
+  '^/posts/\\d+$'
+], 'i', {anchor: 'start'});
+
+routes.test('/posts/42');      // true
+routes.match('/users/7');      // [0]
+routes.sources;                // ['^/users/\\d+$', '^/posts/\\d+$']
+routes.toString();             // '/^/users/\\d+$|^/posts/\\d+$/iu'
+```
+
 ### Calculate length
 
 Two functions to calculate string sizes between

diff --git a/binding.gyp b/binding.gyp
@@ -5,6 +5,7 @@
       "sources": [
         "lib/addon.cc",
         "lib/accessors.cc",
+        "lib/pattern.cc",
         "lib/util.cc",
         "lib/new.cc",
         "lib/exec.cc",
@@ -14,6 +15,7 @@
         "lib/search.cc",
         "lib/split.cc",
         "lib/to_string.cc",
+        "lib/set.cc",
         "vendor/re2/re2/bitmap256.cc",
         "vendor/re2/re2/bitstate.cc",
         "vendor/re2/re2/compile.cc",

diff --git a/lib/addon.cc b/lib/addon.cc
@@ -1,4 +1,5 @@
 #include "./wrapped_re2.h"
+#include "./wrapped_re2_set.h"
 
 static NAN_METHOD(GetUtf8Length)
 {
@@ -75,6 +76,9 @@ v8::Local<v8::Function> WrappedRE2::Init()
 	Nan::SetAccessor(instanceTemplate, Nan::New("internalSource").ToLocalChecked(), GetInternalSource);
 
 	auto ctr = Nan::GetFunction(tpl).ToLocalChecked();
+	auto setCtr = WrappedRE2Set::Init();
+
+	Nan::Set(ctr, Nan::New("Set").ToLocalChecked(), setCtr);
 
 	// properties
 

diff --git a/lib/new.cc b/lib/new.cc
@@ -1,258 +1,13 @@
 #include "./wrapped_re2.h"
 #include "./util.h"
+#include "./pattern.h"
 
 #include <map>
 #include <memory>
 #include <string>
 #include <unordered_set>
 #include <vector>
 
-static char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
-
-inline bool isUpperCaseAlpha(char ch)
-{
-	return 'A' <= ch && ch <= 'Z';
-}
-
-inline bool isHexadecimal(char ch)
-{
-	return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
-}
-
-static std::map<std::string, std::string> unicodeClasses = {
-	{"Uppercase_Letter", "Lu"},
-	{"Lowercase_Letter", "Ll"},
-	{"Titlecase_Letter", "Lt"},
-	{"Cased_Letter", "LC"},
-	{"Modifier_Letter", "Lm"},
-	{"Other_Letter", "Lo"},
-	{"Letter", "L"},
-	{"Nonspacing_Mark", "Mn"},
-	{"Spacing_Mark", "Mc"},
-	{"Enclosing_Mark", "Me"},
-	{"Mark", "M"},
-	{"Decimal_Number", "Nd"},
-	{"Letter_Number", "Nl"},
-	{"Other_Number", "No"},
-	{"Number", "N"},
-	{"Connector_Punctuation", "Pc"},
-	{"Dash_Punctuation", "Pd"},
-	{"Open_Punctuation", "Ps"},
-	{"Close_Punctuation", "Pe"},
-	{"Initial_Punctuation", "Pi"},
-	{"Final_Punctuation", "Pf"},
-	{"Other_Punctuation", "Po"},
-	{"Punctuation", "P"},
-	{"Math_Symbol", "Sm"},
-	{"Currency_Symbol", "Sc"},
-	{"Modifier_Symbol", "Sk"},
-	{"Other_Symbol", "So"},
-	{"Symbol", "S"},
-	{"Space_Separator", "Zs"},
-	{"Line_Separator", "Zl"},
-	{"Paragraph_Separator", "Zp"},
-	{"Separator", "Z"},
-	{"Control", "Cc"},
-	{"Format", "Cf"},
-	{"Surrogate", "Cs"},
-	{"Private_Use", "Co"},
-	{"Unassigned", "Cn"},
-	{"Other", "C"},
-};
-
-static bool translateRegExp(const char *data, size_t size, bool multiline, std::vector<char> &buffer)
-{
-	std::string result;
-	bool changed = false;
-
-	if (!size)
-	{
-		result = "(?:)";
-		changed = true;
-	}
-	else if (multiline)
-	{
-		result = "(?m)";
-		changed = true;
-	}
-
-	for (size_t i = 0; i < size;)
-	{
-		char ch = data[i];
-		if (ch == '\\')
-		{
-			if (i + 1 < size)
-			{
-				ch = data[i + 1];
-				switch (ch)
-				{
-				case '\\':
-					result += "\\\\";
-					i += 2;
-					continue;
-				case 'c':
-					if (i + 2 < size)
-					{
-						ch = data[i + 2];
-						if (isUpperCaseAlpha(ch))
-						{
-							result += "\\x";
-							result += hex[((ch - '@') / 16) & 15];
-							result += hex[(ch - '@') & 15];
-							i += 3;
-							changed = true;
-							continue;
-						}
-					}
-					result += "\\c";
-					i += 2;
-					continue;
-				case 'u':
-					if (i + 2 < size)
-					{
-						ch = data[i + 2];
-						if (isHexadecimal(ch))
-						{
-							result += "\\x{";
-							result += ch;
-							i += 3;
-							for (size_t j = 0; j < 3 && i < size; ++i, ++j)
-							{
-								ch = data[i];
-								if (!isHexadecimal(ch))
-								{
-									break;
-								}
-								result += ch;
-							}
-							result += '}';
-							changed = true;
-							continue;
-						}
-						else if (ch == '{')
-						{
-							result += "\\x";
-							i += 2;
-							changed = true;
-							continue;
-						}
-					}
-					result += "\\u";
-					i += 2;
-					continue;
-				case 'p':
-				case 'P':
-					if (i + 2 < size) {
-						if (data[i + 2] == '{') {
-							size_t j = i + 3;
-							while (j < size && data[j] != '}') ++j;
-							if (j < size) {
-								result += "\\";
-								result += data[i + 1];
-								std::string name(data + i + 3, j - i - 3);
-								if (unicodeClasses.find(name) != unicodeClasses.end()) {
-									name = unicodeClasses[name];
-								} else if (name.size() > 7 && !strncmp(name.c_str(), "Script=", 7)) {
-									name = name.substr(7);
-								} else if (name.size() > 3 && !strncmp(name.c_str(), "sc=", 3)) {
-									name = name.substr(3);
-								}
-								if (name.size() == 1) {
-									result += name;
-								} else {
-									result += "{";
-									result += name;
-									result += "}";
-								}
-								i = j + 1;
-								changed = true;
-								continue;
-							}
-						}
-					}
-					result += "\\";
-					result += data[i + 1];
-					i += 2;
-					continue;
-				default:
-					result += "\\";
-					size_t sym_size = getUtf8CharSize(ch);
-					result.append(data + i + 1, sym_size);
-					i += sym_size + 1;
-					continue;
-				}
-			}
-		}
-		else if (ch == '/')
-		{
-			result += "\\/";
-			i += 1;
-			changed = true;
-			continue;
-		}
-		else if (ch == '(' && i + 2 < size && data[i + 1] == '?' && data[i + 2] == '<')
-		{
-			if (i + 3 >= size || (data[i + 3] != '=' && data[i + 3] != '!'))
-			{
-				result += "(?P<";
-				i += 3;
-				changed = true;
-				continue;
-			}
-		}
-		size_t sym_size = getUtf8CharSize(ch);
-		result.append(data + i, sym_size);
-		i += sym_size;
-	}
-
-	if (!changed)
-	{
-		return false;
-	}
-
-	buffer.resize(0);
-	buffer.insert(buffer.end(), result.data(), result.data() + result.size());
-	buffer.push_back('\0');
-
-	return true;
-}
-
-static std::string escapeRegExp(const char *data, size_t size)
-{
-	std::string result;
-
-	if (!size)
-	{
-		result = "(?:)";
-	}
-
-	size_t prevBackSlashes = 0;
-	for (size_t i = 0; i < size;)
-	{
-		char ch = data[i];
-		if (ch == '\\')
-		{
-			++prevBackSlashes;
-		}
-		else if (ch == '/' && !(prevBackSlashes & 1))
-		{
-			result += "\\/";
-			i += 1;
-			prevBackSlashes = 0;
-			continue;
-		}
-		else
-		{
-			prevBackSlashes = 0;
-		}
-		size_t sym_size = getUtf8CharSize(ch);
-		result.append(data + i, sym_size);
-		i += sym_size;
-	}
-
-	return result;
-}
-
 bool WrappedRE2::alreadyWarnedAboutUnicode = false;
 
 static const char *deprecationMessage = "BMP patterns aren't supported by node-re2. An implicit \"u\" flag is assumed by the RE2 constructor. In a future major version, calling the RE2 constructor without the \"u\" flag may become forbidden, or cause a different behavior. Please see https://github.com/uhop/node-re2/issues/21 for more information.";