From f48503cd31826f8cfb0f0e2b6964879dbe318385 Mon Sep 17 00:00:00 2001 From: Sebastian McKenzie Date: Sun, 19 Oct 2014 18:01:34 +1100 Subject: [PATCH] add unicode flag support to regex --- acorn.js | 38 ++++++++++++++++++++++++++---- test/tests-harmony.js | 29 +++++++++++++++++++++++ test/tests.js | 54 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 4 deletions(-) diff --git a/acorn.js b/acorn.js index a36b5dac1a..efd9e42369 100644 --- a/acorn.js +++ b/acorn.js @@ -934,14 +934,36 @@ // Need to use `readWord1` because '\uXXXX' sequences are allowed // here (don't ask). var mods = readWord1(); - if (mods && !/^[gmsiy]*$/.test(mods)) raise(start, "Invalid regular expression flag"); + var tmp = content; + if (mods) { + var validFlags = /^[gmsiy]*$/; + if (options.ecmaVersion >= 6) validFlags = /^[gmsiyu]*$/; + if (!validFlags.test(mods)) raise(start, "Invalid regular expression flag"); + if (mods.indexOf('u') >= 0) { + // Replace each astral symbol and every Unicode code point + // escape sequence that represents such a symbol with a single + // ASCII symbol to avoid throwing on regular expressions that + // are only valid in combination with the `/u` flag. + tmp = tmp + .replace(/\\u\{([0-9a-fA-F]{5,6})\}/g, 'x') + .replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g, 'x'); + } + } + // Detect invalid regular expressions. try { - var value = new RegExp(content, mods); + new RegExp(tmp); } catch (e) { if (e instanceof SyntaxError) raise(start, "Error parsing regular expression: " + e.message); raise(e); } - return finishToken(_regexp, value); + // Get a regular expression object for this pattern-flag pair, or `null` in + // case the current environment doesn't support the flags it uses. + try { + var value = new RegExp(content, mods); + } catch (err) { + value = null; + } + return finishToken(_regexp, {pattern: content, flags: mods, value: value}); } // Read an integer in the given radix. Return null if zero digits @@ -2005,7 +2027,15 @@ } return id; - case _num: case _string: case _regexp: + case _regexp: + var node = startNode(); + node.regex = {pattern: tokVal.pattern, flags: tokVal.flags}; + node.value = tokVal.value; + node.raw = input.slice(tokStart, tokEnd); + next(); + return finishNode(node, "Literal"); + + case _num: case _string: var node = startNode(); node.value = tokVal; node.raw = input.slice(tokStart, tokEnd); diff --git a/test/tests-harmony.js b/test/tests-harmony.js index 66a802be25..b35c91920f 100644 --- a/test/tests-harmony.js +++ b/test/tests-harmony.js @@ -14792,6 +14792,35 @@ test("func(...a, b)", { locations: true }); +test("/[a-z]/u", { + type: "Program", + body: [ + { + type: "ExpressionStatement", + expression: { + type: "Literal", + regex: { + pattern: "[a-z]", + flags: "u" + }, + loc: { + start: { + line: 1, + column: 0 + }, + end: { + line: 1, + column: 8 + } + } + } + } + ] +}, { + locations: true, + ecmaVersion: 6 +}); + // Harmony Invalid syntax testFail("0o", "Expected number in radix 8 (1:2)", {ecmaVersion: 6}); diff --git a/test/tests.js b/test/tests.js index 4fedc72ca1..287d98f84d 100644 --- a/test/tests.js +++ b/test/tests.js @@ -135,6 +135,60 @@ test("\n 42\n\n", { } }); +test("/foobar/", { + type: "Program", + body: [ + { + type: "ExpressionStatement", + expression: { + type: "Literal", + value: /foobar/, + regex: { + pattern: "foobar", + flags: "" + }, + loc: { + start: { + line: 1, + column: 0 + }, + end: { + line: 1, + column: 8 + } + } + } + } + ] +}); + +test("/[a-z]/g", { + type: "Program", + body: [ + { + type: "ExpressionStatement", + expression: { + type: "Literal", + value: /[a-z]/, + regex: { + pattern: "[a-z]", + flags: "g" + }, + loc: { + start: { + line: 1, + column: 0 + }, + end: { + line: 1, + column: 8 + } + } + } + } + ] +}); + test("(1 + 2 ) * 3", { type: "Program", body: [