From 4f24e13fd580556255386822fd211252f806d02f Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Sat, 26 Jul 2014 05:09:13 +0300 Subject: [PATCH] Added template string support. --- acorn.js | 117 ++++++++++++++++++++++++++++++++++++------ test/tests-harmony.js | 96 +++++++++++++++++----------------- 2 files changed, 148 insertions(+), 65 deletions(-) diff --git a/acorn.js b/acorn.js index 86a7762344..b0206b4b4a 100644 --- a/acorn.js +++ b/acorn.js @@ -199,7 +199,7 @@ var tokType, tokVal; - // Interal state for the tokenizer. To distinguish between division + // Internal state for the tokenizer. To distinguish between division // operators and regular expressions, it remembers whether the last // token was one that is allowed to be followed by an expression. // (If it is, a slash is probably a regexp, if it isn't it's a @@ -231,6 +231,12 @@ var metParenL; + // This is used by parser for detecting if it's inside ES6 + // Template String. If it is, it should treat '$' as prefix before + // '{expression}' and everything else as string literals. + + var inTemplate = false; + // This function is used to raise exceptions on parse errors. It // takes an offset integer (into the current `input`) to indicate // the location of the error, attaches the position to the end @@ -323,7 +329,7 @@ var _braceR = {type: "}"}, _parenL = {type: "(", beforeExpr: true}, _parenR = {type: ")"}; var _comma = {type: ",", beforeExpr: true}, _semi = {type: ";", beforeExpr: true}; var _colon = {type: ":", beforeExpr: true}, _dot = {type: "."}, _ellipsis = {type: "..."}, _question = {type: "?", beforeExpr: true}; - var _arrow = {type: "=>", beforeExpr: true}; + var _arrow = {type: "=>", beforeExpr: true}, _bquote = {type: "`"}, _dollarBraceL = {type: "${", beforeExpr: true}; // Operators. These carry several kinds of properties to help the // parser use them properly (the presence of these properties is @@ -361,7 +367,8 @@ exports.tokTypes = {bracketL: _bracketL, bracketR: _bracketR, braceL: _braceL, braceR: _braceR, parenL: _parenL, parenR: _parenR, comma: _comma, semi: _semi, colon: _colon, dot: _dot, ellipsis: _ellipsis, question: _question, slash: _slash, eq: _eq, - name: _name, eof: _eof, num: _num, regexp: _regexp, string: _string}; + name: _name, eof: _eof, num: _num, regexp: _regexp, string: _string, + arrow: _arrow, bquote: _bquote, dollarBraceL: _dollarBraceL}; for (var kw in keywordTypes) exports.tokTypes["_" + kw] = keywordTypes[kw]; // This is a trick taken from Esprima. It turns out that, on @@ -500,6 +507,7 @@ tokPos = tokLineStart = 0; tokRegexpAllowed = true; metParenL = 0; + inTemplate = false; skipSpace(); } @@ -511,7 +519,7 @@ tokEnd = tokPos; if (options.locations) tokEndLoc = new Position; tokType = type; - skipSpace(); + if (type !== _bquote || inTemplate) skipSpace(); tokVal = val; tokRegexpAllowed = type.beforeExpr; } @@ -690,13 +698,30 @@ } function getTokenFromCode(code) { - switch(code) { - // The interpretation of a dot depends on whether it is followed - // by a digit or another two dots. + // Special rules work inside ES6 template strings. + if (inTemplate) { + // '`' and '${' have special meanings, but they should follow string (can be empty) + if (tokType === _string) { + if (code === 96) { // '`' + ++tokPos; + return finishToken(_bquote); + } + if (code === 36 && input.charCodeAt(tokPos + 1) === 123) { // '${' + tokPos += 2; + return finishToken(_dollarBraceL); + } + } + // anything else is considered string literal + return readString(); + } + + switch (code) { + // The interpretation of a dot depends on whether it is followed + // by a digit or another two dots. case 46: // '.' return readToken_dot(); - // Punctuation tokens. + // Punctuation tokens. case 40: ++tokPos; return finishToken(_parenL); case 41: ++tokPos; return finishToken(_parenR); case 59: ++tokPos; return finishToken(_semi); @@ -707,6 +732,12 @@ case 125: ++tokPos; return finishToken(_braceR); case 58: ++tokPos; return finishToken(_colon); case 63: ++tokPos; return finishToken(_question); + + case 96: // '`' + if (options.ecmaVersion >= 6) { + ++tokPos; + return finishToken(_bquote); + } case 48: // '0' var next = input.charCodeAt(tokPos + 1); @@ -715,12 +746,12 @@ if (next === 111 || next === 79) return readRadixNumber(8); // '0o', '0O' - octal number if (next === 98 || next === 66) return readRadixNumber(2); // '0b', '0B' - binary number } - // Anything else beginning with a digit is an integer, octal - // number, or float. + // Anything else beginning with a digit is an integer, octal + // number, or float. case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 1-9 return readNumber(false); - // Quotes produce strings. + // Quotes produce strings. case 34: case 39: // '"', "'" return readString(code); @@ -767,7 +798,7 @@ var code = input.charCodeAt(tokPos); // Identifier or keyword. '\uXXXX' sequences are allowed in // identifiers, so '\' also dispatches to that. - if (isIdentifierStart(code) || code === 92 /* '\' */) return readWord(); + if (!inTemplate && (isIdentifierStart(code) || code === 92 /* '\' */)) return readWord(); var tok = getTokenFromCode(code); @@ -900,12 +931,15 @@ } function readString(quote) { - tokPos++; + if (!inTemplate) tokPos++; var out = ""; for (;;) { if (tokPos >= inputLen) raise(tokStart, "Unterminated string constant"); var ch = input.charCodeAt(tokPos); - if (ch === quote) { + if (inTemplate) { + if (ch === 96 || ch === 36 && input.charCodeAt(tokPos + 1) === 123) // '`', '${' + return finishToken(_string, out); + } else if (ch === quote) { ++tokPos; return finishToken(_string, out); } @@ -940,9 +974,22 @@ } } } else { - if (ch === 13 || ch === 10 || ch === 8232 || ch === 8233) raise(tokStart, "Unterminated string constant"); - out += String.fromCharCode(ch); // '\' ++tokPos; + if (ch === 13 || ch === 10 || ch === 8232 || ch === 8233) { + if (inTemplate) { + if (ch === 13 && input.charCodeAt(tokPos) === 10) { + ++tokPos; + ch = 10; + } + if (options.locations) { + ++tokCurLine; + tokLineStart = tokPos; + } + } else { + raise(tokStart, "Unterminated string constant"); + } + } + out += String.fromCharCode(ch); // '\' } } } @@ -1693,7 +1740,12 @@ node.callee = base; node.arguments = parseExprList(_parenR, false); return parseSubscripts(finishNode(node, "CallExpression"), noCalls); - } else return base; + } else if (tokType === _bquote) { + var node = startNodeFrom(base); + node.tag = base; + node.quasi = parseTemplate(); + return parseSubscripts(finishNode(node, "TaggedTemplateExpression"), noCalls); + } return base; } // Parse an atomic expression — either a single token that is an @@ -1800,6 +1852,9 @@ case _ellipsis: return parseSpread(); + case _bquote: + return parseTemplate(); + default: unexpected(); } @@ -1827,6 +1882,34 @@ return finishNode(node, "SpreadElement"); } + // Parse template expression. + + function parseTemplate() { + var node = startNode(); + node.expressions = []; + node.quasis = []; + inTemplate = true; + next(); + for (;;) { + var elem = startNode(); + elem.value = {cooked: tokVal, raw: input.slice(tokStart, tokEnd)}; + elem.tail = false; + next(); + node.quasis.push(finishNode(elem, "TemplateElement")); + if (eat(_bquote)) { // '`', end of template + elem.tail = true; + break; + } + inTemplate = false; + expect(_dollarBraceL); + node.expressions.push(parseExpression()); + inTemplate = true; + expect(_braceR); + } + inTemplate = false; + return finishNode(node, "TemplateLiteral"); + } + // Parse an object literal. function parseObj() { diff --git a/test/tests-harmony.js b/test/tests-harmony.js index d88723ae55..e3e9b01ed4 100644 --- a/test/tests-harmony.js +++ b/test/tests-harmony.js @@ -41,6 +41,7 @@ if (typeof exports != "undefined") { - parenthesized expressions (include brackets into expression's location) - expression statements (excluded spaces after statement's semicolon) - arrow and method functions (included arguments into function's location) + - template elements (excluded '`', '${' and '}' from element's location) */ // ES6 Unicode Code Point Escape Sequence @@ -686,10 +687,10 @@ test("`42`", { type: "TemplateElement", value: {raw: "42", cooked: "42"}, tail: true, - range: [0, 4], + range: [1, 3], loc: { - start: {line: 1, column: 0}, - end: {line: 1, column: 4} + start: {line: 1, column: 1}, + end: {line: 1, column: 3} } }], expressions: [], @@ -737,10 +738,10 @@ test("raw`42`", { type: "TemplateElement", value: {raw: "42", cooked: "42"}, tail: true, - range: [3, 7], + range: [4, 6], loc: { - start: {line: 1, column: 3}, - end: {line: 1, column: 7} + start: {line: 1, column: 4}, + end: {line: 1, column: 6} } }], expressions: [], @@ -795,20 +796,20 @@ test("raw`hello ${name}`", { type: "TemplateElement", value: {raw: "hello ", cooked: "hello "}, tail: false, - range: [3, 12], + range: [4, 10], loc: { - start: {line: 1, column: 3}, - end: {line: 1, column: 12} + start: {line: 1, column: 4}, + end: {line: 1, column: 10} } }, { type: "TemplateElement", value: {raw: "", cooked: ""}, tail: true, - range: [16, 18], + range: [17, 17], loc: { - start: {line: 1, column: 16}, - end: {line: 1, column: 18} + start: {line: 1, column: 17}, + end: {line: 1, column: 17} } } ], @@ -860,10 +861,10 @@ test("`$`", { type: "TemplateElement", value: {raw: "$", cooked: "$"}, tail: true, - range: [0, 3], + range: [1, 2], loc: { - start: {line: 1, column: 0}, - end: {line: 1, column: 3} + start: {line: 1, column: 1}, + end: {line: 1, column: 2} } }], expressions: [], @@ -900,29 +901,29 @@ test("`\\n\\r\\b\\v\\t\\f\\\n\\\r\n`", { type: "TemplateElement", value: {raw: "\\n\\r\\b\\v\\t\\f\\\n\\\r\n", cooked: "\n\r\b\u000b\t\f"}, tail: true, - range: [0, 19], + range: [1, 18], loc: { - start: {line: 1, column: 0}, - end: {line: 3, column: 19} + start: {line: 1, column: 1}, + end: {line: 3, column: 0} } }], expressions: [], range: [0, 19], loc: { start: {line: 1, column: 0}, - end: {line: 3, column: 19} + end: {line: 3, column: 1} } }, range: [0, 19], loc: { start: {line: 1, column: 0}, - end: {line: 3, column: 19} + end: {line: 3, column: 1} } }], range: [0, 19], loc: { start: {line: 1, column: 0}, - end: {line: 3, column: 19} + end: {line: 3, column: 1} } }, { ecmaVersion: 6, @@ -940,29 +941,29 @@ test("`\n\r\n`", { type: "TemplateElement", value: {raw: "\n\r\n", cooked: "\n\n"}, tail: true, - range: [0, 5], + range: [1, 4], loc: { - start: {line: 1, column: 0}, - end: {line: 3, column: 5} + start: {line: 1, column: 1}, + end: {line: 3, column: 0} } }], expressions: [], range: [0, 5], loc: { start: {line: 1, column: 0}, - end: {line: 3, column: 5} + end: {line: 3, column: 1} } }, range: [0, 5], loc: { start: {line: 1, column: 0}, - end: {line: 3, column: 5} + end: {line: 3, column: 1} } }], range: [0, 5], loc: { start: {line: 1, column: 0}, - end: {line: 3, column: 5} + end: {line: 3, column: 1} } }, { ecmaVersion: 6, @@ -970,7 +971,7 @@ test("`\n\r\n`", { locations: true }); -test("`\\u{000042}\\u0042\\x42\\u0\\102\\A`", { +test("`\\u{000042}\\u0042\\x42u0\\102\\A`", { type: "Program", body: [{ type: "ExpressionStatement", @@ -978,31 +979,31 @@ test("`\\u{000042}\\u0042\\x42\\u0\\102\\A`", { type: "TemplateLiteral", quasis: [{ type: "TemplateElement", - value: {raw: "\\u{000042}\\u0042\\x42\\u0\\102\\A", cooked: "BBBu0BA"}, + value: {raw: "\\u{000042}\\u0042\\x42u0\\102\\A", cooked: "BBBu0BA"}, tail: true, - range: [0, 31], + range: [1, 29], loc: { - start: {line: 1, column: 0}, - end: {line: 1, column: 31} + start: {line: 1, column: 1}, + end: {line: 1, column: 29} } }], expressions: [], - range: [0, 31], + range: [0, 30], loc: { start: {line: 1, column: 0}, - end: {line: 1, column: 31} + end: {line: 1, column: 30} } }, - range: [0, 31], + range: [0, 30], loc: { start: {line: 1, column: 0}, - end: {line: 1, column: 31} + end: {line: 1, column: 30} } }], - range: [0, 31], + range: [0, 30], loc: { start: {line: 1, column: 0}, - end: {line: 1, column: 31} + end: {line: 1, column: 30} } }, { ecmaVersion: 6, @@ -1033,10 +1034,10 @@ test("new raw`42`", { type: "TemplateElement", value: {raw: "42", cooked: "42"}, tail: true, - range: [7, 11], + range: [8, 10], loc: { - start: {line: 1, column: 7}, - end: {line: 1, column: 11} + start: {line: 1, column: 8}, + end: {line: 1, column: 10} } }], expressions: [], @@ -6261,8 +6262,7 @@ test("function* t() {}", { loc: { start: {line: 1, column: 0}, end: {line: 1, column: 16} - }, - errors: [] + } }, { ecmaVersion: 6, ranges: true, @@ -14742,13 +14742,13 @@ testFail("class A extends yield B { }", "Unexpected token (1:23)", {ecmaVersion: testFail("class default", "Unexpected token (1:6)", {ecmaVersion: 6}); -testFail("`test", "Unexpected token (1:6)", {ecmaVersion: 6}); +testFail("`test", "Unterminated string constant (1:1)", {ecmaVersion: 6}); -testFail("switch `test`", "Unexpected token (1:8)", {ecmaVersion: 6}); +testFail("switch `test`", "Unexpected token (1:7)", {ecmaVersion: 6}); -testFail("`hello ${10 `test`", "Unexpected token (1:19)", {ecmaVersion: 6}); +testFail("`hello ${10 `test`", "Unexpected token (1:18)", {ecmaVersion: 6}); -testFail("`hello ${10;test`", "Unexpected token (1:12)", {ecmaVersion: 6}); +testFail("`hello ${10;test`", "Unexpected token (1:11)", {ecmaVersion: 6}); testFail("function a() 1 // expression closure is not supported", "Unexpected token (1:13)", {ecmaVersion: 6}); @@ -14794,7 +14794,7 @@ testFail("[...a, b] = c", "Unexpected token (1:1)", {ecmaVersion: 6}); testFail("({ t(eval) { \"use strict\"; } });", "Defining 'eval' in strict mode (1:5)", {ecmaVersion: 6}); -testFail("\"use strict\"; `${test}\\02`;", "Unexpected token (1:22)", {ecmaVersion: 6}); +testFail("\"use strict\"; `${test}\\02`;", "Octal literal in strict mode (1:22)", {ecmaVersion: 6}); test("[...a, ] = b", { type: "Program",