diff --git a/src/comments.js b/src/comments.js new file mode 100644 index 0000000000..dd6edafa48 --- /dev/null +++ b/src/comments.js @@ -0,0 +1,129 @@ +/** + * Based on the comment attachment algorithm used in espree and estraverse. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +import { Parser } from "./state"; + +function last(stack) { + return stack[stack.length - 1]; +} + +const pp = Parser.prototype; + +pp.addComment = function (comment) { + this.trailingComments.push(comment); + this.leadingComments.push(comment); +}; + +pp.processComment = function (node) { + var stack = this.bottomRightStack; + + var lastChild; + var trailingComments; + var i; + + if (this.trailingComments.length > 0) { + // If the first comment in trailingComments comes after the + // current node, then we're good - all comments in the array will + // come after the node and so it's safe to add then as official + // trailingComments. + if (this.trailingComments[0].start >= node.end) { + trailingComments = this.trailingComments; + this.trailingComments = []; + } else { + // Otherwise, if the first comment doesn't come after the + // current node, that means we have a mix of leading and trailing + // comments in the array and that leadingComments contains the + // same items as trailingComments. Reset trailingComments to + // zero items and we'll handle this by evaluating leadingComments + // later. + this.trailingComments.length = 0; + } + } else { + var lastInStack = last(stack); + if (stack.length > 0 && lastInStack.trailingComments && lastInStack.trailingComments[0].start >= node.end) { + trailingComments = lastInStack.trailingComments; + lastInStack.trailingComments = null; + } + } + + // Eating the stack. + while (stack.length > 0 && last(stack).start >= node.start) { + lastChild = stack.pop(); + } + + if (lastChild) { + if (lastChild.leadingComments && last(lastChild.leadingComments).end <= node.start) { + node.leadingComments = lastChild.leadingComments; + lastChild.leadingComments = null; + } + } else if (this.leadingComments.length > 0) { + if (last(this.leadingComments).end <= node.start) { + node.leadingComments = this.leadingComments; + this.leadingComments = []; + } else { + // https://github.com/eslint/espree/issues/2 + // + // In special cases, such as return (without a value) and + // debugger, all comments will end up as leadingComments and + // will otherwise be eliminated. This this step runs when the + // bottomRightStack is empty and there are comments left + // in leadingComments. + // + // This loop figures out the stopping point between the actual + // leading and trailing comments by finding the location of the + // first comment that comes after the given node. + for (i = 0; i < this.leadingComments.length; i++) { + if (this.leadingComments[i].end > node.start) { + break; + } + } + + // Split the array based on the location of the first comment + // that comes after the node. Keep in mind that this could + // result in an empty array, and if so, the array must be + // deleted. + node.leadingComments = this.leadingComments.slice(0, i); + if (node.leadingComments.length === 0) { + node.leadingComments = null; + } + + // Similarly, trailing comments are attached later. The variable + // must be reset to null if there are no trailing comments. + trailingComments = this.leadingComments.slice(i); + if (trailingComments.length === 0) { + trailingComments = null; + } + } + } + + if (trailingComments) { + if (trailingComments.length && trailingComments[0].start >= node.start && last(trailingComments).end <= node.end) { + node.innerComments = trailingComments; + } else { + node.trailingComments = trailingComments; + } + } + + stack.push(node); +}; diff --git a/src/expression.js b/src/expression.js index 9da6929003..be9a3cd9e1 100755 --- a/src/expression.js +++ b/src/expression.js @@ -397,7 +397,7 @@ pp.parseExprAtom = function (refShorthandDefaultPos) { pp.parseLiteral = function (value) { let node = this.startNode(); - node.value = value; + node.rawValue = node.value = value; node.raw = this.input.slice(this.start, this.end); this.next(); return this.finishNode(node, "Literal"); diff --git a/src/index.js b/src/index.js index c9cf42ffe5..b5647e85aa 100755 --- a/src/index.js +++ b/src/index.js @@ -4,26 +4,21 @@ import "./parseutil"; import "./statement"; import "./lval"; import "./expression"; +import "./node"; +import "./location"; import "./lookahead"; -import "./tokentype"; +import { types as tokTypes } from "./tokentype"; +import "./tokenize"; import "./tokencontext"; - -export { Parser, plugins } from "./state"; -export { defaultOptions } from "./options"; -export { SourceLocation } from "./location"; -export { getLineInfo } from "./location"; -export { Node } from "./node"; -export { TokenType, types as tokTypes } from "./tokentype"; -export { TokContext, types as tokContexts } from "./tokencontext"; -export { isIdentifierChar, isIdentifierStart } from "./identifier"; -export { Token } from "./tokenize"; -export { isNewLine, lineBreak, lineBreakG } from "./whitespace"; - +import "./comments"; import flowPlugin from "./plugins/flow"; import jsxPlugin from "./plugins/jsx"; + plugins.flow = flowPlugin; plugins.jsx = jsxPlugin; export function parse(input, options) { return new Parser(getOptions(options), input).parse(); } + +export { tokTypes }; diff --git a/src/location.js b/src/location.js index c6832b04da..b4ec229707 100755 --- a/src/location.js +++ b/src/location.js @@ -16,10 +16,9 @@ export class Position { } export class SourceLocation { - constructor(p, start, end) { + constructor(start, end) { this.start = start; this.end = end; - if (p.sourceFile !== null) this.source = p.sourceFile; } } diff --git a/src/lookahead.js b/src/lookahead.js index 41b2cfe6eb..7ff04d587d 100644 --- a/src/lookahead.js +++ b/src/lookahead.js @@ -23,7 +23,9 @@ var STATE_KEYS = [ "inType", "inFunction", "inGenerator", - "labels" + "labels", + "tokens", + "comments" ]; pp.getState = function () { @@ -32,8 +34,10 @@ pp.getState = function () { var key = STATE_KEYS[i]; state[key] = this[key]; } - state.context = this.context.slice(); - state.labels = this.labels.slice(); + state.comments = this.comments.slice(); + state.context = this.context.slice(); + state.tokens = this.tokens.slice(); + state.labels = this.labels.slice(); return state; }; @@ -45,9 +49,11 @@ pp.setState = function (state) { pp.lookahead = function () { var old = this.getState(); + this.isLookahead = true; this.next(); this.isLookahead = false; + var curr = this.getState(); this.setState(old); return curr; diff --git a/src/node.js b/src/node.js index 0d40e97cc7..2ce99dfd94 100755 --- a/src/node.js +++ b/src/node.js @@ -13,11 +13,7 @@ export class Node { if (parser) { if (parser.options.locations) { - this.loc = new SourceLocation(parser, loc); - } - - if (parser.options.directSourceFile) { - this.sourceFile = parser.options.directSourceFile; + this.loc = new SourceLocation(loc); } if (parser.options.ranges) { @@ -46,6 +42,7 @@ function finishNodeAt(node, type, pos, loc) { node.end = pos; if (this.options.locations) node.loc.end = loc; if (this.options.ranges) node.range[1] = pos; + this.processComment(node); return node; } diff --git a/src/options.js b/src/options.js index 45b67c46d2..fbcb758db4 100755 --- a/src/options.js +++ b/src/options.js @@ -1,5 +1,4 @@ import { has } from "./util"; -import { SourceLocation } from "./location"; // A second optional argument can be given to further configure // the parser process. These options are recognized: @@ -7,15 +6,6 @@ import { SourceLocation } from "./location"; export const defaultOptions = { // Source type ("script" or "module") for different semantics sourceType: "script", - // `onInsertedSemicolon` can be a callback that will be called - // when a semicolon is automatically inserted. It will be passed - // th position of the comma as an offset, and if `locations` is - // enabled, it is given the location as a `{line, column}` object - // as second argument. - onInsertedSemicolon: null, - // `onTrailingComma` is similar to `onInsertedSemicolon`, but for - // trailing commas. - onTrailingComma: null, // By default, reserved words are not enforced. Disable // `allowReserved` to enforce them. When this option has the // value "never", reserved words and keywords can also not be @@ -27,31 +17,11 @@ export const defaultOptions = { // When enabled, import/export statements are not constrained to // appearing at the top of the program. allowImportExportEverywhere: false, - // When enabled, hashbang directive in the beginning of file - // is allowed and treated as a line comment. - allowHashBang: false, // When `locations` is on, `loc` properties holding objects with // `start` and `end` properties in `{line, column}` form (with // line being 1-based and column 0-based) will be attached to the // nodes. locations: false, - // A function can be passed as `onToken` option, which will - // cause Acorn to call that function with object in the same - // format as tokenize() returns. Note that you are not - // allowed to call the parser from the callback—that will - // corrupt its internal state. - onToken: null, - // A function can be passed as `onComment` option, which will - // cause Acorn to call that function with `(block, text, start, - // end)` parameters whenever a comment is skipped. `block` is a - // boolean indicating whether this is a block (`/* */`) comment, - // `text` is the content of the comment, and `start` and `end` are - // character offsets that denote the start and end of the comment. - // When the `locations` option is on, two more parameters are - // passed, the full `{line, column}` locations of the start and - // end of the comments. Note that you are not allowed to call the - // parser from the callback—that will corrupt its internal state. - onComment: null, // Nodes have their start and end characters offsets recorded in // `start` and `end` properties (directly on the node, rather than // the `loc` object, which holds line/column data. To also add a @@ -61,18 +31,6 @@ export const defaultOptions = { // // [range]: https://bugzilla.mozilla.org/show_bug.cgi?id=745678 ranges: false, - // It is possible to parse multiple files into a single AST by - // passing the tree produced by parsing the first file as - // `program` option in subsequent parses. This will add the - // toplevel forms of the parsed file to the `Program` (top) node - // of an existing parse tree. - program: null, - // When `locations` is on, you can pass this to record the source - // file in every node's `loc` object. - sourceFile: null, - // This value, if given, is stored in every node, whether - // `locations` is on or off. - directSourceFile: null, plugins: {}, // Babel-specific options features: {}, @@ -83,32 +41,8 @@ export const defaultOptions = { export function getOptions(opts) { let options = {}; - for (let opt in defaultOptions) + for (let opt in defaultOptions) { options[opt] = opts && has(opts, opt) ? opts[opt] : defaultOptions[opt]; - - if (Array.isArray(options.onToken)) { - let tokens = options.onToken; - options.onToken = (token) => tokens.push(token); } - if (Array.isArray(options.onComment)) { - options.onComment = pushComment(options, options.onComment); - } - return options; } - -function pushComment(options, array) { - return function (block, text, start, end, startLoc, endLoc) { - let comment = { - type: block ? "Block" : "Line", - value: text, - start: start, - end: end - }; - if (options.locations) - comment.loc = new SourceLocation(this, startLoc, endLoc); - if (options.ranges) - comment.range = [start, end]; - array.push(comment); - }; -} diff --git a/src/parseutil.js b/src/parseutil.js index 41c6d8e79b..bf848bf68c 100755 --- a/src/parseutil.js +++ b/src/parseutil.js @@ -24,6 +24,22 @@ pp.eat = function (type) { } }; +// TODO + +pp.isRelational = function (op) { + return this.type === tt.relational && this.value === op; +}; + +// TODO + +pp.expectRelational = function (op) { + if (this.isRelational(op)) { + this.next(); + } else { + this.unexpected(); + } +}; + // Tests whether parsed token is a contextual keyword. pp.isContextual = function (name) { @@ -52,8 +68,6 @@ pp.canInsertSemicolon = function () { pp.insertSemicolon = function () { if (this.canInsertSemicolon()) { - if (this.options.onInsertedSemicolon) - this.options.onInsertedSemicolon(this.lastTokEnd, this.lastTokEndLoc); return true; } }; @@ -67,8 +81,6 @@ pp.semicolon = function () { pp.afterTrailingComma = function (tokType) { if (this.type === tokType) { - if (this.options.onTrailingComma) - this.options.onTrailingComma(this.lastTokStart, this.lastTokStartLoc); this.next(); return true; } diff --git a/src/state.js b/src/state.js index 2f398468d6..fc47b6b78f 100755 --- a/src/state.js +++ b/src/state.js @@ -4,7 +4,6 @@ import { lineBreak } from "./whitespace"; export function Parser(options, input, startPos) { this.options = options; - this.sourceFile = this.options.sourceFile || null; this.isKeyword = keywords[6]; this.isReservedWord = reservedWords[6]; this.input = input; @@ -55,10 +54,22 @@ export function Parser(options, input, startPos) { // Labels in scope. this.labels = []; + // Leading decorators. this.decorators = []; + // Token store. + this.tokens = []; + + // Comment store. + this.comments = []; + + // Comment attachment store + this.trailingComments = []; + this.leadingComments = []; + this.bottomRightStack = []; + // If enabled, skip leading hashbang line. - if (this.pos === 0 && this.options.allowHashBang && this.input.slice(0, 2) === "#!") { + if (this.pos === 0 && this.input[0] === "#" && this.input[1] === "!") { this.skipLineComment(2); } } @@ -80,7 +91,8 @@ Parser.prototype.loadPlugins = function (plugins) { }; Parser.prototype.parse = function () { - let node = this.options.program || this.startNode(); + let file = this.startNode(); + let program = this.startNode(); this.nextToken(); - return this.parseTopLevel(node); + return this.parseTopLevel(file, program); }; diff --git a/src/statement.js b/src/statement.js index acd7c8d8d4..4856eb2a85 100755 --- a/src/statement.js +++ b/src/statement.js @@ -11,20 +11,26 @@ const pp = Parser.prototype; // `program` argument. If present, the statements will be appended // to its body instead of creating a new node. -pp.parseTopLevel = function (node) { +pp.parseTopLevel = function (file, program) { + program.sourceType = this.options.sourceType; + program.body = []; + let first = true; - if (!node.body) node.body = []; while (this.type !== tt.eof) { let stmt = this.parseStatement(true, true); - node.body.push(stmt); + program.body.push(stmt); if (first) { if (this.isUseStrict(stmt)) this.setStrict(true); first = false; } } this.next(); - node.sourceType = this.options.sourceType; - return this.finishNode(node, "Program"); + + file.program = this.finishNode(program, "Program"); + file.comments = this.comments; + file.tokens = this.tokens; + + return this.finishNode(file, "File"); }; const loopLabel = {kind: "loop"}, switchLabel = {kind: "switch"}; diff --git a/src/tokenize.js b/src/tokenize.js index cb5dec92cf..9b7206abc8 100755 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -16,7 +16,7 @@ export class Token { this.end = p.end; if (p.options.locations) { - this.loc = new SourceLocation(p, p.startLoc, p.endLoc); + this.loc = new SourceLocation(p.startLoc, p.endLoc); } if (p.options.ranges) { @@ -36,8 +36,8 @@ const isRhino = typeof Packages === "object" && Object.prototype.toString.call(P // Move to the next token pp.next = function () { - if (this.options.onToken && !this.isLookahead) - this.options.onToken(new Token(this)); + if (!this.isLookahead) + this.tokens.push(new Token(this)); this.lastTokEnd = this.end; this.lastTokStart = this.start; @@ -82,8 +82,11 @@ pp.nextToken = function () { if (this.options.locations) this.startLoc = this.curPosition(); if (this.pos >= this.input.length) return this.finishToken(tt.eof); - if (curContext.override) return curContext.override(this); - else this.readToken(this.fullCharCodeAtPos()); + if (curContext.override) { + return curContext.override(this); + } else { + return this.readToken(this.fullCharCodeAtPos()); + } }; pp.readToken = function (code) { @@ -103,10 +106,26 @@ pp.fullCharCodeAtPos = function () { return (code << 10) + next - 0x35fdc00; }; +function pushComment(block, text, start, end, startLoc, endLoc) { + var comment = { + type: block ? "CommentBlock" : "CommentLine", + value: text, + start: start, + end: end, + loc: new SourceLocation(startLoc, endLoc), + range: [start, end] + }; + + this.tokens.push(comment); + this.comments.push(comment); + this.addComment(comment); +} + pp.skipBlockComment = function () { - let startLoc = this.options.onComment && this.curPosition(); + let startLoc = this.curPosition(); let start = this.pos, end = this.input.indexOf("*/", this.pos += 2); if (end === -1) this.raise(this.pos - 2, "Unterminated comment"); + this.pos = end + 2; if (this.options.locations) { lineBreakG.lastIndex = start; @@ -116,23 +135,20 @@ pp.skipBlockComment = function () { this.lineStart = match.index + match[0].length; } } - if (this.options.onComment) - this.options.onComment(true, this.input.slice(start + 2, end), start, this.pos, - startLoc, this.curPosition()); + + pushComment.call(this, true, this.input.slice(start + 2, end), start, this.pos, startLoc, this.curPosition()); }; pp.skipLineComment = function (startSkip) { let start = this.pos; - let startLoc = this.options.onComment && this.curPosition(); + let startLoc = this.curPosition(); let ch = this.input.charCodeAt(this.pos += startSkip); while (this.pos < this.input.length && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) { ++this.pos; ch = this.input.charCodeAt(this.pos); } - if (this.options.onComment) { - this.options.onComment(false, this.input.slice(start + startSkip, this.pos), start, this.pos, - startLoc, this.curPosition()); - } + + pushComment.call(this, false, this.input.slice(start + startSkip, this.pos), start, this.pos, startLoc, this.curPosition()); }; // Called at the start of the parse and after every token. Skips diff --git a/test/driver.js b/test/driver.js index 527fda18df..940030201f 100755 --- a/test/driver.js +++ b/test/driver.js @@ -28,13 +28,14 @@ function runTest(test) { testOpts.onToken = []; try { - var ast = parse(test.code, testOpts); + var ast = parse(test.code, testOpts).program; } catch (err) { if (test.error) { if (err.message === test.error) { return; } else { - throw new Error("Expected error message: " + test.error + ". Got error message: " + err.message); + err.message = "Expected error message: " + test.error + ". Got error message: " + err.message; + throw err; } } diff --git a/test/tests.js b/test/tests.js index caa2745b77..0789110c65 100755 --- a/test/tests.js +++ b/test/tests.js @@ -26382,88 +26382,6 @@ test("price_9̶9̶_89", { // option tests -test("var a = 1;", { - type: "Program", - loc: { - start: { - line: 1, - column: 0 - }, - end: { - line: 1, - column: 10 - }, - source: "test.js" - }, - body: [ - { - type: "VariableDeclaration", - loc: { - start: { - line: 1, - column: 0 - }, - end: { - line: 1, - column: 10 - }, - source: "test.js" - }, - declarations: [ - { - type: "VariableDeclarator", - loc: { - start: { - line: 1, - column: 4 - }, - end: { - line: 1, - column: 9 - }, - source: "test.js" - }, - id: { - type: "Identifier", - loc: { - start: { - line: 1, - column: 4 - }, - end: { - line: 1, - column: 5 - }, - source: "test.js" - }, - name: "a" - }, - init: { - type: "Literal", - loc: { - start: { - line: 1, - column: 8 - }, - end: { - line: 1, - column: 9 - }, - source: "test.js" - }, - value: 1, - raw: "1" - } - } - ], - kind: "var" - } - ] -}, { - locations: true, - sourceFile: "test.js" -}); - test("a.in / b", { type: "Program", body: [ @@ -28411,37 +28329,6 @@ test("for(const x = 0;;);", { }, {ranges: true}); // Assertion Tests -test(function TestComments() { - // Bear class - function Bear(x,y,z) { - this.position = [x||0,y||0,z||0] - } - - Bear.prototype.roar = function(message) { - return 'RAWWW: ' + message; // Whatever - }; - - function Cat() { - /* 1 - 2 - 3*/ - } - - Cat.prototype.roar = function(message) { - return 'MEOOWW: ' + /*stuff*/ message; - }; -}.toString().replace(/\r\n/g, '\n'), {}, { - onComment: [ - {type: "Line", value: " Bear class"}, - {type: "Line", value: " Whatever"}, - {type: "Block", value: [ - " 1", - " 2", - " 3" - ].join('\n')}, - {type: "Block", value: "stuff"} - ] -}); test(" HTML comment", {}, { - locations: true, - onComment: [{ - type: "Line", - value: " HTML comment", - loc: { - start: { line: 2, column: 0 }, - end: { line: 2, column: 16 } - } - }] -}); - -var tokTypes = acorn.tokTypes; - -test('var x = (1 + 2)', {}, { - locations: true, - onToken: [ - { - type: tokTypes._var, - value: "var", - loc: { - start: {line: 1, column: 0}, - end: {line: 1, column: 3} - } - }, - { - type: tokTypes.name, - value: "x", - loc: { - start: {line: 1, column: 4}, - end: {line: 1, column: 5} - } - }, - { - type: tokTypes.eq, - value: "=", - loc: { - start: {line: 1, column: 6}, - end: {line: 1, column: 7} - } - }, - { - type: tokTypes.parenL, - value: undefined, - loc: { - start: {line: 1, column: 8}, - end: {line: 1, column: 9} - } - }, - { - type: tokTypes.num, - value: 1, - loc: { - start: {line: 1, column: 9}, - end: {line: 1, column: 10} - } - }, - { - type: {binop: 9, prefix: true, beforeExpr: true}, - value: "+", - loc: { - start: {line: 1, column: 11}, - end: {line: 1, column: 12} - } - }, - { - type: tokTypes.num, - value: 2, - loc: { - start: {line: 1, column: 13}, - end: {line: 1, column: 14} - } - }, - { - type: tokTypes.parenR, - value: undefined, - loc: { - start: {line: 1, column: 14}, - end: {line: 1, column: 15} - } - }, - { - type: tokTypes.eof, - value: undefined, - loc: { - start: {line: 1, column: 15}, - end: {line: 1, column: 15} - } - } - ] -}); - test("function f(f) { 'use strict'; }", {}); -// https://github.com/marijnh/acorn/issues/180 -test("#!/usr/bin/node\n;", {}, { - allowHashBang: true, - onComment: [{ - type: "Line", - value: "/usr/bin/node", - start: 0, - end: 15 - }] -}); - // https://github.com/marijnh/acorn/issues/204 test("(function () {} / 1)", { type: "Program", @@ -28636,25 +28384,6 @@ test("function f() {} / 1 /", { ] }); -var semicolons = [] -testAssert("var x\nreturn\n10", function() { - var result = semicolons.join(" "); - semicolons.length = 0; - if (result != "5 12 15") - return "Unexpected result for onInsertedSemicolon: " + result; -}, {onInsertedSemicolon: function(pos) { semicolons.push(pos); }, - allowReturnOutsideFunction: true, - loose: false}) - -var trailingCommas = [] -testAssert("[1,2,] + {foo: 1,}", function() { - var result = trailingCommas.join(" "); - trailingCommas.length = 0; - if (result != "4 16") - return "Unexpected result for onTrailingComma: " + result; -}, {onTrailingComma: function(pos) { trailingCommas.push(pos); }, - loose: false}) - // https://github.com/marijnh/acorn/issues/275 testFail("({ get prop(x) {} })", "getter should have no params (1:11)");