diff --git a/README.md b/README.md index 1240d37b82..20e6c6d605 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,11 @@ object referring to that same position. - **onToken**: If a function is passed for this option, each found token will be passed in same format as `tokenize()` returns. + If array is passed, each found token is pushed to it. + + Note that you are not allowed to call the parser from the + callback—that will corrupt its internal state. + - **onComment**: If a function is passed for this option, whenever a comment is encountered the function will be called with the following parameters: @@ -109,6 +114,18 @@ object referring to that same position. of the comment’s start and end are passed as two additional parameters. + If array is passed for this option, each found comment is pushed + to it as object in Esprima format: + + ```javascript + { + "type": "Line" | "Block", + "value": "comment text", + "range": ..., + "loc": ... + } + ``` + Note that you are not allowed to call the parser from the callback—that will corrupt its internal state. @@ -144,9 +161,8 @@ Acorn's tokenizer. The function takes an input string and options similar to `parse` (though only some options are meaningful here), and returns a function that can be called repeatedly to read a single token, and returns a `{start, end, type, value}` object (with added -`startLoc` and `endLoc` properties when the `locations` option is -enabled). This object will be reused (updated) for each token, so you -can't count on it staying stable. +`loc` property when the `locations` option is enabled and `range` +property when the `ranges` option is enabled). **tokTypes** holds an object mapping names to the token type objects that end up in the `type` properties of tokens. @@ -155,8 +171,7 @@ that end up in the `type` properties of tokens. Escodegen supports generating comments from AST, attached in Esprima-specific format. In order to simulate same format in -Acorn, consider following example (this may be simplified -in future): +Acorn, consider following example: ```javascript var comments = [], tokens = []; @@ -165,19 +180,9 @@ var ast = acorn.parse('var x = 42; // answer', { // collect ranges for each node ranges: true, // collect comments in Esprima's format - onComment: function (block, text, start, end) { - comments.push({ - type: block ? 'Block' : 'Line', - value: text, - range: [start, end] - }); - }, + onComment: comments, // collect token ranges - onToken: function (token) { - tokens.push({ - range: [token.start, token.end] - }); - } + onToken: tokens }); // attach comments using collected information diff --git a/acorn.js b/acorn.js index 485ce2681c..0dc99c7a24 100644 --- a/acorn.js +++ b/acorn.js @@ -114,12 +114,40 @@ directSourceFile: null }; + var isArray = function (obj) { + return Object.prototype.toString.call(obj) === "[object Array]"; + }; + function setOptions(opts) { options = opts || {}; for (var opt in defaultOptions) if (!has(options, opt)) options[opt] = defaultOptions[opt]; sourceFile = options.sourceFile || null; - + if (isArray(options.onToken)) { + var tokens = options.onToken; + options.onToken = function (token) { + tokens.push(token); + }; + } + if (isArray(options.onComment)) { + var comments = options.onComment; + options.onComment = function (block, text, start, end, startLoc, endLoc) { + var comment = { + type: block ? 'Block' : 'Line', + value: text, + start: start, + end: end + }; + if (options.locations) { + comment.loc = new SourceLocation(); + comment.loc.start = startLoc; + comment.loc.end = endLoc; + } + if (options.ranges) + comment.range = [start, end]; + comments.push(comment); + }; + } isKeyword = options.ecmaVersion >= 6 ? isEcma6Keyword : isEcma5AndLessKeyword; } @@ -141,19 +169,23 @@ return {line: line, column: offset - cur}; }; - var getCurrentToken = function () { - var token = { - type: tokType, - value: tokVal, - start: tokStart, - end: tokEnd - }; + function Token() { + this.type = tokType; + this.value = tokVal; + this.start = tokStart; + this.end = tokEnd; if (options.locations) { - token.startLoc = tokStartLoc; - token.endLoc = tokEndLoc; + this.loc = new SourceLocation(); + this.loc.end = tokEndLoc; + // TODO: remove in next major release + this.startLoc = tokStartLoc; + this.endLoc = tokEndLoc; } - return token; - }; + if (options.ranges) + this.range = [tokStart, tokEnd]; + } + + exports.Token = Token; // Acorn is organized as a tokenizer and a recursive-descent parser. // The `tokenize` export provides an interface to the tokenizer. @@ -170,7 +202,7 @@ function getToken(forceRegexp) { lastEnd = tokEnd; readToken(forceRegexp); - return getCurrentToken(); + return new Token(); } getToken.jumpTo = function(pos, reAllowed) { tokPos = pos; @@ -554,7 +586,7 @@ tokVal = val; tokRegexpAllowed = type.beforeExpr; if (options.onToken) { - options.onToken(getCurrentToken()); + options.onToken(new Token()); } } @@ -576,16 +608,16 @@ startLoc, options.locations && new Position); } - function skipLineComment() { + function skipLineComment(startSkip) { var start = tokPos; var startLoc = options.onComment && options.locations && new Position; - var ch = input.charCodeAt(tokPos+=2); + var ch = input.charCodeAt(tokPos+=startSkip); while (tokPos < inputLen && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) { ++tokPos; ch = input.charCodeAt(tokPos); } if (options.onComment) - options.onComment(false, input.slice(start + 2, tokPos), start, tokPos, + options.onComment(false, input.slice(start + startSkip, tokPos), start, tokPos, startLoc, options.locations && new Position); } @@ -620,7 +652,7 @@ if (next === 42) { // '*' skipBlockComment(); } else if (next === 47) { // '/' - skipLineComment(); + skipLineComment(2); } else break; } else if (ch === 160) { // '\xa0' ++tokPos; @@ -689,8 +721,7 @@ if (next == 45 && input.charCodeAt(tokPos + 2) == 62 && newline.test(input.slice(lastEnd, tokPos))) { // A `-->` line comment - tokPos += 3; - skipLineComment(); + skipLineComment(3); skipSpace(); return readToken(); } @@ -711,8 +742,7 @@ if (next == 33 && code == 60 && input.charCodeAt(tokPos + 2) == 45 && input.charCodeAt(tokPos + 3) == 45) { // ` HTML comment", {}, {locations: true}, + [{ + block: false, + text: " HTML comment", + startLoc: { line: 2, column: 0 }, + endLoc: { line: 2, column: 16 } + }]); })(); (function() { @@ -28662,72 +28699,90 @@ testFail("for(const x = 0;;);", "Unexpected token (1:4)", {ecmaVersion: 6}); value: "var", start: 0, end: 3, - startLoc: {line: 1, column: 0}, - endLoc: {line: 1, column: 3} + loc: { + start: {line: 1, column: 0}, + end: {line: 1, column: 3} + } }, { type: tokTypes.name, value: "x", start: 4, end: 5, - startLoc: {line: 1, column: 4}, - endLoc: {line: 1, column: 5} + loc: { + start: {line: 1, column: 4}, + end: {line: 1, column: 5} + } }, { type: tokTypes.eq, value: "=", start: 6, end: 7, - startLoc: {line: 1, column: 6}, - endLoc: {line: 1, column: 7} + loc: { + start: {line: 1, column: 6}, + end: {line: 1, column: 7} + } }, { type: tokTypes.parenL, value: undefined, start: 8, end: 9, - startLoc: {line: 1, column: 8}, - endLoc: {line: 1, column: 9} + loc: { + start: {line: 1, column: 8}, + end: {line: 1, column: 9} + } }, { type: tokTypes.num, value: 1, start: 9, end: 10, - startLoc: {line: 1, column: 9}, - endLoc: {line: 1, column: 10} + loc: { + start: {line: 1, column: 9}, + end: {line: 1, column: 10} + } }, { type: {binop: 9, prefix: true, beforeExpr: true}, value: "+", start: 11, end: 12, - startLoc: {line: 1, column: 11}, - endLoc: {line: 1, column: 12} + loc: { + start: {line: 1, column: 11}, + end: {line: 1, column: 12} + } }, { type: tokTypes.num, value: 2, start: 13, end: 14, - startLoc: {line: 1, column: 13}, - endLoc: {line: 1, column: 14} + loc: { + start: {line: 1, column: 13}, + end: {line: 1, column: 14} + } }, { type: tokTypes.parenR, value: undefined, start: 14, end: 15, - startLoc: {line: 1, column: 14}, - endLoc: {line: 1, column: 15} + loc: { + start: {line: 1, column: 14}, + end: {line: 1, column: 15} + } }, { type: tokTypes.eof, value: undefined, start: 15, end: 15, - startLoc: {line: 1, column: 15}, - endLoc: {line: 1, column: 15} + loc: { + start: {line: 1, column: 15}, + end: {line: 1, column: 15} + } } ]; testAssert('var x = (1 + 2)', function assert(ast) { @@ -28735,7 +28790,11 @@ testFail("for(const x = 0;;);", "Unexpected token (1:4)", {ecmaVersion: 6}); return JSON.stringify(actualTokens) + " !== " + JSON.stringify(expectedTokens); } else { for (var i=0, n=actualTokens.length; i < n; i++) { - var actualToken = JSON.stringify(actualTokens[i]); + var actualToken = JSON.stringify( + actualTokens[i], + // just remove this when startLoc/endLoc support is dropped + function (key, value) { if (key !== 'startLoc' && key !== 'endLoc') return value; } + ); var expectedToken = JSON.stringify(expectedTokens[i]); if (actualToken !== expectedToken) return actualToken + ' !== ' + expectedToken; @@ -28743,8 +28802,8 @@ testFail("for(const x = 0;;);", "Unexpected token (1:4)", {ecmaVersion: 6}); } }, { locations: true, - onToken: function(token) { - actualTokens.push(token); - } + onToken: actualTokens }); -})(); \ No newline at end of file +})(); + +test("function f(f) { 'use strict'; }", {});