Make token format compatible with Esprima and Mozilla-styled locations.

* Tokens are now instances of single exported constructor Token. * Token objects are compatible with Esprima (only `type` format is different). * Added token.loc in format of node.loc (Mozilla). * Deprecated token.startLoc & token.endLoc. * Updated comment generation example. Also added ability to pass arrays in `onToken`/`onComment` where tokens/comments will be collected in Esprima's format so you can simply pass those arrays to `escodegen.attachComments`. Updated docs and comment attachment example.
2014-08-20 15:31:35 +03:00 · 2014-08-20 15:31:35 +03:00 · 10553cbced
commit 10553cbced
parent a246bf83d0
3 changed files with 110 additions and 53 deletions
--- a/README.md
+++ b/README.md
@ -76,6 +76,11 @@ object referring to that same position.
 - **onToken**: If a function is passed for this option, each found
  token will be passed in same format as `tokenize()` returns.

+  If array is passed, each found token is pushed to it.
+
+  Note that you are not allowed to call the parser from the
+  callback—that will corrupt its internal state.
+
 - **onComment**: If a function is passed for this option, whenever a
  comment is encountered the function will be called with the
  following parameters:
@ -90,6 +95,18 @@ object referring to that same position.
  of the comment’s start and end are passed as two additional
  parameters.

+  If array is passed for this option, each found comment is pushed
+  to it as object in Esprima format:
+  
+  ```javascript
+  {
+    "type": "Line" | "Block",
+    "value": "comment text",
+    "range": ...,
+    "loc": ...
+  }
+  ```
+
  Note that you are not allowed to call the parser from the
  callback—that will corrupt its internal state.

@ -125,9 +142,8 @@ Acorn's tokenizer. The function takes an input string and options
 similar to `parse` (though only some options are meaningful here), and
 returns a function that can be called repeatedly to read a single
 token, and returns a `{start, end, type, value}` object (with added
-`startLoc` and `endLoc` properties when the `locations` option is
-enabled). This object will be reused (updated) for each token, so you
-can't count on it staying stable.
+`loc` property when the `locations` option is enabled and `range`
+property when the `ranges` option is enabled).

 **tokTypes** holds an object mapping names to the token type objects
 that end up in the `type` properties of tokens.
@ -136,8 +152,7 @@ that end up in the `type` properties of tokens.

 Escodegen supports generating comments from AST, attached in
 Esprima-specific format. In order to simulate same format in
-Acorn, consider following example (this may be simplified
-in future):
+Acorn, consider following example:

 ```javascript
 var comments = [], tokens = [];
@ -146,19 +161,9 @@ var ast = acorn.parse('var x = 42; // answer', {
 	// collect ranges for each node
 	ranges: true,
 	// collect comments in Esprima's format
-	onComment: function (block, text, start, end) {
-		comments.push({
-			type: block ? 'Block' : 'Line',
-			value: text,
-			range: [start, end]
-		});
-	},
+	onComment: comments,
 	// collect token ranges
-	onToken: function (token) {
-		tokens.push({
-			range: [token.start, token.end]
-		});
-	}
+	onToken: tokens
 });

 // attach comments using collected information
--- a/acorn.js
+++ b/acorn.js
@ -114,12 +114,40 @@
    directSourceFile: null
  };

+  var isArray = function (obj) {
+    return Object.prototype.toString.call(obj) === "[object Array]";
+  };
+
  function setOptions(opts) {
    options = opts || {};
    for (var opt in defaultOptions) if (!has(options, opt))
      options[opt] = defaultOptions[opt];
    sourceFile = options.sourceFile || null;
-
+    if (isArray(options.onToken)) {
+      var tokens = options.onToken;
+      options.onToken = function (token) {
+        tokens.push(token);
+      };
+    }
+    if (isArray(options.onComment)) {
+      var comments = options.onComment;
+      options.onComment = function (block, text, start, end, startLoc, endLoc) {
+        var comment = {
+          type: block ? 'Block' : 'Line',
+          value: text,
+          start: start,
+          end: end
+        };
+        if (options.locations) {
+          comment.loc = new SourceLocation();
+          comment.loc.start = startLoc;
+          comment.loc.end = endLoc;
+        }
+        if (options.ranges)
+          comment.range = [start, end];
+        comments.push(comment);
+      };
+    }
    isKeyword = options.ecmaVersion >= 6 ? isEcma6Keyword : isEcma5AndLessKeyword;
  }

@ -141,19 +169,23 @@
    return {line: line, column: offset - cur};
  };

-  var getCurrentToken = function () {
-    var token = {
-      type: tokType,
-      value: tokVal,
-      start: tokStart,
-      end: tokEnd
-    };
+  function Token() {
+    this.type = tokType;
+    this.value = tokVal;
+    this.start = tokStart;
+    this.end = tokEnd;
    if (options.locations) {
-      token.startLoc = tokStartLoc;
-      token.endLoc = tokEndLoc;
+      this.loc = new SourceLocation();
+      this.loc.end = tokEndLoc;
+      // TODO: remove in next major release
+      this.startLoc = tokStartLoc;
+      this.endLoc = tokEndLoc;
    }
-    return token;
-  };
+    if (options.ranges)
+      this.range = [tokStart, tokEnd];
+  }
+
+  exports.Token = Token;

  // Acorn is organized as a tokenizer and a recursive-descent parser.
  // The `tokenize` export provides an interface to the tokenizer.
@ -170,7 +202,7 @@
    function getToken(forceRegexp) {
      lastEnd = tokEnd;
      readToken(forceRegexp);
-      return getCurrentToken();
+      return new Token();
    }
    getToken.jumpTo = function(pos, reAllowed) {
      tokPos = pos;
@ -543,7 +575,7 @@
    tokVal = val;
    tokRegexpAllowed = type.beforeExpr;
    if (options.onToken) {
-      options.onToken(getCurrentToken());
+      options.onToken(new Token());
    }
  }

--- a/test/tests.js
+++ b/test/tests.js
@ -28699,72 +28699,90 @@ test("<!--\n;", {
          value: "var",
          start: 0,
          end: 3,
-          startLoc: {line: 1, column: 0},
-          endLoc: {line: 1, column: 3}
+          loc: {
+            start: {line: 1, column: 0},
+            end: {line: 1, column: 3}
+          }
        },
        {
          type: tokTypes.name,
          value: "x",
          start: 4,
          end: 5,
-          startLoc: {line: 1, column: 4},
-          endLoc: {line: 1, column: 5}
+          loc: {
+            start: {line: 1, column: 4},
+            end: {line: 1, column: 5}
+          }
        },
        {
          type: tokTypes.eq,
          value: "=",
          start: 6,
          end: 7,
-          startLoc: {line: 1, column: 6},
-          endLoc: {line: 1, column: 7}
+          loc: {
+            start: {line: 1, column: 6},
+            end: {line: 1, column: 7}
+          }
        },
        {
          type: tokTypes.parenL,
          value: undefined,
          start: 8,
          end: 9,
-          startLoc: {line: 1, column: 8},
-          endLoc: {line: 1, column: 9}
+          loc: {
+            start: {line: 1, column: 8},
+            end: {line: 1, column: 9}
+          }
        },
        {
          type: tokTypes.num,
          value: 1,
          start: 9,
          end: 10,
-          startLoc: {line: 1, column: 9},
-          endLoc: {line: 1, column: 10}
+          loc: {
+            start: {line: 1, column: 9},
+            end: {line: 1, column: 10}
+          }
        },
        {
          type: {binop: 9, prefix: true, beforeExpr: true},
          value: "+",
          start: 11,
          end: 12,
-          startLoc: {line: 1, column: 11},
-          endLoc: {line: 1, column: 12}
+          loc: {
+            start: {line: 1, column: 11},
+            end: {line: 1, column: 12}
+          }
        },
        {
          type: tokTypes.num,
          value: 2,
          start: 13,
          end: 14,
-          startLoc: {line: 1, column: 13},
-          endLoc: {line: 1, column: 14}
+          loc: {
+            start: {line: 1, column: 13},
+            end: {line: 1, column: 14}
+          }
        },
        {
          type: tokTypes.parenR,
          value: undefined,
          start: 14,
          end: 15,
-          startLoc: {line: 1, column: 14},
-          endLoc: {line: 1, column: 15}
+          loc: {
+            start: {line: 1, column: 14},
+            end: {line: 1, column: 15}
+          }
        },
        {
          type: tokTypes.eof,
          value: undefined,
          start: 15,
          end: 15,
-          startLoc: {line: 1, column: 15},
-          endLoc: {line: 1, column: 15}
+          loc: {
+            start: {line: 1, column: 15},
+            end: {line: 1, column: 15}
+          }
        }
      ];
  testAssert('var x = (1 + 2)', function assert(ast) {
@ -28772,7 +28790,11 @@ test("<!--\n;", {
      return JSON.stringify(actualTokens) + " !== " + JSON.stringify(expectedTokens);
    } else {
      for (var i=0, n=actualTokens.length; i < n; i++) {
-        var actualToken = JSON.stringify(actualTokens[i]);
+        var actualToken = JSON.stringify(
+          actualTokens[i],
+          // just remove this when startLoc/endLoc support is dropped
+          function (key, value) { if (key !== 'startLoc' && key !== 'endLoc') return value; }
+        );
        var expectedToken = JSON.stringify(expectedTokens[i]);
        if (actualToken !== expectedToken)
          return actualToken + ' !== ' + expectedToken;
@ -28780,9 +28802,7 @@ test("<!--\n;", {
    }
  }, {
    locations: true,
-    onToken: function(token) {
-      actualTokens.push(token);
-    }
+    onToken: actualTokens
  });
 })();