Make token format compatible with Esprima and Mozilla-styled locations.

* Tokens are now instances of single exported constructor Token.
* Token objects are compatible with Esprima (only `type` format is different).
* Added token.loc in format of node.loc (Mozilla).
* Deprecated token.startLoc & token.endLoc.
* Updated comment generation example.

Also added ability to pass arrays in `onToken`/`onComment` where
tokens/comments will be collected in Esprima's format so you can
simply pass those arrays to `escodegen.attachComments`.

Updated docs and comment attachment example.
This commit is contained in:
Ingvar Stepanyan 2014-08-20 15:31:35 +03:00 committed by Marijn Haverbeke
parent a246bf83d0
commit 10553cbced
3 changed files with 110 additions and 53 deletions

View File

@ -76,6 +76,11 @@ object referring to that same position.
- **onToken**: If a function is passed for this option, each found
token will be passed in same format as `tokenize()` returns.
If array is passed, each found token is pushed to it.
Note that you are not allowed to call the parser from the
callback—that will corrupt its internal state.
- **onComment**: If a function is passed for this option, whenever a
comment is encountered the function will be called with the
following parameters:
@ -90,6 +95,18 @@ object referring to that same position.
of the comments start and end are passed as two additional
parameters.
If array is passed for this option, each found comment is pushed
to it as object in Esprima format:
```javascript
{
"type": "Line" | "Block",
"value": "comment text",
"range": ...,
"loc": ...
}
```
Note that you are not allowed to call the parser from the
callback—that will corrupt its internal state.
@ -125,9 +142,8 @@ Acorn's tokenizer. The function takes an input string and options
similar to `parse` (though only some options are meaningful here), and
returns a function that can be called repeatedly to read a single
token, and returns a `{start, end, type, value}` object (with added
`startLoc` and `endLoc` properties when the `locations` option is
enabled). This object will be reused (updated) for each token, so you
can't count on it staying stable.
`loc` property when the `locations` option is enabled and `range`
property when the `ranges` option is enabled).
**tokTypes** holds an object mapping names to the token type objects
that end up in the `type` properties of tokens.
@ -136,8 +152,7 @@ that end up in the `type` properties of tokens.
Escodegen supports generating comments from AST, attached in
Esprima-specific format. In order to simulate same format in
Acorn, consider following example (this may be simplified
in future):
Acorn, consider following example:
```javascript
var comments = [], tokens = [];
@ -146,19 +161,9 @@ var ast = acorn.parse('var x = 42; // answer', {
// collect ranges for each node
ranges: true,
// collect comments in Esprima's format
onComment: function (block, text, start, end) {
comments.push({
type: block ? 'Block' : 'Line',
value: text,
range: [start, end]
});
},
onComment: comments,
// collect token ranges
onToken: function (token) {
tokens.push({
range: [token.start, token.end]
});
}
onToken: tokens
});
// attach comments using collected information

View File

@ -114,12 +114,40 @@
directSourceFile: null
};
var isArray = function (obj) {
return Object.prototype.toString.call(obj) === "[object Array]";
};
function setOptions(opts) {
options = opts || {};
for (var opt in defaultOptions) if (!has(options, opt))
options[opt] = defaultOptions[opt];
sourceFile = options.sourceFile || null;
if (isArray(options.onToken)) {
var tokens = options.onToken;
options.onToken = function (token) {
tokens.push(token);
};
}
if (isArray(options.onComment)) {
var comments = options.onComment;
options.onComment = function (block, text, start, end, startLoc, endLoc) {
var comment = {
type: block ? 'Block' : 'Line',
value: text,
start: start,
end: end
};
if (options.locations) {
comment.loc = new SourceLocation();
comment.loc.start = startLoc;
comment.loc.end = endLoc;
}
if (options.ranges)
comment.range = [start, end];
comments.push(comment);
};
}
isKeyword = options.ecmaVersion >= 6 ? isEcma6Keyword : isEcma5AndLessKeyword;
}
@ -141,19 +169,23 @@
return {line: line, column: offset - cur};
};
var getCurrentToken = function () {
var token = {
type: tokType,
value: tokVal,
start: tokStart,
end: tokEnd
};
function Token() {
this.type = tokType;
this.value = tokVal;
this.start = tokStart;
this.end = tokEnd;
if (options.locations) {
token.startLoc = tokStartLoc;
token.endLoc = tokEndLoc;
this.loc = new SourceLocation();
this.loc.end = tokEndLoc;
// TODO: remove in next major release
this.startLoc = tokStartLoc;
this.endLoc = tokEndLoc;
}
return token;
};
if (options.ranges)
this.range = [tokStart, tokEnd];
}
exports.Token = Token;
// Acorn is organized as a tokenizer and a recursive-descent parser.
// The `tokenize` export provides an interface to the tokenizer.
@ -170,7 +202,7 @@
function getToken(forceRegexp) {
lastEnd = tokEnd;
readToken(forceRegexp);
return getCurrentToken();
return new Token();
}
getToken.jumpTo = function(pos, reAllowed) {
tokPos = pos;
@ -543,7 +575,7 @@
tokVal = val;
tokRegexpAllowed = type.beforeExpr;
if (options.onToken) {
options.onToken(getCurrentToken());
options.onToken(new Token());
}
}

View File

@ -28699,72 +28699,90 @@ test("<!--\n;", {
value: "var",
start: 0,
end: 3,
startLoc: {line: 1, column: 0},
endLoc: {line: 1, column: 3}
loc: {
start: {line: 1, column: 0},
end: {line: 1, column: 3}
}
},
{
type: tokTypes.name,
value: "x",
start: 4,
end: 5,
startLoc: {line: 1, column: 4},
endLoc: {line: 1, column: 5}
loc: {
start: {line: 1, column: 4},
end: {line: 1, column: 5}
}
},
{
type: tokTypes.eq,
value: "=",
start: 6,
end: 7,
startLoc: {line: 1, column: 6},
endLoc: {line: 1, column: 7}
loc: {
start: {line: 1, column: 6},
end: {line: 1, column: 7}
}
},
{
type: tokTypes.parenL,
value: undefined,
start: 8,
end: 9,
startLoc: {line: 1, column: 8},
endLoc: {line: 1, column: 9}
loc: {
start: {line: 1, column: 8},
end: {line: 1, column: 9}
}
},
{
type: tokTypes.num,
value: 1,
start: 9,
end: 10,
startLoc: {line: 1, column: 9},
endLoc: {line: 1, column: 10}
loc: {
start: {line: 1, column: 9},
end: {line: 1, column: 10}
}
},
{
type: {binop: 9, prefix: true, beforeExpr: true},
value: "+",
start: 11,
end: 12,
startLoc: {line: 1, column: 11},
endLoc: {line: 1, column: 12}
loc: {
start: {line: 1, column: 11},
end: {line: 1, column: 12}
}
},
{
type: tokTypes.num,
value: 2,
start: 13,
end: 14,
startLoc: {line: 1, column: 13},
endLoc: {line: 1, column: 14}
loc: {
start: {line: 1, column: 13},
end: {line: 1, column: 14}
}
},
{
type: tokTypes.parenR,
value: undefined,
start: 14,
end: 15,
startLoc: {line: 1, column: 14},
endLoc: {line: 1, column: 15}
loc: {
start: {line: 1, column: 14},
end: {line: 1, column: 15}
}
},
{
type: tokTypes.eof,
value: undefined,
start: 15,
end: 15,
startLoc: {line: 1, column: 15},
endLoc: {line: 1, column: 15}
loc: {
start: {line: 1, column: 15},
end: {line: 1, column: 15}
}
}
];
testAssert('var x = (1 + 2)', function assert(ast) {
@ -28772,7 +28790,11 @@ test("<!--\n;", {
return JSON.stringify(actualTokens) + " !== " + JSON.stringify(expectedTokens);
} else {
for (var i=0, n=actualTokens.length; i < n; i++) {
var actualToken = JSON.stringify(actualTokens[i]);
var actualToken = JSON.stringify(
actualTokens[i],
// just remove this when startLoc/endLoc support is dropped
function (key, value) { if (key !== 'startLoc' && key !== 'endLoc') return value; }
);
var expectedToken = JSON.stringify(expectedTokens[i]);
if (actualToken !== expectedToken)
return actualToken + ' !== ' + expectedToken;
@ -28780,9 +28802,7 @@ test("<!--\n;", {
}
}, {
locations: true,
onToken: function(token) {
actualTokens.push(token);
}
onToken: actualTokens
});
})();