From 7b05e660b428903f8b86d2ddb049886fb8225e22 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Thu, 19 Mar 2015 16:28:28 +0100 Subject: [PATCH] Move location-related code into location.js --- src/index.js | 972 +++--------------------------------------------- src/location.js | 66 ++++ src/tokenize.js | 36 -- 3 files changed, 109 insertions(+), 965 deletions(-) create mode 100644 src/location.js diff --git a/src/index.js b/src/index.js index d49aff7c03..dc10882782 100644 --- a/src/index.js +++ b/src/index.js @@ -20,7 +20,8 @@ // [walk]: util/walk.js import {has, isArray} from "./util" -import {types as tt, keywords as keywordTypes} from "./tokentype" +import {lineBreak} from "./tokenize" +import {getLineInfo as getLineInfo_, SourceLocation} from "./location" export const version = "0.12.1" @@ -162,89 +163,6 @@ function parser(options, input) { return new Parser(getOptions(options), String(input)) } -// This function tries to parse a single expression at a given -// offset in a string. Useful for parsing mixed-language formats -// that embed JavaScript expressions. - -export function parseExpressionAt(input, pos, options) { - let p = parser(options, input, pos) - p.nextToken() - return p.parseExpression() -} - -// The `getLineInfo` function is mostly useful when the -// `locations` option is off (for performance reasons) and you -// want to find the line/column position for a given character -// offset. `input` should be the code string that the offset refers -// into. - -export function getLineInfo(input, offset) { - for (let line = 1, cur = 0;;) { - lineBreak.lastIndex = cur - let match = lineBreak.exec(input) - if (match && match.index < offset) { - ++line - cur = match.index + match[0].length - } else { - return new Position(line, offset - cur) - } - } -} - -// Acorn is organized as a tokenizer and a recursive-descent parser. -// The `tokenize` export provides an interface to the tokenizer. -// Because the tokenizer is optimized for being efficiently used by -// the Acorn parser itself, this interface is somewhat crude and not -// very modular. - -export function tokenizer(input, options) { - return parser(options, input) -} - -// Object type used to represent tokens. Note that normally, tokens -// simply exist as properties on the parser object. This is only -// used for the onToken callback and the external tokenizer. - -class Token { - constructor(p) { - this.type = p.type; - this.value = p.value; - this.start = p.start; - this.end = p.end; - if (p.options.locations) - this.loc = new SourceLocation(p, p.startLoc, p.endLoc); - if (p.options.ranges) - this.range = [p.start, p.end]; - } -} - -// Whether a single character denotes a newline. - -var newline = exports.newline = /[\n\r\u2028\u2029]/; - -var isNewLine = exports.isNewLine = function(code) { - return code === 10 || code === 13 || code === 0x2028 || code == 0x2029; -}; - -// Matches a whole line break (where CRLF is considered a single -// line break). Used to count lines. - -var lineBreak = exports.lineBreak = /\r\n|[\n\r\u2028\u2029]/g; - -// ## Tokenizer - -// These are used when `options.locations` is on, for the -// `startLoc` and `endLoc` properties. - -function Position(line, col) { - this.line = line; - this.column = col; -} - -Position.prototype.offset = function(n) { - return new Position(this.line, this.column + n); -}; - // ## Parser // A recursive descent parser operates by defining functions for all @@ -265,831 +183,31 @@ Position.prototype.offset = function(n) { // // [opp]: http://en.wikipedia.org/wiki/Operator-precedence_parser -import {reservedWords, keywords, isIdentifierStart, isIdentifierChar} from "./identifier" - -var Parser = exports.Parser = function(options, input, startPos) { - this.options = options - this.loadPlugins(this.options.plugins) - this.sourceFile = this.options.sourceFile || null - this.isKeyword = keywords[this.options.ecmaVersion >= 6 ? 6 : 5] - this.isReservedWord = reservedWords[this.options.ecmaVersion] - this.input = input - - // Set up token state - - // The current position of the tokenizer in the input. - if (startPos) { - this.pos = startPos; - this.lineStart = Math.max(0, this.input.lastIndexOf("\n", startPos)); - this.curLine = this.input.slice(0, this.lineStart).split(newline).length; - } else { - this.pos = this.lineStart = 0; - this.curLine = 1; - } - - // Properties of the current token: - // Its type - this.type = tt.eof; - // For tokens that include more information than their type, the value - this.value = null; - // Its start and end offset - this.start = this.end = this.pos; - // And, if locations are used, the {line, column} object - // corresponding to those offsets - this.startLoc = this.endLoc = null; - - // Position information for the previous token - this.lastTokEndLoc = this.lastTokStartLoc = null; - this.lastTokStart = this.lastTokEnd = this.pos; - - // The context stack is used to superficially track syntactic - // context to predict whether a regular expression is allowed in a - // given position. - this.context = [tc.b_stat]; - this.exprAllowed = true; - - // Figure out if it's a module code. - this.strict = this.inModule = this.options.sourceType === "module"; - - // Flags to track whether we are in a function, a generator. - this.inFunction = this.inGenerator = false; - // Labels in scope. - this.labels = []; - - // If enabled, skip leading hashbang line. - if (this.pos === 0 && this.options.allowHashBang && this.input.slice(0, 2) === '#!') - this.skipLineComment(2); -}; - -// Shorthand because we are going to be adding a _lot_ of methods to -// this. -var pp = Parser.prototype; - -pp.extend = function(name, f) { - this[name] = f(this[name]); -}; - -pp.loadPlugins = function(plugins) { - for (var name in plugins) { - var plugin = exports.plugins[name]; - if (!plugin) throw new Error("Plugin '" + name + "' not found"); - plugin(this, plugins[name]); - } -}; - -// Move to the next token - -pp.next = function() { - if (this.options.onToken) - this.options.onToken(new Token(this)); - - this.lastTokEnd = this.end; - this.lastTokStart = this.start; - this.lastTokEndLoc = this.endLoc; - this.lastTokStartLoc = this.startLoc; - this.nextToken(); -}; - -pp.getToken = function() { - this.next(); - return new Token(this); -}; - -// If we're in an ES6 environment, make parsers iterable -if (typeof Symbol !== "undefined") - pp[Symbol.iterator] = function () { - var self = this; - return {next: function () { - var token = self.getToken(); - return { - done: token.type === tt.eof, - value: token - }; - }}; - }; - -// Toggle strict mode. Re-reads the next number or string to please -// pedantic tests (`"use strict"; 010;` should fail). - -pp.setStrict = function(strict) { - this.strict = strict; - if (this.type !== tt.num && this.type !== tt.string) return; - this.pos = this.start; - if (this.options.locations) { - while (this.pos < this.lineStart) { - this.lineStart = this.input.lastIndexOf("\n", this.lineStart - 2) + 1; - --this.curLine; - } - } - this.nextToken(); -}; - -pp.curContext = function() { - return this.context[this.context.length - 1]; -}; - -// Read a single token, updating the parser object's token-related -// properties. - -pp.nextToken = function() { - var curContext = this.curContext(); - if (!curContext || !curContext.preserveSpace) this.skipSpace(); - - this.start = this.pos; - if (this.options.locations) this.startLoc = this.curPosition(); - if (this.pos >= this.input.length) return this.finishToken(tt.eof); - - if (curContext === tc.q_tmpl) return this.readTmplToken(); - - this.readToken(this.fullCharCodeAtPos()); -}; - -pp.readToken = function(code) { - // Identifier or keyword. '\uXXXX' sequences are allowed in - // identifiers, so '\' also dispatches to that. - if (isIdentifierStart(code, this.options.ecmaVersion >= 6) || code === 92 /* '\' */) - return this.readWord(); - - return this.getTokenFromCode(code); -}; - -pp.fullCharCodeAtPos = function() { - var code = this.input.charCodeAt(this.pos); - if (code <= 0xd7ff || code >= 0xe000) return code; - var next = this.input.charCodeAt(this.pos + 1); - return (code << 10) + next - 0x35fdc00; -}; - -pp.skipBlockComment = function() { - var startLoc = this.options.onComment && this.options.locations && this.curPosition(); - var start = this.pos, end = this.input.indexOf("*/", this.pos += 2); - if (end === -1) this.raise(this.pos - 2, "Unterminated comment"); - this.pos = end + 2; - if (this.options.locations) { - lineBreak.lastIndex = start; - var match; - while ((match = lineBreak.exec(this.input)) && match.index < this.pos) { - ++this.curLine; - this.lineStart = match.index + match[0].length; - } - } - if (this.options.onComment) - this.options.onComment(true, this.input.slice(start + 2, end), start, this.pos, - startLoc, this.options.locations && this.curPosition()); -}; - -pp.skipLineComment = function(startSkip) { - var start = this.pos; - var startLoc = this.options.onComment && this.options.locations && this.curPosition(); - var ch = this.input.charCodeAt(this.pos+=startSkip); - while (this.pos < this.input.length && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) { - ++this.pos; - ch = this.input.charCodeAt(this.pos); - } - if (this.options.onComment) - this.options.onComment(false, this.input.slice(start + startSkip, this.pos), start, this.pos, - startLoc, this.options.locations && this.curPosition()); -}; - -// Called at the start of the parse and after every token. Skips -// whitespace and comments, and. - -pp.skipSpace = function() { - while (this.pos < this.input.length) { - var ch = this.input.charCodeAt(this.pos); - if (ch === 32) { // ' ' - ++this.pos; - } else if (ch === 13) { - ++this.pos; - var next = this.input.charCodeAt(this.pos); - if (next === 10) { - ++this.pos; - } - if (this.options.locations) { - ++this.curLine; - this.lineStart = this.pos; - } - } else if (ch === 10 || ch === 8232 || ch === 8233) { - ++this.pos; - if (this.options.locations) { - ++this.curLine; - this.lineStart = this.pos; - } - } else if (ch > 8 && ch < 14) { - ++this.pos; - } else if (ch === 47) { // '/' - var next = this.input.charCodeAt(this.pos + 1); - if (next === 42) { // '*' - this.skipBlockComment(); - } else if (next === 47) { // '/' - this.skipLineComment(2); - } else break; - } else if (ch === 160) { // '\xa0' - ++this.pos; - } else if (ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) { - ++this.pos; - } else { - break; - } - } -}; - -pp.curPosition = function() { - return new Position(this.curLine, this.pos - this.lineStart); -}; - -// The algorithm used to determine whether a regexp can appear at a -// given point in the program is loosely based on sweet.js' approach. -// See https://github.com/mozilla/sweet.js/wiki/design - -var TokContext = exports.TokContext = function(token, isExpr, preserveSpace) { - this.token = token; - this.isExpr = isExpr; - this.preserveSpace = preserveSpace; -}; - -var tc = exports.tokContexts = { - b_stat: new TokContext("{", false), - b_expr: new TokContext("{", true), - b_tmpl: new TokContext("${", true), - p_stat: new TokContext("(", false), - p_expr: new TokContext("(", true), - q_tmpl: new TokContext("`", true, true), - f_expr: new TokContext("function", true) -}; - -pp.braceIsBlock = function(prevType) { - var parent; - if (prevType === tt.colon && (parent = this.curContext()).token == "{") - return !parent.isExpr; - if (prevType === tt._return) - return newline.test(this.input.slice(this.lastTokEnd, this.start)); - if (prevType === tt._else || prevType === tt.semi || prevType === tt.eof) - return true; - if (prevType == tt.braceL) - return this.curContext() === tc.b_stat; - return !this.exprAllowed; -}; - -// Called at the end of every token. Sets `end`, `val`, and -// maintains `context` and `exprAllowed`, and skips the space after -// the token, so that the next one's `start` will point at the -// right position. - -pp.finishToken = function(type, val) { - this.end = this.pos; - if (this.options.locations) this.endLoc = this.curPosition(); - var prevType = this.type; - this.type = type; - this.value = val; - - this.updateContext(prevType); -}; - -pp.updateContext = function(prevType) { - var update, type = this.type; - if (type.keyword && prevType == tt.dot) - this.exprAllowed = false; - else if (update = type.updateContext) - update.call(this, prevType); - else - this.exprAllowed = type.beforeExpr; -}; - -// Token-specific context update code - -tt.parenR.updateContext = tt.braceR.updateContext = function() { - var out = this.context.pop(); - if (out === tc.b_stat && this.curContext() === tc.f_expr) { - this.context.pop(); - this.exprAllowed = false; - } else if (out === tc.b_tmpl) { - this.exprAllowed = true; - } else { - this.exprAllowed = !(out && out.isExpr); - } -}; - -tt.braceL.updateContext = function(prevType) { - this.context.push(this.braceIsBlock(prevType) ? tc.b_stat : tc.b_expr); - this.exprAllowed = true; -}; - -tt.dollarBraceL.updateContext = function() { - this.context.push(tc.b_tmpl); - this.exprAllowed = true; -}; - -tt.parenL.updateContext = function(prevType) { - var statementParens = prevType === tt._if || prevType === tt._for || prevType === tt._with || prevType === tt._while; - this.context.push(statementParens ? tc.p_stat : tc.p_expr); - this.exprAllowed = true; -}; - -tt.incDec.updateContext = function() { - // tokExprAllowed stays unchanged -}; - -tt._function.updateContext = function() { - if (this.curContext() !== tc.b_stat) - this.context.push(tc.f_expr); - this.exprAllowed = false; -}; - -tt.backQuote.updateContext = function() { - if (this.curContext() === tc.q_tmpl) - this.context.pop(); - else - this.context.push(tc.q_tmpl); - this.exprAllowed = false; -}; - -// ### Token reading - -// This is the function that is called to fetch the next token. It -// is somewhat obscure, because it works in character codes rather -// than characters, and because operator parsing has been inlined -// into it. -// -// All in the name of speed. -// -pp.readToken_dot = function() { - var next = this.input.charCodeAt(this.pos + 1); - if (next >= 48 && next <= 57) return this.readNumber(true); - var next2 = this.input.charCodeAt(this.pos + 2); - if (this.options.ecmaVersion >= 6 && next === 46 && next2 === 46) { // 46 = dot '.' - this.pos += 3; - return this.finishToken(tt.ellipsis); - } else { - ++this.pos; - return this.finishToken(tt.dot); - } -}; - -pp.readToken_slash = function() { // '/' - var next = this.input.charCodeAt(this.pos + 1); - if (this.exprAllowed) {++this.pos; return this.readRegexp();} - if (next === 61) return this.finishOp(tt.assign, 2); - return this.finishOp(tt.slash, 1); -}; - -pp.readToken_mult_modulo = function(code) { // '%*' - var next = this.input.charCodeAt(this.pos + 1); - if (next === 61) return this.finishOp(tt.assign, 2); - return this.finishOp(code === 42 ? tt.star : tt.modulo, 1); -}; - -pp.readToken_pipe_amp = function(code) { // '|&' - var next = this.input.charCodeAt(this.pos + 1); - if (next === code) return this.finishOp(code === 124 ? tt.logicalOR : tt.logicalAND, 2); - if (next === 61) return this.finishOp(tt.assign, 2); - return this.finishOp(code === 124 ? tt.bitwiseOR : tt.bitwiseAND, 1); -}; - -pp.readToken_caret = function() { // '^' - var next = this.input.charCodeAt(this.pos + 1); - if (next === 61) return this.finishOp(tt.assign, 2); - return this.finishOp(tt.bitwiseXOR, 1); -}; - -pp.readToken_plus_min = function(code) { // '+-' - var next = this.input.charCodeAt(this.pos + 1); - if (next === code) { - if (next == 45 && this.input.charCodeAt(this.pos + 2) == 62 && - newline.test(this.input.slice(this.lastTokEnd, this.pos))) { - // A `-->` line comment - this.skipLineComment(3); - this.skipSpace(); - return this.nextToken(); - } - return this.finishOp(tt.incDec, 2); - } - if (next === 61) return this.finishOp(tt.assign, 2); - return this.finishOp(tt.plusMin, 1); -}; - -pp.readToken_lt_gt = function(code) { // '<>' - var next = this.input.charCodeAt(this.pos + 1); - var size = 1; - if (next === code) { - size = code === 62 && this.input.charCodeAt(this.pos + 2) === 62 ? 3 : 2; - if (this.input.charCodeAt(this.pos + size) === 61) return this.finishOp(tt.assign, size + 1); - return this.finishOp(tt.bitShift, size); - } - if (next == 33 && code == 60 && this.input.charCodeAt(this.pos + 2) == 45 && - this.input.charCodeAt(this.pos + 3) == 45) { - if (this.inModule) unexpected(); - // `