remove range property from nodes, clean up babylon codebase

2015-07-25 07:07:22 +01:00
parent af03a301ae
commit e7fec51feb
424 changed files with 3967 additions and 19592 deletions
--- a/src/tokenizer/context.js
+++ b/src/tokenizer/context.js
@@ -0,0 +1,118 @@
+// The algorithm used to determine whether a regexp can appear at a
+// given point in the program is loosely based on sweet.js' approach.
+// See https://github.com/mozilla/sweet.js/wiki/design
+
+import { Parser } from "../state";
+import { types as tt } from "./types";
+import { lineBreak } from "../util/whitespace";
+
+export class TokContext {
+  constructor(token, isExpr, preserveSpace, override) {
+    this.token = token;
+    this.isExpr = !!isExpr;
+    this.preserveSpace = !!preserveSpace;
+    this.override = override;
+  }
+}
+
+export const types = {
+  b_stat: new TokContext("{", false),
+  b_expr: new TokContext("{", true),
+  b_tmpl: new TokContext("${", true),
+  p_stat: new TokContext("(", false),
+  p_expr: new TokContext("(", true),
+  q_tmpl: new TokContext("`", true, true, p => p.readTmplToken()),
+  f_expr: new TokContext("function", true)
+};
+
+const pp = Parser.prototype;
+
+pp.initialContext = function () {
+  return [types.b_stat];
+};
+
+pp.braceIsBlock = function (prevType) {
+  if (prevType === tt.colon) {
+    let parent = this.curContext();
+    if (parent === types.b_stat || parent === types.b_expr)
+      return !parent.isExpr;
+  }
+
+  if (prevType === tt._return)
+    return lineBreak.test(this.input.slice(this.lastTokEnd, this.start));
+
+  if (prevType === tt._else || prevType === tt.semi || prevType === tt.eof)
+    return true;
+
+  if (prevType === tt.braceL)
+    return this.curContext() === types.b_stat;
+
+  return !this.exprAllowed;
+};
+
+pp.updateContext = function (prevType) {
+  let update, type = this.type;
+  if (type.keyword && prevType === tt.dot) {
+    this.exprAllowed = false;
+  } else if (update = type.updateContext) {
+    update.call(this, prevType);
+  } else {
+    this.exprAllowed = type.beforeExpr;
+  }
+};
+
+// Token-specific context update code
+
+tt.parenR.updateContext = tt.braceR.updateContext = function () {
+  if (this.context.length === 1) {
+    this.exprAllowed = true;
+    return;
+  }
+
+  let out = this.context.pop();
+  if (out === types.b_stat && this.curContext() === types.f_expr) {
+    this.context.pop();
+    this.exprAllowed = false;
+  } else if (out === types.b_tmpl) {
+    this.exprAllowed = true;
+  } else {
+    this.exprAllowed = !out.isExpr;
+  }
+};
+
+tt.braceL.updateContext = function (prevType) {
+  this.context.push(this.braceIsBlock(prevType) ? types.b_stat : types.b_expr);
+  this.exprAllowed = true;
+};
+
+tt.dollarBraceL.updateContext = function () {
+  this.context.push(types.b_tmpl);
+  this.exprAllowed = true;
+};
+
+tt.parenL.updateContext = function (prevType) {
+  let statementParens = prevType === tt._if || prevType === tt._for || prevType === tt._with || prevType === tt._while;
+  this.context.push(statementParens ? types.p_stat : types.p_expr);
+  this.exprAllowed = true;
+};
+
+tt.incDec.updateContext = function () {
+  // tokExprAllowed stays unchanged
+};
+
+tt._function.updateContext = function () {
+  if (this.curContext() !== types.b_stat) {
+    this.context.push(types.f_expr);
+  }
+
+  this.exprAllowed = false;
+};
+
+tt.backQuote.updateContext = function () {
+  if (this.curContext() === types.q_tmpl) {
+    this.context.pop();
+  } else {
+    this.context.push(types.q_tmpl);
+  }
+  this.exprAllowed = false;
+};
--- a/src/tokenizer/index.js
+++ b/src/tokenizer/index.js
@@ -0,0 +1,756 @@
+import { isIdentifierStart, isIdentifierChar } from "../util/identifier";
+import { types as tt, keywords as keywordTypes } from "./types";
+import { SourceLocation } from "../locutil";
+import { lineBreak, lineBreakG, isNewLine, nonASCIIwhitespace } from "../util/whitespace";
+
+// Object type used to represent tokens. Note that normally, tokens
+// simply exist as properties on the parser object. This is only
+// used for the onToken callback and the external tokenizer.
+
+export class Token {
+  constructor(p) {
+    this.type = p.type;
+    this.value = p.value;
+    this.start = p.start;
+    this.end = p.end;
+
+    this.loc = new SourceLocation(p.startLoc, p.endLoc);
+  }
+}
+
+// ## Tokenizer
+
+// Are we running under Rhino?
+/* global Packages */
+const isRhino = typeof Packages === "object" && Object.prototype.toString.call(Packages) === "[object JavaPackage]";
+
+// Parse a regular expression. Some context-awareness is necessary,
+// since a '/' inside a '[]' set does not end the expression.
+
+function tryCreateRegexp(src, flags, throwErrorStart) {
+  try {
+    return new RegExp(src, flags);
+  } catch (e) {
+    if (throwErrorStart !== undefined) {
+      if (e instanceof SyntaxError) this.raise(throwErrorStart, "Error parsing regular expression: " + e.message);
+      this.raise(e);
+    }
+  }
+}
+
+var regexpUnicodeSupport = !!tryCreateRegexp("\uffff", "u");
+
+function codePointToString(code) {
+  // UTF-16 Decoding
+  if (code <= 0xFFFF) return String.fromCharCode(code);
+  return String.fromCharCode(((code - 0x10000) >> 10) + 0xD800, ((code - 0x10000) & 1023) + 0xDC00);
+}
+
+// Used to signal to callers of `readWord1` whether the word
+// contained any escape sequences. This is needed because words with
+// escape sequences must not be interpreted as keywords.
+
+var containsEsc;
+
+export default class Tokenizer {
+  constructor() {
+    // The current position of the tokenizer in the input.
+    this.pos = this.lineStart = 0;
+    this.curLine = 1;
+
+    // Properties of the current token:
+    // Its type
+    this.type = tt.eof;
+    // For tokens that include more information than their type, the value
+    this.value = null;
+    // Its start and end offset
+    this.start = this.end = this.pos;
+    // And, if locations are used, the {line, column} object
+    // corresponding to those offsets
+    this.startLoc = this.endLoc = this.curPosition();
+
+    // Position information for the previous token
+    this.lastTokEndLoc = this.lastTokStartLoc = null;
+    this.lastTokStart = this.lastTokEnd = this.pos;
+
+    // The context stack is used to superficially track syntactic
+    // context to predict whether a regular expression is allowed in a
+    // given position.
+    this.context = this.initialContext();
+    this.exprAllowed = true;
+  }
+
+  // Move to the next token
+
+  next() {
+    if (!this.isLookahead) {
+      this.tokens.push(new Token(this));
+    }
+
+    this.lastTokEnd = this.end;
+    this.lastTokStart = this.start;
+    this.lastTokEndLoc = this.endLoc;
+    this.lastTokStartLoc = this.startLoc;
+    this.nextToken();
+  };
+
+  getToken() {
+    this.next();
+    return new Token(this);
+  }
+
+  // Toggle strict mode. Re-reads the next number or string to please
+  // pedantic tests (`"use strict"; 010;` should fail).
+
+  setStrict(strict) {
+    this.strict = strict;
+    if (this.type !== tt.num && this.type !== tt.string) return;
+    this.pos = this.start;
+    while (this.pos < this.lineStart) {
+      this.lineStart = this.input.lastIndexOf("\n", this.lineStart - 2) + 1;
+      --this.curLine;
+    }
+    this.nextToken();
+  }
+
+  curContext() {
+    return this.context[this.context.length - 1];
+  }
+
+  // Read a single token, updating the parser object's token-related
+  // properties.
+
+  nextToken() {
+    let curContext = this.curContext();
+    if (!curContext || !curContext.preserveSpace) this.skipSpace();
+
+    this.start = this.pos;
+    this.startLoc = this.curPosition();
+    if (this.pos >= this.input.length) return this.finishToken(tt.eof);
+
+    if (curContext.override) {
+      return curContext.override(this);
+    } else {
+      return this.readToken(this.fullCharCodeAtPos());
+    }
+  }
+
+  readToken(code) {
+    // Identifier or keyword. '\uXXXX' sequences are allowed in
+    // identifiers, so '\' also dispatches to that.
+    if (isIdentifierStart(code, true) || code === 92 /* '\' */)
+      return this.readWord();
+
+    return this.getTokenFromCode(code);
+  }
+
+  fullCharCodeAtPos() {
+    let code = this.input.charCodeAt(this.pos);
+    if (code <= 0xd7ff || code >= 0xe000) return code;
+
+    let next = this.input.charCodeAt(this.pos + 1);
+    return (code << 10) + next - 0x35fdc00;
+  }
+
+  pushComment(block, text, start, end, startLoc, endLoc) {
+    var comment = {
+      type: block ? "CommentBlock" : "CommentLine",
+      value: text,
+      start: start,
+      end: end,
+      loc: new SourceLocation(startLoc, endLoc),
+      range: [start, end]
+    };
+
+    this.tokens.push(comment);
+    this.comments.push(comment);
+    this.addComment(comment);
+  }
+
+  skipBlockComment() {
+    let startLoc = this.curPosition();
+    let start = this.pos, end = this.input.indexOf("*/", this.pos += 2);
+    if (end === -1) this.raise(this.pos - 2, "Unterminated comment");
+
+    this.pos = end + 2;
+    lineBreakG.lastIndex = start;
+    let match;
+    while ((match = lineBreakG.exec(this.input)) && match.index < this.pos) {
+      ++this.curLine;
+      this.lineStart = match.index + match[0].length;
+    }
+
+    this.pushComment(true, this.input.slice(start + 2, end), start, this.pos, startLoc, this.curPosition());
+  }
+
+  skipLineComment(startSkip) {
+    let start = this.pos;
+    let startLoc = this.curPosition();
+    let ch = this.input.charCodeAt(this.pos += startSkip);
+    while (this.pos < this.input.length && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) {
+      ++this.pos;
+      ch = this.input.charCodeAt(this.pos);
+    }
+
+    this.pushComment(false, this.input.slice(start + startSkip, this.pos), start, this.pos, startLoc, this.curPosition());
+  }
+
+  // Called at the start of the parse and after every token. Skips
+  // whitespace and comments, and.
+
+  skipSpace() {
+    loop: while (this.pos < this.input.length) {
+      let ch = this.input.charCodeAt(this.pos);
+      switch (ch) {
+        case 32: case 160: // ' '
+          ++this.pos;
+          break;
+
+        case 13:
+          if (this.input.charCodeAt(this.pos + 1) === 10) {
+            ++this.pos;
+          }
+
+        case 10: case 8232: case 8233:
+          ++this.pos;
+          ++this.curLine;
+          this.lineStart = this.pos;
+          break;
+
+        case 47: // '/'
+          switch (this.input.charCodeAt(this.pos + 1)) {
+            case 42: // '*'
+              this.skipBlockComment();
+              break;
+
+            case 47:
+              this.skipLineComment(2);
+              break;
+
+            default:
+              break loop;
+          }
+          break;
+
+        default:
+          if (ch > 8 && ch < 14 || ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) {
+            ++this.pos;
+          } else {
+            break loop;
+          }
+      }
+    }
+  }
+
+  // Called at the end of every token. Sets `end`, `val`, and
+  // maintains `context` and `exprAllowed`, and skips the space after
+  // the token, so that the next one's `start` will point at the
+  // right position.
+
+  finishToken(type, val) {
+    this.end = this.pos;
+    this.endLoc = this.curPosition();
+    let prevType = this.type;
+    this.type = type;
+    this.value = val;
+
+    this.updateContext(prevType);
+  }
+
+  // ### Token reading
+
+  // This is the function that is called to fetch the next token. It
+  // is somewhat obscure, because it works in character codes rather
+  // than characters, and because operator parsing has been inlined
+  // into it.
+  //
+  // All in the name of speed.
+  //
+  readToken_dot() {
+    let next = this.input.charCodeAt(this.pos + 1);
+    if (next >= 48 && next <= 57) return this.readNumber(true);
+
+    let next2 = this.input.charCodeAt(this.pos + 2);
+    if (next === 46 && next2 === 46) { // 46 = dot '.'
+      this.pos += 3;
+      return this.finishToken(tt.ellipsis);
+    } else {
+      ++this.pos;
+      return this.finishToken(tt.dot);
+    }
+  }
+
+  readToken_slash() { // '/'
+    let next = this.input.charCodeAt(this.pos + 1);
+    if (this.exprAllowed) {
+      ++this.pos;
+      return this.readRegexp();
+    }
+    if (next === 61) return this.finishOp(tt.assign, 2);
+    return this.finishOp(tt.slash, 1);
+  }
+
+  readToken_mult_modulo(code) { // '%*'
+    var type = code === 42 ? tt.star : tt.modulo;
+    var width = 1;
+    var next = this.input.charCodeAt(this.pos + 1);
+
+    if (next === 42 && this.options.features["es7.exponentiationOperator"]) { // '*'
+      width++;
+      next = this.input.charCodeAt(this.pos + 2);
+      type = tt.exponent;
+    }
+
+    if (next === 61) {
+      width++;
+      type = tt.assign;
+    }
+
+    return this.finishOp(type, width);
+  }
+
+  readToken_pipe_amp(code) { // '|&'
+    let next = this.input.charCodeAt(this.pos + 1);
+    if (next === code) return this.finishOp(code === 124 ? tt.logicalOR : tt.logicalAND, 2);
+    if (next === 61) return this.finishOp(tt.assign, 2);
+    return this.finishOp(code === 124 ? tt.bitwiseOR : tt.bitwiseAND, 1);
+  }
+
+  readToken_caret() { // '^'
+    let next = this.input.charCodeAt(this.pos + 1);
+    if (next === 61) {
+      return this.finishOp(tt.assign, 2);
+    } else {
+      return this.finishOp(tt.bitwiseXOR, 1);
+    }
+  }
+
+  readToken_plus_min(code) { // '+-'
+    let next = this.input.charCodeAt(this.pos + 1);
+
+    if (next === code) {
+      if (next === 45 && this.input.charCodeAt(this.pos + 2) === 62 && lineBreak.test(this.input.slice(this.lastTokEnd, this.pos))) {
+        // A `-->` line comment
+        this.skipLineComment(3);
+        this.skipSpace();
+        return this.nextToken();
+      }
+      return this.finishOp(tt.incDec, 2);
+    }
+
+    if (next === 61) {
+      return this.finishOp(tt.assign, 2);
+    } else {
+      return this.finishOp(tt.plusMin, 1);
+    }
+  }
+
+  readToken_lt_gt(code) { // '<>'
+    let next = this.input.charCodeAt(this.pos + 1);
+    let size = 1;
+
+    if (next === code) {
+      size = code === 62 && this.input.charCodeAt(this.pos + 2) === 62 ? 3 : 2;
+      if (this.input.charCodeAt(this.pos + size) === 61) return this.finishOp(tt.assign, size + 1);
+      return this.finishOp(tt.bitShift, size);
+    }
+
+    if (next === 33 && code === 60 && this.input.charCodeAt(this.pos + 2) === 45 && this.input.charCodeAt(this.pos + 3) === 45) {
+      if (this.inModule) this.unexpected();
+      // `<!--`, an XML-style comment that should be interpreted as a line comment
+      this.skipLineComment(4);
+      this.skipSpace();
+      return this.nextToken();
+    }
+
+    if (next === 61) {
+      size = this.input.charCodeAt(this.pos + 2) === 61 ? 3 : 2;
+    }
+
+    return this.finishOp(tt.relational, size);
+  }
+
+  readToken_eq_excl(code) { // '=!'
+    let next = this.input.charCodeAt(this.pos + 1);
+    if (next === 61) return this.finishOp(tt.equality, this.input.charCodeAt(this.pos + 2) === 61 ? 3 : 2);
+    if (code === 61 && next === 62) { // '=>'
+      this.pos += 2;
+      return this.finishToken(tt.arrow);
+    }
+    return this.finishOp(code === 61 ? tt.eq : tt.prefix, 1);
+  }
+
+  getTokenFromCode(code) {
+    switch (code) {
+      // The interpretation of a dot depends on whether it is followed
+      // by a digit or another two dots.
+      case 46: // '.'
+        return this.readToken_dot();
+
+      // Punctuation tokens.
+      case 40: ++this.pos; return this.finishToken(tt.parenL);
+      case 41: ++this.pos; return this.finishToken(tt.parenR);
+      case 59: ++this.pos; return this.finishToken(tt.semi);
+      case 44: ++this.pos; return this.finishToken(tt.comma);
+      case 91: ++this.pos; return this.finishToken(tt.bracketL);
+      case 93: ++this.pos; return this.finishToken(tt.bracketR);
+      case 123: ++this.pos; return this.finishToken(tt.braceL);
+      case 125: ++this.pos; return this.finishToken(tt.braceR);
+
+      case 58:
+        if (this.options.features["es7.functionBind"] && this.input.charCodeAt(this.pos + 1) === 58) {
+          return this.finishOp(tt.doubleColon, 2);
+        } else {
+          ++this.pos;
+          return this.finishToken(tt.colon);
+        }
+
+      case 63: ++this.pos; return this.finishToken(tt.question);
+      case 64: ++this.pos; return this.finishToken(tt.at);
+
+      case 96: // '`'
+        ++this.pos;
+        return this.finishToken(tt.backQuote);
+
+      case 48: // '0'
+        let next = this.input.charCodeAt(this.pos + 1);
+        if (next === 120 || next === 88) return this.readRadixNumber(16); // '0x', '0X' - hex number
+        if (next === 111 || next === 79) return this.readRadixNumber(8); // '0o', '0O' - octal number
+        if (next === 98 || next === 66) return this.readRadixNumber(2); // '0b', '0B' - binary number
+        // Anything else beginning with a digit is an integer, octal
+        // number, or float.
+      case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 1-9
+        return this.readNumber(false);
+
+        // Quotes produce strings.
+      case 34: case 39: // '"', "'"
+        return this.readString(code);
+
+      // Operators are parsed inline in tiny state machines. '=' (61) is
+      // often referred to. `finishOp` simply skips the amount of
+      // characters it is given as second argument, and returns a token
+      // of the type given by its first argument.
+
+      case 47: // '/'
+        return this.readToken_slash();
+
+      case 37: case 42: // '%*'
+        return this.readToken_mult_modulo(code);
+
+      case 124: case 38: // '|&'
+        return this.readToken_pipe_amp(code);
+
+      case 94: // '^'
+        return this.readToken_caret();
+
+      case 43: case 45: // '+-'
+        return this.readToken_plus_min(code);
+
+      case 60: case 62: // '<>'
+        return this.readToken_lt_gt(code);
+
+      case 61: case 33: // '=!'
+        return this.readToken_eq_excl(code);
+
+      case 126: // '~'
+        return this.finishOp(tt.prefix, 1);
+    }
+
+    this.raise(this.pos, `Unexpected character '${codePointToString(code)}'`);
+  }
+
+  finishOp(type, size) {
+    let str = this.input.slice(this.pos, this.pos + size);
+    this.pos += size;
+    return this.finishToken(type, str);
+  }
+
+  readRegexp() {
+    let escaped, inClass, start = this.pos;
+    for (;;) {
+      if (this.pos >= this.input.length) this.raise(start, "Unterminated regular expression");
+      let ch = this.input.charAt(this.pos);
+      if (lineBreak.test(ch)) this.raise(start, "Unterminated regular expression");
+      if (escaped) {
+        escaped = false;
+      } else {
+        if (ch === "[") inClass = true;
+        else if (ch === "]" && inClass) inClass = false;
+        else if (ch === "/" && !inClass) break;
+        escaped = ch === "\\";
+      }
+      ++this.pos;
+    }
+    let content = this.input.slice(start, this.pos);
+    ++this.pos;
+    // Need to use `readWord1` because '\uXXXX' sequences are allowed
+    // here (don't ask).
+    let mods = this.readWord1();
+    let tmp = content;
+    if (mods) {
+      let validFlags = /^[gmsiyu]*$/;
+      if (!validFlags.test(mods)) this.raise(start, "Invalid regular expression flag");
+      if (mods.indexOf("u") >= 0 && !regexpUnicodeSupport) {
+        // Replace each astral symbol and every Unicode escape sequence that
+        // possibly represents an astral symbol or a paired surrogate with a
+        // single ASCII symbol to avoid throwing on regular expressions that
+        // are only valid in combination with the `/u` flag.
+        // Note: replacing with the ASCII symbol `x` might cause false
+        // negatives in unlikely scenarios. For example, `[\u{61}-b]` is a
+        // perfectly valid pattern that is equivalent to `[a-b]`, but it would
+        // be replaced by `[x-b]` which throws an error.
+        tmp = tmp.replace(/\\u\{([0-9a-fA-F]+)\}/g, (match, code, offset) => {
+          code = Number("0x" + code);
+          if (code > 0x10FFFF) this.raise(start + offset + 3, "Code point out of bounds");
+          return "x";
+        });
+        tmp = tmp.replace(/\\u([a-fA-F0-9]{4})|[\uD800-\uDBFF][\uDC00-\uDFFF]/g, "x");
+      }
+    }
+    // Detect invalid regular expressions.
+    let value = null;
+    // Rhino's regular expression parser is flaky and throws uncatchable exceptions,
+    // so don't do detection if we are running under Rhino
+    if (!isRhino) {
+      tryCreateRegexp.call(this, tmp, undefined, start);
+      // Get a regular expression object for this pattern-flag pair, or `null` in
+      // case the current environment doesn't support the flags it uses.
+      value = tryCreateRegexp.call(this, content, mods);
+    }
+    return this.finishToken(tt.regexp, {pattern: content, flags: mods, value: value});
+  }
+
+  // Read an integer in the given radix. Return null if zero digits
+  // were read, the integer value otherwise. When `len` is given, this
+  // will return `null` unless the integer has exactly `len` digits.
+
+  readInt(radix, len) {
+    let start = this.pos, total = 0;
+    for (let i = 0, e = len == null ? Infinity : len; i < e; ++i) {
+      let code = this.input.charCodeAt(this.pos), val;
+      if (code >= 97) val = code - 97 + 10; // a
+      else if (code >= 65) val = code - 65 + 10; // A
+      else if (code >= 48 && code <= 57) val = code - 48; // 0-9
+      else val = Infinity;
+      if (val >= radix) break;
+      ++this.pos;
+      total = total * radix + val;
+    }
+    if (this.pos === start || len != null && this.pos - start !== len) return null;
+
+    return total;
+  }
+
+  readRadixNumber(radix) {
+    this.pos += 2; // 0x
+    let val = this.readInt(radix);
+    if (val == null) this.raise(this.start + 2, "Expected number in radix " + radix);
+    if (isIdentifierStart(this.fullCharCodeAtPos())) this.raise(this.pos, "Identifier directly after number");
+    return this.finishToken(tt.num, val);
+  }
+
+  // Read an integer, octal integer, or floating-point number.
+
+  readNumber(startsWithDot) {
+    let start = this.pos, isFloat = false, octal = this.input.charCodeAt(this.pos) === 48;
+    if (!startsWithDot && this.readInt(10) === null) this.raise(start, "Invalid number");
+    let next = this.input.charCodeAt(this.pos);
+    if (next === 46) { // '.'
+      ++this.pos;
+      this.readInt(10);
+      isFloat = true;
+      next = this.input.charCodeAt(this.pos);
+    }
+    if (next === 69 || next === 101) { // 'eE'
+      next = this.input.charCodeAt(++this.pos);
+      if (next === 43 || next === 45) ++this.pos; // '+-'
+      if (this.readInt(10) === null) this.raise(start, "Invalid number");
+      isFloat = true;
+    }
+    if (isIdentifierStart(this.fullCharCodeAtPos())) this.raise(this.pos, "Identifier directly after number");
+
+    let str = this.input.slice(start, this.pos), val;
+    if (isFloat) val = parseFloat(str);
+    else if (!octal || str.length === 1) val = parseInt(str, 10);
+    else if (/[89]/.test(str) || this.strict) this.raise(start, "Invalid number");
+    else val = parseInt(str, 8);
+    return this.finishToken(tt.num, val);
+  }
+
+  // Read a string value, interpreting backslash-escapes.
+
+  readCodePoint() {
+    let ch = this.input.charCodeAt(this.pos), code;
+
+    if (ch === 123) {
+      let codePos = ++this.pos;
+      code = this.readHexChar(this.input.indexOf("}", this.pos) - this.pos);
+      ++this.pos;
+      if (code > 0x10FFFF) this.raise(codePos, "Code point out of bounds");
+    } else {
+      code = this.readHexChar(4);
+    }
+    return code;
+  }
+
+  readString(quote) {
+    let out = "", chunkStart = ++this.pos;
+    for (;;) {
+      if (this.pos >= this.input.length) this.raise(this.start, "Unterminated string constant");
+      let ch = this.input.charCodeAt(this.pos);
+      if (ch === quote) break;
+      if (ch === 92) { // '\'
+        out += this.input.slice(chunkStart, this.pos);
+        out += this.readEscapedChar(false);
+        chunkStart = this.pos;
+      } else {
+        if (isNewLine(ch)) this.raise(this.start, "Unterminated string constant");
+        ++this.pos;
+      }
+    }
+    out += this.input.slice(chunkStart, this.pos++);
+    return this.finishToken(tt.string, out);
+  }
+
+  // Reads template string tokens.
+
+  readTmplToken() {
+    let out = "", chunkStart = this.pos;
+    for (;;) {
+      if (this.pos >= this.input.length) this.raise(this.start, "Unterminated template");
+      let ch = this.input.charCodeAt(this.pos);
+      if (ch === 96 || ch === 36 && this.input.charCodeAt(this.pos + 1) === 123) { // '`', '${'
+        if (this.pos === this.start && this.type === tt.template) {
+          if (ch === 36) {
+            this.pos += 2;
+            return this.finishToken(tt.dollarBraceL);
+          } else {
+            ++this.pos;
+            return this.finishToken(tt.backQuote);
+          }
+        }
+        out += this.input.slice(chunkStart, this.pos);
+        return this.finishToken(tt.template, out);
+      }
+      if (ch === 92) { // '\'
+        out += this.input.slice(chunkStart, this.pos);
+        out += this.readEscapedChar(true);
+        chunkStart = this.pos;
+      } else if (isNewLine(ch)) {
+        out += this.input.slice(chunkStart, this.pos);
+        ++this.pos;
+        switch (ch) {
+          case 13:
+            if (this.input.charCodeAt(this.pos) === 10) ++this.pos;
+          case 10:
+            out += "\n";
+            break;
+          default:
+            out += String.fromCharCode(ch);
+            break;
+        }
+        ++this.curLine;
+        this.lineStart = this.pos;
+        chunkStart = this.pos;
+      } else {
+        ++this.pos;
+      }
+    }
+  }
+
+  // Used to read escaped characters
+
+  readEscapedChar(inTemplate) {
+    let ch = this.input.charCodeAt(++this.pos);
+    ++this.pos;
+    switch (ch) {
+      case 110: return "\n"; // 'n' -> '\n'
+      case 114: return "\r"; // 'r' -> '\r'
+      case 120: return String.fromCharCode(this.readHexChar(2)); // 'x'
+      case 117: return codePointToString(this.readCodePoint()); // 'u'
+      case 116: return "\t"; // 't' -> '\t'
+      case 98: return "\b"; // 'b' -> '\b'
+      case 118: return "\u000b"; // 'v' -> '\u000b'
+      case 102: return "\f"; // 'f' -> '\f'
+      case 13: if (this.input.charCodeAt(this.pos) === 10) ++this.pos; // '\r\n'
+      case 10: // ' \n'
+        this.lineStart = this.pos;
+        ++this.curLine;
+        return "";
+      default:
+        if (ch >= 48 && ch <= 55) {
+          let octalStr = this.input.substr(this.pos - 1, 3).match(/^[0-7]+/)[0];
+          let octal = parseInt(octalStr, 8);
+          if (octal > 255) {
+            octalStr = octalStr.slice(0, -1);
+            octal = parseInt(octalStr, 8);
+          }
+          if (octal > 0 && (this.strict || inTemplate)) {
+            this.raise(this.pos - 2, "Octal literal in strict mode");
+          }
+          this.pos += octalStr.length - 1;
+          return String.fromCharCode(octal);
+        }
+        return String.fromCharCode(ch);
+    }
+  }
+
+  // Used to read character escape sequences ('\x', '\u', '\U').
+
+  readHexChar(len) {
+    let codePos = this.pos;
+    let n = this.readInt(16, len);
+    if (n === null) this.raise(codePos, "Bad character escape sequence");
+    return n;
+  }
+
+  // Read an identifier, and return it as a string. Sets `containsEsc`
+  // to whether the word contained a '\u' escape.
+  //
+  // Incrementally adds only escaped chars, adding other chunks as-is
+  // as a micro-optimization.
+
+  readWord1() {
+    containsEsc = false;
+    let word = "", first = true, chunkStart = this.pos;
+    while (this.pos < this.input.length) {
+      let ch = this.fullCharCodeAtPos();
+      if (isIdentifierChar(ch, true)) {
+        this.pos += ch <= 0xffff ? 1 : 2;
+      } else if (ch === 92) { // "\"
+        containsEsc = true;
+
+        word += this.input.slice(chunkStart, this.pos);
+        let escStart = this.pos;
+
+        if (this.input.charCodeAt(++this.pos) !== 117) { // "u"
+          this.raise(this.pos, "Expecting Unicode escape sequence \\uXXXX");
+        }
+
+        ++this.pos;
+        let esc = this.readCodePoint();
+        if (!(first ? isIdentifierStart : isIdentifierChar)(esc, true)) {
+          this.raise(escStart, "Invalid Unicode escape");
+        }
+
+        word += codePointToString(esc);
+        chunkStart = this.pos;
+      } else {
+        break;
+      }
+      first = false;
+    }
+    return word + this.input.slice(chunkStart, this.pos);
+  }
+
+  // Read an identifier or keyword token. Will check for reserved
+  // words when necessary.
+
+  readWord() {
+    let word = this.readWord1();
+    let type = tt.name;
+    if (!containsEsc && this.isKeyword(word))
+      type = keywordTypes[word];
+    return this.finishToken(type, word);
+  }
+}
--- a/src/tokenizer/types.js
+++ b/src/tokenizer/types.js
@@ -0,0 +1,146 @@
+// ## Token types
+
+// The assignment of fine-grained, information-carrying type objects
+// allows the tokenizer to store the information it has about a
+// token in a way that is very cheap for the parser to look up.
+
+// All token type variables start with an underscore, to make them
+// easy to recognize.
+
+// The `beforeExpr` property is used to disambiguate between regular
+// expressions and divisions. It is set on all token types that can
+// be followed by an expression (thus, a slash after them would be a
+// regular expression).
+//
+// `isLoop` marks a keyword as starting a loop, which is important
+// to know when parsing a label, in order to allow or disallow
+// continue jumps to that label.
+
+export class TokenType {
+  constructor(label, conf = {}) {
+    this.label = label;
+    this.keyword = conf.keyword;
+    this.beforeExpr = !!conf.beforeExpr;
+    this.startsExpr = !!conf.startsExpr;
+    this.rightAssociative = !!conf.rightAssociative;
+    this.isLoop = !!conf.isLoop;
+    this.isAssign = !!conf.isAssign;
+    this.prefix = !!conf.prefix;
+    this.postfix = !!conf.postfix;
+    this.binop = conf.binop || null;
+    this.updateContext = null;
+  }
+}
+
+function binop(name, prec) {
+  return new TokenType(name, {beforeExpr: true, binop: prec});
+}
+const beforeExpr = {beforeExpr: true}, startsExpr = {startsExpr: true};
+
+export const types = {
+  num: new TokenType("num", startsExpr),
+  regexp: new TokenType("regexp", startsExpr),
+  string: new TokenType("string", startsExpr),
+  name: new TokenType("name", startsExpr),
+  eof: new TokenType("eof"),
+
+  // Punctuation token types.
+  bracketL: new TokenType("[", {beforeExpr: true, startsExpr: true}),
+  bracketR: new TokenType("]"),
+  braceL: new TokenType("{", {beforeExpr: true, startsExpr: true}),
+  braceR: new TokenType("}"),
+  parenL: new TokenType("(", {beforeExpr: true, startsExpr: true}),
+  parenR: new TokenType(")"),
+  comma: new TokenType(",", beforeExpr),
+  semi: new TokenType(";", beforeExpr),
+  colon: new TokenType(":", beforeExpr),
+  doubleColon: new TokenType("::", beforeExpr),
+  dot: new TokenType("."),
+  question: new TokenType("?", beforeExpr),
+  arrow: new TokenType("=>", beforeExpr),
+  template: new TokenType("template"),
+  ellipsis: new TokenType("...", beforeExpr),
+  backQuote: new TokenType("`", startsExpr),
+  dollarBraceL: new TokenType("${", {beforeExpr: true, startsExpr: true}),
+  at: new TokenType("@"),
+
+  // Operators. These carry several kinds of properties to help the
+  // parser use them properly (the presence of these properties is
+  // what categorizes them as operators).
+  //
+  // `binop`, when present, specifies that this operator is a binary
+  // operator, and will refer to its precedence.
+  //
+  // `prefix` and `postfix` mark the operator as a prefix or postfix
+  // unary operator.
+  //
+  // `isAssign` marks all of `=`, `+=`, `-=` etcetera, which act as
+  // binary operators with a very low precedence, that should result
+  // in AssignmentExpression nodes.
+
+  eq: new TokenType("=", {beforeExpr: true, isAssign: true}),
+  assign: new TokenType("_=", {beforeExpr: true, isAssign: true}),
+  incDec: new TokenType("++/--", {prefix: true, postfix: true, startsExpr: true}),
+  prefix: new TokenType("prefix", {beforeExpr: true, prefix: true, startsExpr: true}),
+  logicalOR: binop("||", 1),
+  logicalAND: binop("&&", 2),
+  bitwiseOR: binop("|", 3),
+  bitwiseXOR: binop("^", 4),
+  bitwiseAND: binop("&", 5),
+  equality: binop("==/!=", 6),
+  relational: binop("</>", 7),
+  bitShift: binop("<</>>", 8),
+  plusMin: new TokenType("+/-", {beforeExpr: true, binop: 9, prefix: true, startsExpr: true}),
+  modulo: binop("%", 10),
+  star: binop("*", 10),
+  slash: binop("/", 10),
+  exponent: new TokenType("**", {beforeExpr: true, binop: 11, rightAssociative: true})
+};
+
+// Map keyword names to token types.
+
+export const keywords = {};
+
+// Succinct definitions of keyword token types
+function kw(name, options = {}) {
+  options.keyword = name;
+  keywords[name] = types["_" + name] = new TokenType(name, options);
+}
+
+kw("break");
+kw("case", beforeExpr);
+kw("catch");
+kw("continue");
+kw("debugger");
+kw("default", beforeExpr);
+kw("do", {isLoop: true});
+kw("else", beforeExpr);
+kw("finally");
+kw("for", {isLoop: true});
+kw("function", startsExpr);
+kw("if");
+kw("return", beforeExpr);
+kw("switch");
+kw("throw", beforeExpr);
+kw("try");
+kw("var");
+kw("let");
+kw("const");
+kw("while", {isLoop: true});
+kw("with");
+kw("new", {beforeExpr: true, startsExpr: true});
+kw("this", startsExpr);
+kw("super", startsExpr);
+kw("class");
+kw("extends", beforeExpr);
+kw("export");
+kw("import");
+kw("yield", {beforeExpr: true, startsExpr: true});
+kw("null", startsExpr);
+kw("true", startsExpr);
+kw("false", startsExpr);
+kw("in", {beforeExpr: true, binop: 7});
+kw("instanceof", {beforeExpr: true, binop: 7});
+kw("typeof", {beforeExpr: true, prefix: true, startsExpr: true});
+kw("void", {beforeExpr: true, prefix: true, startsExpr: true});
+kw("delete", {beforeExpr: true, prefix: true, startsExpr: true});