Overhaul comment attachment (#13521)

* refactor: inline pushComment * chore: add benchmark cases * perf: overhaul comment attachment * cleanup * update test fixtures They are all bugfixes. * fix: merge HTMLComment parsing to skipSpace * perf: remove unattachedCommentStack baseline 128 nested leading comments: 11_034 ops/sec ±50.64% (0.091ms) baseline 256 nested leading comments: 6_037 ops/sec ±11.46% (0.166ms) baseline 512 nested leading comments: 3_077 ops/sec ±2.31% (0.325ms) baseline 1024 nested leading comments: 1_374 ops/sec ±3.22% (0.728ms) current 128 nested leading comments: 11_027 ops/sec ±37.41% (0.091ms) current 256 nested leading comments: 6_736 ops/sec ±1.39% (0.148ms) current 512 nested leading comments: 3_306 ops/sec ±0.69% (0.302ms) current 1024 nested leading comments: 1_579 ops/sec ±2.09% (0.633ms) baseline 128 nested trailing comments: 10_073 ops/sec ±42.95% (0.099ms) baseline 256 nested trailing comments: 6_294 ops/sec ±2.19% (0.159ms) baseline 512 nested trailing comments: 3_041 ops/sec ±0.8% (0.329ms) baseline 1024 nested trailing comments: 1_530 ops/sec ±1.18% (0.654ms) current 128 nested trailing comments: 11_461 ops/sec ±44.89% (0.087ms) current 256 nested trailing comments: 7_212 ops/sec ±1.6% (0.139ms) current 512 nested trailing comments: 3_403 ops/sec ±1% (0.294ms) current 1024 nested trailing comments: 1_539 ops/sec ±1.49% (0.65ms) * fix: do not expose CommentWhitespace type * add comments on CommentWhitespace * add test case for #11576 * fix: mark containerNode be the innermost node containing commentWS * fix: adjust trailing comma comments for Record/Tuple/OptionalCall * fix: drain comment stacks in parseExpression * docs: update comments * add a new benchmark * chore: containerNode => containingNode * add more benchmark cases * fix: avoid finishNodeAt in stmtToDirective * finalize comment right after containerNode is set * add testcase about directive * fix: finish SequenceExpression at current pos and adjust later * chore: rename test cases * add new test case on switch statement * fix: adjust comments after trailing comma of function params * add comment attachment design doc * misc fix * fix: reset previous trailing comments when parsing async method/accessor * chore: add more comment testcases * fix flow errors * fix: handle comments when parsing async arrow * fix: handle comments when "static" is a class modifier * fix flow errors * fix: handle comments when parsing async function/do * refactor: simplify resetPreviousNodeTrailingComments * update test fixtures
2021-07-07 11:51:40 -04:00
parent 8a3e0fd960
commit 79d3276f61
76 changed files with 4822 additions and 498 deletions
--- a/packages/babel-parser/src/tokenizer/index.js
+++ b/packages/babel-parser/src/tokenizer/index.js
@@ -4,7 +4,6 @@

 import type { Options } from "../options";
 import * as N from "../types";
-import type { Position } from "../util/location";
 import * as charCodes from "charcodes";
 import { isIdentifierStart, isIdentifierChar } from "../util/identifier";
 import { types as tt, keywords as keywordTypes, type TokenType } from "./types";
@@ -304,28 +303,7 @@ export default class Tokenizer extends ParserErrors {
    }
  }

-  pushComment(
-    block: boolean,
-    text: string,
-    start: number,
-    end: number,
-    startLoc: Position,
-    endLoc: Position,
-  ): void {
-    const comment = {
-      type: block ? "CommentBlock" : "CommentLine",
-      value: text,
-      start: start,
-      end: end,
-      loc: new SourceLocation(startLoc, endLoc),
-    };
-
-    if (this.options.tokens) this.pushToken(comment);
-    this.state.comments.push(comment);
-    this.addComment(comment);
-  }
-
-  skipBlockComment(): void {
+  skipBlockComment(): N.CommentBlock | void {
    let startLoc;
    if (!this.isLookahead) startLoc = this.state.curPosition();
    const start = this.state.pos;
@@ -348,17 +326,19 @@ export default class Tokenizer extends ParserErrors {
    if (this.isLookahead) return;
    /*:: invariant(startLoc) */

-    this.pushComment(
-      true,
-      this.input.slice(start + 2, end),
-      start,
-      this.state.pos,
-      startLoc,
-      this.state.curPosition(),
-    );
+    const value = this.input.slice(start + 2, end);
+    const comment = {
+      type: "CommentBlock",
+      value: value,
+      start: start,
+      end: end + 2,
+      loc: new SourceLocation(startLoc, this.state.curPosition()),
+    };
+    if (this.options.tokens) this.pushToken(comment);
+    return comment;
  }

-  skipLineComment(startSkip: number): void {
+  skipLineComment(startSkip: number): N.CommentLine | void {
    const start = this.state.pos;
    let startLoc;
    if (!this.isLookahead) startLoc = this.state.curPosition();
@@ -374,20 +354,26 @@ export default class Tokenizer extends ParserErrors {
    if (this.isLookahead) return;
    /*:: invariant(startLoc) */

-    this.pushComment(
-      false,
-      this.input.slice(start + startSkip, this.state.pos),
+    const end = this.state.pos;
+    const value = this.input.slice(start + startSkip, end);
+
+    const comment = {
+      type: "CommentLine",
+      value,
      start,
-      this.state.pos,
-      startLoc,
-      this.state.curPosition(),
-    );
+      end,
+      loc: new SourceLocation(startLoc, this.state.curPosition()),
+    };
+    if (this.options.tokens) this.pushToken(comment);
+    return comment;
  }

  // Called at the start of the parse and after every token. Skips
  // whitespace and comments, and.

  skipSpace(): void {
+    const spaceStart = this.state.pos;
+    const comments = [];
    loop: while (this.state.pos < this.length) {
      const ch = this.input.charCodeAt(this.state.pos);
      switch (ch) {
@@ -413,13 +399,23 @@ export default class Tokenizer extends ParserErrors {

        case charCodes.slash:
          switch (this.input.charCodeAt(this.state.pos + 1)) {
-            case charCodes.asterisk:
-              this.skipBlockComment();
+            case charCodes.asterisk: {
+              const comment = this.skipBlockComment();
+              if (comment !== undefined) {
+                this.addComment(comment);
+                comments.push(comment);
+              }
              break;
+            }

-            case charCodes.slash:
-              this.skipLineComment(2);
+            case charCodes.slash: {
+              const comment = this.skipLineComment(2);
+              if (comment !== undefined) {
+                this.addComment(comment);
+                comments.push(comment);
+              }
              break;
+            }

            default:
              break loop;
@@ -429,11 +425,56 @@ export default class Tokenizer extends ParserErrors {
        default:
          if (isWhitespace(ch)) {
            ++this.state.pos;
+          } else if (ch === charCodes.dash && !this.inModule) {
+            const pos = this.state.pos;
+            if (
+              this.input.charCodeAt(pos + 1) === charCodes.dash &&
+              this.input.charCodeAt(pos + 2) === charCodes.greaterThan &&
+              (spaceStart === 0 || this.state.lineStart > spaceStart)
+            ) {
+              // A `-->` line comment
+              const comment = this.skipLineComment(3);
+              if (comment !== undefined) {
+                this.addComment(comment);
+                comments.push(comment);
+              }
+            } else {
+              break loop;
+            }
+          } else if (ch === charCodes.lessThan && !this.inModule) {
+            const pos = this.state.pos;
+            if (
+              this.input.charCodeAt(pos + 1) === charCodes.exclamationMark &&
+              this.input.charCodeAt(pos + 2) === charCodes.dash &&
+              this.input.charCodeAt(pos + 3) === charCodes.dash
+            ) {
+              // `<!--`, an XML-style comment that should be interpreted as a line comment
+              const comment = this.skipLineComment(4);
+              if (comment !== undefined) {
+                this.addComment(comment);
+                comments.push(comment);
+              }
+            } else {
+              break loop;
+            }
          } else {
            break loop;
          }
      }
    }
+
+    if (comments.length > 0) {
+      const end = this.state.pos;
+      const CommentWhitespace = {
+        start: spaceStart,
+        end,
+        comments,
+        leadingNode: null,
+        trailingNode: null,
+        containingNode: null,
+      };
+      this.state.commentStack.push(CommentWhitespace);
+    }
  }

  // Called at the end of every token. Sets `end`, `val`, and
@@ -661,18 +702,6 @@ export default class Tokenizer extends ParserErrors {
    const next = this.input.charCodeAt(this.state.pos + 1);

    if (next === code) {
-      if (
-        next === charCodes.dash &&
-        !this.inModule &&
-        this.input.charCodeAt(this.state.pos + 2) === charCodes.greaterThan &&
-        (this.state.lastTokEnd === 0 || this.hasPrecedingLineBreak())
-      ) {
-        // A `-->` line comment
-        this.skipLineComment(3);
-        this.skipSpace();
-        this.nextToken();
-        return;
-      }
      this.finishOp(tt.incDec, 2);
      return;
    }
@@ -703,20 +732,6 @@ export default class Tokenizer extends ParserErrors {
      return;
    }

-    if (
-      next === charCodes.exclamationMark &&
-      code === charCodes.lessThan &&
-      !this.inModule &&
-      this.input.charCodeAt(this.state.pos + 2) === charCodes.dash &&
-      this.input.charCodeAt(this.state.pos + 3) === charCodes.dash
-    ) {
-      // `<!--`, an XML-style comment that should be interpreted as a line comment
-      this.skipLineComment(4);
-      this.skipSpace();
-      this.nextToken();
-      return;
-    }
-
    if (next === charCodes.equalsTo) {
      // <= | >=
      size = 2;
--- a/packages/babel-parser/src/tokenizer/state.js
+++ b/packages/babel-parser/src/tokenizer/state.js
@@ -2,6 +2,7 @@

 import type { Options } from "../options";
 import * as N from "../types";
+import type { CommentWhitespace } from "../parser/comments";
 import { Position } from "../util/location";

 import { types as ct, type TokContext } from "./context";
@@ -89,20 +90,11 @@ export default class State {
  // where @foo belongs to the outer class and @bar to the inner
  decoratorStack: Array<Array<N.Decorator>> = [[]];

-  // Comment store.
+  // Comment store for Program.comments
  comments: Array<N.Comment> = [];

  // Comment attachment store
-  trailingComments: Array<N.Comment> = [];
-  leadingComments: Array<N.Comment> = [];
-  commentStack: Array<{
-    start: number,
-    leadingComments: ?Array<N.Comment>,
-    trailingComments: ?Array<N.Comment>,
-    type: string,
-  }> = [];
-  // $FlowIgnore this is initialized when the parser starts.
-  commentPreviousNode: N.Node = null;
+  commentStack: Array<CommentWhitespace> = [];

  // The current position of the tokenizer in the input.
  pos: number = 0;