Overhaul comment attachment (#13521)

* refactor: inline pushComment

* chore: add benchmark cases

* perf: overhaul comment attachment

* cleanup

* update test fixtures

They are all bugfixes.

* fix: merge HTMLComment parsing to skipSpace

* perf: remove unattachedCommentStack

baseline 128 nested leading comments: 11_034 ops/sec ±50.64% (0.091ms)
baseline 256 nested leading comments: 6_037 ops/sec ±11.46% (0.166ms)
baseline 512 nested leading comments: 3_077 ops/sec ±2.31% (0.325ms)
baseline 1024 nested leading comments: 1_374 ops/sec ±3.22% (0.728ms)
current 128 nested leading comments: 11_027 ops/sec ±37.41% (0.091ms)
current 256 nested leading comments: 6_736 ops/sec ±1.39% (0.148ms)
current 512 nested leading comments: 3_306 ops/sec ±0.69% (0.302ms)
current 1024 nested leading comments: 1_579 ops/sec ±2.09% (0.633ms)

baseline 128 nested trailing comments: 10_073 ops/sec ±42.95% (0.099ms)
baseline 256 nested trailing comments: 6_294 ops/sec ±2.19% (0.159ms)
baseline 512 nested trailing comments: 3_041 ops/sec ±0.8% (0.329ms)
baseline 1024 nested trailing comments: 1_530 ops/sec ±1.18% (0.654ms)
current 128 nested trailing comments: 11_461 ops/sec ±44.89% (0.087ms)
current 256 nested trailing comments: 7_212 ops/sec ±1.6% (0.139ms)
current 512 nested trailing comments: 3_403 ops/sec ±1% (0.294ms)
current 1024 nested trailing comments: 1_539 ops/sec ±1.49% (0.65ms)

* fix: do not expose CommentWhitespace type

* add comments on CommentWhitespace

* add test case for #11576

* fix: mark containerNode be the innermost node containing commentWS

* fix: adjust trailing comma comments for Record/Tuple/OptionalCall

* fix: drain comment stacks in parseExpression

* docs: update comments

* add a new benchmark

* chore: containerNode => containingNode

* add more benchmark cases

* fix: avoid finishNodeAt in stmtToDirective

* finalize comment right after containerNode is set

* add testcase about directive

* fix: finish SequenceExpression at current pos and adjust later

* chore: rename test cases

* add new test case on switch statement

* fix: adjust comments after trailing comma of function params

* add comment attachment design doc

* misc fix

* fix: reset previous trailing comments when parsing async method/accessor

* chore: add more comment testcases

* fix flow errors

* fix: handle comments when parsing async arrow

* fix: handle comments when "static" is a class modifier

* fix flow errors

* fix: handle comments when parsing async function/do

* refactor: simplify resetPreviousNodeTrailingComments

* update test fixtures
This commit is contained in:
Huáng Jùnliàng
2021-07-07 11:51:40 -04:00
committed by GitHub
parent 8a3e0fd960
commit 79d3276f61
76 changed files with 4822 additions and 498 deletions

View File

@@ -4,7 +4,6 @@
import type { Options } from "../options";
import * as N from "../types";
import type { Position } from "../util/location";
import * as charCodes from "charcodes";
import { isIdentifierStart, isIdentifierChar } from "../util/identifier";
import { types as tt, keywords as keywordTypes, type TokenType } from "./types";
@@ -304,28 +303,7 @@ export default class Tokenizer extends ParserErrors {
}
}
pushComment(
block: boolean,
text: string,
start: number,
end: number,
startLoc: Position,
endLoc: Position,
): void {
const comment = {
type: block ? "CommentBlock" : "CommentLine",
value: text,
start: start,
end: end,
loc: new SourceLocation(startLoc, endLoc),
};
if (this.options.tokens) this.pushToken(comment);
this.state.comments.push(comment);
this.addComment(comment);
}
skipBlockComment(): void {
skipBlockComment(): N.CommentBlock | void {
let startLoc;
if (!this.isLookahead) startLoc = this.state.curPosition();
const start = this.state.pos;
@@ -348,17 +326,19 @@ export default class Tokenizer extends ParserErrors {
if (this.isLookahead) return;
/*:: invariant(startLoc) */
this.pushComment(
true,
this.input.slice(start + 2, end),
start,
this.state.pos,
startLoc,
this.state.curPosition(),
);
const value = this.input.slice(start + 2, end);
const comment = {
type: "CommentBlock",
value: value,
start: start,
end: end + 2,
loc: new SourceLocation(startLoc, this.state.curPosition()),
};
if (this.options.tokens) this.pushToken(comment);
return comment;
}
skipLineComment(startSkip: number): void {
skipLineComment(startSkip: number): N.CommentLine | void {
const start = this.state.pos;
let startLoc;
if (!this.isLookahead) startLoc = this.state.curPosition();
@@ -374,20 +354,26 @@ export default class Tokenizer extends ParserErrors {
if (this.isLookahead) return;
/*:: invariant(startLoc) */
this.pushComment(
false,
this.input.slice(start + startSkip, this.state.pos),
const end = this.state.pos;
const value = this.input.slice(start + startSkip, end);
const comment = {
type: "CommentLine",
value,
start,
this.state.pos,
startLoc,
this.state.curPosition(),
);
end,
loc: new SourceLocation(startLoc, this.state.curPosition()),
};
if (this.options.tokens) this.pushToken(comment);
return comment;
}
// Called at the start of the parse and after every token. Skips
// whitespace and comments, and.
skipSpace(): void {
const spaceStart = this.state.pos;
const comments = [];
loop: while (this.state.pos < this.length) {
const ch = this.input.charCodeAt(this.state.pos);
switch (ch) {
@@ -413,13 +399,23 @@ export default class Tokenizer extends ParserErrors {
case charCodes.slash:
switch (this.input.charCodeAt(this.state.pos + 1)) {
case charCodes.asterisk:
this.skipBlockComment();
case charCodes.asterisk: {
const comment = this.skipBlockComment();
if (comment !== undefined) {
this.addComment(comment);
comments.push(comment);
}
break;
}
case charCodes.slash:
this.skipLineComment(2);
case charCodes.slash: {
const comment = this.skipLineComment(2);
if (comment !== undefined) {
this.addComment(comment);
comments.push(comment);
}
break;
}
default:
break loop;
@@ -429,11 +425,56 @@ export default class Tokenizer extends ParserErrors {
default:
if (isWhitespace(ch)) {
++this.state.pos;
} else if (ch === charCodes.dash && !this.inModule) {
const pos = this.state.pos;
if (
this.input.charCodeAt(pos + 1) === charCodes.dash &&
this.input.charCodeAt(pos + 2) === charCodes.greaterThan &&
(spaceStart === 0 || this.state.lineStart > spaceStart)
) {
// A `-->` line comment
const comment = this.skipLineComment(3);
if (comment !== undefined) {
this.addComment(comment);
comments.push(comment);
}
} else {
break loop;
}
} else if (ch === charCodes.lessThan && !this.inModule) {
const pos = this.state.pos;
if (
this.input.charCodeAt(pos + 1) === charCodes.exclamationMark &&
this.input.charCodeAt(pos + 2) === charCodes.dash &&
this.input.charCodeAt(pos + 3) === charCodes.dash
) {
// `<!--`, an XML-style comment that should be interpreted as a line comment
const comment = this.skipLineComment(4);
if (comment !== undefined) {
this.addComment(comment);
comments.push(comment);
}
} else {
break loop;
}
} else {
break loop;
}
}
}
if (comments.length > 0) {
const end = this.state.pos;
const CommentWhitespace = {
start: spaceStart,
end,
comments,
leadingNode: null,
trailingNode: null,
containingNode: null,
};
this.state.commentStack.push(CommentWhitespace);
}
}
// Called at the end of every token. Sets `end`, `val`, and
@@ -661,18 +702,6 @@ export default class Tokenizer extends ParserErrors {
const next = this.input.charCodeAt(this.state.pos + 1);
if (next === code) {
if (
next === charCodes.dash &&
!this.inModule &&
this.input.charCodeAt(this.state.pos + 2) === charCodes.greaterThan &&
(this.state.lastTokEnd === 0 || this.hasPrecedingLineBreak())
) {
// A `-->` line comment
this.skipLineComment(3);
this.skipSpace();
this.nextToken();
return;
}
this.finishOp(tt.incDec, 2);
return;
}
@@ -703,20 +732,6 @@ export default class Tokenizer extends ParserErrors {
return;
}
if (
next === charCodes.exclamationMark &&
code === charCodes.lessThan &&
!this.inModule &&
this.input.charCodeAt(this.state.pos + 2) === charCodes.dash &&
this.input.charCodeAt(this.state.pos + 3) === charCodes.dash
) {
// `<!--`, an XML-style comment that should be interpreted as a line comment
this.skipLineComment(4);
this.skipSpace();
this.nextToken();
return;
}
if (next === charCodes.equalsTo) {
// <= | >=
size = 2;

View File

@@ -2,6 +2,7 @@
import type { Options } from "../options";
import * as N from "../types";
import type { CommentWhitespace } from "../parser/comments";
import { Position } from "../util/location";
import { types as ct, type TokContext } from "./context";
@@ -89,20 +90,11 @@ export default class State {
// where @foo belongs to the outer class and @bar to the inner
decoratorStack: Array<Array<N.Decorator>> = [[]];
// Comment store.
// Comment store for Program.comments
comments: Array<N.Comment> = [];
// Comment attachment store
trailingComments: Array<N.Comment> = [];
leadingComments: Array<N.Comment> = [];
commentStack: Array<{
start: number,
leadingComments: ?Array<N.Comment>,
trailingComments: ?Array<N.Comment>,
type: string,
}> = [];
// $FlowIgnore this is initialized when the parser starts.
commentPreviousNode: N.Node = null;
commentStack: Array<CommentWhitespace> = [];
// The current position of the tokenizer in the input.
pos: number = 0;