Make the tokenizer aware of multi-byte characters in ES6 mode

Add a data structure to recognize astral identifier chars. Parse whole
code points when looking for identifiers.

Issue #215
This commit is contained in:
Marijn Haverbeke
2015-03-04 18:11:03 +01:00
parent 891d5d07dd
commit d76ea4b3be
2 changed files with 173 additions and 42 deletions

View File

@@ -15093,6 +15093,111 @@ test("`${/\\d/.exec('1')[0]}`", {
ecmaVersion: 6
});
test("var _𐒦 = 10;", {
"type": "Program",
"start": 0,
"end": 13,
"body": [
{
"type": "VariableDeclaration",
"start": 0,
"end": 13,
"declarations": [
{
"type": "VariableDeclarator",
"start": 4,
"end": 12,
"id": {
"type": "Identifier",
"start": 4,
"end": 7,
"name": "_𐒦"
},
"init": {
"type": "Literal",
"start": 10,
"end": 12,
"value": 10,
"raw": "10"
}
}
],
"kind": "var"
}
]
}, {ecmaVersion: 6});
test("var 𫠝_ = 10;", {
"type": "Program",
"start": 0,
"end": 13,
"body": [
{
"type": "VariableDeclaration",
"start": 0,
"end": 13,
"declarations": [
{
"type": "VariableDeclarator",
"start": 4,
"end": 12,
"id": {
"type": "Identifier",
"start": 4,
"end": 7,
"name": "𫠝_"
},
"init": {
"type": "Literal",
"start": 10,
"end": 12,
"value": 10,
"raw": "10"
}
}
],
"kind": "var"
}
]
}, {ecmaVersion: 6});
test("var _\\u{104A6} = 10;", {
"type": "Program",
"start": 0,
"end": 20,
"body": [
{
"type": "VariableDeclaration",
"start": 0,
"end": 20,
"declarations": [
{
"type": "VariableDeclarator",
"start": 4,
"end": 19,
"id": {
"type": "Identifier",
"start": 4,
"end": 14,
"name": "_𐒦"
},
"init": {
"type": "Literal",
"start": 17,
"end": 19,
"value": 10,
"raw": "10"
}
}
],
"kind": "var"
}
]
}, {ecmaVersion: 6});
testFail("var _𖫵 = 11;", "Unexpected character '𖫵' (1:5)", {ecmaVersion: 6});
testFail("var 𫠞_ = 12;", "Unexpected character '𫠞' (1:4)", {ecmaVersion: 6});
testFail("var 𫠝_ = 10;", "Unexpected character '𫠝' (1:4)", {ecmaVersion: 5});
testFail("if (1) let x = 10;", "Unexpected token (1:7)", {ecmaVersion: 6});
testFail("for (;;) const x = 10;", "Unexpected token (1:9)", {ecmaVersion: 6});
testFail("while (1) function foo(){}", "Unexpected token (1:10)", {ecmaVersion: 6});