Update tools/generate-identifier-regex.js
So that it can also spit out the astral maps.
This commit is contained in:
parent
d76ea4b3be
commit
33307e789a
3
acorn.js
3
acorn.js
@ -566,7 +566,8 @@
|
||||
// These are a run-length and offset encoded representation of the
|
||||
// >0xffff code points that are a valid part of identifiers. The
|
||||
// offset starts at 0x10000, and each pair of numbers represents an
|
||||
// offset to the next range, and then a size of the range.
|
||||
// offset to the next range, and then a size of the range. They were
|
||||
// generated by tools/generate-identifier-regex.js
|
||||
var astralIdentifierStartCodes = [0,11,2,25,2,18,2,1,2,14,3,13,35,122,70,52,268,28,4,48,48,31,17,26,6,37,11,29,3,35,5,7,2,4,43,157,99,39,9,51,157,310,10,21,11,7,153,5,3,0,2,43,2,1,4,0,3,22,11,22,10,30,98,21,11,25,71,55,7,1,65,0,16,3,2,2,2,26,45,28,4,28,36,7,2,27,28,53,11,21,11,18,14,17,111,72,955,52,76,44,33,24,27,35,42,34,4,0,13,47,15,3,22,0,38,17,2,24,133,46,39,7,3,1,3,21,2,6,2,1,2,4,4,0,32,4,287,47,21,1,2,0,185,46,82,47,21,0,60,42,502,63,32,0,449,56,1288,920,104,110,2962,1070,13266,568,8,30,114,29,19,47,17,3,32,20,6,18,881,68,12,0,67,12,16481,1,3071,106,6,12,4,8,8,9,5991,84,2,70,2,1,3,0,3,1,3,3,2,11,2,0,2,6,2,64,2,3,3,7,2,6,2,27,2,3,2,4,2,0,4,6,2,339,3,24,2,24,2,30,2,24,2,30,2,24,2,30,2,24,2,30,2,24,2,7,4149,196,1340,3,2,26,2,1,2,0,3,0,2,9,2,3,2,0,2,0,7,0,5,0,2,0,2,0,2,2,2,1,2,0,3,0,2,0,2,0,2,0,2,0,2,1,2,0,3,3,2,6,2,3,2,3,2,0,2,9,2,16,6,2,2,4,2,16,4421,42710,42,4148,12,221,16355,541];
|
||||
var astralIdentifierCodes = [509,0,227,0,150,4,294,9,1368,2,2,1,6,3,41,2,5,0,166,1,1306,2,54,14,32,9,16,3,46,10,54,9,7,2,37,13,2,9,52,0,13,2,49,13,16,9,83,11,168,11,6,9,8,2,57,0,2,6,3,1,3,2,10,0,11,1,3,6,4,4,316,19,13,9,214,6,3,8,112,16,16,9,82,12,9,9,535,9,20855,9,135,4,60,6,26,9,1016,45,17,3,19723,1,5319,4,4,5,9,7,3,6,31,3,149,2,1418,49,4305,6,792618,239];
|
||||
|
||||
|
||||
@ -1,51 +1,47 @@
|
||||
// Based on https://gist.github.com/mathiasbynens/6334847 by @mathias
|
||||
|
||||
var regenerate = require('regenerate');
|
||||
// Note: run `npm install unicode-7.0.0` first.
|
||||
|
||||
// Which Unicode version should be used?
|
||||
var version = '7.0.0'; // note: also update `package.json` when this changes
|
||||
var version = '7.0.0';
|
||||
|
||||
// Shorthand function
|
||||
var get = function(what) {
|
||||
return require('unicode-' + version + '/' + what + '/code-points');
|
||||
};
|
||||
var start = require('unicode-' + version + '/properties/ID_Start/code-points')
|
||||
.filter(function(ch) { return ch > 127; });
|
||||
var cont = [0x200c, 0x200d].concat(require('unicode-' + version + '/properties/ID_Continue/code-points')
|
||||
.filter(function(ch) { return ch > 127 && start.indexOf(ch) == -1; }));
|
||||
|
||||
// Unicode categories needed to construct the ES5 regex
|
||||
var Lu = get('categories/Lu');
|
||||
var Ll = get('categories/Ll');
|
||||
var Lt = get('categories/Lt');
|
||||
var Lm = get('categories/Lm');
|
||||
var Lo = get('categories/Lo');
|
||||
var Nl = get('categories/Nl');
|
||||
var Mn = get('categories/Mn');
|
||||
var Mc = get('categories/Mc');
|
||||
var Nd = get('categories/Nd');
|
||||
var Pc = get('categories/Pc');
|
||||
function pad(str, width) {
|
||||
while (str.length < width) str = "0" + str;
|
||||
return str;
|
||||
}
|
||||
|
||||
var generateES5Regex = function() { // ES 5.1
|
||||
// http://mathiasbynens.be/notes/javascript-identifiers#valid-identifier-names
|
||||
var identifierStart = regenerate('$', '_')
|
||||
.add(Lu, Ll, Lt, Lm, Lo, Nl)
|
||||
.removeRange(0x010000, 0x10FFFF) // remove astral symbols
|
||||
.removeRange(0x0, 0x7F); // remove ASCII symbols (Acorn-specific)
|
||||
var identifierPart = regenerate('\u200C', '\u200D', Mn, Mc, Nd, Pc)
|
||||
.removeRange(0x010000, 0x10FFFF) // remove astral symbols
|
||||
.remove(identifierStart) // (Acorn-specific)
|
||||
.removeRange(0x0, 0x7F); // remove ASCII symbols (Acorn-specific)
|
||||
return {
|
||||
'NonAsciiIdentifierStart': identifierStart.toString(),
|
||||
'NonAsciiIdentifierPart': identifierPart.toString()
|
||||
};
|
||||
};
|
||||
function esc(code) {
|
||||
var hex = code.toString(16);
|
||||
if (hex.length <= 2) return "\\x" + pad(hex, 2);
|
||||
else return "\\u" + pad(hex, 4);
|
||||
}
|
||||
|
||||
var result = generateES5Regex();
|
||||
console.log(
|
||||
'// ECMAScript 5.1/Unicode v%s `nonASCIIidentifierStart`:\n\n%s\n',
|
||||
version,
|
||||
result.NonAsciiIdentifierStart
|
||||
);
|
||||
console.log(
|
||||
'// ECMAScript 5.1/Unicode v%s `nonASCIIidentifier`:\n\n%s',
|
||||
version,
|
||||
result.NonAsciiIdentifierPart
|
||||
);
|
||||
function generate(chars) {
|
||||
var astral = [], re = "";
|
||||
for (var i = 0, at = 0x10000; i < chars.length; i++) {
|
||||
var from = chars[i], to = from;
|
||||
while (i < chars.length - 1 && chars[i + 1] == to + 1) {
|
||||
i++;
|
||||
to++;
|
||||
}
|
||||
if (to <= 0xffff) {
|
||||
if (from == to) re += esc(from);
|
||||
else if (from + 1 == to) re += esc(from) + esc(to);
|
||||
else re += esc(from) + "-" + esc(to);
|
||||
} else {
|
||||
astral.push(from - at, to - from);
|
||||
at = to;
|
||||
}
|
||||
}
|
||||
return {nonASCII: re, astral: astral};
|
||||
}
|
||||
|
||||
var startData = generate(start), contData = generate(cont);
|
||||
|
||||
console.log(" var nonASCIIidentifierStartChars = \"" + startData.nonASCII + "\";");
|
||||
console.log(" var nonASCIIidentifierChars = \"" + contData.nonASCII + "\";");
|
||||
console.log(" var astralIdentifierStartCodes = " + JSON.stringify(startData.astral) + ";");
|
||||
console.log(" var astralIdentifierCodes = " + JSON.stringify(contData.astral) + ";");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user