Update tools/generate-identifier-regex.js

So that it can also spit out the astral maps.
This commit is contained in:
Marijn Haverbeke 2015-03-05 11:15:04 +01:00
parent d76ea4b3be
commit 33307e789a
2 changed files with 43 additions and 46 deletions

View File

@ -566,7 +566,8 @@
// These are a run-length and offset encoded representation of the
// >0xffff code points that are a valid part of identifiers. The
// offset starts at 0x10000, and each pair of numbers represents an
// offset to the next range, and then a size of the range.
// offset to the next range, and then a size of the range. They were
// generated by tools/generate-identifier-regex.js
var astralIdentifierStartCodes = [0,11,2,25,2,18,2,1,2,14,3,13,35,122,70,52,268,28,4,48,48,31,17,26,6,37,11,29,3,35,5,7,2,4,43,157,99,39,9,51,157,310,10,21,11,7,153,5,3,0,2,43,2,1,4,0,3,22,11,22,10,30,98,21,11,25,71,55,7,1,65,0,16,3,2,2,2,26,45,28,4,28,36,7,2,27,28,53,11,21,11,18,14,17,111,72,955,52,76,44,33,24,27,35,42,34,4,0,13,47,15,3,22,0,38,17,2,24,133,46,39,7,3,1,3,21,2,6,2,1,2,4,4,0,32,4,287,47,21,1,2,0,185,46,82,47,21,0,60,42,502,63,32,0,449,56,1288,920,104,110,2962,1070,13266,568,8,30,114,29,19,47,17,3,32,20,6,18,881,68,12,0,67,12,16481,1,3071,106,6,12,4,8,8,9,5991,84,2,70,2,1,3,0,3,1,3,3,2,11,2,0,2,6,2,64,2,3,3,7,2,6,2,27,2,3,2,4,2,0,4,6,2,339,3,24,2,24,2,30,2,24,2,30,2,24,2,30,2,24,2,30,2,24,2,7,4149,196,1340,3,2,26,2,1,2,0,3,0,2,9,2,3,2,0,2,0,7,0,5,0,2,0,2,0,2,2,2,1,2,0,3,0,2,0,2,0,2,0,2,0,2,1,2,0,3,3,2,6,2,3,2,3,2,0,2,9,2,16,6,2,2,4,2,16,4421,42710,42,4148,12,221,16355,541];
var astralIdentifierCodes = [509,0,227,0,150,4,294,9,1368,2,2,1,6,3,41,2,5,0,166,1,1306,2,54,14,32,9,16,3,46,10,54,9,7,2,37,13,2,9,52,0,13,2,49,13,16,9,83,11,168,11,6,9,8,2,57,0,2,6,3,1,3,2,10,0,11,1,3,6,4,4,316,19,13,9,214,6,3,8,112,16,16,9,82,12,9,9,535,9,20855,9,135,4,60,6,26,9,1016,45,17,3,19723,1,5319,4,4,5,9,7,3,6,31,3,149,2,1418,49,4305,6,792618,239];

View File

@ -1,51 +1,47 @@
// Based on https://gist.github.com/mathiasbynens/6334847 by @mathias
var regenerate = require('regenerate');
// Note: run `npm install unicode-7.0.0` first.
// Which Unicode version should be used?
var version = '7.0.0'; // note: also update `package.json` when this changes
var version = '7.0.0';
// Shorthand function
var get = function(what) {
return require('unicode-' + version + '/' + what + '/code-points');
};
var start = require('unicode-' + version + '/properties/ID_Start/code-points')
.filter(function(ch) { return ch > 127; });
var cont = [0x200c, 0x200d].concat(require('unicode-' + version + '/properties/ID_Continue/code-points')
.filter(function(ch) { return ch > 127 && start.indexOf(ch) == -1; }));
// Unicode categories needed to construct the ES5 regex
var Lu = get('categories/Lu');
var Ll = get('categories/Ll');
var Lt = get('categories/Lt');
var Lm = get('categories/Lm');
var Lo = get('categories/Lo');
var Nl = get('categories/Nl');
var Mn = get('categories/Mn');
var Mc = get('categories/Mc');
var Nd = get('categories/Nd');
var Pc = get('categories/Pc');
function pad(str, width) {
while (str.length < width) str = "0" + str;
return str;
}
var generateES5Regex = function() { // ES 5.1
// http://mathiasbynens.be/notes/javascript-identifiers#valid-identifier-names
var identifierStart = regenerate('$', '_')
.add(Lu, Ll, Lt, Lm, Lo, Nl)
.removeRange(0x010000, 0x10FFFF) // remove astral symbols
.removeRange(0x0, 0x7F); // remove ASCII symbols (Acorn-specific)
var identifierPart = regenerate('\u200C', '\u200D', Mn, Mc, Nd, Pc)
.removeRange(0x010000, 0x10FFFF) // remove astral symbols
.remove(identifierStart) // (Acorn-specific)
.removeRange(0x0, 0x7F); // remove ASCII symbols (Acorn-specific)
return {
'NonAsciiIdentifierStart': identifierStart.toString(),
'NonAsciiIdentifierPart': identifierPart.toString()
};
};
function esc(code) {
var hex = code.toString(16);
if (hex.length <= 2) return "\\x" + pad(hex, 2);
else return "\\u" + pad(hex, 4);
}
var result = generateES5Regex();
console.log(
'// ECMAScript 5.1/Unicode v%s `nonASCIIidentifierStart`:\n\n%s\n',
version,
result.NonAsciiIdentifierStart
);
console.log(
'// ECMAScript 5.1/Unicode v%s `nonASCIIidentifier`:\n\n%s',
version,
result.NonAsciiIdentifierPart
);
function generate(chars) {
var astral = [], re = "";
for (var i = 0, at = 0x10000; i < chars.length; i++) {
var from = chars[i], to = from;
while (i < chars.length - 1 && chars[i + 1] == to + 1) {
i++;
to++;
}
if (to <= 0xffff) {
if (from == to) re += esc(from);
else if (from + 1 == to) re += esc(from) + esc(to);
else re += esc(from) + "-" + esc(to);
} else {
astral.push(from - at, to - from);
at = to;
}
}
return {nonASCII: re, astral: astral};
}
var startData = generate(start), contData = generate(cont);
console.log(" var nonASCIIidentifierStartChars = \"" + startData.nonASCII + "\";");
console.log(" var nonASCIIidentifierChars = \"" + contData.nonASCII + "\";");
console.log(" var astralIdentifierStartCodes = " + JSON.stringify(startData.astral) + ";");
console.log(" var astralIdentifierCodes = " + JSON.stringify(contData.astral) + ";");