Use fromCodePoint to convert high value unicode entities (#243)

* Use fromCodePoint to convert high value unicode entities

* Include polyfill for String.fromCodePoint

* copy and adapt fromCodePoint into String.fromcodepoint

In order to avoid modifying String as the polyfill does, I've copied
the source from the polyfill and adapted it return the polyfill
function if the native version does not exist. Once support for node
versions that lack fromCodePoint is dropped, this polyfill can be
removed.

* move license notice to top of file
This commit is contained in:
Ryan Duffy 2017-01-02 03:15:56 -06:00 committed by Daniel Tschinder
parent 3dc403974c
commit 1c13800efd
3 changed files with 71 additions and 3 deletions

View File

@ -0,0 +1,66 @@
// Adapted from String.fromcodepoint to export the function without modifying String
/*! https://mths.be/fromcodepoint v0.2.1 by @mathias */
// The MIT License (MIT)
// Copyright (c) Mathias Bynens
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
// associated documentation files (the "Software"), to deal in the Software without restriction,
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
let fromCodePoint = String.fromCodePoint;
if (!fromCodePoint) {
const stringFromCharCode = String.fromCharCode;
const floor = Math.floor;
fromCodePoint = function() {
const MAX_SIZE = 0x4000;
const codeUnits = [];
let highSurrogate;
let lowSurrogate;
let index = -1;
const length = arguments.length;
if (!length) {
return "";
}
let result = "";
while (++index < length) {
let codePoint = Number(arguments[index]);
if (
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
codePoint < 0 || // not a valid Unicode code point
codePoint > 0x10FFFF || // not a valid Unicode code point
floor(codePoint) != codePoint // not an integer
) {
throw RangeError("Invalid code point: " + codePoint);
}
if (codePoint <= 0xFFFF) { // BMP code point
codeUnits.push(codePoint);
} else { // Astral code point; split in surrogate halves
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) + 0xD800;
lowSurrogate = (codePoint % 0x400) + 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
result += stringFromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
};
}
export default fromCodePoint;

View File

@ -1,5 +1,7 @@
/* eslint indent: 0 */
import fromCodePoint from "./fromCodePoint";
import XHTMLEntities from "./xhtml";
import { TokenType, types as tt } from "../../tokenizer/types";
import { TokContext, types as tc } from "../../tokenizer/context";
@ -136,11 +138,11 @@ pp.jsxReadEntity = function() {
if (str[1] === "x") {
str = str.substr(2);
if (HEX_NUMBER.test(str))
entity = String.fromCharCode(parseInt(str, 16));
entity = fromCodePoint(parseInt(str, 16));
} else {
str = str.substr(1);
if (DECIMAL_NUMBER.test(str))
entity = String.fromCharCode(parseInt(str, 10));
entity = fromCodePoint(parseInt(str, 10));
}
} else {
entity = XHTMLEntities[str];

View File

@ -136,7 +136,7 @@
}
},
"extra": null,
"value": ""
"value": "💩"
}
]
}