From 1c13800efdbe22c67efa3307a91712e9bfb48544 Mon Sep 17 00:00:00 2001 From: Ryan Duffy Date: Mon, 2 Jan 2017 03:15:56 -0600 Subject: [PATCH] Use fromCodePoint to convert high value unicode entities (#243) * Use fromCodePoint to convert high value unicode entities * Include polyfill for String.fromCodePoint * copy and adapt fromCodePoint into String.fromcodepoint In order to avoid modifying String as the polyfill does, I've copied the source from the polyfill and adapted it return the polyfill function if the native version does not exist. Once support for node versions that lack fromCodePoint is dropped, this polyfill can be removed. * move license notice to top of file --- src/plugins/jsx/fromCodePoint.js | 66 ++++++++++++++++++++ src/plugins/jsx/index.js | 6 +- test/fixtures/jsx/basic/entity/expected.json | 2 +- 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 src/plugins/jsx/fromCodePoint.js diff --git a/src/plugins/jsx/fromCodePoint.js b/src/plugins/jsx/fromCodePoint.js new file mode 100644 index 0000000000..a69acf7686 --- /dev/null +++ b/src/plugins/jsx/fromCodePoint.js @@ -0,0 +1,66 @@ +// Adapted from String.fromcodepoint to export the function without modifying String +/*! https://mths.be/fromcodepoint v0.2.1 by @mathias */ + +// The MIT License (MIT) +// Copyright (c) Mathias Bynens +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +let fromCodePoint = String.fromCodePoint; + +if (!fromCodePoint) { + const stringFromCharCode = String.fromCharCode; + const floor = Math.floor; + fromCodePoint = function() { + const MAX_SIZE = 0x4000; + const codeUnits = []; + let highSurrogate; + let lowSurrogate; + let index = -1; + const length = arguments.length; + if (!length) { + return ""; + } + let result = ""; + while (++index < length) { + let codePoint = Number(arguments[index]); + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + throw RangeError("Invalid code point: " + codePoint); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; +} + +export default fromCodePoint; diff --git a/src/plugins/jsx/index.js b/src/plugins/jsx/index.js index 79b90039d3..bb5dc2f8a5 100644 --- a/src/plugins/jsx/index.js +++ b/src/plugins/jsx/index.js @@ -1,5 +1,7 @@ /* eslint indent: 0 */ +import fromCodePoint from "./fromCodePoint"; + import XHTMLEntities from "./xhtml"; import { TokenType, types as tt } from "../../tokenizer/types"; import { TokContext, types as tc } from "../../tokenizer/context"; @@ -136,11 +138,11 @@ pp.jsxReadEntity = function() { if (str[1] === "x") { str = str.substr(2); if (HEX_NUMBER.test(str)) - entity = String.fromCharCode(parseInt(str, 16)); + entity = fromCodePoint(parseInt(str, 16)); } else { str = str.substr(1); if (DECIMAL_NUMBER.test(str)) - entity = String.fromCharCode(parseInt(str, 10)); + entity = fromCodePoint(parseInt(str, 10)); } } else { entity = XHTMLEntities[str]; diff --git a/test/fixtures/jsx/basic/entity/expected.json b/test/fixtures/jsx/basic/entity/expected.json index 5b39f8e2b5..69f4b59ea6 100644 --- a/test/fixtures/jsx/basic/entity/expected.json +++ b/test/fixtures/jsx/basic/entity/expected.json @@ -136,7 +136,7 @@ } }, "extra": null, - "value": "" + "value": "💩" } ] }