Expand existing support for zero-width-joiners.

This commit is contained in:
John-David Dalton
2015-09-18 08:43:24 -07:00
parent 406e36977e
commit 53bf94ce78

View File

@@ -142,36 +142,31 @@
/** Used to match unescaped characters in compiled string literals. */
var reUnescapedString = /['\n\r\u2028\u2029\\]/g;
/** Used to compose `reStrSymbol` and `reWord`. */
/** Used to compose `reAdvSymbol`, `reStrSymbol`, and `reWord`. */
var rsAstralRange = '\\ud800-\\udfff',
rsAstral = '[' + rsAstralRange + ']',
rsDigits = '\\d+',
rsLowers = '[a-z\\xdf-\\xf6\\xf8-\\xff]+',
rsNonAstral = '[^' + rsAstralRange + ']',
rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
rsUpper = '[A-Z\\xc0-\\xd6\\xd8-\\xde]',
rsZWJ = '\\u200d',
rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
rsSurrPairs = rsSurrPair + '(?:' + rsZWJ + rsSurrPair + ')*';
rsJoiner = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + '))*',
rsSymbol = '(?:' + [rsNonAstral, rsRegional, rsSurrPair, rsAstral].join('|') + ')';
/** Used to match code points from the astral planes. */
var reAstral = RegExp(rsAstral);
/** Used to match [zero-width joiners and code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */
var reAdvSymbol = RegExp('[' + rsZWJ + rsAstralRange + ']');
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
var reStrSymbol = RegExp([
rsNonAstral,
rsRegional,
rsSurrPairs,
rsAstral
].join('|'), 'g');
var reStrSymbol = RegExp(rsSymbol + rsJoiner, 'g');
/** Used to match words to create compound words. */
var reWord = RegExp([
rsUpper + '+(?=' + rsUpper + rsLowers + ')',
rsUpper + '?' + rsLowers,
rsUpper + '+',
rsRegional,
rsSurrPairs,
'(?:' + rsRegional + '|' + rsSurrPair + ')' + rsJoiner,
rsDigits
].join('|'), 'g');
@@ -1202,7 +1197,7 @@
* @returns {number} Returns the string size.
*/
function stringSize(string) {
if (!(string && reAstral.test(string))) {
if (!(string && reAdvSymbol.test(string))) {
return string.length;
}
var result = reStrSymbol.lastIndex = 0;
@@ -3647,7 +3642,7 @@
chars = chars === undefined ? ' ' : (chars + '');
var result = repeat(chars, nativeCeil(padLength / stringSize(chars)));
return reAstral.test(chars)
return reAdvSymbol.test(chars)
? stringToArray(result).slice(0, padLength).join('')
: result.slice(0, padLength);
}
@@ -10388,7 +10383,7 @@
if (!string) {
return string;
}
if (reAstral.test(string)) {
if (reAdvSymbol.test(string)) {
var strSymbols = stringToArray(string);
return strSymbols[0].toUpperCase() + strSymbols.slice(1).join('');
}
@@ -11128,7 +11123,7 @@
string = baseToString(string);
var strLength = string.length;
if (reAstral.test(string)) {
if (reAdvSymbol.test(string)) {
var strSymbols = stringToArray(string);
strLength = strSymbols.length;
}