From 53bf94ce78aa314af58ec89137c35f430201ef77 Mon Sep 17 00:00:00 2001 From: John-David Dalton Date: Fri, 18 Sep 2015 08:43:24 -0700 Subject: [PATCH] Expand existing support for zero-width-joiners. --- lodash.js | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/lodash.js b/lodash.js index 5b8b43c70..978b93601 100644 --- a/lodash.js +++ b/lodash.js @@ -142,36 +142,31 @@ /** Used to match unescaped characters in compiled string literals. */ var reUnescapedString = /['\n\r\u2028\u2029\\]/g; - /** Used to compose `reStrSymbol` and `reWord`. */ + /** Used to compose `reAdvSymbol`, `reStrSymbol`, and `reWord`. */ var rsAstralRange = '\\ud800-\\udfff', rsAstral = '[' + rsAstralRange + ']', rsDigits = '\\d+', rsLowers = '[a-z\\xdf-\\xf6\\xf8-\\xff]+', rsNonAstral = '[^' + rsAstralRange + ']', rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', + rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', rsUpper = '[A-Z\\xc0-\\xd6\\xd8-\\xde]', rsZWJ = '\\u200d', - rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', - rsSurrPairs = rsSurrPair + '(?:' + rsZWJ + rsSurrPair + ')*'; + rsJoiner = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + '))*', + rsSymbol = '(?:' + [rsNonAstral, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; - /** Used to match code points from the astral planes. */ - var reAstral = RegExp(rsAstral); + /** Used to match [zero-width joiners and code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ + var reAdvSymbol = RegExp('[' + rsZWJ + rsAstralRange + ']'); /** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ - var reStrSymbol = RegExp([ - rsNonAstral, - rsRegional, - rsSurrPairs, - rsAstral - ].join('|'), 'g'); + var reStrSymbol = RegExp(rsSymbol + rsJoiner, 'g'); /** Used to match words to create compound words. */ var reWord = RegExp([ rsUpper + '+(?=' + rsUpper + rsLowers + ')', rsUpper + '?' + rsLowers, rsUpper + '+', - rsRegional, - rsSurrPairs, + '(?:' + rsRegional + '|' + rsSurrPair + ')' + rsJoiner, rsDigits ].join('|'), 'g'); @@ -1202,7 +1197,7 @@ * @returns {number} Returns the string size. */ function stringSize(string) { - if (!(string && reAstral.test(string))) { + if (!(string && reAdvSymbol.test(string))) { return string.length; } var result = reStrSymbol.lastIndex = 0; @@ -3647,7 +3642,7 @@ chars = chars === undefined ? ' ' : (chars + ''); var result = repeat(chars, nativeCeil(padLength / stringSize(chars))); - return reAstral.test(chars) + return reAdvSymbol.test(chars) ? stringToArray(result).slice(0, padLength).join('') : result.slice(0, padLength); } @@ -10388,7 +10383,7 @@ if (!string) { return string; } - if (reAstral.test(string)) { + if (reAdvSymbol.test(string)) { var strSymbols = stringToArray(string); return strSymbols[0].toUpperCase() + strSymbols.slice(1).join(''); } @@ -11128,7 +11123,7 @@ string = baseToString(string); var strLength = string.length; - if (reAstral.test(string)) { + if (reAdvSymbol.test(string)) { var strSymbols = stringToArray(string); strLength = strSymbols.length; }