From 7488068d50ca0b09887daefafbdc7d023cd38aed Mon Sep 17 00:00:00 2001 From: John-David Dalton Date: Fri, 25 Sep 2015 22:27:31 -0700 Subject: [PATCH] Add support for combining diacritical marks to `_.toArray`. --- lodash.js | 11 +++++++--- test/test.js | 57 ++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/lodash.js b/lodash.js index 0fba43a5d..39a413ac2 100644 --- a/lodash.js +++ b/lodash.js @@ -139,9 +139,11 @@ /** Used to match unescaped characters in compiled string literals. */ var reUnescapedString = /['\n\r\u2028\u2029\\]/g; - /** Used to compose `reAdvSymbol`, `reStrSymbol`, and `reWord`. */ + /** Used to compose unicode related regexes. */ var rsAstralRange = '\\ud800-\\udfff', rsAstral = '[' + rsAstralRange + ']', + rsComboRange = '\\u0300-\\u036f\\ufe20-\\ufe23', + rsCombo = '[' + rsComboRange + ']', rsDigits = '\\d+', rsDingbat = '[\\u2700-\\u27bf]', rsLowers = '[a-z\\xdf-\\xf6\\xf8-\\xff]+', @@ -149,7 +151,7 @@ rsNonAstral = '[^' + rsAstralRange + ']', rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', - rsSymbol = '(?:' + [rsNonAstral, rsRegional, rsSurrPair, rsAstral].join('|') + ')', + rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?' , rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')', rsUpper = '[A-Z\\xc0-\\xd6\\xd8-\\xde]', rsVS = '\\ufe0e\\ufe0f', rsZWJ = '\\u200d', @@ -159,7 +161,10 @@ rsSeq = rsOptVS + reOptMod + rsJoiner; /** Used to match [zero-width joiners and code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ - var reAdvSymbol = RegExp('[' + rsZWJ + rsVS + rsAstralRange + ']'); + var reAdvSymbol = RegExp('[' + rsZWJ + rsVS + rsAstralRange + rsComboRange + ']'); + + /** Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). */ + var reComboMark = RegExp(rsCombo, 'g'); /** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ var reStrSymbol = RegExp(rsSymbol + rsSeq, 'g'); diff --git a/test/test.js b/test/test.js index 95d114a56..13e461a1c 100644 --- a/test/test.js +++ b/test/test.js @@ -249,12 +249,7 @@ '\xef', '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff' ]; - /** List of combining diacritical marks for spanning multiple characters. */ - var comboHalfs = [ - '\ufe20', '\ufe21', '\ufe22', '\ufe23' - ]; - - /** List of common combining diacritical marks. */ + /** List of combining diacritical marks. */ var comboMarks = [ '\u0300', '\u0301', '\u0302', '\u0303', '\u0304', '\u0305', '\u0306', '\u0307', '\u0308', '\u0309', '\u030a', '\u030b', '\u030c', '\u030d', '\u030e', '\u030f', '\u0310', '\u0311', '\u0312', '\u0313', '\u0314', '\u0315', '\u0316', '\u0317', '\u0318', '\u0319', '\u031a', '\u031b', '\u031c', '\u031d', '\u031e', '\u031f', @@ -262,7 +257,8 @@ '\u0330', '\u0331', '\u0332', '\u0333', '\u0334', '\u0335', '\u0336', '\u0337', '\u0338', '\u0339', '\u033a', '\u033b', '\u033c', '\u033d', '\u033e', '\u033f', '\u0340', '\u0341', '\u0342', '\u0343', '\u0344', '\u0345', '\u0346', '\u0347', '\u0348', '\u0349', '\u034a', '\u034b', '\u034c', '\u034d', '\u034e', '\u034f', '\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357', '\u0358', '\u0359', '\u035a', '\u035b', '\u035c', '\u035d', '\u035e', '\u035f', - '\u0360', '\u0361', '\u0362', '\u0363', '\u0364', '\u0365', '\u0366', '\u0367', '\u0368', '\u0369', '\u036a', '\u036b', '\u036c', '\u036d', '\u036e', '\u036f' + '\u0360', '\u0361', '\u0362', '\u0363', '\u0364', '\u0365', '\u0366', '\u0367', '\u0368', '\u0369', '\u036a', '\u036b', '\u036c', '\u036d', '\u036e', '\u036f', + '\ufe20', '\ufe21', '\ufe22', '\ufe23' ]; /** List of `burredLetters` translated to basic latin letters. */ @@ -273,6 +269,18 @@ 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y' ]; + /** List of emoji modifiers. */ + var emojiModifiers = [ + '\ud83c\udffb', + '\ud83c\udffc', + '\ud83c\udffd', + '\ud83c\udffe', + '\ud83c\udfff' + ]; + + /** Used to specify the emoji style glyph variant of characters. */ + var emojiVar = '\ufe0f'; + /** Used to provide falsey values to methods. */ var falsey = [, '', 0, false, NaN, null, undefined]; @@ -3293,10 +3301,9 @@ QUnit.test('should deburr combining diacritical marks', function(assert) { assert.expect(1); - var values = comboMarks.concat(comboHalfs), - expected = _.map(values, _.constant('ei')); + var expected = _.map(comboMarks, _.constant('ei')); - var actual = _.map(values, function(chr) { + var actual = _.map(comboMarks, function(chr) { return _.deburr('e' + chr + 'i'); }); @@ -18885,19 +18892,17 @@ /*--------------------------------------------------------------------------*/ - QUnit.module('astral symbols'); + QUnit.module('uncommon symbols'); (function() { - var emojiVar = '\ufe0f', - flag = '\ud83c\uddfa\ud83c\uddf8', + var flag = '\ud83c\uddfa\ud83c\uddf8', heart = '\u2764' + emojiVar, hearts = '\ud83d\udc95', leafs = '\ud83c\udf42', raisedHand = '\u270B' + emojiVar, rocket = '\ud83d\ude80', thumbsUp = '\ud83d\udc4d', - comboGlyph = '\ud83d\udc68\u200d' + heart + '\u200d\ud83d\udc8B\u200d\ud83d\udc68', - modifiers = ['\ud83c\udffb', '\ud83c\udffc', '\ud83c\udffd', '\ud83c\udffe', '\ud83c\udfff']; + comboGlyph = '\ud83d\udc68\u200d' + heart + '\u200d\ud83d\udc8B\u200d\ud83d\udc68'; QUnit.test('should account for astral symbols', function(assert) { assert.expect(25); @@ -18984,7 +18989,7 @@ QUnit.test('should account for modifiers', function(assert) { assert.expect(1); - var values = _.map(modifiers, function(modifier) { + var values = _.map(emojiModifiers, function(modifier) { return thumbsUp + modifier; }); @@ -19002,7 +19007,7 @@ QUnit.test('should account for variation selectors with modifiers', function(assert) { assert.expect(1); - var values = _.map(modifiers, function(modifier) { + var values = _.map(emojiModifiers, function(modifier) { return raisedHand + modifier; }); @@ -19016,6 +19021,24 @@ assert.deepEqual(actual, expected); }); + + QUnit.test('should account for combining diacritical marks', function(assert) { + assert.expect(1); + + var values = _.map(comboMarks, function(mark) { + return 'o' + mark; + }); + + var expected = _.map(values, function(value) { + return [1, [value], ['o']]; + }); + + var actual = _.map(values, function(value) { + return [_.size(value), _.toArray(value), _.words(value)]; + }); + + assert.deepEqual(actual, expected); + }); }()); /*--------------------------------------------------------------------------*/