From 96ef2110ce6a6e1e6994735c591905cc3104840d Mon Sep 17 00:00:00 2001 From: John-David Dalton Date: Thu, 14 Jan 2016 21:31:26 -0800 Subject: [PATCH] Add support for combining diacritical marks for symbols. --- lodash.js | 17 ++++--- test/test.js | 123 ++++++++++++++++++++++++++------------------------- 2 files changed, 74 insertions(+), 66 deletions(-) diff --git a/lodash.js b/lodash.js index 15b500338..def0470eb 100644 --- a/lodash.js +++ b/lodash.js @@ -163,7 +163,8 @@ /** Used to compose unicode character classes. */ var rsAstralRange = '\\ud800-\\udfff', - rsComboRange = '\\u0300-\\u036f\\ufe20-\\ufe23', + rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', + rsComboSymbolsRange = '\\u20d0-\\u20f0', rsDingbatRange = '\\u2700-\\u27bf', rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff', rsMathOpRange = '\\xac\\xb1\\xd7\\xf7', @@ -177,12 +178,13 @@ /** Used to compose unicode capture groups. */ var rsAstral = '[' + rsAstralRange + ']', rsBreak = '[' + rsBreakRange + ']', - rsCombo = '[' + rsComboRange + ']', + rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']', rsDigits = '\\d+', rsDingbat = '[' + rsDingbatRange + ']', rsLower = '[' + rsLowerRange + ']', rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']', - rsModifier = '(?:\\ud83c[\\udffb-\\udfff])', + rsFitz = '\\ud83c[\\udffb-\\udfff]', + rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', rsNonAstral = '[^' + rsAstralRange + ']', rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', @@ -199,14 +201,17 @@ rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq, rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; - /** Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). */ + /** + * Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks) and + * [combining diacritical marks for symbols](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks_for_Symbols). + */ var reComboMark = RegExp(rsCombo, 'g'); /** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ - var reComplexSymbol = RegExp(rsSymbol + rsSeq, 'g'); + var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g'); /** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ - var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboRange + rsVarRange + ']'); + var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboMarksRange + rsComboSymbolsRange + rsVarRange + ']'); /** Used to match non-compound words composed of alphanumeric characters. */ var reBasicWord = /[a-zA-Z0-9]+/g; diff --git a/test/test.js b/test/test.js index b5da634e6..523bd21bd 100644 --- a/test/test.js +++ b/test/test.js @@ -274,15 +274,6 @@ 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y' ]; - /** List of emoji modifiers. */ - var emojiModifiers = [ - '\ud83c\udffb', - '\ud83c\udffc', - '\ud83c\udffd', - '\ud83c\udffe', - '\ud83c\udfff' - ]; - /** Used to specify the emoji style glyph variant of characters. */ var emojiVar = '\ufe0f'; @@ -303,6 +294,15 @@ new URIError ]; + /** List of fitzpatrick modifiers. */ + var fitzModifiers = [ + '\ud83c\udffb', + '\ud83c\udffc', + '\ud83c\udffd', + '\ud83c\udffe', + '\ud83c\udfff' + ]; + /** Used to check whether methods support typed arrays. */ var typedArrays = [ 'Float32Array', @@ -21223,17 +21223,16 @@ var flag = '\ud83c\uddfa\ud83c\uddf8', heart = '\u2764' + emojiVar, hearts = '\ud83d\udc95', + comboGlyph = '\ud83d\udc68\u200d' + heart + '\u200d\ud83d\udc8B\u200d\ud83d\udc68', + hashKeycap = '#' + emojiVar + '\u20e3', leafs = '\ud83c\udf42', + noMic = '\ud83c\udf99\u20e0', raisedHand = '\u270B' + emojiVar, rocket = '\ud83d\ude80', - thumbsUp = '\ud83d\udc4d', - comboGlyph = '\ud83d\udc68\u200d' + heart + '\u200d\ud83d\udc8B\u200d\ud83d\udc68', - keycapHash = '#' + emojiVar + '\u20e3', - oneFitzpatrick = '\ud83c\udfff', - twoFitzpatrick = oneFitzpatrick + oneFitzpatrick; + thumbsUp = '\ud83d\udc4d'; QUnit.test('should account for astral symbols', function(assert) { - assert.expect(27); + assert.expect(26); var allHearts = _.repeat(hearts, 10), chars = hearts + comboGlyph, @@ -21264,10 +21263,7 @@ assert.strictEqual(_.truncate(string, { 'length': 6 }), 'A ' + leafs + '...'); assert.deepEqual(_.words(string), ['A', leafs, comboGlyph, 'and', rocket]); - - assert.deepEqual(_.toArray(keycapHash), [keycapHash]); - - assert.deepEqual(_.toArray(twoFitzpatrick), [oneFitzpatrick, oneFitzpatrick]); + assert.deepEqual(_.toArray(hashKeycap), [hashKeycap]); lodashStable.times(2, function(index) { var separator = index ? RegExp(hearts) : hearts, @@ -21283,15 +21279,40 @@ }); }); - QUnit.test('should match lone surrogates', function(assert) { - assert.expect(3); + QUnit.test('should account for combining diacritical marks', function(assert) { + assert.expect(1); - var pair = hearts.split(''), - surrogates = pair[0] + ' ' + pair[1]; + var values = lodashStable.map(comboMarks, function(mark) { + return 'o' + mark; + }); - assert.strictEqual(_.size(surrogates), 3); - assert.deepEqual(_.toArray(surrogates), [pair[0], ' ', pair[1]]); - assert.deepEqual(_.words(surrogates), []); + var expected = lodashStable.map(values, function(value) { + return [1, [value], [value]]; + }); + + var actual = lodashStable.map(values, function(value) { + return [_.size(value), _.toArray(value), _.words(value)]; + }); + + assert.deepEqual(actual, expected); + }); + + QUnit.test('should account for fitzpatrick modifiers', function(assert) { + assert.expect(1); + + var values = lodashStable.map(fitzModifiers, function(modifier) { + return thumbsUp + modifier; + }); + + var expected = lodashStable.map(values, function(value) { + return [1, [value], [value]]; + }); + + var actual = lodashStable.map(values, function(value) { + return [_.size(value), _.toArray(value), _.words(value)]; + }); + + assert.deepEqual(actual, expected); }); QUnit.test('should account for regional symbols', function(assert) { @@ -21318,28 +21339,10 @@ assert.deepEqual(_.words(heart), [heart]); }); - QUnit.test('should account for modifiers', function(assert) { + QUnit.test('should account for variation selectors with fitzpatrick modifiers', function(assert) { assert.expect(1); - var values = lodashStable.map(emojiModifiers, function(modifier) { - return thumbsUp + modifier; - }); - - var expected = lodashStable.map(values, function(value) { - return [1, [value], [value]]; - }); - - var actual = lodashStable.map(values, function(value) { - return [_.size(value), _.toArray(value), _.words(value)]; - }); - - assert.deepEqual(actual, expected); - }); - - QUnit.test('should account for variation selectors with modifiers', function(assert) { - assert.expect(1); - - var values = lodashStable.map(emojiModifiers, function(modifier) { + var values = lodashStable.map(fitzModifiers, function(modifier) { return raisedHand + modifier; }); @@ -21354,22 +21357,22 @@ assert.deepEqual(actual, expected); }); - QUnit.test('should account for combining diacritical marks', function(assert) { + QUnit.test('should match lone surrogates', function(assert) { + assert.expect(3); + + var pair = hearts.split(''), + surrogates = pair[0] + ' ' + pair[1]; + + assert.strictEqual(_.size(surrogates), 3); + assert.deepEqual(_.toArray(surrogates), [pair[0], ' ', pair[1]]); + assert.deepEqual(_.words(surrogates), []); + }); + + QUnit.test('should match side by side fitzpatrick modifiers separately ', function(assert) { assert.expect(1); - var values = lodashStable.map(comboMarks, function(mark) { - return 'o' + mark; - }); - - var expected = lodashStable.map(values, function(value) { - return [1, [value], [value]]; - }); - - var actual = lodashStable.map(values, function(value) { - return [_.size(value), _.toArray(value), _.words(value)]; - }); - - assert.deepEqual(actual, expected); + var string = fitzModifiers[0] + fitzModifiers[0]; + assert.deepEqual(_.toArray(string), [fitzModifiers[0], fitzModifiers[0]]); }); }());