Add support for combining diacritical marks for symbols.

This commit is contained in:
John-David Dalton
2016-01-14 21:31:26 -08:00
parent 84763cab26
commit 96ef2110ce
2 changed files with 74 additions and 66 deletions

View File

@@ -163,7 +163,8 @@
/** Used to compose unicode character classes. */ /** Used to compose unicode character classes. */
var rsAstralRange = '\\ud800-\\udfff', var rsAstralRange = '\\ud800-\\udfff',
rsComboRange = '\\u0300-\\u036f\\ufe20-\\ufe23', rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23',
rsComboSymbolsRange = '\\u20d0-\\u20f0',
rsDingbatRange = '\\u2700-\\u27bf', rsDingbatRange = '\\u2700-\\u27bf',
rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff', rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff',
rsMathOpRange = '\\xac\\xb1\\xd7\\xf7', rsMathOpRange = '\\xac\\xb1\\xd7\\xf7',
@@ -177,12 +178,13 @@
/** Used to compose unicode capture groups. */ /** Used to compose unicode capture groups. */
var rsAstral = '[' + rsAstralRange + ']', var rsAstral = '[' + rsAstralRange + ']',
rsBreak = '[' + rsBreakRange + ']', rsBreak = '[' + rsBreakRange + ']',
rsCombo = '[' + rsComboRange + ']', rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']',
rsDigits = '\\d+', rsDigits = '\\d+',
rsDingbat = '[' + rsDingbatRange + ']', rsDingbat = '[' + rsDingbatRange + ']',
rsLower = '[' + rsLowerRange + ']', rsLower = '[' + rsLowerRange + ']',
rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']', rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']',
rsModifier = '(?:\\ud83c[\\udffb-\\udfff])', rsFitz = '\\ud83c[\\udffb-\\udfff]',
rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')',
rsNonAstral = '[^' + rsAstralRange + ']', rsNonAstral = '[^' + rsAstralRange + ']',
rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
@@ -199,14 +201,17 @@
rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq, rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq,
rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')';
/** Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). */ /**
* Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks) and
* [combining diacritical marks for symbols](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks_for_Symbols).
*/
var reComboMark = RegExp(rsCombo, 'g'); var reComboMark = RegExp(rsCombo, 'g');
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ /** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
var reComplexSymbol = RegExp(rsSymbol + rsSeq, 'g'); var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g');
/** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ /** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */
var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboRange + rsVarRange + ']'); var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboMarksRange + rsComboSymbolsRange + rsVarRange + ']');
/** Used to match non-compound words composed of alphanumeric characters. */ /** Used to match non-compound words composed of alphanumeric characters. */
var reBasicWord = /[a-zA-Z0-9]+/g; var reBasicWord = /[a-zA-Z0-9]+/g;

View File

@@ -274,15 +274,6 @@
'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y' 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y'
]; ];
/** List of emoji modifiers. */
var emojiModifiers = [
'\ud83c\udffb',
'\ud83c\udffc',
'\ud83c\udffd',
'\ud83c\udffe',
'\ud83c\udfff'
];
/** Used to specify the emoji style glyph variant of characters. */ /** Used to specify the emoji style glyph variant of characters. */
var emojiVar = '\ufe0f'; var emojiVar = '\ufe0f';
@@ -303,6 +294,15 @@
new URIError new URIError
]; ];
/** List of fitzpatrick modifiers. */
var fitzModifiers = [
'\ud83c\udffb',
'\ud83c\udffc',
'\ud83c\udffd',
'\ud83c\udffe',
'\ud83c\udfff'
];
/** Used to check whether methods support typed arrays. */ /** Used to check whether methods support typed arrays. */
var typedArrays = [ var typedArrays = [
'Float32Array', 'Float32Array',
@@ -21223,17 +21223,16 @@
var flag = '\ud83c\uddfa\ud83c\uddf8', var flag = '\ud83c\uddfa\ud83c\uddf8',
heart = '\u2764' + emojiVar, heart = '\u2764' + emojiVar,
hearts = '\ud83d\udc95', hearts = '\ud83d\udc95',
comboGlyph = '\ud83d\udc68\u200d' + heart + '\u200d\ud83d\udc8B\u200d\ud83d\udc68',
hashKeycap = '#' + emojiVar + '\u20e3',
leafs = '\ud83c\udf42', leafs = '\ud83c\udf42',
noMic = '\ud83c\udf99\u20e0',
raisedHand = '\u270B' + emojiVar, raisedHand = '\u270B' + emojiVar,
rocket = '\ud83d\ude80', rocket = '\ud83d\ude80',
thumbsUp = '\ud83d\udc4d', thumbsUp = '\ud83d\udc4d';
comboGlyph = '\ud83d\udc68\u200d' + heart + '\u200d\ud83d\udc8B\u200d\ud83d\udc68',
keycapHash = '#' + emojiVar + '\u20e3',
oneFitzpatrick = '\ud83c\udfff',
twoFitzpatrick = oneFitzpatrick + oneFitzpatrick;
QUnit.test('should account for astral symbols', function(assert) { QUnit.test('should account for astral symbols', function(assert) {
assert.expect(27); assert.expect(26);
var allHearts = _.repeat(hearts, 10), var allHearts = _.repeat(hearts, 10),
chars = hearts + comboGlyph, chars = hearts + comboGlyph,
@@ -21264,10 +21263,7 @@
assert.strictEqual(_.truncate(string, { 'length': 6 }), 'A ' + leafs + '...'); assert.strictEqual(_.truncate(string, { 'length': 6 }), 'A ' + leafs + '...');
assert.deepEqual(_.words(string), ['A', leafs, comboGlyph, 'and', rocket]); assert.deepEqual(_.words(string), ['A', leafs, comboGlyph, 'and', rocket]);
assert.deepEqual(_.toArray(hashKeycap), [hashKeycap]);
assert.deepEqual(_.toArray(keycapHash), [keycapHash]);
assert.deepEqual(_.toArray(twoFitzpatrick), [oneFitzpatrick, oneFitzpatrick]);
lodashStable.times(2, function(index) { lodashStable.times(2, function(index) {
var separator = index ? RegExp(hearts) : hearts, var separator = index ? RegExp(hearts) : hearts,
@@ -21283,15 +21279,40 @@
}); });
}); });
QUnit.test('should match lone surrogates', function(assert) { QUnit.test('should account for combining diacritical marks', function(assert) {
assert.expect(3); assert.expect(1);
var pair = hearts.split(''), var values = lodashStable.map(comboMarks, function(mark) {
surrogates = pair[0] + ' ' + pair[1]; return 'o' + mark;
});
assert.strictEqual(_.size(surrogates), 3); var expected = lodashStable.map(values, function(value) {
assert.deepEqual(_.toArray(surrogates), [pair[0], ' ', pair[1]]); return [1, [value], [value]];
assert.deepEqual(_.words(surrogates), []); });
var actual = lodashStable.map(values, function(value) {
return [_.size(value), _.toArray(value), _.words(value)];
});
assert.deepEqual(actual, expected);
});
QUnit.test('should account for fitzpatrick modifiers', function(assert) {
assert.expect(1);
var values = lodashStable.map(fitzModifiers, function(modifier) {
return thumbsUp + modifier;
});
var expected = lodashStable.map(values, function(value) {
return [1, [value], [value]];
});
var actual = lodashStable.map(values, function(value) {
return [_.size(value), _.toArray(value), _.words(value)];
});
assert.deepEqual(actual, expected);
}); });
QUnit.test('should account for regional symbols', function(assert) { QUnit.test('should account for regional symbols', function(assert) {
@@ -21318,28 +21339,10 @@
assert.deepEqual(_.words(heart), [heart]); assert.deepEqual(_.words(heart), [heart]);
}); });
QUnit.test('should account for modifiers', function(assert) { QUnit.test('should account for variation selectors with fitzpatrick modifiers', function(assert) {
assert.expect(1); assert.expect(1);
var values = lodashStable.map(emojiModifiers, function(modifier) { var values = lodashStable.map(fitzModifiers, function(modifier) {
return thumbsUp + modifier;
});
var expected = lodashStable.map(values, function(value) {
return [1, [value], [value]];
});
var actual = lodashStable.map(values, function(value) {
return [_.size(value), _.toArray(value), _.words(value)];
});
assert.deepEqual(actual, expected);
});
QUnit.test('should account for variation selectors with modifiers', function(assert) {
assert.expect(1);
var values = lodashStable.map(emojiModifiers, function(modifier) {
return raisedHand + modifier; return raisedHand + modifier;
}); });
@@ -21354,22 +21357,22 @@
assert.deepEqual(actual, expected); assert.deepEqual(actual, expected);
}); });
QUnit.test('should account for combining diacritical marks', function(assert) { QUnit.test('should match lone surrogates', function(assert) {
assert.expect(3);
var pair = hearts.split(''),
surrogates = pair[0] + ' ' + pair[1];
assert.strictEqual(_.size(surrogates), 3);
assert.deepEqual(_.toArray(surrogates), [pair[0], ' ', pair[1]]);
assert.deepEqual(_.words(surrogates), []);
});
QUnit.test('should match side by side fitzpatrick modifiers separately ', function(assert) {
assert.expect(1); assert.expect(1);
var values = lodashStable.map(comboMarks, function(mark) { var string = fitzModifiers[0] + fitzModifiers[0];
return 'o' + mark; assert.deepEqual(_.toArray(string), [fitzModifiers[0], fitzModifiers[0]]);
});
var expected = lodashStable.map(values, function(value) {
return [1, [value], [value]];
});
var actual = lodashStable.map(values, function(value) {
return [_.size(value), _.toArray(value), _.words(value)];
});
assert.deepEqual(actual, expected);
}); });
}()); }());