diff --git a/lodash.src.js b/lodash.src.js index b1a5d094a..9c5b909a8 100644 --- a/lodash.src.js +++ b/lodash.src.js @@ -87,6 +87,13 @@ reEvaluate = /<%([\s\S]+?)%>/g, reInterpolate = /<%=([\s\S]+?)%>/g; + /** + * Used to match combining diacritical marks. + * See [Wikipedia](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks) + * for more details. + */ + var reComboMarks = /[\u0300-\u036f\ufe20-\ufe23]/g; + /** * Used to match ES template delimiters. * See the [ES spec](https://people.mozilla.org/~jorendorff/es6-draft.html#sec-template-literal-lexical-components) @@ -10073,7 +10080,7 @@ */ function deburr(string) { string = baseToString(string); - return string && string.replace(reLatin1, deburrLetter); + return string && string.replace(reLatin1, deburrLetter).replace(reComboMarks, ''); } /** diff --git a/test/test.js b/test/test.js index 81378a6a1..79ee74008 100644 --- a/test/test.js +++ b/test/test.js @@ -235,6 +235,22 @@ '\xef', '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff' ]; + /** List of combining diacritical marks for spanning multiple characters. */ + var comboHalfs = [ + '\ufe20', '\ufe21', '\ufe22', '\ufe23' + ]; + + /** List of common combining diacritical marks. */ + var comboMarks = [ + '\u0300', '\u0301', '\u0302', '\u0303', '\u0304', '\u0305', '\u0306', '\u0307', '\u0308', '\u0309', '\u030a', '\u030b', '\u030c', '\u030d', '\u030e', '\u030f', + '\u0310', '\u0311', '\u0312', '\u0313', '\u0314', '\u0315', '\u0316', '\u0317', '\u0318', '\u0319', '\u031a', '\u031b', '\u031c', '\u031d', '\u031e', '\u031f', + '\u0320', '\u0321', '\u0322', '\u0323', '\u0324', '\u0325', '\u0326', '\u0327', '\u0328', '\u0329', '\u032a', '\u032b', '\u032c', '\u032d', '\u032e', '\u032f', + '\u0330', '\u0331', '\u0332', '\u0333', '\u0334', '\u0335', '\u0336', '\u0337', '\u0338', '\u0339', '\u033a', '\u033b', '\u033c', '\u033d', '\u033e', '\u033f', + '\u0340', '\u0341', '\u0342', '\u0343', '\u0344', '\u0345', '\u0346', '\u0347', '\u0348', '\u0349', '\u034a', '\u034b', '\u034c', '\u034d', '\u034e', '\u034f', + '\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357', '\u0358', '\u0359', '\u035a', '\u035b', '\u035c', '\u035d', '\u035e', '\u035f', + '\u0360', '\u0361', '\u0362', '\u0363', '\u0364', '\u0365', '\u0366', '\u0367', '\u0368', '\u0369', '\u036a', '\u036b', '\u036c', '\u036d', '\u036e', '\u036f' + ]; + /** List of `burredLetters` translated to basic latin letters. */ var deburredLetters = [ 'A', 'A', 'A', 'A', 'A', 'A', 'Ae', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', @@ -3533,6 +3549,17 @@ deepEqual(actual, operators); }); + + test('should deburr combining diacritical marks', 1, function() { + var values = comboMarks.concat(comboHalfs), + expected = _.map(values, _.constant('ei')); + + var actual = _.map(values, function(chr) { + return _.deburr('e' + chr + 'i'); + }); + + deepEqual(actual, expected); + }); }()); /*--------------------------------------------------------------------------*/