mirror of
https://github.com/whoisclebs/lodash.git
synced 2026-02-01 15:57:48 +00:00
Make _.deburr handle combining diacritical marks. [closes #1070]
This commit is contained in:
@@ -87,6 +87,13 @@
|
||||
reEvaluate = /<%([\s\S]+?)%>/g,
|
||||
reInterpolate = /<%=([\s\S]+?)%>/g;
|
||||
|
||||
/**
|
||||
* Used to match combining diacritical marks.
|
||||
* See [Wikipedia](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks)
|
||||
* for more details.
|
||||
*/
|
||||
var reComboMarks = /[\u0300-\u036f\ufe20-\ufe23]/g;
|
||||
|
||||
/**
|
||||
* Used to match ES template delimiters.
|
||||
* See the [ES spec](https://people.mozilla.org/~jorendorff/es6-draft.html#sec-template-literal-lexical-components)
|
||||
@@ -10073,7 +10080,7 @@
|
||||
*/
|
||||
function deburr(string) {
|
||||
string = baseToString(string);
|
||||
return string && string.replace(reLatin1, deburrLetter);
|
||||
return string && string.replace(reLatin1, deburrLetter).replace(reComboMarks, '');
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
27
test/test.js
27
test/test.js
@@ -235,6 +235,22 @@
|
||||
'\xef', '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
|
||||
];
|
||||
|
||||
/** List of combining diacritical marks for spanning multiple characters. */
|
||||
var comboHalfs = [
|
||||
'\ufe20', '\ufe21', '\ufe22', '\ufe23'
|
||||
];
|
||||
|
||||
/** List of common combining diacritical marks. */
|
||||
var comboMarks = [
|
||||
'\u0300', '\u0301', '\u0302', '\u0303', '\u0304', '\u0305', '\u0306', '\u0307', '\u0308', '\u0309', '\u030a', '\u030b', '\u030c', '\u030d', '\u030e', '\u030f',
|
||||
'\u0310', '\u0311', '\u0312', '\u0313', '\u0314', '\u0315', '\u0316', '\u0317', '\u0318', '\u0319', '\u031a', '\u031b', '\u031c', '\u031d', '\u031e', '\u031f',
|
||||
'\u0320', '\u0321', '\u0322', '\u0323', '\u0324', '\u0325', '\u0326', '\u0327', '\u0328', '\u0329', '\u032a', '\u032b', '\u032c', '\u032d', '\u032e', '\u032f',
|
||||
'\u0330', '\u0331', '\u0332', '\u0333', '\u0334', '\u0335', '\u0336', '\u0337', '\u0338', '\u0339', '\u033a', '\u033b', '\u033c', '\u033d', '\u033e', '\u033f',
|
||||
'\u0340', '\u0341', '\u0342', '\u0343', '\u0344', '\u0345', '\u0346', '\u0347', '\u0348', '\u0349', '\u034a', '\u034b', '\u034c', '\u034d', '\u034e', '\u034f',
|
||||
'\u0350', '\u0351', '\u0352', '\u0353', '\u0354', '\u0355', '\u0356', '\u0357', '\u0358', '\u0359', '\u035a', '\u035b', '\u035c', '\u035d', '\u035e', '\u035f',
|
||||
'\u0360', '\u0361', '\u0362', '\u0363', '\u0364', '\u0365', '\u0366', '\u0367', '\u0368', '\u0369', '\u036a', '\u036b', '\u036c', '\u036d', '\u036e', '\u036f'
|
||||
];
|
||||
|
||||
/** List of `burredLetters` translated to basic latin letters. */
|
||||
var deburredLetters = [
|
||||
'A', 'A', 'A', 'A', 'A', 'A', 'Ae', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I',
|
||||
@@ -3533,6 +3549,17 @@
|
||||
|
||||
deepEqual(actual, operators);
|
||||
});
|
||||
|
||||
test('should deburr combining diacritical marks', 1, function() {
|
||||
var values = comboMarks.concat(comboHalfs),
|
||||
expected = _.map(values, _.constant('ei'));
|
||||
|
||||
var actual = _.map(values, function(chr) {
|
||||
return _.deburr('e' + chr + 'i');
|
||||
});
|
||||
|
||||
deepEqual(actual, expected);
|
||||
});
|
||||
}());
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
Reference in New Issue
Block a user