Add support for ordinals to _.words.

This commit is contained in:
John-David Dalton
2016-10-09 16:16:48 -07:00
parent 31cb06a643
commit 054e78572b
2 changed files with 69 additions and 41 deletions

View File

@@ -227,13 +227,15 @@
rsZWJ = '\\u200d'; rsZWJ = '\\u200d';
/** Used to compose unicode regexes. */ /** Used to compose unicode regexes. */
var rsLowerMisc = '(?:' + rsLower + '|' + rsMisc + ')', var rsMiscLower = '(?:' + rsLower + '|' + rsMisc + ')',
rsUpperMisc = '(?:' + rsUpper + '|' + rsMisc + ')', rsMiscUpper = '(?:' + rsUpper + '|' + rsMisc + ')',
rsOptLowerContr = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?', rsOptContrLower = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?',
rsOptUpperContr = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?', rsOptContrUpper = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?',
reOptMod = rsModifier + '?', reOptMod = rsModifier + '?',
rsOptVar = '[' + rsVarRange + ']?', rsOptVar = '[' + rsVarRange + ']?',
rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*',
rsOrdLower = '\\d*(?:(?:1st|2nd|3rd|(?![123])\\dth)\\b)',
rsOrdUpper = '\\d*(?:(?:1ST|2ND|3RD|(?![123])\\dTH)\\b)',
rsSeq = rsOptVar + reOptMod + rsOptJoin, rsSeq = rsOptVar + reOptMod + rsOptJoin,
rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq, rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq,
rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')';
@@ -252,10 +254,12 @@
/** Used to match complex or compound words. */ /** Used to match complex or compound words. */
var reUnicodeWord = RegExp([ var reUnicodeWord = RegExp([
rsUpper + '?' + rsLower + '+' + rsOptLowerContr + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')', rsUpper + '?' + rsLower + '+' + rsOptContrLower + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')',
rsUpperMisc + '+' + rsOptUpperContr + '(?=' + [rsBreak, rsUpper + rsLowerMisc, '$'].join('|') + ')', rsMiscUpper + '+' + rsOptContrUpper + '(?=' + [rsBreak, rsUpper + rsMiscLower, '$'].join('|') + ')',
rsUpper + '?' + rsLowerMisc + '+' + rsOptLowerContr, rsUpper + '?' + rsMiscLower + '+' + rsOptContrLower,
rsUpper + '+' + rsOptUpperContr, rsUpper + '+' + rsOptContrUpper,
rsOrdUpper,
rsOrdLower,
rsDigits, rsDigits,
rsEmoji rsEmoji
].join('|'), 'g'); ].join('|'), 'g');

View File

@@ -4454,7 +4454,7 @@
QUnit.module('lodash.deburr'); QUnit.module('lodash.deburr');
(function() { (function() {
QUnit.test('should convert Latin-1 Supplement letters to basic Latin', function(assert) { QUnit.test('should convert Latin Unicode letters to basic Latin', function(assert) {
assert.expect(1); assert.expect(1);
var actual = lodashStable.map(burredLetters, _.deburr); var actual = lodashStable.map(burredLetters, _.deburr);
@@ -25109,7 +25109,7 @@
QUnit.module('lodash.words'); QUnit.module('lodash.words');
(function() { (function() {
QUnit.test('should match words containing Latin-1 Supplement letters', function(assert) { QUnit.test('should match words containing Latin Unicode letters', function(assert) {
assert.expect(1); assert.expect(1);
var expected = lodashStable.map(burredLetters, function(letter) { var expected = lodashStable.map(burredLetters, function(letter) {
@@ -25123,31 +25123,6 @@
assert.deepEqual(actual, expected); assert.deepEqual(actual, expected);
}); });
QUnit.test('should not treat mathematical operators as words', function(assert) {
assert.expect(1);
var operators = ['\xac', '\xb1', '\xd7', '\xf7'],
expected = lodashStable.map(operators, stubArray),
actual = lodashStable.map(operators, _.words);
assert.deepEqual(actual, expected);
});
QUnit.test('should not treat punctuation as words', function(assert) {
assert.expect(1);
var marks = [
'\u2012', '\u2013', '\u2014', '\u2015',
'\u2024', '\u2025', '\u2026',
'\u205d', '\u205e'
];
var expected = lodashStable.map(marks, stubArray),
actual = lodashStable.map(marks, _.words);
assert.deepEqual(actual, expected);
});
QUnit.test('should support a `pattern` argument', function(assert) { QUnit.test('should support a `pattern` argument', function(assert) {
assert.expect(2); assert.expect(2);
@@ -25180,24 +25155,73 @@
assert.deepEqual(_.words('æiou2Consonants'), ['æiou', '2', 'Consonants']); assert.deepEqual(_.words('æiou2Consonants'), ['æiou', '2', 'Consonants']);
}); });
QUnit.test('should work with contractions', function(assert) { QUnit.test('should not treat contractions as separate words', function(assert) {
assert.expect(2); assert.expect(4);
var postfixes = ['d', 'll', 'm', 're', 's', 't', 've']; var postfixes = ['d', 'll', 'm', 're', 's', 't', 've'];
lodashStable.each(["'", '\u2019'], function(apos) { lodashStable.each(["'", '\u2019'], function(apos) {
var actual = lodashStable.map(postfixes, function(postfix) { lodashStable.times(2, function(index) {
return _.words('a b' + apos + postfix + ' c'); var actual = lodashStable.map(postfixes, function(postfix) {
var string = 'a b' + apos + postfix + ' c';
return _.words(string[index ? 'toUpperCase' : 'toLowerCase']());
});
var expected = lodashStable.map(postfixes, function(postfix) {
var words = ['a', 'b' + apos + postfix, 'c'];
return lodashStable.map(words, function(word) {
return word[index ? 'toUpperCase' : 'toLowerCase']();
});
});
assert.deepEqual(actual, expected);
});
});
});
QUnit.test('should not treat ordinal numbers as separate words', function(assert) {
assert.expect(2);
var ordinals = ['1st', '2nd', '3rd', '4th'];
lodashStable.times(2, function(index) {
var expected = lodashStable.map(ordinals, function(ordinal) {
return [ordinal[index ? 'toUpperCase' : 'toLowerCase']()];
}); });
var expected = lodashStable.map(postfixes, function(postfix) { var actual = lodashStable.map(expected, function(words) {
return ['a', 'b' + apos + postfix, 'c']; return _.words(words[0]);
}); });
assert.deepEqual(actual, expected); assert.deepEqual(actual, expected);
}); });
}); });
QUnit.test('should not treat mathematical operators as words', function(assert) {
assert.expect(1);
var operators = ['\xac', '\xb1', '\xd7', '\xf7'],
expected = lodashStable.map(operators, stubArray),
actual = lodashStable.map(operators, _.words);
assert.deepEqual(actual, expected);
});
QUnit.test('should not treat punctuation as words', function(assert) {
assert.expect(1);
var marks = [
'\u2012', '\u2013', '\u2014', '\u2015',
'\u2024', '\u2025', '\u2026',
'\u205d', '\u205e'
];
var expected = lodashStable.map(marks, stubArray),
actual = lodashStable.map(marks, _.words);
assert.deepEqual(actual, expected);
});
QUnit.test('should work as an iteratee for methods like `_.map`', function(assert) { QUnit.test('should work as an iteratee for methods like `_.map`', function(assert) {
assert.expect(1); assert.expect(1);