From eb723422c12750f3ae34293d4e00fb781ee48a42 Mon Sep 17 00:00:00 2001 From: John-David Dalton Date: Wed, 7 Oct 2015 23:08:35 -0700 Subject: [PATCH] Update `reWords` detection. --- lodash.js | 52 +++++++++++++++++++++++++++++++++++----------------- test/test.js | 8 ++++---- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/lodash.js b/lodash.js index f60c7ffac..914fa6c65 100644 --- a/lodash.js +++ b/lodash.js @@ -143,29 +143,47 @@ /** Used to match unescaped characters in compiled string literals. */ var reUnescapedString = /['\n\r\u2028\u2029\\]/g; - /** Used to compose unicode related regexes. */ + /** Used to compose unicode character classes. */ var rsAstralRange = '\\ud800-\\udfff', - rsAstral = '[' + rsAstralRange + ']', rsComboRange = '\\u0300-\\u036f\\ufe20-\\ufe23', + rsDingbatRange = '\\u2700-\\u27bf', + rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff', + rsMathOpRange = '\\xd7\\xf7', + rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf', + rsQuoteRange = '\\u2018\\u2019\\u201c\\u201d', + rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000', + rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde', + rsVarRange = '\\ufe0e\\ufe0f', + rsBreakRange = rsMathOpRange + rsNonCharRange + rsQuoteRange + rsSpaceRange; + + /** Used to compose unicode capture groups. */ + var rsAstral = '[' + rsAstralRange + ']', + rsBreak = '[' + rsBreakRange + ']', rsCombo = '[' + rsComboRange + ']', rsDigits = '\\d+', - rsDingbat = '[\\u2700-\\u27bf]', - rsLowers = '[a-z\\xdf-\\xf6\\xf8-\\xff]+', + rsDingbat = '[' + rsDingbatRange + ']', + rsLower = '[' + rsLowerRange + ']', + rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']', rsModifier = '(?:\\ud83c[\\udffb-\\udfff])', rsNonAstral = '[^' + rsAstralRange + ']', rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', + rsSpace = '[' + rsSpaceRange + ']', rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', - rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?' , rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')', - rsUpper = '[A-Z\\xc0-\\xd6\\xd8-\\xde]', - rsVS = '\\ufe0e\\ufe0f', - rsZWJ = '\\u200d', + rsUpper = '[' + rsUpperRange + ']', + rsZWJ = '\\u200d'; + + /** Used to compose unicode regexes. */ + var rsLowerMisc = '(?:' + rsLower + '|' + rsMisc + ')', + rsUpperMisc = '(?:' + rsUpper + '|' + rsMisc + ')', reOptMod = rsModifier + '?', - rsOptVS = '[' + rsVS + ']?', - rsJoiner = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVS + reOptMod + ')*', - rsSeq = rsOptVS + reOptMod + rsJoiner; + rsOptVar = '[' + rsVarRange + ']?', + rsJoiner = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', + rsSeq = rsOptVar + reOptMod + rsJoiner, + rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq, + rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; /** Used to match [zero-width joiners and code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ - var reAdvSymbol = RegExp('[' + rsZWJ + rsVS + rsAstralRange + rsComboRange + ']'); + var reAdvSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboRange + rsVarRange + ']'); /** Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). */ var reComboMark = RegExp(rsCombo, 'g'); @@ -175,11 +193,11 @@ /** Used to match words to create compound words. */ var reWord = RegExp([ - rsUpper + '+(?=' + rsUpper + rsLowers + ')', - rsUpper + '?' + rsLowers, - rsUpper + '+', - '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq, - rsDigits + rsUpper + '?' + rsLower + '+(?=' + [rsBreak, rsUpper, '$'].join('|') + ')', + rsUpperMisc + '+(?=' + [rsBreak, rsUpper + rsLowerMisc, '$'].join('|') + ')', + rsUpper + '?' + rsLowerMisc + '+', + rsDigits + '(?:' + rsLowerMisc + '+)?', + rsEmoji ].join('|'), 'g'); /** Used to assign default `context` object properties. */ diff --git a/test/test.js b/test/test.js index 5a44bebb1..2a81b3806 100644 --- a/test/test.js +++ b/test/test.js @@ -1832,7 +1832,7 @@ return func(result); }, 'enable 24h format'); - assert.strictEqual(actual, 'enable24HFormat'); + assert.strictEqual(actual, 'enable24hFormat'); }); }()); @@ -1844,7 +1844,7 @@ QUnit.test('should work with numbers', function(assert) { assert.expect(4); - assert.strictEqual(_.camelCase('enable 24h format'), 'enable24HFormat'); + assert.strictEqual(_.camelCase('enable 24h format'), 'enable24hFormat'); assert.strictEqual(_.camelCase('too legit 2 quit'), 'tooLegit2Quit'); assert.strictEqual(_.camelCase('walk 500 miles'), 'walk500Miles'); assert.strictEqual(_.camelCase('xhr2 request'), 'xhr2Request'); @@ -19197,7 +19197,7 @@ }); var expected = _.map(values, function(value) { - return [1, [value], ['o']]; + return [1, [value], [value]]; }); var actual = _.map(values, function(value) { @@ -19770,7 +19770,7 @@ assert.expect(6); assert.deepEqual(_.words('aeiouAreVowels'), ['aeiou', 'Are', 'Vowels']); - assert.deepEqual(_.words('enable 24h format'), ['enable', '24', 'h', 'format']); + assert.deepEqual(_.words('enable 24h format'), ['enable', '24h', 'format']); assert.deepEqual(_.words('LETTERSAeiouAreVowels'), ['LETTERS', 'Aeiou', 'Are', 'Vowels']); assert.deepEqual(_.words('tooLegit2Quit'), ['too', 'Legit', '2', 'Quit']); assert.deepEqual(_.words('walk500Miles'), ['walk', '500', 'Miles']);