diff --git a/README.md b/README.md index 784098aed..3048b8324 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# lodash-es v4.14.2 +# lodash-es v4.15.0 The [Lodash](https://lodash.com/) library exported as [ES](http://www.ecma-international.org/ecma-262/6.0/) modules. @@ -7,4 +7,4 @@ Generated using [lodash-cli](https://www.npmjs.com/package/lodash-cli): $ lodash modularize exports=es -o ./ ``` -See the [package source](https://github.com/lodash/lodash/tree/4.14.2-es) for more details. +See the [package source](https://github.com/lodash/lodash/tree/4.15.0-es) for more details. diff --git a/_arrayIncludes.js b/_arrayIncludes.js index a3dfdf9d1..12b48843f 100644 --- a/_arrayIncludes.js +++ b/_arrayIncludes.js @@ -5,7 +5,7 @@ import baseIndexOf from './_baseIndexOf.js'; * specifying an index to search from. * * @private - * @param {Array} [array] The array to search. + * @param {Array} [array] The array to inspect. * @param {*} target The value to search for. * @returns {boolean} Returns `true` if `target` is found, else `false`. */ diff --git a/_arrayIncludesWith.js b/_arrayIncludesWith.js index 24edf9fcf..817ab7613 100644 --- a/_arrayIncludesWith.js +++ b/_arrayIncludesWith.js @@ -2,7 +2,7 @@ * This function is like `arrayIncludes` except that it accepts a comparator. * * @private - * @param {Array} [array] The array to search. + * @param {Array} [array] The array to inspect. * @param {*} target The value to search for. * @param {Function} comparator The comparator invoked per element. * @returns {boolean} Returns `true` if `target` is found, else `false`. diff --git a/_arrayLikeKeys.js b/_arrayLikeKeys.js index 16759b205..387113d19 100644 --- a/_arrayLikeKeys.js +++ b/_arrayLikeKeys.js @@ -2,7 +2,6 @@ import baseTimes from './_baseTimes.js'; import isArguments from './isArguments.js'; import isArray from './isArray.js'; import isIndex from './_isIndex.js'; -import isString from './isString.js'; /** Used for built-in method references. */ var objectProto = Object.prototype; @@ -19,7 +18,9 @@ var hasOwnProperty = objectProto.hasOwnProperty; * @returns {Array} Returns the array of property names. */ function arrayLikeKeys(value, inherited) { - var result = (isArray(value) || isString(value) || isArguments(value)) + // Safari 8.1 makes `arguments.callee` enumerable in strict mode. + // Safari 9 makes `arguments.length` enumerable in strict mode. + var result = (isArray(value) || isArguments(value)) ? baseTimes(value.length, String) : []; diff --git a/_asciiSize.js b/_asciiSize.js new file mode 100644 index 000000000..bbf4df436 --- /dev/null +++ b/_asciiSize.js @@ -0,0 +1,12 @@ +import baseProperty from './_baseProperty.js'; + +/** + * Gets the size of an ASCII `string`. + * + * @private + * @param {string} string The string inspect. + * @returns {number} Returns the string size. + */ +var asciiSize = baseProperty('length'); + +export default asciiSize; diff --git a/_asciiToArray.js b/_asciiToArray.js new file mode 100644 index 000000000..1ab7be69c --- /dev/null +++ b/_asciiToArray.js @@ -0,0 +1,12 @@ +/** + * Converts an ASCII `string` to an array. + * + * @private + * @param {string} string The string to convert. + * @returns {Array} Returns the converted array. + */ +function asciiToArray(string) { + return string.split(''); +} + +export default asciiToArray; diff --git a/_asciiWords.js b/_asciiWords.js new file mode 100644 index 000000000..7840636ba --- /dev/null +++ b/_asciiWords.js @@ -0,0 +1,15 @@ +/** Used to match words composed of alphanumeric characters. */ +var reAsciiWord = /[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g; + +/** + * Splits an ASCII `string` into an array of its words. + * + * @private + * @param {string} The string to inspect. + * @returns {Array} Returns the words of `string`. + */ +function asciiWords(string) { + return string.match(reAsciiWord) || []; +} + +export default asciiWords; diff --git a/_assocIndexOf.js b/_assocIndexOf.js index d4985ce1f..88afb3979 100644 --- a/_assocIndexOf.js +++ b/_assocIndexOf.js @@ -4,7 +4,7 @@ import eq from './eq.js'; * Gets the index at which the `key` is found in `array` of key-value pairs. * * @private - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {*} key The key to search for. * @returns {number} Returns the index of the matched value, else `-1`. */ diff --git a/_baseFindIndex.js b/_baseFindIndex.js index 0295f7d96..860636ee5 100644 --- a/_baseFindIndex.js +++ b/_baseFindIndex.js @@ -3,7 +3,7 @@ * support for iteratee shorthands. * * @private - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {Function} predicate The function invoked per iteration. * @param {number} fromIndex The index to search from. * @param {boolean} [fromRight] Specify iterating from right to left. diff --git a/_baseFindKey.js b/_baseFindKey.js index 816e74374..6d1932eed 100644 --- a/_baseFindKey.js +++ b/_baseFindKey.js @@ -4,7 +4,7 @@ * using `eachFunc`. * * @private - * @param {Array|Object} collection The collection to search. + * @param {Array|Object} collection The collection to inspect. * @param {Function} predicate The function invoked per iteration. * @param {Function} eachFunc The function to iterate over `collection`. * @returns {*} Returns the found element or its key, else `undefined`. diff --git a/_baseIndexOf.js b/_baseIndexOf.js index 9152c3edc..69a5c3b21 100644 --- a/_baseIndexOf.js +++ b/_baseIndexOf.js @@ -5,7 +5,7 @@ import baseIsNaN from './_baseIsNaN.js'; * The base implementation of `_.indexOf` without `fromIndex` bounds checks. * * @private - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {*} value The value to search for. * @param {number} fromIndex The index to search from. * @returns {number} Returns the index of the matched value, else `-1`. diff --git a/_baseIndexOfWith.js b/_baseIndexOfWith.js index 028ad9159..38831b26b 100644 --- a/_baseIndexOfWith.js +++ b/_baseIndexOfWith.js @@ -2,7 +2,7 @@ * This function is like `baseIndexOf` except that it accepts a comparator. * * @private - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {*} value The value to search for. * @param {number} fromIndex The index to search from. * @param {Function} comparator The comparator invoked per element. diff --git a/_baseIsNative.js b/_baseIsNative.js index d41a99c5b..8e38ee54b 100644 --- a/_baseIsNative.js +++ b/_baseIsNative.js @@ -14,10 +14,11 @@ var reRegExpChar = /[\\^$.*+?()[\]{}|]/g; var reIsHostCtor = /^\[object .+?Constructor\]$/; /** Used for built-in method references. */ -var objectProto = Object.prototype; +var funcProto = Function.prototype, + objectProto = Object.prototype; /** Used to resolve the decompiled source of functions. */ -var funcToString = Function.prototype.toString; +var funcToString = funcProto.toString; /** Used to check objects for own properties. */ var hasOwnProperty = objectProto.hasOwnProperty; diff --git a/_baseSet.js b/_baseSet.js index e19e989be..187b3e5c2 100644 --- a/_baseSet.js +++ b/_baseSet.js @@ -9,7 +9,7 @@ import toKey from './_toKey.js'; * The base implementation of `_.set`. * * @private - * @param {Object} object The object to query. + * @param {Object} object The object to modify. * @param {Array|string} path The path of the property to set. * @param {*} value The value to set. * @param {Function} [customizer] The function to customize path creation. diff --git a/_baseUpdate.js b/_baseUpdate.js index 8bf7d1d11..70dd150f4 100644 --- a/_baseUpdate.js +++ b/_baseUpdate.js @@ -5,7 +5,7 @@ import baseSet from './_baseSet.js'; * The base implementation of `_.update`. * * @private - * @param {Object} object The object to query. + * @param {Object} object The object to modify. * @param {Array|string} path The path of the property to update. * @param {Function} updater The function to produce the updated value. * @param {Function} [customizer] The function to customize path creation. diff --git a/_createCaseFirst.js b/_createCaseFirst.js index f569ce8c7..333c7c4c0 100644 --- a/_createCaseFirst.js +++ b/_createCaseFirst.js @@ -1,5 +1,5 @@ import castSlice from './_castSlice.js'; -import reHasComplexSymbol from './_reHasComplexSymbol.js'; +import hasUnicode from './_hasUnicode.js'; import stringToArray from './_stringToArray.js'; import toString from './toString.js'; @@ -14,7 +14,7 @@ function createCaseFirst(methodName) { return function(string) { string = toString(string); - var strSymbols = reHasComplexSymbol.test(string) + var strSymbols = hasUnicode(string) ? stringToArray(string) : undefined; diff --git a/_createPadding.js b/_createPadding.js index b1653992f..1e35cd82b 100644 --- a/_createPadding.js +++ b/_createPadding.js @@ -1,7 +1,7 @@ import baseRepeat from './_baseRepeat.js'; import baseToString from './_baseToString.js'; import castSlice from './_castSlice.js'; -import reHasComplexSymbol from './_reHasComplexSymbol.js'; +import hasUnicode from './_hasUnicode.js'; import stringSize from './_stringSize.js'; import stringToArray from './_stringToArray.js'; @@ -25,7 +25,7 @@ function createPadding(length, chars) { return charsLength ? baseRepeat(chars, length) : chars; } var result = baseRepeat(chars, nativeCeil(length / stringSize(chars))); - return reHasComplexSymbol.test(chars) + return hasUnicode(chars) ? castSlice(stringToArray(result), 0, length).join('') : result.slice(0, length); } diff --git a/_deburrLetter.js b/_deburrLetter.js index fe6a3ab90..896d14c50 100644 --- a/_deburrLetter.js +++ b/_deburrLetter.js @@ -1,15 +1,16 @@ import basePropertyOf from './_basePropertyOf.js'; -/** Used to map latin-1 supplementary letters to basic latin letters. */ +/** Used to map Latin Unicode letters to basic Latin letters. */ var deburredLetters = { + // Latin-1 Supplement block. '\xc0': 'A', '\xc1': 'A', '\xc2': 'A', '\xc3': 'A', '\xc4': 'A', '\xc5': 'A', '\xe0': 'a', '\xe1': 'a', '\xe2': 'a', '\xe3': 'a', '\xe4': 'a', '\xe5': 'a', '\xc7': 'C', '\xe7': 'c', '\xd0': 'D', '\xf0': 'd', '\xc8': 'E', '\xc9': 'E', '\xca': 'E', '\xcb': 'E', '\xe8': 'e', '\xe9': 'e', '\xea': 'e', '\xeb': 'e', - '\xcC': 'I', '\xcd': 'I', '\xce': 'I', '\xcf': 'I', - '\xeC': 'i', '\xed': 'i', '\xee': 'i', '\xef': 'i', + '\xcc': 'I', '\xcd': 'I', '\xce': 'I', '\xcf': 'I', + '\xec': 'i', '\xed': 'i', '\xee': 'i', '\xef': 'i', '\xd1': 'N', '\xf1': 'n', '\xd2': 'O', '\xd3': 'O', '\xd4': 'O', '\xd5': 'O', '\xd6': 'O', '\xd8': 'O', '\xf2': 'o', '\xf3': 'o', '\xf4': 'o', '\xf5': 'o', '\xf6': 'o', '\xf8': 'o', @@ -18,11 +19,48 @@ var deburredLetters = { '\xdd': 'Y', '\xfd': 'y', '\xff': 'y', '\xc6': 'Ae', '\xe6': 'ae', '\xde': 'Th', '\xfe': 'th', - '\xdf': 'ss' + '\xdf': 'ss', + // Latin Extended-A block. + '\u0100': 'A', '\u0102': 'A', '\u0104': 'A', + '\u0101': 'a', '\u0103': 'a', '\u0105': 'a', + '\u0106': 'C', '\u0108': 'C', '\u010a': 'C', '\u010c': 'C', + '\u0107': 'c', '\u0109': 'c', '\u010b': 'c', '\u010d': 'c', + '\u010e': 'D', '\u0110': 'D', '\u010f': 'd', '\u0111': 'd', + '\u0112': 'E', '\u0114': 'E', '\u0116': 'E', '\u0118': 'E', '\u011a': 'E', + '\u0113': 'e', '\u0115': 'e', '\u0117': 'e', '\u0119': 'e', '\u011b': 'e', + '\u011c': 'G', '\u011e': 'G', '\u0120': 'G', '\u0122': 'G', + '\u011d': 'g', '\u011f': 'g', '\u0121': 'g', '\u0123': 'g', + '\u0124': 'H', '\u0126': 'H', '\u0125': 'h', '\u0127': 'h', + '\u0128': 'I', '\u012a': 'I', '\u012c': 'I', '\u012e': 'I', '\u0130': 'I', + '\u0129': 'i', '\u012b': 'i', '\u012d': 'i', '\u012f': 'i', '\u0131': 'i', + '\u0134': 'J', '\u0135': 'j', + '\u0136': 'K', '\u0137': 'k', '\u0138': 'k', + '\u0139': 'L', '\u013b': 'L', '\u013d': 'L', '\u013f': 'L', '\u0141': 'L', + '\u013a': 'l', '\u013c': 'l', '\u013e': 'l', '\u0140': 'l', '\u0142': 'l', + '\u0143': 'N', '\u0145': 'N', '\u0147': 'N', '\u014a': 'N', + '\u0144': 'n', '\u0146': 'n', '\u0148': 'n', '\u014b': 'n', + '\u014c': 'O', '\u014e': 'O', '\u0150': 'O', + '\u014d': 'o', '\u014f': 'o', '\u0151': 'o', + '\u0154': 'R', '\u0156': 'R', '\u0158': 'R', + '\u0155': 'r', '\u0157': 'r', '\u0159': 'r', + '\u015a': 'S', '\u015c': 'S', '\u015e': 'S', '\u0160': 'S', + '\u015b': 's', '\u015d': 's', '\u015f': 's', '\u0161': 's', + '\u0162': 'T', '\u0164': 'T', '\u0166': 'T', + '\u0163': 't', '\u0165': 't', '\u0167': 't', + '\u0168': 'U', '\u016a': 'U', '\u016c': 'U', '\u016e': 'U', '\u0170': 'U', '\u0172': 'U', + '\u0169': 'u', '\u016b': 'u', '\u016d': 'u', '\u016f': 'u', '\u0171': 'u', '\u0173': 'u', + '\u0174': 'W', '\u0175': 'w', + '\u0176': 'Y', '\u0177': 'y', '\u0178': 'Y', + '\u0179': 'Z', '\u017b': 'Z', '\u017d': 'Z', + '\u017a': 'z', '\u017c': 'z', '\u017e': 'z', + '\u0132': 'IJ', '\u0133': 'ij', + '\u0152': 'Oe', '\u0153': 'oe', + '\u0149': "'n", '\u017f': 'ss' }; /** - * Used by `_.deburr` to convert latin-1 supplementary letters to basic latin letters. + * Used by `_.deburr` to convert Latin-1 Supplement and Latin Extended-A + * letters to basic Latin letters. * * @private * @param {string} letter The matched letter to deburr. diff --git a/_getTag.js b/_getTag.js index 3fdc421c5..d2c415613 100644 --- a/_getTag.js +++ b/_getTag.js @@ -42,7 +42,7 @@ var dataViewCtorString = toSource(DataView), var getTag = baseGetTag; // Fallback for data views, maps, sets, and weak maps in IE 11, -// for data views in Edge, and promises in Node.js. +// for data views in Edge < 14, and promises in Node.js. if ((DataView && getTag(new DataView(new ArrayBuffer(1))) != dataViewTag) || (Map && getTag(new Map) != mapTag) || (Promise && getTag(Promise.resolve()) != promiseTag) || diff --git a/_hasPath.js b/_hasPath.js index 669355eb4..019cf4135 100644 --- a/_hasPath.js +++ b/_hasPath.js @@ -4,7 +4,6 @@ import isArray from './isArray.js'; import isIndex from './_isIndex.js'; import isKey from './_isKey.js'; import isLength from './isLength.js'; -import isString from './isString.js'; import toKey from './_toKey.js'; /** @@ -35,7 +34,7 @@ function hasPath(object, path, hasFunc) { } var length = object ? object.length : 0; return !!length && isLength(length) && isIndex(key, length) && - (isArray(object) || isString(object) || isArguments(object)); + (isArray(object) || isArguments(object)); } export default hasPath; diff --git a/_reHasComplexSymbol.js b/_hasUnicode.js similarity index 52% rename from _reHasComplexSymbol.js rename to _hasUnicode.js index fff4f9cbb..ae9548690 100644 --- a/_reHasComplexSymbol.js +++ b/_hasUnicode.js @@ -8,6 +8,17 @@ var rsAstralRange = '\\ud800-\\udfff', var rsZWJ = '\\u200d'; /** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ -var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboMarksRange + rsComboSymbolsRange + rsVarRange + ']'); +var reHasUnicode = RegExp('[' + rsZWJ + rsAstralRange + rsComboMarksRange + rsComboSymbolsRange + rsVarRange + ']'); -export default reHasComplexSymbol; +/** + * Checks if `string` contains Unicode symbols. + * + * @private + * @param {string} string The string to inspect. + * @returns {boolean} Returns `true` if a symbol is found, else `false`. + */ +function hasUnicode(string) { + return reHasUnicode.test(string); +} + +export default hasUnicode; diff --git a/_hasUnicodeWord.js b/_hasUnicodeWord.js new file mode 100644 index 000000000..b2a026093 --- /dev/null +++ b/_hasUnicodeWord.js @@ -0,0 +1,15 @@ +/** Used to detect strings that need a more robust regexp to match words. */ +var reHasUnicodeWord = /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/; + +/** + * Checks if `string` contains a word composed of Unicode symbols. + * + * @private + * @param {string} string The string to inspect. + * @returns {boolean} Returns `true` if a word is found, else `false`. + */ +function hasUnicodeWord(string) { + return reHasUnicodeWord.test(string); +} + +export default hasUnicodeWord; diff --git a/_stringSize.js b/_stringSize.js index 1109286ae..17443a8d1 100644 --- a/_stringSize.js +++ b/_stringSize.js @@ -1,30 +1,6 @@ -import reHasComplexSymbol from './_reHasComplexSymbol.js'; - -/** Used to compose unicode character classes. */ -var rsAstralRange = '\\ud800-\\udfff', - rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', - rsComboSymbolsRange = '\\u20d0-\\u20f0', - rsVarRange = '\\ufe0e\\ufe0f'; - -/** Used to compose unicode capture groups. */ -var rsAstral = '[' + rsAstralRange + ']', - rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']', - rsFitz = '\\ud83c[\\udffb-\\udfff]', - rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', - rsNonAstral = '[^' + rsAstralRange + ']', - rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', - rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', - rsZWJ = '\\u200d'; - -/** Used to compose unicode regexes. */ -var reOptMod = rsModifier + '?', - rsOptVar = '[' + rsVarRange + ']?', - rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', - rsSeq = rsOptVar + reOptMod + rsOptJoin, - rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; - -/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ -var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g'); +import asciiSize from './_asciiSize.js'; +import hasUnicode from './_hasUnicode.js'; +import unicodeSize from './_unicodeSize.js'; /** * Gets the number of symbols in `string`. @@ -34,14 +10,9 @@ var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, * @returns {number} Returns the string size. */ function stringSize(string) { - if (!(string && reHasComplexSymbol.test(string))) { - return string.length; - } - var result = reComplexSymbol.lastIndex = 0; - while (reComplexSymbol.test(string)) { - result++; - } - return result; + return hasUnicode(string) + ? unicodeSize(string) + : asciiSize(string); } export default stringSize; diff --git a/_stringToArray.js b/_stringToArray.js index 136909b3c..1ce84fd4e 100644 --- a/_stringToArray.js +++ b/_stringToArray.js @@ -1,28 +1,6 @@ -/** Used to compose unicode character classes. */ -var rsAstralRange = '\\ud800-\\udfff', - rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', - rsComboSymbolsRange = '\\u20d0-\\u20f0', - rsVarRange = '\\ufe0e\\ufe0f'; - -/** Used to compose unicode capture groups. */ -var rsAstral = '[' + rsAstralRange + ']', - rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']', - rsFitz = '\\ud83c[\\udffb-\\udfff]', - rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', - rsNonAstral = '[^' + rsAstralRange + ']', - rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', - rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', - rsZWJ = '\\u200d'; - -/** Used to compose unicode regexes. */ -var reOptMod = rsModifier + '?', - rsOptVar = '[' + rsVarRange + ']?', - rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', - rsSeq = rsOptVar + reOptMod + rsOptJoin, - rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; - -/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ -var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g'); +import asciiToArray from './_asciiToArray.js'; +import hasUnicode from './_hasUnicode.js'; +import unicodeToArray from './_unicodeToArray.js'; /** * Converts `string` to an array. @@ -32,7 +10,9 @@ var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, * @returns {Array} Returns the converted array. */ function stringToArray(string) { - return string.match(reComplexSymbol); + return hasUnicode(string) + ? unicodeToArray(string) + : asciiToArray(string); } export default stringToArray; diff --git a/_toSource.js b/_toSource.js index 04d5b5072..2a7f05f1f 100644 --- a/_toSource.js +++ b/_toSource.js @@ -1,5 +1,8 @@ +/** Used for built-in method references. */ +var funcProto = Function.prototype; + /** Used to resolve the decompiled source of functions. */ -var funcToString = Function.prototype.toString; +var funcToString = funcProto.toString; /** * Converts `func` to its source code. diff --git a/_unicodeSize.js b/_unicodeSize.js new file mode 100644 index 000000000..83117b0db --- /dev/null +++ b/_unicodeSize.js @@ -0,0 +1,42 @@ +/** Used to compose unicode character classes. */ +var rsAstralRange = '\\ud800-\\udfff', + rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', + rsComboSymbolsRange = '\\u20d0-\\u20f0', + rsVarRange = '\\ufe0e\\ufe0f'; + +/** Used to compose unicode capture groups. */ +var rsAstral = '[' + rsAstralRange + ']', + rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']', + rsFitz = '\\ud83c[\\udffb-\\udfff]', + rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', + rsNonAstral = '[^' + rsAstralRange + ']', + rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', + rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', + rsZWJ = '\\u200d'; + +/** Used to compose unicode regexes. */ +var reOptMod = rsModifier + '?', + rsOptVar = '[' + rsVarRange + ']?', + rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', + rsSeq = rsOptVar + reOptMod + rsOptJoin, + rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; + +/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ +var reUnicode = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g'); + +/** + * Gets the size of a Unicode `string`. + * + * @private + * @param {string} string The string inspect. + * @returns {number} Returns the string size. + */ +function unicodeSize(string) { + var result = reUnicode.lastIndex = 0; + while (reUnicode.test(string)) { + result++; + } + return result; +} + +export default unicodeSize; diff --git a/_unicodeToArray.js b/_unicodeToArray.js new file mode 100644 index 000000000..0ef8e3e4d --- /dev/null +++ b/_unicodeToArray.js @@ -0,0 +1,38 @@ +/** Used to compose unicode character classes. */ +var rsAstralRange = '\\ud800-\\udfff', + rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', + rsComboSymbolsRange = '\\u20d0-\\u20f0', + rsVarRange = '\\ufe0e\\ufe0f'; + +/** Used to compose unicode capture groups. */ +var rsAstral = '[' + rsAstralRange + ']', + rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']', + rsFitz = '\\ud83c[\\udffb-\\udfff]', + rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', + rsNonAstral = '[^' + rsAstralRange + ']', + rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', + rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', + rsZWJ = '\\u200d'; + +/** Used to compose unicode regexes. */ +var reOptMod = rsModifier + '?', + rsOptVar = '[' + rsVarRange + ']?', + rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', + rsSeq = rsOptVar + reOptMod + rsOptJoin, + rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; + +/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ +var reUnicode = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g'); + +/** + * Converts a Unicode `string` to an array. + * + * @private + * @param {string} string The string to convert. + * @returns {Array} Returns the converted array. + */ +function unicodeToArray(string) { + return string.match(reUnicode) || []; +} + +export default unicodeToArray; diff --git a/_unicodeWords.js b/_unicodeWords.js new file mode 100644 index 000000000..9d605fe11 --- /dev/null +++ b/_unicodeWords.js @@ -0,0 +1,63 @@ +/** Used to compose unicode character classes. */ +var rsAstralRange = '\\ud800-\\udfff', + rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', + rsComboSymbolsRange = '\\u20d0-\\u20f0', + rsDingbatRange = '\\u2700-\\u27bf', + rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff', + rsMathOpRange = '\\xac\\xb1\\xd7\\xf7', + rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf', + rsPunctuationRange = '\\u2000-\\u206f', + rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000', + rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde', + rsVarRange = '\\ufe0e\\ufe0f', + rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange; + +/** Used to compose unicode capture groups. */ +var rsApos = "['\u2019]", + rsBreak = '[' + rsBreakRange + ']', + rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']', + rsDigits = '\\d+', + rsDingbat = '[' + rsDingbatRange + ']', + rsLower = '[' + rsLowerRange + ']', + rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']', + rsFitz = '\\ud83c[\\udffb-\\udfff]', + rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', + rsNonAstral = '[^' + rsAstralRange + ']', + rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', + rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', + rsUpper = '[' + rsUpperRange + ']', + rsZWJ = '\\u200d'; + +/** Used to compose unicode regexes. */ +var rsLowerMisc = '(?:' + rsLower + '|' + rsMisc + ')', + rsUpperMisc = '(?:' + rsUpper + '|' + rsMisc + ')', + rsOptLowerContr = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?', + rsOptUpperContr = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?', + reOptMod = rsModifier + '?', + rsOptVar = '[' + rsVarRange + ']?', + rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', + rsSeq = rsOptVar + reOptMod + rsOptJoin, + rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq; + +/** Used to match complex or compound words. */ +var reUnicodeWord = RegExp([ + rsUpper + '?' + rsLower + '+' + rsOptLowerContr + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')', + rsUpperMisc + '+' + rsOptUpperContr + '(?=' + [rsBreak, rsUpper + rsLowerMisc, '$'].join('|') + ')', + rsUpper + '?' + rsLowerMisc + '+' + rsOptLowerContr, + rsUpper + '+' + rsOptUpperContr, + rsDigits, + rsEmoji +].join('|'), 'g'); + +/** + * Splits a Unicode `string` into an array of its words. + * + * @private + * @param {string} The string to inspect. + * @returns {Array} Returns the words of `string`. + */ +function unicodeWords(string) { + return string.match(reUnicodeWord) || []; +} + +export default unicodeWords; diff --git a/deburr.js b/deburr.js index 1d93de941..0fa630440 100644 --- a/deburr.js +++ b/deburr.js @@ -1,8 +1,8 @@ import deburrLetter from './_deburrLetter.js'; import toString from './toString.js'; -/** Used to match latin-1 supplementary letters (excluding mathematical operators). */ -var reLatin1 = /[\xc0-\xd6\xd8-\xde\xdf-\xf6\xf8-\xff]/g; +/** Used to match Latin Unicode letters (excluding mathematical operators). */ +var reLatin = /[\xc0-\xd6\xd8-\xf6\xf8-\xff\u0100-\u017f]/g; /** Used to compose unicode character classes. */ var rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', @@ -19,8 +19,9 @@ var reComboMark = RegExp(rsCombo, 'g'); /** * Deburrs `string` by converting - * [latin-1 supplementary letters](https://en.wikipedia.org/wiki/Latin-1_Supplement_(Unicode_block)#Character_table) - * to basic latin letters and removing + * [Latin-1 Supplement](https://en.wikipedia.org/wiki/Latin-1_Supplement_(Unicode_block)#Character_table) + * and [Latin Extended-A](https://en.wikipedia.org/wiki/Latin_Extended-A) + * letters to basic Latin letters and removing * [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). * * @static @@ -36,7 +37,7 @@ var reComboMark = RegExp(rsCombo, 'g'); */ function deburr(string) { string = toString(string); - return string && string.replace(reLatin1, deburrLetter).replace(reComboMark, ''); + return string && string.replace(reLatin, deburrLetter).replace(reComboMark, ''); } export default deburr; diff --git a/endsWith.js b/endsWith.js index 65fe0a646..1554a6e40 100644 --- a/endsWith.js +++ b/endsWith.js @@ -10,7 +10,7 @@ import toString from './toString.js'; * @memberOf _ * @since 3.0.0 * @category String - * @param {string} [string=''] The string to search. + * @param {string} [string=''] The string to inspect. * @param {string} [target] The string to search for. * @param {number} [position=string.length] The position to search up to. * @returns {boolean} Returns `true` if `string` ends with `target`, diff --git a/find.js b/find.js index 7084c3f2d..24a792b56 100644 --- a/find.js +++ b/find.js @@ -10,7 +10,7 @@ import findIndex from './findIndex.js'; * @memberOf _ * @since 0.1.0 * @category Collection - * @param {Array|Object} collection The collection to search. + * @param {Array|Object} collection The collection to inspect. * @param {Function} [predicate=_.identity] * The function invoked per iteration. * @param {number} [fromIndex=0] The index to search from. diff --git a/findIndex.js b/findIndex.js index b14022522..084f48af2 100644 --- a/findIndex.js +++ b/findIndex.js @@ -13,7 +13,7 @@ var nativeMax = Math.max; * @memberOf _ * @since 1.1.0 * @category Array - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {Function} [predicate=_.identity] * The function invoked per iteration. * @param {number} [fromIndex=0] The index to search from. diff --git a/findKey.js b/findKey.js index 6e2519511..a578ede4e 100644 --- a/findKey.js +++ b/findKey.js @@ -10,7 +10,7 @@ import baseIteratee from './_baseIteratee.js'; * @memberOf _ * @since 1.1.0 * @category Object - * @param {Object} object The object to search. + * @param {Object} object The object to inspect. * @param {Function} [predicate=_.identity] The function invoked per iteration. * @returns {string|undefined} Returns the key of the matched element, * else `undefined`. diff --git a/findLast.js b/findLast.js index d353213e7..236ed2624 100644 --- a/findLast.js +++ b/findLast.js @@ -9,7 +9,7 @@ import findLastIndex from './findLastIndex.js'; * @memberOf _ * @since 2.0.0 * @category Collection - * @param {Array|Object} collection The collection to search. + * @param {Array|Object} collection The collection to inspect. * @param {Function} [predicate=_.identity] * The function invoked per iteration. * @param {number} [fromIndex=collection.length-1] The index to search from. diff --git a/findLastIndex.js b/findLastIndex.js index 3c39a6c07..1881ea16d 100644 --- a/findLastIndex.js +++ b/findLastIndex.js @@ -14,7 +14,7 @@ var nativeMax = Math.max, * @memberOf _ * @since 2.0.0 * @category Array - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {Function} [predicate=_.identity] * The function invoked per iteration. * @param {number} [fromIndex=array.length-1] The index to search from. diff --git a/findLastKey.js b/findLastKey.js index 67c8b2893..6b391dc03 100644 --- a/findLastKey.js +++ b/findLastKey.js @@ -10,7 +10,7 @@ import baseIteratee from './_baseIteratee.js'; * @memberOf _ * @since 2.0.0 * @category Object - * @param {Object} object The object to search. + * @param {Object} object The object to inspect. * @param {Function} [predicate=_.identity] The function invoked per iteration. * @returns {string|undefined} Returns the key of the matched element, * else `undefined`. diff --git a/includes.js b/includes.js index c438da7b4..7bb379384 100644 --- a/includes.js +++ b/includes.js @@ -18,7 +18,7 @@ var nativeMax = Math.max; * @memberOf _ * @since 0.1.0 * @category Collection - * @param {Array|Object|string} collection The collection to search. + * @param {Array|Object|string} collection The collection to inspect. * @param {*} value The value to search for. * @param {number} [fromIndex=0] The index to search from. * @param- {Object} [guard] Enables use as an iteratee for methods like `_.reduce`. diff --git a/indexOf.js b/indexOf.js index 6e3adf803..7da26981b 100644 --- a/indexOf.js +++ b/indexOf.js @@ -14,7 +14,7 @@ var nativeMax = Math.max; * @memberOf _ * @since 0.1.0 * @category Array - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {*} value The value to search for. * @param {number} [fromIndex=0] The index to search from. * @returns {number} Returns the index of the matched value, else `-1`. diff --git a/isArguments.js b/isArguments.js index 318ccb227..c5650a56a 100644 --- a/isArguments.js +++ b/isArguments.js @@ -38,7 +38,7 @@ var propertyIsEnumerable = objectProto.propertyIsEnumerable; * // => false */ function isArguments(value) { - // Safari 8.1 incorrectly makes `arguments.callee` enumerable in strict mode. + // Safari 8.1 makes `arguments.callee` enumerable in strict mode. return isArrayLikeObject(value) && hasOwnProperty.call(value, 'callee') && (!propertyIsEnumerable.call(value, 'callee') || objectToString.call(value) == argsTag); } diff --git a/isEmpty.js b/isEmpty.js index 78f90a2d8..d9ff00245 100644 --- a/isEmpty.js +++ b/isEmpty.js @@ -3,10 +3,7 @@ import isArguments from './isArguments.js'; import isArray from './isArray.js'; import isArrayLike from './isArrayLike.js'; import isBuffer from './isBuffer.js'; -import isFunction from './isFunction.js'; -import isObjectLike from './isObjectLike.js'; import isPrototype from './_isPrototype.js'; -import isString from './isString.js'; import nativeKeys from './_nativeKeys.js'; /** `Object#toString` result references. */ @@ -60,24 +57,23 @@ var nonEnumShadows = !propertyIsEnumerable.call({ 'valueOf': 1 }, 'valueOf'); */ function isEmpty(value) { if (isArrayLike(value) && - (isArray(value) || isString(value) || isFunction(value.splice) || - isArguments(value) || isBuffer(value))) { + (isArray(value) || typeof value == 'string' || + typeof value.splice == 'function' || isBuffer(value) || isArguments(value))) { return !value.length; } - if (isObjectLike(value)) { - var tag = getTag(value); - if (tag == mapTag || tag == setTag) { - return !value.size; - } + var tag = getTag(value); + if (tag == mapTag || tag == setTag) { + return !value.size; + } + if (nonEnumShadows || isPrototype(value)) { + return !nativeKeys(value).length; } - var isProto = isPrototype(value); for (var key in value) { - if (hasOwnProperty.call(value, key) && - !(isProto && key == 'constructor')) { + if (hasOwnProperty.call(value, key)) { return false; } } - return !(nonEnumShadows && nativeKeys(value).length); + return true; } export default isEmpty; diff --git a/isFunction.js b/isFunction.js index 49eaa8cf2..a7ceb2b77 100644 --- a/isFunction.js +++ b/isFunction.js @@ -33,8 +33,7 @@ var objectToString = objectProto.toString; */ function isFunction(value) { // The use of `Object#toString` avoids issues with the `typeof` operator - // in Safari 8 which returns 'object' for typed array and weak map constructors, - // and PhantomJS 1.9 which returns 'function' for `NodeList` instances. + // in Safari 8-9 which returns 'object' for typed array and other constructors. var tag = isObject(value) ? objectToString.call(value) : ''; return tag == funcTag || tag == genTag; } diff --git a/isPlainObject.js b/isPlainObject.js index 6e561e213..0438043e0 100644 --- a/isPlainObject.js +++ b/isPlainObject.js @@ -6,10 +6,11 @@ import isObjectLike from './isObjectLike.js'; var objectTag = '[object Object]'; /** Used for built-in method references. */ -var objectProto = Object.prototype; +var funcProto = Function.prototype, + objectProto = Object.prototype; /** Used to resolve the decompiled source of functions. */ -var funcToString = Function.prototype.toString; +var funcToString = funcProto.toString; /** Used to check objects for own properties. */ var hasOwnProperty = objectProto.hasOwnProperty; diff --git a/lastIndexOf.js b/lastIndexOf.js index 43a53a378..fb1dcdbd6 100644 --- a/lastIndexOf.js +++ b/lastIndexOf.js @@ -14,7 +14,7 @@ var nativeMax = Math.max, * @memberOf _ * @since 0.1.0 * @category Array - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {*} value The value to search for. * @param {number} [fromIndex=array.length-1] The index to search from. * @returns {number} Returns the index of the matched value, else `-1`. diff --git a/lodash.default.js b/lodash.default.js index f0a8c1c29..c3cdf3da7 100644 --- a/lodash.default.js +++ b/lodash.default.js @@ -45,7 +45,7 @@ import toInteger from './toInteger.js'; import lodash from './wrapperLodash.js'; /** Used as the semantic version number. */ -var VERSION = '4.14.2'; +var VERSION = '4.15.0'; /** Used to compose bitmasks for function metadata. */ var BIND_KEY_FLAG = 2; diff --git a/package.json b/package.json index 7645f7dad..640f021c4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "lodash-es", - "version": "4.14.2", + "version": "4.15.0", "description": "Lodash exported as ES modules.", "keywords": "es6, modules, stdlib, util", "homepage": "https://lodash.com/custom-builds", diff --git a/replace.js b/replace.js index 68976c2fe..c14a16985 100644 --- a/replace.js +++ b/replace.js @@ -1,11 +1,5 @@ import toString from './toString.js'; -/** Used for built-in method references. */ -var stringProto = String.prototype; - -/* Built-in method references for those with the same name as other `lodash` methods. */ -var nativeReplace = stringProto.replace; - /** * Replaces matches for `pattern` in `string` with `replacement`. * @@ -29,7 +23,7 @@ function replace() { var args = arguments, string = toString(args[0]); - return args.length < 3 ? string : nativeReplace.call(string, args[1], args[2]); + return args.length < 3 ? string : string.replace(args[1], args[2]); } export default replace; diff --git a/size.js b/size.js index 4ddd8c8c9..b808aaac4 100644 --- a/size.js +++ b/size.js @@ -1,7 +1,6 @@ import baseKeys from './_baseKeys.js'; import getTag from './_getTag.js'; import isArrayLike from './isArrayLike.js'; -import isObjectLike from './isObjectLike.js'; import isString from './isString.js'; import stringSize from './_stringSize.js'; @@ -17,7 +16,7 @@ var mapTag = '[object Map]', * @memberOf _ * @since 0.1.0 * @category Collection - * @param {Array|Object} collection The collection to inspect. + * @param {Array|Object|string} collection The collection to inspect. * @returns {number} Returns the collection size. * @example * @@ -35,14 +34,11 @@ function size(collection) { return 0; } if (isArrayLike(collection)) { - var result = collection.length; - return (result && isString(collection)) ? stringSize(collection) : result; + return isString(collection) ? stringSize(collection) : collection.length; } - if (isObjectLike(collection)) { - var tag = getTag(collection); - if (tag == mapTag || tag == setTag) { - return collection.size; - } + var tag = getTag(collection); + if (tag == mapTag || tag == setTag) { + return collection.size; } return baseKeys(collection).length; } diff --git a/sortedIndexOf.js b/sortedIndexOf.js index d58d7b163..49ea3763f 100644 --- a/sortedIndexOf.js +++ b/sortedIndexOf.js @@ -9,7 +9,7 @@ import eq from './eq.js'; * @memberOf _ * @since 4.0.0 * @category Array - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {*} value The value to search for. * @returns {number} Returns the index of the matched value, else `-1`. * @example diff --git a/sortedLastIndexOf.js b/sortedLastIndexOf.js index d59134708..f9738385e 100644 --- a/sortedLastIndexOf.js +++ b/sortedLastIndexOf.js @@ -9,7 +9,7 @@ import eq from './eq.js'; * @memberOf _ * @since 4.0.0 * @category Array - * @param {Array} array The array to search. + * @param {Array} array The array to inspect. * @param {*} value The value to search for. * @returns {number} Returns the index of the matched value, else `-1`. * @example diff --git a/split.js b/split.js index 4de901d1f..ad3681213 100644 --- a/split.js +++ b/split.js @@ -1,20 +1,14 @@ import baseToString from './_baseToString.js'; import castSlice from './_castSlice.js'; +import hasUnicode from './_hasUnicode.js'; import isIterateeCall from './_isIterateeCall.js'; import isRegExp from './isRegExp.js'; -import reHasComplexSymbol from './_reHasComplexSymbol.js'; import stringToArray from './_stringToArray.js'; import toString from './toString.js'; /** Used as references for the maximum length and index of an array. */ var MAX_ARRAY_LENGTH = 4294967295; -/** Used for built-in method references. */ -var stringProto = String.prototype; - -/* Built-in method references for those with the same name as other `lodash` methods. */ -var nativeSplit = stringProto.split; - /** * Splits `string` by `separator`. * @@ -48,11 +42,11 @@ function split(string, separator, limit) { (separator != null && !isRegExp(separator)) )) { separator = baseToString(separator); - if (separator == '' && reHasComplexSymbol.test(string)) { + if (!separator && hasUnicode(string)) { return castSlice(stringToArray(string), 0, limit); } } - return nativeSplit.call(string, separator, limit); + return string.split(separator, limit); } export default split; diff --git a/startsWith.js b/startsWith.js index cc67a4934..991da7a2d 100644 --- a/startsWith.js +++ b/startsWith.js @@ -10,7 +10,7 @@ import toString from './toString.js'; * @memberOf _ * @since 3.0.0 * @category String - * @param {string} [string=''] The string to search. + * @param {string} [string=''] The string to inspect. * @param {string} [target] The string to search for. * @param {number} [position=0] The position to search from. * @returns {boolean} Returns `true` if `string` starts with `target`, diff --git a/toNumber.js b/toNumber.js index 55609a814..430bf226c 100644 --- a/toNumber.js +++ b/toNumber.js @@ -1,4 +1,3 @@ -import isFunction from './isFunction.js'; import isObject from './isObject.js'; import isSymbol from './isSymbol.js'; @@ -51,7 +50,7 @@ function toNumber(value) { return NAN; } if (isObject(value)) { - var other = isFunction(value.valueOf) ? value.valueOf() : value; + var other = typeof value.valueOf == 'function' ? value.valueOf() : value; value = isObject(other) ? (other + '') : other; } if (typeof value != 'string') { diff --git a/truncate.js b/truncate.js index 0c3082c51..fe4156175 100644 --- a/truncate.js +++ b/truncate.js @@ -1,8 +1,8 @@ import baseToString from './_baseToString.js'; import castSlice from './_castSlice.js'; +import hasUnicode from './_hasUnicode.js'; import isObject from './isObject.js'; import isRegExp from './isRegExp.js'; -import reHasComplexSymbol from './_reHasComplexSymbol.js'; import stringSize from './_stringSize.js'; import stringToArray from './_stringToArray.js'; import toInteger from './toInteger.js'; @@ -64,7 +64,7 @@ function truncate(string, options) { string = toString(string); var strLength = string.length; - if (reHasComplexSymbol.test(string)) { + if (hasUnicode(string)) { var strSymbols = stringToArray(string); strLength = strSymbols.length; } diff --git a/words.js b/words.js index 9edf1b217..c6d10c209 100644 --- a/words.js +++ b/words.js @@ -1,61 +1,7 @@ +import asciiWords from './_asciiWords.js'; +import hasUnicodeWord from './_hasUnicodeWord.js'; import toString from './toString.js'; - -/** Used to match non-compound words composed of alphanumeric characters. */ -var reBasicWord = /[a-zA-Z0-9]+/g; - -/** Used to compose unicode character classes. */ -var rsAstralRange = '\\ud800-\\udfff', - rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23', - rsComboSymbolsRange = '\\u20d0-\\u20f0', - rsDingbatRange = '\\u2700-\\u27bf', - rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff', - rsMathOpRange = '\\xac\\xb1\\xd7\\xf7', - rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf', - rsPunctuationRange = '\\u2000-\\u206f', - rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000', - rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde', - rsVarRange = '\\ufe0e\\ufe0f', - rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange; - -/** Used to compose unicode capture groups. */ -var rsApos = "['\u2019]", - rsBreak = '[' + rsBreakRange + ']', - rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']', - rsDigits = '\\d+', - rsDingbat = '[' + rsDingbatRange + ']', - rsLower = '[' + rsLowerRange + ']', - rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']', - rsFitz = '\\ud83c[\\udffb-\\udfff]', - rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', - rsNonAstral = '[^' + rsAstralRange + ']', - rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', - rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', - rsUpper = '[' + rsUpperRange + ']', - rsZWJ = '\\u200d'; - -/** Used to compose unicode regexes. */ -var rsLowerMisc = '(?:' + rsLower + '|' + rsMisc + ')', - rsUpperMisc = '(?:' + rsUpper + '|' + rsMisc + ')', - rsOptLowerContr = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?', - rsOptUpperContr = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?', - reOptMod = rsModifier + '?', - rsOptVar = '[' + rsVarRange + ']?', - rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', - rsSeq = rsOptVar + reOptMod + rsOptJoin, - rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq; - -/** Used to match complex or compound words. */ -var reComplexWord = RegExp([ - rsUpper + '?' + rsLower + '+' + rsOptLowerContr + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')', - rsUpperMisc + '+' + rsOptUpperContr + '(?=' + [rsBreak, rsUpper + rsLowerMisc, '$'].join('|') + ')', - rsUpper + '?' + rsLowerMisc + '+' + rsOptLowerContr, - rsUpper + '+' + rsOptUpperContr, - rsDigits, - rsEmoji -].join('|'), 'g'); - -/** Used to detect strings that need a more robust regexp to match words. */ -var reHasComplexWord = /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/; +import unicodeWords from './_unicodeWords.js'; /** * Splits `string` into an array of its words. @@ -81,7 +27,7 @@ function words(string, pattern, guard) { pattern = guard ? undefined : pattern; if (pattern === undefined) { - pattern = reHasComplexWord.test(string) ? reComplexWord : reBasicWord; + return hasUnicodeWord(string) ? unicodeWords(string) : asciiWords(string); } return string.match(pattern) || []; }