Rework ascii and unicode helpers.

This commit is contained in:
John-David Dalton
2016-08-09 18:08:20 -07:00
parent f71c0bd4c3
commit 6071982dd9

201
lodash.js
View File

@@ -150,7 +150,7 @@
reSplitDetails = /,? & /;
/** Used to match non-compound words composed of alphanumeric characters. */
var reBasicWord = /[a-zA-Z0-9]+/g;
var reAsciiWord = /[a-zA-Z0-9]+/g;
/** Used to match backslashes in property paths. */
var reEscapeChar = /\\(\\)?/g;
@@ -244,10 +244,10 @@
var reComboMark = RegExp(rsCombo, 'g');
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g');
var reUnicode = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g');
/** Used to match complex or compound words. */
var reComplexWord = RegExp([
var reUnicodeWord = RegExp([
rsUpper + '?' + rsLower + '+' + rsOptLowerContr + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')',
rsUpperMisc + '+' + rsOptUpperContr + '(?=' + [rsBreak, rsUpper + rsLowerMisc, '$'].join('|') + ')',
rsUpper + '?' + rsLowerMisc + '+' + rsOptLowerContr,
@@ -257,10 +257,10 @@
].join('|'), 'g');
/** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */
var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboMarksRange + rsComboSymbolsRange + rsVarRange + ']');
var reHasUnicode = RegExp('[' + rsZWJ + rsAstralRange + rsComboMarksRange + rsComboSymbolsRange + rsVarRange + ']');
/** Used to detect strings that need a more robust regexp to match words. */
var reHasComplexWord = /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/;
var reHasUnicodeWord = /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/;
/** Used to assign default `context` object properties. */
var contextProps = [
@@ -560,7 +560,7 @@
* specifying an index to search from.
*
* @private
* @param {Array} [array] The array to search.
* @param {Array} [array] The array to inspect.
* @param {*} target The value to search for.
* @returns {boolean} Returns `true` if `target` is found, else `false`.
*/
@@ -573,7 +573,7 @@
* This function is like `arrayIncludes` except that it accepts a comparator.
*
* @private
* @param {Array} [array] The array to search.
* @param {Array} [array] The array to inspect.
* @param {*} target The value to search for.
* @param {Function} comparator The comparator invoked per element.
* @returns {boolean} Returns `true` if `target` is found, else `false`.
@@ -699,13 +699,44 @@
return false;
}
/**
* Gets the size of an ASCII `string`.
*
* @private
* @param {string} string The string inspect.
* @returns {number} Returns the string size.
*/
var asciiSize = baseProperty('length');
/**
* Converts an ASCII `string` to an array.
*
* @private
* @param {string} string The string to convert.
* @returns {Array} Returns the converted array.
*/
function asciiToArray(string) {
return string.split('');
}
/**
* Splits an ASCII `string` into an array of its words.
*
* @private
* @param {string} The string to inspect.
* @returns {Array} Returns the words of `string`.
*/
function asciiWords(string) {
return string.match(reAsciiWord) || [];
}
/**
* The base implementation of methods like `_.findKey` and `_.findLastKey`,
* without support for iteratee shorthands, which iterates over `collection`
* using `eachFunc`.
*
* @private
* @param {Array|Object} collection The collection to search.
* @param {Array|Object} collection The collection to inspect.
* @param {Function} predicate The function invoked per iteration.
* @param {Function} eachFunc The function to iterate over `collection`.
* @returns {*} Returns the found element or its key, else `undefined`.
@@ -726,7 +757,7 @@
* support for iteratee shorthands.
*
* @private
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {Function} predicate The function invoked per iteration.
* @param {number} fromIndex The index to search from.
* @param {boolean} [fromRight] Specify iterating from right to left.
@@ -748,7 +779,7 @@
* The base implementation of `_.indexOf` without `fromIndex` bounds checks.
*
* @private
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {*} value The value to search for.
* @param {number} fromIndex The index to search from.
* @returns {number} Returns the index of the matched value, else `-1`.
@@ -772,7 +803,7 @@
* This function is like `baseIndexOf` except that it accepts a comparator.
*
* @private
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {*} value The value to search for.
* @param {number} fromIndex The index to search from.
* @param {Function} comparator The comparator invoked per element.
@@ -1075,6 +1106,28 @@
return object == null ? undefined : object[key];
}
/**
* Checks if `string` contains Unicode symbols.
*
* @private
* @param {string} string The string to inspect.
* @returns {boolean} Returns `true` if a symbol is found, else `false`.
*/
function hasUnicode(string) {
return reHasUnicode.test(string);
}
/**
* Checks if `string` contains a word composed of Unicode symbols.
*
* @private
* @param {string} string The string to inspect.
* @returns {boolean} Returns `true` if a word is found, else `false`.
*/
function hasUnicodeWord(string) {
return reHasUnicodeWord.test(string);
}
/**
* Checks if `value` is a host object in IE < 9.
*
@@ -1209,14 +1262,9 @@
* @returns {number} Returns the string size.
*/
function stringSize(string) {
if (!(string && reHasComplexSymbol.test(string))) {
return string.length;
}
var result = reComplexSymbol.lastIndex = 0;
while (reComplexSymbol.test(string)) {
result++;
}
return result;
return hasUnicode(string)
? unicodeSize(string)
: asciiSize(string);
}
/**
@@ -1227,7 +1275,9 @@
* @returns {Array} Returns the converted array.
*/
function stringToArray(string) {
return string.match(reComplexSymbol);
return hasUnicode(string)
? unicodeToArray(string)
: asciiToArray(string);
}
/**
@@ -1239,6 +1289,43 @@
*/
var unescapeHtmlChar = basePropertyOf(htmlUnescapes);
/**
* Gets the size of a Unicode `string`.
*
* @private
* @param {string} string The string inspect.
* @returns {number} Returns the string size.
*/
function unicodeSize(string) {
var result = reUnicode.lastIndex = 0;
while (reUnicode.test(string)) {
result++;
}
return result;
}
/**
* Converts a Unicode `string` to an array.
*
* @private
* @param {string} string The string to convert.
* @returns {Array} Returns the converted array.
*/
function unicodeToArray(string) {
return string.match(reUnicode) || [];
}
/**
* Splits a Unicode `string` into an array of its words.
*
* @private
* @param {string} The string to inspect.
* @returns {Array} Returns the words of `string`.
*/
function unicodeWords(string) {
return string.match(reUnicodeWord) || [];
}
/*--------------------------------------------------------------------------*/
/**
@@ -1278,19 +1365,23 @@
* var defer = _.runInContext({ 'setTimeout': setImmediate }).defer;
*/
function runInContext(context) {
context = context ? _.defaults({}, context, _.pick(root, contextProps)) : root;
context = context ? _.defaults(root.Object(), context, _.pick(root, contextProps)) : root;
/** Built-in constructor references. */
var Array = context.Array,
Date = context.Date,
Error = context.Error,
Function = context.Function,
Math = context.Math,
Object = context.Object,
RegExp = context.RegExp,
TypeError = context.TypeError;
/** Used for built-in method references. */
var arrayProto = context.Array.prototype,
objectProto = context.Object.prototype,
stringProto = context.String.prototype;
var arrayProto = Array.prototype,
funcProto = Function.prototype,
objectProto = Object.prototype,
stringProto = String.prototype;
/** Used to detect overreaching core-js shims. */
var coreJsData = context['__core-js_shared__'];
@@ -1302,7 +1393,7 @@
}());
/** Used to resolve the decompiled source of functions. */
var funcToString = context.Function.prototype.toString;
var funcToString = funcProto.toString;
/** Used to check objects for own properties. */
var hasOwnProperty = objectProto.hasOwnProperty;
@@ -1335,14 +1426,14 @@
Uint8Array = context.Uint8Array,
getPrototype = overArg(Object.getPrototypeOf, Object),
iteratorSymbol = Symbol ? Symbol.iterator : undefined,
objectCreate = context.Object.create,
objectCreate = Object.create,
propertyIsEnumerable = objectProto.propertyIsEnumerable,
splice = arrayProto.splice,
spreadableSymbol = Symbol ? Symbol.isConcatSpreadable : undefined;
/** Mocked built-ins. */
var ctxClearTimeout = context.clearTimeout !== root.clearTimeout && context.clearTimeout,
ctxNow = context.Date && context.Date.now !== root.Date.now && context.Date.now,
ctxNow = Date && Date.now !== root.Date.now && Date.now,
ctxSetTimeout = context.setTimeout !== root.setTimeout && context.setTimeout;
/* Built-in method references for those with the same name as other `lodash` methods. */
@@ -1357,9 +1448,7 @@
nativeMin = Math.min,
nativeParseInt = context.parseInt,
nativeRandom = Math.random,
nativeReplace = stringProto.replace,
nativeReverse = arrayProto.reverse,
nativeSplit = stringProto.split;
nativeReverse = arrayProto.reverse;
/* Built-in method references that are verified to be native. */
var DataView = getNative(context, 'DataView'),
@@ -1367,11 +1456,11 @@
Promise = getNative(context, 'Promise'),
Set = getNative(context, 'Set'),
WeakMap = getNative(context, 'WeakMap'),
nativeCreate = getNative(context.Object, 'create');
nativeCreate = getNative(Object, 'create');
/* Used to set `toString` methods. */
var defineProperty = (function() {
var func = getNative(context.Object, 'defineProperty'),
var func = getNative(Object, 'defineProperty'),
name = getNative.name;
return (name && name.length > 2) ? func : undefined;
@@ -2280,7 +2369,7 @@
* Gets the index at which the `key` is found in `array` of key-value pairs.
*
* @private
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {*} key The key to search for.
* @returns {number} Returns the index of the matched value, else `-1`.
*/
@@ -3705,7 +3794,7 @@
* The base implementation of `_.set`.
*
* @private
* @param {Object} object The object to query.
* @param {Object} object The object to modify.
* @param {Array|string} path The path of the property to set.
* @param {*} value The value to set.
* @param {Function} [customizer] The function to customize path creation.
@@ -4035,7 +4124,7 @@
* The base implementation of `_.update`.
*
* @private
* @param {Object} object The object to query.
* @param {Object} object The object to modify.
* @param {Array|string} path The path of the property to update.
* @param {Function} updater The function to produce the updated value.
* @param {Function} [customizer] The function to customize path creation.
@@ -4646,7 +4735,7 @@
return function(string) {
string = toString(string);
var strSymbols = reHasComplexSymbol.test(string)
var strSymbols = hasUnicode(string)
? stringToArray(string)
: undefined;
@@ -4989,7 +5078,7 @@
return charsLength ? baseRepeat(chars, length) : chars;
}
var result = baseRepeat(chars, nativeCeil(length / stringSize(chars)));
return reHasComplexSymbol.test(chars)
return hasUnicode(chars)
? castSlice(stringToArray(result), 0, length).join('')
: result.slice(0, length);
}
@@ -6729,7 +6818,7 @@
* @memberOf _
* @since 1.1.0
* @category Array
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {Function} [predicate=_.identity]
* The function invoked per iteration.
* @param {number} [fromIndex=0] The index to search from.
@@ -6777,7 +6866,7 @@
* @memberOf _
* @since 2.0.0
* @category Array
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {Function} [predicate=_.identity]
* The function invoked per iteration.
* @param {number} [fromIndex=array.length-1] The index to search from.
@@ -6946,7 +7035,7 @@
* @memberOf _
* @since 0.1.0
* @category Array
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {*} value The value to search for.
* @param {number} [fromIndex=0] The index to search from.
* @returns {number} Returns the index of the matched value, else `-1`.
@@ -7131,7 +7220,7 @@
* @memberOf _
* @since 0.1.0
* @category Array
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {*} value The value to search for.
* @param {number} [fromIndex=array.length-1] The index to search from.
* @returns {number} Returns the index of the matched value, else `-1`.
@@ -7509,7 +7598,7 @@
* @memberOf _
* @since 4.0.0
* @category Array
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {*} value The value to search for.
* @returns {number} Returns the index of the matched value, else `-1`.
* @example
@@ -7588,7 +7677,7 @@
* @memberOf _
* @since 4.0.0
* @category Array
* @param {Array} array The array to search.
* @param {Array} array The array to inspect.
* @param {*} value The value to search for.
* @returns {number} Returns the index of the matched value, else `-1`.
* @example
@@ -8717,7 +8806,7 @@
* @memberOf _
* @since 0.1.0
* @category Collection
* @param {Array|Object} collection The collection to search.
* @param {Array|Object} collection The collection to inspect.
* @param {Function} [predicate=_.identity]
* The function invoked per iteration.
* @param {number} [fromIndex=0] The index to search from.
@@ -8755,7 +8844,7 @@
* @memberOf _
* @since 2.0.0
* @category Collection
* @param {Array|Object} collection The collection to search.
* @param {Array|Object} collection The collection to inspect.
* @param {Function} [predicate=_.identity]
* The function invoked per iteration.
* @param {number} [fromIndex=collection.length-1] The index to search from.
@@ -8950,7 +9039,7 @@
* @memberOf _
* @since 0.1.0
* @category Collection
* @param {Array|Object|string} collection The collection to search.
* @param {Array|Object|string} collection The collection to inspect.
* @param {*} value The value to search for.
* @param {number} [fromIndex=0] The index to search from.
* @param- {Object} [guard] Enables use as an iteratee for methods like `_.reduce`.
@@ -9383,7 +9472,7 @@
* @memberOf _
* @since 0.1.0
* @category Collection
* @param {Array|Object} collection The collection to inspect.
* @param {Array|Object|string} collection The collection to inspect.
* @returns {number} Returns the collection size.
* @example
*
@@ -12374,7 +12463,7 @@
* @memberOf _
* @since 1.1.0
* @category Object
* @param {Object} object The object to search.
* @param {Object} object The object to inspect.
* @param {Function} [predicate=_.identity] The function invoked per iteration.
* @returns {string|undefined} Returns the key of the matched element,
* else `undefined`.
@@ -12413,7 +12502,7 @@
* @memberOf _
* @since 2.0.0
* @category Object
* @param {Object} object The object to search.
* @param {Object} object The object to inspect.
* @param {Function} [predicate=_.identity] The function invoked per iteration.
* @returns {string|undefined} Returns the key of the matched element,
* else `undefined`.
@@ -13682,7 +13771,7 @@
* @memberOf _
* @since 3.0.0
* @category String
* @param {string} [string=''] The string to search.
* @param {string} [string=''] The string to inspect.
* @param {string} [target] The string to search for.
* @param {number} [position=string.length] The position to search up to.
* @returns {boolean} Returns `true` if `string` ends with `target`,
@@ -14038,7 +14127,7 @@
var args = arguments,
string = toString(args[0]);
return args.length < 3 ? string : nativeReplace.call(string, args[1], args[2]);
return args.length < 3 ? string : string.replace(args[1], args[2]);
}
/**
@@ -14099,11 +14188,11 @@
(separator != null && !isRegExp(separator))
)) {
separator = baseToString(separator);
if (separator == '' && reHasComplexSymbol.test(string)) {
if (!separator && hasUnicode(string)) {
return castSlice(stringToArray(string), 0, limit);
}
}
return nativeSplit.call(string, separator, limit);
return string.split(separator, limit);
}
/**
@@ -14138,7 +14227,7 @@
* @memberOf _
* @since 3.0.0
* @category String
* @param {string} [string=''] The string to search.
* @param {string} [string=''] The string to inspect.
* @param {string} [target] The string to search for.
* @param {number} [position=0] The position to search from.
* @returns {boolean} Returns `true` if `string` starts with `target`,
@@ -14575,7 +14664,7 @@
string = toString(string);
var strLength = string.length;
if (reHasComplexSymbol.test(string)) {
if (hasUnicode(string)) {
var strSymbols = stringToArray(string);
strLength = strSymbols.length;
}
@@ -14712,7 +14801,7 @@
pattern = guard ? undefined : pattern;
if (pattern === undefined) {
pattern = reHasComplexWord.test(string) ? reComplexWord : reBasicWord;
return hasUnicodeWord(string) ? unicodeWords(string) : asciiWords(string);
}
return string.match(pattern) || [];
}