Add support for astral symbols in string methods. [closes #1463]

This commit is contained in:
John-David Dalton
2015-09-14 01:06:40 -07:00
parent 0fb94ad3c6
commit fa61e30dd5

View File

@@ -92,6 +92,10 @@
uint16Tag = '[object Uint16Array]', uint16Tag = '[object Uint16Array]',
uint32Tag = '[object Uint32Array]'; uint32Tag = '[object Uint32Array]';
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
var reStrSymbol = /[^\uD800-\uDBFF\uDC00-\uDFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF\uDC00-\uDFFF]/g,
reStrSurrogate = /[\uD800-\uDBFF\uDC00-\uDFFF]/;
/** Used to match empty string literals in compiled template source. */ /** Used to match empty string literals in compiled template source. */
var reEmptyStringLeading = /\b__p \+= '';/g, var reEmptyStringLeading = /\b__p \+= '';/g,
reEmptyStringMiddle = /\b(__p \+=) '' \+/g, reEmptyStringMiddle = /\b(__p \+=) '' \+/g,
@@ -811,10 +815,13 @@
* @returns {number} Returns the index of the first character not found in `chars`. * @returns {number} Returns the index of the first character not found in `chars`.
*/ */
function charsLeftIndex(string, chars) { function charsLeftIndex(string, chars) {
string = string ? string.match(reStrSymbol) : [];
chars = chars ? chars.match(reStrSymbol) : [];
var index = -1, var index = -1,
length = string.length; length = string.length;
while (++index < length && chars.indexOf(string.charAt(index)) > -1) {} while (++index < length && baseIndexOf(chars, string[index], 0) > -1) {}
return index; return index;
} }
@@ -828,9 +835,11 @@
* @returns {number} Returns the index of the last character not found in `chars`. * @returns {number} Returns the index of the last character not found in `chars`.
*/ */
function charsRightIndex(string, chars) { function charsRightIndex(string, chars) {
var index = string.length; string = string ? string.match(reStrSymbol) : [];
chars = chars ? chars.match(reStrSymbol) : [];
while (index-- && chars.indexOf(string.charAt(index)) > -1) {} var index = string.length;
while (index-- && baseIndexOf(chars, string[index], 0) > -1) {}
return index; return index;
} }
@@ -1177,6 +1186,18 @@
return result; return result;
} }
/**
* Gets the number of symbols in `string`.
*
* @param {string} string The string to inspect.
* @returns {number} Returns the string size.
*/
function stringSize(string) {
return (string && reStrSurrogate.test(string))
? string.match(reStrSymbol).length
: string.length;
}
/** /**
* Used by `_.trim` and `_.trimLeft` to get the index of the first non-whitespace * Used by `_.trim` and `_.trimLeft` to get the index of the first non-whitespace
* character of `string`. * character of `string`.
@@ -3593,7 +3614,7 @@
* @returns {string} Returns the padding for `string`. * @returns {string} Returns the padding for `string`.
*/ */
function createPadding(string, length, chars) { function createPadding(string, length, chars) {
var strLength = string.length; var strLength = stringSize(string);
length = toInteger(length); length = toInteger(length);
if (!length || strLength >= length) { if (!length || strLength >= length) {
@@ -3601,7 +3622,11 @@
} }
var padLength = length - strLength; var padLength = length - strLength;
chars = chars === undefined ? ' ' : (chars + ''); chars = chars === undefined ? ' ' : (chars + '');
return repeat(chars, nativeCeil(padLength / chars.length)).slice(0, padLength);
var result = repeat(chars, nativeCeil(padLength / stringSize(chars)));
return reStrSurrogate.test(chars)
? result.match(reStrSymbol).slice(0, padLength).join('')
: result.slice(0, padLength);
} }
/** /**
@@ -6803,7 +6828,7 @@
* @memberOf _ * @memberOf _
* @category Collection * @category Collection
* @param {Array|Object} collection The collection to inspect. * @param {Array|Object} collection The collection to inspect.
* @returns {number} Returns the size of `collection`. * @returns {number} Returns the collection size.
* @example * @example
* *
* _.size([1, 2, 3]); * _.size([1, 2, 3]);
@@ -6819,8 +6844,13 @@
if (collection == null) { if (collection == null) {
return 0; return 0;
} }
collection = isArrayLike(collection) ? collection : keys(collection); if (isArrayLike(collection)) {
return collection.length; var result = collection.length;
return (result && !isArray(collection) && isString(collection))
? stringSize(collection)
: result;
}
return keys(collection).length;
} }
/** /**
@@ -8952,7 +8982,13 @@
return []; return [];
} }
if (isArrayLike(value)) { if (isArrayLike(value)) {
return value.length ? copyArray(value) : []; if (!value.length) {
return [];
}
if (!isArray(value) && isString(value)) {
return reStrSurrogate.test(value) ? value.match(reStrSymbol) : value.split('');
}
return copyArray(value);
} }
if (iteratorSymbol && value[iteratorSymbol]) { if (iteratorSymbol && value[iteratorSymbol]) {
return iteratorToArray(value[iteratorSymbol]()); return iteratorToArray(value[iteratorSymbol]());
@@ -10321,7 +10357,7 @@
var length = string.length; var length = string.length;
position = position === undefined position = position === undefined
? length ? length
: nativeMin(position < 0 ? 0 : toInteger(position), length); : nativeMin(nativeMax(toInteger(position), 0), length);
position -= target.length; position -= target.length;
return position >= 0 && string.indexOf(target, position) == position; return position >= 0 && string.indexOf(target, position) == position;
@@ -10436,7 +10472,7 @@
string = baseToString(string); string = baseToString(string);
length = toInteger(length); length = toInteger(length);
var strLength = string.length; var strLength = stringSize(string);
if (!length || strLength >= length) { if (!length || strLength >= length) {
return string; return string;
} }
@@ -10444,8 +10480,7 @@
leftLength = nativeFloor(mid), leftLength = nativeFloor(mid),
rightLength = nativeCeil(mid); rightLength = nativeCeil(mid);
chars = createPadding('', rightLength, chars); return createPadding('', leftLength, chars) + string + createPadding('', rightLength, chars)
return chars.slice(0, leftLength) + string + chars;
} }
/** /**
@@ -10646,10 +10681,7 @@
*/ */
function startsWith(string, target, position) { function startsWith(string, target, position) {
string = baseToString(string); string = baseToString(string);
position = position == null position = nativeMin(nativeMax(toInteger(position), 0), string.length);
? 0
: nativeMin(position < 0 ? 0 : toInteger(position), string.length);
return string.lastIndexOf(target, position) == position; return string.lastIndexOf(target, position) == position;
} }
@@ -10995,10 +11027,10 @@
omission = 'omission' in options ? baseToString(options.omission) : omission; omission = 'omission' in options ? baseToString(options.omission) : omission;
} }
string = baseToString(string); string = baseToString(string);
if (length >= string.length) { if (length >= stringSize(string)) {
return string; return string;
} }
var end = length - omission.length; var end = length - stringSize(omission);
if (end < 1) { if (end < 1) {
return omission; return omission;
} }