diff --git a/.internal/asciiWords.js b/.internal/asciiWords.js deleted file mode 100644 index c992dfd8b..000000000 --- a/.internal/asciiWords.js +++ /dev/null @@ -1,15 +0,0 @@ -/** Used to match words composed of alphanumeric characters. */ -const reAsciiWord = /[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g - -/** - * Splits an ASCII `string` into an array of its words. - * - * @private - * @param {string} The string to inspect. - * @returns {Array} Returns the words of `string`. - */ -function asciiWords(string) { - return string.match(reAsciiWord) || [] -} - -export default asciiWords diff --git a/.internal/hasUnicodeWord.js b/.internal/hasUnicodeWord.js deleted file mode 100644 index d9f0faac9..000000000 --- a/.internal/hasUnicodeWord.js +++ /dev/null @@ -1,15 +0,0 @@ -/** Used to detect strings that need a more robust regexp to match words. */ -const reHasUnicodeWord = /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/ - -/** - * Checks if `string` contains a word composed of Unicode symbols. - * - * @private - * @param {string} string The string to inspect. - * @returns {boolean} Returns `true` if a word is found, else `false`. - */ -function hasUnicodeWord(string) { - return reHasUnicodeWord.test(string) -} - -export default hasUnicodeWord diff --git a/.internal/unicodeWords.js b/.internal/unicodeWords.js index 232d1a699..6a1996617 100644 --- a/.internal/unicodeWords.js +++ b/.internal/unicodeWords.js @@ -43,8 +43,14 @@ const rsOrdUpper = '\\d*(?:(?:1ST|2ND|3RD|(?![123])\\dTH)\\b)' const rsSeq = rsOptVar + reOptMod + rsOptJoin const rsEmoji = `(?:${ [rsDingbat, rsRegional, rsSurrPair].join('|') })${ rsSeq }` -/** Used to match complex or compound words. */ -const reUnicodeWord = RegExp([ +/** + * Splits a Unicode `string` into an array of its words. + * + * @private + * @param {string} The string to inspect. + * @returns {Array} Returns the words of `string`. + */ +const unicodeWords = RegExp.prototype.exec.bind(RegExp([ `${ rsUpper }?${ rsLower }+${ rsOptContrLower }(?=${ [rsBreak, rsUpper, '$'].join('|') })`, `${ rsMiscUpper }+${ rsOptContrUpper }(?=${ [rsBreak, rsUpper + rsMiscLower, '$'].join('|') })`, `${ rsUpper }?${ rsMiscLower }+${ rsOptContrLower }`, @@ -53,17 +59,6 @@ const reUnicodeWord = RegExp([ rsOrdLower, rsDigits, rsEmoji -].join('|'), 'g') - -/** - * Splits a Unicode `string` into an array of its words. - * - * @private - * @param {string} The string to inspect. - * @returns {Array} Returns the words of `string`. - */ -function unicodeWords(string) { - return string.match(reUnicodeWord) || [] -} +].join('|'), 'g')) export default unicodeWords diff --git a/words.js b/words.js index def6ed396..65e834ef8 100644 --- a/words.js +++ b/words.js @@ -1,7 +1,13 @@ -import asciiWords from './.internal/asciiWords.js' -import hasUnicodeWord from './.internal/hasUnicodeWord.js' import unicodeWords from './.internal/unicodeWords.js' +const asciiWords = RegExp.prototype.exec.bind( + /[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g +) + +const hasUnicodeWord = RegExp.prototype.test.bind( + /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/ +) + /** * Splits `string` into an array of its words. * @@ -20,7 +26,8 @@ import unicodeWords from './.internal/unicodeWords.js' */ function words(string, pattern) { if (pattern === undefined) { - return hasUnicodeWord(string) ? unicodeWords(string) : asciiWords(string) + const result = hasUnicodeWord(string) ? unicodeWords(string) : asciiWords(string) + return result || [] } return string.match(pattern) || [] }