Simplify words.

2026-02-09 10:27:49 +00:00 · 2017-03-27 14:08:22 -07:00
parent 8be26ac755
commit 6d19563a9f
4 changed files with 19 additions and 47 deletions
--- a/.internal/asciiWords.js
+++ b/.internal/asciiWords.js
@@ -1,15 +0,0 @@
 /** Used to match words composed of alphanumeric characters. */
 const reAsciiWord = /[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g
 /**
 * Splits an ASCII `string` into an array of its words.
 *
 * @private
 * @param {string} The string to inspect.
 * @returns {Array} Returns the words of `string`.
 */
 function asciiWords(string) {
  return string.match(reAsciiWord) || []
 }
 export default asciiWords
--- a/.internal/hasUnicodeWord.js
+++ b/.internal/hasUnicodeWord.js
@@ -1,15 +0,0 @@
 /** Used to detect strings that need a more robust regexp to match words. */
 const reHasUnicodeWord = /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/
 /**
 * Checks if `string` contains a word composed of Unicode symbols.
 *
 * @private
 * @param {string} string The string to inspect.
 * @returns {boolean} Returns `true` if a word is found, else `false`.
 */
 function hasUnicodeWord(string) {
  return reHasUnicodeWord.test(string)
 }
 export default hasUnicodeWord
--- a/.internal/unicodeWords.js
+++ b/.internal/unicodeWords.js
@@ -43,8 +43,14 @@ const rsOrdUpper = '\\d*(?:(?:1ST|2ND|3RD|(?![123])\\dTH)\\b)'
 const rsSeq = rsOptVar + reOptMod + rsOptJoin
 const rsEmoji = `(?:${ [rsDingbat, rsRegional, rsSurrPair].join('|') })${ rsSeq }`
-/** Used to match complex or compound words. */
+/**
-const reUnicodeWord = RegExp([
+ * Splits a Unicode `string` into an array of its words.
 *
 * @private
 * @param {string} The string to inspect.
 * @returns {Array} Returns the words of `string`.
 */
 const unicodeWords = RegExp.prototype.exec.bind(RegExp([
  `${ rsUpper }?${ rsLower }+${ rsOptContrLower }(?=${ [rsBreak, rsUpper, '$'].join('|') })`,
  `${ rsMiscUpper }+${ rsOptContrUpper }(?=${ [rsBreak, rsUpper + rsMiscLower, '$'].join('|') })`,
  `${ rsUpper }?${ rsMiscLower }+${ rsOptContrLower }`,
@@ -53,17 +59,6 @@ const reUnicodeWord = RegExp([
  rsOrdLower,
  rsDigits,
  rsEmoji
-].join('|'), 'g')
+].join('|'), 'g'))
 /**
 * Splits a Unicode `string` into an array of its words.
 *
 * @private
 * @param {string} The string to inspect.
 * @returns {Array} Returns the words of `string`.
 */
 function unicodeWords(string) {
  return string.match(reUnicodeWord) || []
 }
 export default unicodeWords
--- a/words.js
+++ b/words.js
@@ -1,7 +1,13 @@
 import asciiWords from './.internal/asciiWords.js'
 import hasUnicodeWord from './.internal/hasUnicodeWord.js'
 import unicodeWords from './.internal/unicodeWords.js'
 const asciiWords = RegExp.prototype.exec.bind(
  /[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g
 )
 const hasUnicodeWord = RegExp.prototype.test.bind(
  /[a-z][A-Z]|[A-Z]{2,}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/
 )
 /**
 * Splits `string` into an array of its words.
 *
@@ -20,7 +26,8 @@ import unicodeWords from './.internal/unicodeWords.js'
 */
 function words(string, pattern) {
  if (pattern === undefined) {
-    return hasUnicodeWord(string) ? unicodeWords(string) : asciiWords(string)
+    const result = hasUnicodeWord(string) ? unicodeWords(string) : asciiWords(string)
    return result || []
  }
  return string.match(pattern) || []
 }