mirror of
https://github.com/whoisclebs/lodash.git
synced 2026-02-01 15:57:48 +00:00
65 lines
2.8 KiB
JavaScript
65 lines
2.8 KiB
JavaScript
/** Used to compose unicode character classes. */
|
|
const rsAstralRange = '\\ud800-\\udfff'
|
|
const rsComboMarksRange = '\\u0300-\\u036f'
|
|
const reComboHalfMarksRange = '\\ufe20-\\ufe2f'
|
|
const rsComboSymbolsRange = '\\u20d0-\\u20ff'
|
|
const rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange
|
|
const rsDingbatRange = '\\u2700-\\u27bf'
|
|
const rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff'
|
|
const rsMathOpRange = '\\xac\\xb1\\xd7\\xf7'
|
|
const rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf'
|
|
const rsPunctuationRange = '\\u2000-\\u206f'
|
|
const rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000'
|
|
const rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde'
|
|
const rsVarRange = '\\ufe0e\\ufe0f'
|
|
const rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange
|
|
|
|
/** Used to compose unicode capture groups. */
|
|
const rsApos = "['\u2019]"
|
|
const rsBreak = `[${rsBreakRange}]`
|
|
const rsCombo = `[${rsComboRange}]`
|
|
const rsDigits = '\\d+'
|
|
const rsDingbat = `[${rsDingbatRange}]`
|
|
const rsLower = `[${rsLowerRange}]`
|
|
const rsMisc = `[^${rsAstralRange}${rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange}]`
|
|
const rsFitz = '\\ud83c[\\udffb-\\udfff]'
|
|
const rsModifier = `(?:${rsCombo}|${rsFitz})`
|
|
const rsNonAstral = `[^${rsAstralRange}]`
|
|
const rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}'
|
|
const rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]'
|
|
const rsUpper = `[${rsUpperRange}]`
|
|
const rsZWJ = '\\u200d'
|
|
|
|
/** Used to compose unicode regexes. */
|
|
const rsMiscLower = `(?:${rsLower}|${rsMisc})`
|
|
const rsMiscUpper = `(?:${rsUpper}|${rsMisc})`
|
|
const rsOptContrLower = `(?:${rsApos}(?:d|ll|m|re|s|t|ve))?`
|
|
const rsOptContrUpper = `(?:${rsApos}(?:D|LL|M|RE|S|T|VE))?`
|
|
const reOptMod = `${rsModifier}?`
|
|
const rsOptVar = `[${rsVarRange}]?`
|
|
const rsOptJoin = `(?:${rsZWJ}(?:${[rsNonAstral, rsRegional, rsSurrPair].join('|')})${rsOptVar + reOptMod})*`
|
|
const rsOrdLower = '\\d*(?:(?:1st|2nd|3rd|(?![123])\\dth)\\b)'
|
|
const rsOrdUpper = '\\d*(?:(?:1ST|2ND|3RD|(?![123])\\dTH)\\b)'
|
|
const rsSeq = rsOptVar + reOptMod + rsOptJoin
|
|
const rsEmoji = `(?:${[rsDingbat, rsRegional, rsSurrPair].join('|')})${rsSeq}`
|
|
|
|
/**
|
|
* Splits a Unicode `string` into an array of its words.
|
|
*
|
|
* @private
|
|
* @param {string} The string to inspect.
|
|
* @returns {Array} Returns the words of `string`.
|
|
*/
|
|
const unicodeWords = RegExp.prototype.exec.bind(RegExp([
|
|
`${rsUpper}?${rsLower}+${rsOptContrLower}(?=${[rsBreak, rsUpper, '$'].join('|')})`,
|
|
`${rsMiscUpper}+${rsOptContrUpper}(?=${[rsBreak, rsUpper + rsMiscLower, '$'].join('|')})`,
|
|
`${rsUpper}?${rsMiscLower}+${rsOptContrLower}`,
|
|
`${rsUpper}+${rsOptContrUpper}`,
|
|
rsOrdUpper,
|
|
rsOrdLower,
|
|
rsDigits,
|
|
rsEmoji
|
|
].join('|'), 'g'))
|
|
|
|
export default unicodeWords
|