Add support for combining diacritical marks for symbols.

This commit is contained in:
John-David Dalton
2016-01-14 21:31:26 -08:00
parent 84763cab26
commit 96ef2110ce
2 changed files with 74 additions and 66 deletions

View File

@@ -163,7 +163,8 @@
/** Used to compose unicode character classes. */
var rsAstralRange = '\\ud800-\\udfff',
rsComboRange = '\\u0300-\\u036f\\ufe20-\\ufe23',
rsComboMarksRange = '\\u0300-\\u036f\\ufe20-\\ufe23',
rsComboSymbolsRange = '\\u20d0-\\u20f0',
rsDingbatRange = '\\u2700-\\u27bf',
rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff',
rsMathOpRange = '\\xac\\xb1\\xd7\\xf7',
@@ -177,12 +178,13 @@
/** Used to compose unicode capture groups. */
var rsAstral = '[' + rsAstralRange + ']',
rsBreak = '[' + rsBreakRange + ']',
rsCombo = '[' + rsComboRange + ']',
rsCombo = '[' + rsComboMarksRange + rsComboSymbolsRange + ']',
rsDigits = '\\d+',
rsDingbat = '[' + rsDingbatRange + ']',
rsLower = '[' + rsLowerRange + ']',
rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']',
rsModifier = '(?:\\ud83c[\\udffb-\\udfff])',
rsFitz = '\\ud83c[\\udffb-\\udfff]',
rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')',
rsNonAstral = '[^' + rsAstralRange + ']',
rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
@@ -199,14 +201,17 @@
rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq,
rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')';
/** Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). */
/**
* Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks) and
* [combining diacritical marks for symbols](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks_for_Symbols).
*/
var reComboMark = RegExp(rsCombo, 'g');
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
var reComplexSymbol = RegExp(rsSymbol + rsSeq, 'g');
var reComplexSymbol = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g');
/** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */
var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboRange + rsVarRange + ']');
var reHasComplexSymbol = RegExp('[' + rsZWJ + rsAstralRange + rsComboMarksRange + rsComboSymbolsRange + rsVarRange + ']');
/** Used to match non-compound words composed of alphanumeric characters. */
var reBasicWord = /[a-zA-Z0-9]+/g;