From 10c2232c398e095ff52f2306df08f0a0e0350285 Mon Sep 17 00:00:00 2001 From: jdalton Date: Sat, 27 Jun 2015 13:22:21 -0700 Subject: [PATCH] Make `_.escapeRegExp` escape characters that could cause problems for escape sequences and other edge cases. --- lodash.src.js | 36 +++++++++++++++++++++++++++++++----- test/test.js | 46 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/lodash.src.js b/lodash.src.js index e6004a6d4..2cb0f51bf 100644 --- a/lodash.src.js +++ b/lodash.src.js @@ -97,7 +97,7 @@ * Used to match `RegExp` [syntax characters](http://ecma-international.org/ecma-262/6.0/#sec-patterns) * and those outlined by [`EscapeRegExpPattern`](http://ecma-international.org/ecma-262/6.0/#sec-escaperegexppattern). */ - var reRegExpChars = /[\/^$\\.*+?()[\]{}|\n\r\u2028\u2029]/g, + var reRegExpChars = /^[:!,]|[\/^$\\.*+?()[\]{}|]|(^[0-9a-fA-Fnrtuvx])|([\n\r\u2028\u2029])/g, reHasRegExpChars = RegExp(reRegExpChars.source); /** Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). */ @@ -233,6 +233,15 @@ 'object': true }; + /** Used to escape characters for inclusion in compiled regexes. */ + var regexpEscapes = { + '0': 'x30', '1': 'x31', '2': 'x32', '3': 'x33', '4': 'x34', + '5': 'x35', '6': 'x36', '7': 'x37', '8': 'x38', '9': 'x39', + 'A': 'x41', 'B': 'x42', 'C': 'x43', 'D': 'x44', 'E': 'x45', 'F': 'x46', + 'a': 'x61', 'b': 'x62', 'c': 'x63', 'd': 'x64', 'e': 'x65', 'f': 'x66', + 'n': 'x6e', 'r': 'x72', 't': 'x74', 'u': 'x75', 'v': 'x76', 'x': 'x78' + }; + /** Used to escape characters for inclusion in compiled string literals. */ var stringEscapes = { '\\': '\\', @@ -489,15 +498,32 @@ } /** - * Used by `_.template` to escape characters for inclusion in compiled - * string literals. + * Used by `_.escapeRegExp` to escape characters for inclusion in compiled regexes. + * + * @private + * @param {string} chr The matched character to escape. + * @param {string} leadingChar The capture group for a leading character. + * @param {string} whitespaceChar The capture group for a whitespace character. + * @returns {string} Returns the escaped character. + */ + function escapeRegExpChar(chr, leadingChar, whitespaceChar) { + if (leadingChar) { + chr = regexpEscapes[chr]; + } else if (whitespaceChar) { + chr = stringEscapes[chr]; + } + return '\\' + chr; + } + + /** + * Used by `_.template` to escape characters for inclusion in compiled string literals. * * @private * @param {string} chr The matched character to escape. * @returns {string} Returns the escaped character. */ function escapeStringChar(chr) { - return '\\' + (stringEscapes[chr] || chr); + return '\\' + stringEscapes[chr]; } /** @@ -10521,7 +10547,7 @@ function escapeRegExp(string) { string = baseToString(string); return (string && reHasRegExpChars.test(string)) - ? string.replace(reRegExpChars, escapeStringChar) + ? string.replace(reRegExpChars, escapeRegExpChar) : (string || '(?:)'); } diff --git a/test/test.js b/test/test.js index 9366ba616..dd86c936e 100644 --- a/test/test.js +++ b/test/test.js @@ -285,6 +285,15 @@ new URIError ]; + /** Used to check escaped regexp characters. */ + var regexpEscapes = { + '0': 'x30', '1': 'x31', '2': 'x32', '3': 'x33', '4': 'x34', + '5': 'x35', '6': 'x36', '7': 'x37', '8': 'x38', '9': 'x39', + 'A': 'x41', 'B': 'x42', 'C': 'x43', 'D': 'x44', 'E': 'x45', 'F': 'x46', + 'a': 'x61', 'b': 'x62', 'c': 'x63', 'd': 'x64', 'e': 'x65', 'f': 'x66', + 'n': 'x6e', 'r': 'x72', 't': 'x74', 'u': 'x75', 'v': 'x76', 'x': 'x78' + }; + /** Used to check problem JScript properties (a.k.a. the `[[DontEnum]]` bug). */ var shadowProps = [ 'constructor', @@ -4270,14 +4279,39 @@ strictEqual(_.escapeRegExp(unescaped + unescaped), escaped + escaped); }); - test('should handle strings with nothing to escape', 1, function() { - strictEqual(_.escapeRegExp('abc'), 'abc'); + test('should escape special characters at the start of a string', 1, function() { + var chars = [ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f', + 'A', 'B', 'C', 'D', 'E', 'F', + 'n', 'r', 't', 'u', 'v', 'x', + ':', '!', ',' + ]; + + var expected = _.map(chars, function(chr) { + return ['\\' + (regexpEscapes[chr] || chr) + 'z', 'z' + chr]; + }); + + var actual = _.map(chars, function(chr) { + return [_.escapeRegExp(chr + 'z'), _.escapeRegExp('z' + chr)]; + }); + + deepEqual(actual, expected); }); - test('should return `"(?:)"` when provided nullish or empty string values', 3, function() { - strictEqual(_.escapeRegExp(null), '(?:)'); - strictEqual(_.escapeRegExp(undefined), '(?:)'); - strictEqual(_.escapeRegExp(''), '(?:)'); + test('should handle strings with nothing to escape', 1, function() { + strictEqual(_.escapeRegExp('ghi'), 'ghi'); + }); + + test('should return `"(?:)"` when provided nullish or empty string values', 1, function() { + var values = [, null, undefined, ''], + expected = _.map(values, _.constant('(?:)')); + + var actual = _.map(values, function(value, index) { + return index ? _.escapeRegExp(value) : _.escapeRegExp(); + }); + + deepEqual(actual, expected); }); test('should work with `eval` and `Function`', 2, function() {