Skip to content

Commit 82252c0

Browse files
committed
detect redos between charclass and inverted charclass
1 parent 16473fc commit 82252c0

File tree

3 files changed

+71
-1
lines changed

3 files changed

+71
-1
lines changed

javascript/ql/src/Performance/ReDoS.ql

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,57 @@ predicate charClassMatchesChar(RegExpCharacterClass cc, string char) {
443443
)
444444
}
445445

446+
/**
447+
* Gets the minimum char that is matched by both the positive char class `c` and the
448+
* negative char class `d`.
449+
*/
450+
pragma[noinline]
451+
private string getMinOverlapBetweenCharacterClasses(CharClass c, InvertedCharClass d) {
452+
result = min(getAOverlapBetweenCharacterClasses(c, d))
453+
}
454+
455+
/**
456+
* Gets a char that is mentioned in the character class `c`.
457+
*/
458+
private string getAMentionedChar(RegExpCharacterClass c) {
459+
exists(RegExpTerm child | child = c.getAChild() |
460+
result = child.(RegExpConstant).getValue()
461+
or
462+
child.(RegExpCharacterRange).isRange(result, _)
463+
or
464+
child.(RegExpCharacterRange).isRange(_, result)
465+
)
466+
}
467+
468+
/**
469+
* Gets a char that is relevant for ReDoS analysis of `symbol`.
470+
* The result is either mentioned in the character class `symbol`,
471+
* or, if `symbol` is an inverted character class, then the result is the next/previous charcode.
472+
*/
473+
pragma[noinline]
474+
private string getARelevantCharClassChar(TInputSymbol symbol) {
475+
exists(RegExpCharacterClass cc | symbol = CharClass(cc) | result = getAMentionedChar(cc))
476+
or
477+
exists(RegExpCharacterClass cc | symbol = InvertedCharClass(cc) |
478+
result = nextChar(getAMentionedChar(cc)) or
479+
nextChar(result) = getAMentionedChar(cc)
480+
)
481+
}
482+
483+
/**
484+
* Gets a char that is matched by both the positive char class `c` and the
485+
* negative char class `d`.
486+
*/
487+
private string getAOverlapBetweenCharacterClasses(CharClass c, InvertedCharClass d) {
488+
result = [getARelevantCharClassChar(c), getARelevantCharClassChar(d)] and
489+
exists(RegExpCharacterClass negClass, RegExpCharacterClass posClass |
490+
c = CharClass(posClass) and
491+
d = InvertedCharClass(negClass) and
492+
charClassMatchesChar(posClass, result) and
493+
not charClassMatchesChar(negClass, result)
494+
)
495+
}
496+
446497
/**
447498
* Gets a character that is represented by both `c` and `d`.
448499
*/
@@ -463,6 +514,8 @@ string intersect(InputSymbol c, InputSymbol d) {
463514
d = Any()
464515
)
465516
or
517+
result = getMinOverlapBetweenCharacterClasses(c, d)
518+
or
466519
exists(RegExpCharacterClass cc | c = InvertedCharClass(cc) and result = chooseFromInverted(cc) |
467520
d = InvertedCharClass(cc)
468521
or

javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
| regexplib/uri.js:3:128:3:129 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
3434
| regexplib/uri.js:38:35:38:40 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
3535
| regexplib/uri.js:55:35:55:40 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
36+
| regexplib/uri.js:63:393:63:429 | [a-zA-Z0-9\\.\\,\\?\\'\\\\/\\+&%\\$#\\=~_\\-@]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/#'. |
3637
| tst.js:4:18:4:32 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
3738
| tst.js:4:42:4:58 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
3839
| tst.js:14:14:14:15 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
@@ -56,6 +57,10 @@
5657
| tst.js:83:14:83:20 | (.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
5758
| tst.js:89:25:89:32 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
5859
| tst.js:95:15:95:25 | ([^]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
60+
| tst.js:98:15:98:20 | [^"']+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '('. |
5961
| tst.js:101:15:101:23 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
6062
| tst.js:107:15:107:23 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
6163
| tst.js:110:15:110:23 | (G\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
64+
| tst.js:113:15:113:27 | ([0-9]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
65+
| tst.js:116:60:116:104 | (?:\\\\[\\x00-\\x7f]\|[^\\x00-\\x08\\x0a-\\x1f\\x7f"])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\!'. |
66+
| tst.js:119:16:119:60 | (?:\\\\[\\x00-\\x7f]\|[^\\x00-\\x08\\x0a-\\x1f\\x7f"])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\!'. |

javascript/ql/test/query-tests/Performance/ReDoS/tst.js

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ var good9 = '(a|aa?)*b';
9494
// NOT GOOD
9595
var bad18 = /(([^]|[^a])*)"/;
9696

97-
// NOT GOOD - but not flagged
97+
// NOT GOOD
9898
var bad19 = /([^"']+)*/g;
9999

100100
// NOT GOOD
@@ -108,3 +108,15 @@ var bad21 = /((b|[^a])*)"/;
108108

109109
// NOT GOOD
110110
var bad22 = /((G|[^a])*)"/;
111+
112+
// NOT GOOD
113+
var bad23 = /(([0-9]|[^a])*)"/;
114+
115+
// NOT GOOD
116+
var bad24 = /(?:=(?:([!#\$%&'\*\+\-\.\^_`\|~0-9A-Za-z]+)|"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"))?/;
117+
118+
// NOT GOOD
119+
var bad25 = /"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"/;
120+
121+
// GOOD
122+
var bad26 = /"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"\\])*)"/;

0 commit comments

Comments
 (0)