forked from github/codeql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHashedButNoHash.ql
More file actions
85 lines (76 loc) · 2.59 KB
/
HashedButNoHash.ql
File metadata and controls
85 lines (76 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/**
* @name Unhashable object hashed
* @description Hashing an object which is not hashable will result in a TypeError at runtime.
* @kind problem
* @tags reliability
* correctness
* @problem.severity error
* @sub-severity low
* @precision very-high
* @id py/hash-unhashable-value
*/
import python
/*
* This assumes that any indexing operation where the value is not a sequence or numpy array involves hashing.
* For sequences, the index must be an int, which are hashable, so we don't need to treat them specially.
* For numpy arrays, the index may be a list, which are not hashable and needs to be treated specially.
*/
predicate numpy_array_type(ClassValue na) {
exists(ModuleValue np | np.getName() = "numpy" or np.getName() = "numpy.core" |
na.getASuperType() = np.attr("ndarray")
)
}
predicate has_custom_getitem(Value v) {
v.getClass().lookup("__getitem__") instanceof PythonFunctionValue
or
numpy_array_type(v.getClass())
}
predicate explicitly_hashed(ControlFlowNode f) {
exists(CallNode c, GlobalVariable hash |
c.getArg(0) = f and c.getFunction().(NameNode).uses(hash) and hash.getId() = "hash"
)
}
predicate unhashable_subscript(ControlFlowNode f, ClassValue c, ControlFlowNode origin) {
is_unhashable(f, c, origin) and
exists(SubscriptNode sub | sub.getIndex() = f |
exists(Value custom_getitem |
sub.getObject().pointsTo(custom_getitem) and
not has_custom_getitem(custom_getitem)
)
)
}
predicate is_unhashable(ControlFlowNode f, ClassValue cls, ControlFlowNode origin) {
exists(Value v | f.pointsTo(v, origin) and v.getClass() = cls |
not cls.hasAttribute("__hash__") and not cls.failedInference(_) and cls.isNewStyle()
or
cls.lookup("__hash__") = Value::named("None")
)
}
/**
* Holds if `f` is inside a `try` that catches `TypeError`. For example:
*
* try:
* ... f ...
* except TypeError:
* ...
*
* This predicate is used to eliminate false positive results. If `hash`
* is called on an unhashable object then a `TypeError` will be thrown.
* But this is not a bug if the code catches the `TypeError` and handles
* it.
*/
predicate typeerror_is_caught(ControlFlowNode f) {
exists(Try try |
try.getBody().contains(f.getNode()) and
try.getAHandler().getType().pointsTo(ClassValue::typeError())
)
}
from ControlFlowNode f, ClassValue c, ControlFlowNode origin
where
not typeerror_is_caught(f) and
(
explicitly_hashed(f) and is_unhashable(f, c, origin)
or
unhashable_subscript(f, c, origin)
)
select f.getNode(), "This $@ of $@ is unhashable.", origin, "instance", c, c.getQualifiedName()