forked from github/codeql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathIncompleteHostnameRegexp.ql
More file actions
118 lines (104 loc) · 4.14 KB
/
IncompleteHostnameRegexp.ql
File metadata and controls
118 lines (104 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/**
* @name Incomplete regular expression for hostnames
* @description Matching a URL or hostname against a regular expression that contains an unescaped
* dot as part of the hostname might match more hostnames than expected.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id go/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-020
*/
import go
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegexpPattern(string pattern, string hostPart) {
hostPart =
pattern
.regexpCapture("(?i).*?" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in,
// followed by a known TLD
"(([():|?a-z0-9-]+(\\\\)?[.])?" + commonTld() + ")" + ".*", 1)
}
/** Holds if `b` sets the HTTP status code (represented by a pseudo-header named `status`) */
predicate writesHttpError(ReachableBasicBlock b) {
forex(Http::HeaderWrite hw |
hw.getHeaderName() = "status" and hw.asInstruction().getBasicBlock() = b
|
exists(string code | code.matches("4__") or code.matches("5__") |
hw.definesHeader("status", code)
)
)
}
/** Holds if starting from `block` a status code representing an error is certainly set. */
predicate onlyErrors(BasicBlock block) {
writesHttpError(block)
or
forex(ReachableBasicBlock pred | pred = block.getAPredecessor() | onlyErrors(pred))
}
/** Gets a node that refers to a handler that is considered to return an HTTP error. */
DataFlow::Node getASafeHandler() {
exists(Variable v |
v.hasQualifiedName(ElazarlGoproxy::packagePath(), ["AlwaysReject", "RejectConnect"])
|
result = v.getARead()
)
or
exists(Function f |
onlyErrors(f.getFuncDecl().(ControlFlow::Root).getExitNode().getBasicBlock())
|
result = f.getARead()
)
}
/** Holds if `regexp` is used in a check before `handler` is called. */
predicate regexpGuardsHandler(RegexpPattern regexp, Http::RequestHandler handler) {
handler.guardedBy(DataFlow::exprNode(regexp.getAUse().asExpr().getParent*()))
}
/** Holds if `regexp` guards an HTTP error write. */
predicate regexpGuardsError(RegexpPattern regexp) {
exists(ControlFlow::ConditionGuardNode cond, RegexpMatchFunction match, DataFlow::CallNode call |
call.getTarget() = match and
match.getRegexp(call) = regexp
|
cond.ensures(match.getResult().getNode(call).getASuccessor*(), true) and
cond.dominates(any(ReachableBasicBlock b | writesHttpError(b)))
)
}
module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig {
additional predicate isSourceString(DataFlow::Node source, string hostPart) {
exists(Expr e | e = source.asExpr() |
isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart) and
// Exclude constant names to avoid duplicate results, because the string
// literals which they are initialised with are also considered as
// sources.
not e instanceof ConstantName
)
}
predicate isSource(DataFlow::Node source) { isSourceString(source, _) }
predicate isSink(DataFlow::Node sink) {
sink instanceof RegexpPattern and
forall(Http::RequestHandler handler | regexpGuardsHandler(sink, handler) |
not handler = getASafeHandler()
) and
not regexpGuardsError(sink)
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
StringOps::Concatenation::taintStep(node1, node2)
}
}
module Flow = DataFlow::Global<IncompleteHostNameRegexpConfig>;
import Flow::PathGraph
from Flow::PathNode source, Flow::PathNode sink, string hostPart
where
Flow::flowPath(source, sink) and
IncompleteHostNameRegexpConfig::isSourceString(source.getNode(), hostPart)
select source, source, sink,
"This regular expression has an unescaped dot before '" + hostPart + "', " +
"so it might match more hosts than expected when $@.", sink, "the regular expression is used"