Skip to content

Commit 83835da

Browse files
committed
Python: Remove imprecise container steps
- remove `tupleStoreStep` and `dictStoreStep` from `containerStep` These are imprecise compared to the content being precise. - add implicit reads to recover taint at sinks - add implicit read steps for decoders to supplement the `AdditionalTaintStep` that now only covers when the full container is tainted.
1 parent 03ce2b0 commit 83835da

16 files changed

Lines changed: 180 additions & 55 deletions

File tree

python/ql/consistency-queries/DataFlowConsistency.ql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ private module Input implements InputSig<Location, PythonDataFlow> {
3434
// parameter, but dataflow-consistency queries should _not_ complain about there not
3535
// being a post-update node for the synthetic `**kwargs` parameter.
3636
n instanceof SynthDictSplatParameterNode
37+
or
38+
Private::Conversions::readStep(n, _, _)
3739
}
3840

3941
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
934934
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
935935
or
936936
VariableCapture::readStep(nodeFrom, c, nodeTo)
937+
or
938+
Conversions::readStep(nodeFrom, c, nodeTo)
937939
}
938940

939941
/** Data flows from a sequence to a subscript of the sequence. */
@@ -989,6 +991,40 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
989991
nodeTo.accesses(nodeFrom, c.getAttribute())
990992
}
991993

994+
module Conversions {
995+
private import semmle.python.Concepts
996+
997+
predicate decoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
998+
exists(Decoding decoding |
999+
nodeFrom = decoding.getAnInput() and
1000+
nodeTo = decoding.getOutput()
1001+
) and
1002+
(
1003+
c instanceof TupleElementContent
1004+
or
1005+
c instanceof DictionaryElementContent
1006+
)
1007+
}
1008+
1009+
predicate encoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
1010+
exists(Encoding encoding |
1011+
nodeFrom = encoding.getAnInput() and
1012+
nodeTo = encoding.getOutput()
1013+
) and
1014+
(
1015+
c instanceof TupleElementContent
1016+
or
1017+
c instanceof DictionaryElementContent
1018+
)
1019+
}
1020+
1021+
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
1022+
decoderReadStep(nodeFrom, c, nodeTo)
1023+
or
1024+
encoderReadStep(nodeFrom, c, nodeTo)
1025+
}
1026+
}
1027+
9921028
/**
9931029
* Holds if values stored inside content `c` are cleared at node `n`. For example,
9941030
* any value stored inside `f` is cleared at the pre-update node associated with `x`

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,16 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
1616
* of `c` at sinks and inputs to additional taint steps.
1717
*/
1818
bindingset[node]
19-
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { none() }
19+
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
20+
// We allow implicit reads of precise content
21+
// imprecise content has already bubled up.
22+
exists(node) and
23+
(
24+
c instanceof DataFlow::TupleElementContent
25+
or
26+
c instanceof DataFlow::DictionaryElementContent
27+
)
28+
}
2029

2130
private module Cached {
2231
/**
@@ -176,10 +185,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
176185
or
177186
DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
178187
or
179-
DataFlowPrivate::tupleStoreStep(nodeFrom, _, nodeTo)
180-
or
181-
DataFlowPrivate::dictStoreStep(nodeFrom, _, nodeTo)
182-
or
183188
// comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
184189
// resulting list of the list-comprehension.
185190
//

python/ql/test/library-tests/dataflow/sensitive-data/test.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,5 @@ def call_wrapper(func):
131131
print(password) # $ SensitiveUse=password
132132
_config = {"sleep_timer": 5, "mysql_password": password}
133133

134-
# since we have taint-step from store of `password`, we will consider any item in the
135-
# dictionary to be a password :(
136-
print(_config["sleep_timer"]) # $ SPURIOUS: SensitiveUse=password
134+
# since we have precise dictionary content, other items of the config are not tainted
135+
print(_config["sleep_timer"])

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def str_methods():
1717
ts.casefold(), # $ tainted
1818

1919
ts.format_map({}), # $ tainted
20-
"{unsafe}".format_map({"unsafe": ts}), # $ tainted
20+
"{unsafe}".format_map({"unsafe": ts}), # $ MISSING: tainted
2121
)
2222

2323

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ def test_construction():
2929

3030
ensure_tainted(
3131
list(tainted_list), # $ tainted
32-
list(tainted_tuple), # $ tainted
32+
list(tainted_tuple), # $ MISSING: tainted
3333
list(tainted_set), # $ tainted
34-
list(tainted_dict.values()), # $ tainted
35-
list(tainted_dict.items()), # $ tainted
34+
list(tainted_dict.values()), # $ MISSING: tainted
35+
list(tainted_dict.items()), # $ MISSING: tainted
3636

3737
tuple(tainted_list), # $ tainted
3838
set(tainted_list), # $ tainted

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def percent_fmt():
115115
ensure_tainted(
116116
tainted_fmt % (1, 2), # $ tainted
117117
"%s foo bar" % ts, # $ tainted
118-
"%s %s %s" % (1, 2, ts), # $ tainted
118+
"%s %s %s" % (1, 2, ts), # $ MISSING: tainted
119119
)
120120

121121

python/ql/test/library-tests/dataflow/tainttracking/defaultAdditionalTaintStep/test_unpacking.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def contrived_1():
5353

5454
(a, b, c), (d, e, f) = tainted_list, no_taint_list
5555
ensure_tainted(a, b, c) # $ tainted
56-
ensure_not_tainted(d, e, f) # $ SPURIOUS: tainted
56+
ensure_not_tainted(d, e, f)
5757

5858

5959
def contrived_2():

python/ql/test/library-tests/frameworks/stdlib/test_re.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@
8080
)
8181

8282
ensure_not_tainted(
83-
re.subn(pat, repl="safe", string=ts),
8483
re.subn(pat, repl="safe", string=ts)[1], # // the number of substitutions made
8584
)
8685
ensure_tainted(
86+
re.subn(pat, repl="safe", string=ts), # $ tainted // implicit read at sink
8787
re.subn(pat, repl="safe", string=ts)[0], # $ tainted // the string
8888
)

python/ql/test/library-tests/frameworks/tornado/taint_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def get(self, name = "World!", number="0", foo="foo"): # $ requestHandler route
6363
request.headers["header-name"], # $ tainted
6464
request.headers.get_list("header-name"), # $ tainted
6565
request.headers.get_all(), # $ tainted
66-
[(k, v) for (k, v) in request.headers.get_all()], # $ tainted
66+
[(k, v) for (k, v) in request.headers.get_all()], # $ MISSING: tainted
6767

6868
# Dict[str, http.cookies.Morsel]
6969
request.cookies, # $ tainted

0 commit comments

Comments
 (0)