Skip to content

Commit d8b7d8d

Browse files
committed
fix slice aspect memory usage
1 parent f396595 commit d8b7d8d

File tree

1 file changed

+66
-84
lines changed

1 file changed

+66
-84
lines changed
Lines changed: 66 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,71 @@
11
#include "aspect_slice.h"
2-
#include <algorithm>
2+
#include "helpers.h"
33

44
/**
5-
* Optimized function to compute taint ranges for a slice operation.
6-
* This avoids creating an intermediate character-by-character index map,
7-
* directly computing overlapping ranges instead. This reduces memory usage
8-
* from O(n) to O(m) where n=string length, m=number of taint ranges.
5+
* This function reduces the taint ranges from the given index range map.
96
*
10-
* @param text The text object being sliced.
11-
* @param ranges The taint ranges of the original text.
12-
* @param start The start index of the slice (or nullptr/None).
13-
* @param stop The stop index of the slice (or nullptr/None).
14-
* @param step The step of the slice (or nullptr/None).
7+
* @param index_range_map The index range map from which the taint ranges are to be reduced.
158
*
16-
* @return Taint ranges for the sliced result.
9+
* @return A map of taint ranges for the given index range map.
1710
*/
1811
TaintRangeRefs
19-
compute_slice_ranges(PyObject* text, const TaintRangeRefs& ranges, PyObject* start, PyObject* stop, PyObject* step)
12+
reduce_ranges_from_index_range_map(const TaintRangeRefs& index_range_map)
2013
{
21-
long length_text = static_cast<long>(py::len(text));
14+
TaintRangeRefs new_ranges;
15+
TaintRangePtr current_range;
16+
size_t current_start = 0;
17+
size_t index;
18+
19+
for (index = 0; index < index_range_map.size(); ++index) {
20+
if (const auto& taint_range{ index_range_map.at(index) }; taint_range != current_range) {
21+
if (current_range) {
22+
new_ranges.emplace_back(initializer->allocate_taint_range(
23+
current_start, index - current_start, current_range->source, current_range->secure_marks));
24+
}
25+
current_range = taint_range;
26+
current_start = index;
27+
}
28+
}
29+
if (current_range != nullptr) {
30+
new_ranges.emplace_back(initializer->allocate_taint_range(
31+
current_start, index - current_start, current_range->source, current_range->secure_marks));
32+
}
33+
return new_ranges;
34+
}
2235

23-
// Parse slice parameters
36+
/**
37+
* This function builds a map of taint ranges for the given text object.
38+
*
39+
* @param text The text object for which the taint ranges are to be built.
40+
* @param ranges The taint range map that stores taint information.
41+
* @param start The start index of the text object.
42+
* @param stop The stop index of the text object.
43+
* @param step The step index of the text object.
44+
*
45+
* @return A map of taint ranges for the given text object.
46+
*/
47+
TaintRangeRefs
48+
build_index_range_map(PyObject* text, TaintRangeRefs& ranges, PyObject* start, PyObject* stop, PyObject* step)
49+
{
50+
TaintRangeRefs index_range_map;
51+
long long index = 0;
52+
for (const auto& taint_range : ranges) {
53+
auto shared_range = taint_range;
54+
while (index < taint_range->start) {
55+
index_range_map.emplace_back(nullptr);
56+
index++;
57+
}
58+
while (index < (taint_range->start + taint_range->length)) {
59+
index_range_map.emplace_back(shared_range);
60+
index++;
61+
}
62+
}
63+
long length_text = static_cast<long long>(py::len(text));
64+
while (index < length_text) {
65+
index_range_map.emplace_back(nullptr);
66+
index++;
67+
}
68+
TaintRangeRefs index_range_map_result;
2469
long start_int = 0;
2570
if (start != nullptr and start != Py_None) {
2671
start_int = PyLong_AsLong(start);
@@ -50,74 +95,11 @@ compute_slice_ranges(PyObject* text, const TaintRangeRefs& ranges, PyObject* sta
5095
step_int = PyLong_AsLong(step);
5196
}
5297

53-
// For step != 1, we need to track which positions are included
54-
// Build a mapping of original positions to result positions
55-
if (step_int != 1) {
56-
// Use the original algorithm for non-unit steps (rare case)
57-
// Build position-to-range map only for the slice range
58-
std::vector<TaintRangePtr> position_map;
59-
position_map.reserve(stop_int - start_int);
60-
61-
for (long i = start_int; i < stop_int; i += step_int) {
62-
TaintRangePtr range_at_pos = nullptr;
63-
for (const auto& taint_range : ranges) {
64-
if (i >= taint_range->start && i < (taint_range->start + taint_range->length)) {
65-
range_at_pos = taint_range;
66-
break;
67-
}
68-
}
69-
position_map.push_back(range_at_pos);
70-
}
71-
72-
// Consolidate consecutive ranges
73-
TaintRangeRefs result_ranges;
74-
TaintRangePtr current_range = nullptr;
75-
size_t current_start = 0;
76-
77-
for (size_t i = 0; i < position_map.size(); ++i) {
78-
if (position_map[i] != current_range) {
79-
if (current_range) {
80-
result_ranges.emplace_back(safe_allocate_taint_range(
81-
current_start, i - current_start, current_range->source, current_range->secure_marks));
82-
}
83-
current_range = position_map[i];
84-
current_start = i;
85-
}
86-
}
87-
if (current_range != nullptr) {
88-
result_ranges.emplace_back(safe_allocate_taint_range(
89-
current_start, position_map.size() - current_start, current_range->source, current_range->secure_marks));
90-
}
91-
92-
return result_ranges;
93-
}
94-
95-
// Optimized path for step == 1 (common case)
96-
// Directly compute overlapping ranges without intermediate array
97-
TaintRangeRefs result_ranges;
98-
99-
for (const auto& taint_range : ranges) {
100-
long range_start = taint_range->start;
101-
long range_end = range_start + taint_range->length;
102-
103-
// Check if this range overlaps with [start_int, stop_int)
104-
if (range_end <= start_int || range_start >= stop_int) {
105-
continue; // No overlap
106-
}
107-
108-
// Compute the overlapping portion
109-
long overlap_start = std::max(range_start, start_int);
110-
long overlap_end = std::min(range_end, stop_int);
111-
112-
// Translate to result coordinates (relative to start of slice)
113-
long result_start = overlap_start - start_int;
114-
long result_length = overlap_end - overlap_start;
115-
116-
result_ranges.emplace_back(
117-
safe_allocate_taint_range(result_start, result_length, taint_range->source, taint_range->secure_marks));
98+
for (auto i = start_int; i < stop_int; i += step_int) {
99+
index_range_map_result.emplace_back(index_range_map[i]);
118100
}
119101

120-
return result_ranges;
102+
return index_range_map_result;
121103
}
122104

123105
PyObject*
@@ -132,7 +114,9 @@ slice_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* start, PyOb
132114
if (ranges_error or ranges.empty()) {
133115
return result_o;
134116
}
135-
set_ranges(result_o, compute_slice_ranges(candidate_text, ranges, start, stop, step), ctx_map);
117+
set_ranges(result_o,
118+
reduce_ranges_from_index_range_map(build_index_range_map(candidate_text, ranges, start, stop, step)),
119+
ctx_map);
136120
return result_o;
137121
}
138122

@@ -160,8 +144,6 @@ api_slice_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
160144

161145
PyObject* result_o = PyObject_GetItem(candidate_text, slice);
162146

163-
CHECK_IAST_INITIALIZED_OR_RETURN(result_o);
164-
165147
TRY_CATCH_ASPECT("slice_aspect", return result_o, Py_XDECREF(slice), {
166148
// If no result or the params are not None|Number or the result is the same as the candidate text, nothing
167149
// to taint
@@ -176,4 +158,4 @@ api_slice_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
176158
Py_XDECREF(slice);
177159
return res;
178160
});
179-
}
161+
}

0 commit comments

Comments
 (0)