11#include " aspect_slice.h"
2- #include < algorithm >
2+ #include " helpers.h "
33
44/* *
5- * Optimized function to compute taint ranges for a slice operation.
6- * This avoids creating an intermediate character-by-character index map,
7- * directly computing overlapping ranges instead. This reduces memory usage
8- * from O(n) to O(m) where n=string length, m=number of taint ranges.
5+ * This function reduces the taint ranges from the given index range map.
96 *
10- * @param text The text object being sliced.
11- * @param ranges The taint ranges of the original text.
12- * @param start The start index of the slice (or nullptr/None).
13- * @param stop The stop index of the slice (or nullptr/None).
14- * @param step The step of the slice (or nullptr/None).
7+ * @param index_range_map The index range map from which the taint ranges are to be reduced.
158 *
16- * @return Taint ranges for the sliced result .
9+ * @return A map of taint ranges for the given index range map .
1710 */
1811TaintRangeRefs
19- compute_slice_ranges (PyObject* text, const TaintRangeRefs& ranges, PyObject* start, PyObject* stop, PyObject* step )
12+ reduce_ranges_from_index_range_map ( const TaintRangeRefs& index_range_map )
2013{
21- long length_text = static_cast <long >(py::len (text));
14+ TaintRangeRefs new_ranges;
15+ TaintRangePtr current_range;
16+ size_t current_start = 0 ;
17+ size_t index;
18+
19+ for (index = 0 ; index < index_range_map.size (); ++index) {
20+ if (const auto & taint_range{ index_range_map.at (index) }; taint_range != current_range) {
21+ if (current_range) {
22+ new_ranges.emplace_back (initializer->allocate_taint_range (
23+ current_start, index - current_start, current_range->source , current_range->secure_marks ));
24+ }
25+ current_range = taint_range;
26+ current_start = index;
27+ }
28+ }
29+ if (current_range != nullptr ) {
30+ new_ranges.emplace_back (initializer->allocate_taint_range (
31+ current_start, index - current_start, current_range->source , current_range->secure_marks ));
32+ }
33+ return new_ranges;
34+ }
2235
23- // Parse slice parameters
36+ /* *
37+ * This function builds a map of taint ranges for the given text object.
38+ *
39+ * @param text The text object for which the taint ranges are to be built.
40+ * @param ranges The taint range map that stores taint information.
41+ * @param start The start index of the text object.
42+ * @param stop The stop index of the text object.
43+ * @param step The step index of the text object.
44+ *
45+ * @return A map of taint ranges for the given text object.
46+ */
47+ TaintRangeRefs
48+ build_index_range_map (PyObject* text, TaintRangeRefs& ranges, PyObject* start, PyObject* stop, PyObject* step)
49+ {
50+ TaintRangeRefs index_range_map;
51+ long long index = 0 ;
52+ for (const auto & taint_range : ranges) {
53+ auto shared_range = taint_range;
54+ while (index < taint_range->start ) {
55+ index_range_map.emplace_back (nullptr );
56+ index++;
57+ }
58+ while (index < (taint_range->start + taint_range->length )) {
59+ index_range_map.emplace_back (shared_range);
60+ index++;
61+ }
62+ }
63+ long length_text = static_cast <long long >(py::len (text));
64+ while (index < length_text) {
65+ index_range_map.emplace_back (nullptr );
66+ index++;
67+ }
68+ TaintRangeRefs index_range_map_result;
2469 long start_int = 0 ;
2570 if (start != nullptr and start != Py_None) {
2671 start_int = PyLong_AsLong (start);
@@ -50,74 +95,11 @@ compute_slice_ranges(PyObject* text, const TaintRangeRefs& ranges, PyObject* sta
5095 step_int = PyLong_AsLong (step);
5196 }
5297
53- // For step != 1, we need to track which positions are included
54- // Build a mapping of original positions to result positions
55- if (step_int != 1 ) {
56- // Use the original algorithm for non-unit steps (rare case)
57- // Build position-to-range map only for the slice range
58- std::vector<TaintRangePtr> position_map;
59- position_map.reserve (stop_int - start_int);
60-
61- for (long i = start_int; i < stop_int; i += step_int) {
62- TaintRangePtr range_at_pos = nullptr ;
63- for (const auto & taint_range : ranges) {
64- if (i >= taint_range->start && i < (taint_range->start + taint_range->length )) {
65- range_at_pos = taint_range;
66- break ;
67- }
68- }
69- position_map.push_back (range_at_pos);
70- }
71-
72- // Consolidate consecutive ranges
73- TaintRangeRefs result_ranges;
74- TaintRangePtr current_range = nullptr ;
75- size_t current_start = 0 ;
76-
77- for (size_t i = 0 ; i < position_map.size (); ++i) {
78- if (position_map[i] != current_range) {
79- if (current_range) {
80- result_ranges.emplace_back (safe_allocate_taint_range (
81- current_start, i - current_start, current_range->source , current_range->secure_marks ));
82- }
83- current_range = position_map[i];
84- current_start = i;
85- }
86- }
87- if (current_range != nullptr ) {
88- result_ranges.emplace_back (safe_allocate_taint_range (
89- current_start, position_map.size () - current_start, current_range->source , current_range->secure_marks ));
90- }
91-
92- return result_ranges;
93- }
94-
95- // Optimized path for step == 1 (common case)
96- // Directly compute overlapping ranges without intermediate array
97- TaintRangeRefs result_ranges;
98-
99- for (const auto & taint_range : ranges) {
100- long range_start = taint_range->start ;
101- long range_end = range_start + taint_range->length ;
102-
103- // Check if this range overlaps with [start_int, stop_int)
104- if (range_end <= start_int || range_start >= stop_int) {
105- continue ; // No overlap
106- }
107-
108- // Compute the overlapping portion
109- long overlap_start = std::max (range_start, start_int);
110- long overlap_end = std::min (range_end, stop_int);
111-
112- // Translate to result coordinates (relative to start of slice)
113- long result_start = overlap_start - start_int;
114- long result_length = overlap_end - overlap_start;
115-
116- result_ranges.emplace_back (
117- safe_allocate_taint_range (result_start, result_length, taint_range->source , taint_range->secure_marks ));
98+ for (auto i = start_int; i < stop_int; i += step_int) {
99+ index_range_map_result.emplace_back (index_range_map[i]);
118100 }
119101
120- return result_ranges ;
102+ return index_range_map_result ;
121103}
122104
123105PyObject*
@@ -132,7 +114,9 @@ slice_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* start, PyOb
132114 if (ranges_error or ranges.empty ()) {
133115 return result_o;
134116 }
135- set_ranges (result_o, compute_slice_ranges (candidate_text, ranges, start, stop, step), ctx_map);
117+ set_ranges (result_o,
118+ reduce_ranges_from_index_range_map (build_index_range_map (candidate_text, ranges, start, stop, step)),
119+ ctx_map);
136120 return result_o;
137121}
138122
@@ -160,8 +144,6 @@ api_slice_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
160144
161145 PyObject* result_o = PyObject_GetItem (candidate_text, slice);
162146
163- CHECK_IAST_INITIALIZED_OR_RETURN (result_o);
164-
165147 TRY_CATCH_ASPECT (" slice_aspect" , return result_o, Py_XDECREF (slice), {
166148 // If no result or the params are not None|Number or the result is the same as the candidate text, nothing
167149 // to taint
@@ -176,4 +158,4 @@ api_slice_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
176158 Py_XDECREF (slice);
177159 return res;
178160 });
179- }
161+ }
0 commit comments