Skip to content

Commit 5698f78

Browse files
committed
Fix #160, rewrite skip_to_character to iterative because in case of long strings it will hit max recursion depth
1 parent 3feaeaa commit 5698f78

File tree

3 files changed

+27
-18
lines changed

3 files changed

+27
-18
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ repos:
3838
pass_filenames: false
3939
types: [python]
4040
- repo: https://github.com/semgrep/pre-commit
41-
rev: "v1.142.0"
41+
rev: "v1.142.1"
4242
hooks:
4343
- id: semgrep
4444
args:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
33
build-backend = "setuptools.build_meta"
44
[project]
55
name = "json_repair"
6-
version = "0.52.4"
6+
version = "0.52.5"
77
license = "MIT"
88
license-files = ["LICENSE"]
99
authors = [

src/json_repair/json_parser.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -158,23 +158,32 @@ def skip_whitespaces_at(self, idx: int = 0, move_main_index=True) -> int:
158158

159159
def skip_to_character(self, character: str | list[str], idx: int = 0) -> int:
160160
"""
161-
This function quickly iterates to find a character, syntactic sugar to make the code more concise
161+
Advance from (self.index + idx) until we hit an *unescaped* target character.
162+
Returns the offset (idx) from self.index to that position, or the distance to the end if not found.
162163
"""
163-
try:
164-
char = self.json_str[self.index + idx]
165-
except IndexError:
166-
return idx
167-
character_list = character if isinstance(character, list) else [character]
168-
while char not in character_list:
169-
idx += 1
170-
try:
171-
char = self.json_str[self.index + idx]
172-
except IndexError:
173-
return idx
174-
if self.json_str[self.index + idx - 1] == "\\":
175-
# Ah shoot this was actually escaped, continue
176-
return self.skip_to_character(character, idx + 1)
177-
return idx
164+
targets = set(character) if isinstance(character, list) else {character}
165+
i = self.index + idx
166+
n = len(self.json_str)
167+
backslashes = 0 # count of consecutive '\' immediately before current char
168+
169+
while i < n:
170+
ch = self.json_str[i]
171+
172+
if ch == "\\":
173+
backslashes += 1
174+
i += 1
175+
continue
176+
177+
# ch is not a backslash; if it's a target and not escaped (even backslashes), we're done
178+
if ch in targets and (backslashes % 2 == 0):
179+
return i - self.index
180+
181+
# reset backslash run when we see a non-backslash
182+
backslashes = 0
183+
i += 1
184+
185+
# not found; return distance to end
186+
return n - self.index
178187

179188
def _log(self, text: str) -> None:
180189
window: int = 10

0 commit comments

Comments
 (0)