-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Open
Labels
enhancementNew feature or requestNew feature or request
Description
in the words_from_chars function that can be found in recognition.utils
it assumes the line is LTR but not all the languages are LTR and some are RTL like Hebrew and Arabic.
i suggest adding characters checking to see whats the dominant language because there can be mixed languages in the sentence to determine the direction and based on that merge to the edges.
from typing import List, Tuple, Any
import unicodedata
import statistics
# ---- bidi helpers ----
def _is_strong_rtl(s: str) -> bool:
for ch in s:
b = unicodedata.bidirectional(ch)
if b in ("R", "AL", "AN"):
return True
return False
def _is_strong_ltr(s: str) -> bool:
for ch in s:
b = unicodedata.bidirectional(ch)
if b in ("L", "LRE", "LRO", "EN"):
return True
return False
def _detect_line_direction(chars) -> str:
rtl = ltr = 0
for c in chars:
t = (getattr(c, "text", "") or "").strip()
if not t:
continue
rtl += 1 if _is_strong_rtl(t) else 0
ltr += 1 if _is_strong_ltr(t) else 0
return "rtl" if rtl > ltr else "ltr"
# ---- geometry helpers (robust to different field names) ----
def _extract_box(obj: Any) -> Tuple[float, float, float, float]:
"""Return (x0, y0, x1, y1) from obj or obj.bbox with common field names."""
b = getattr(obj, "bbox", obj)
for x0k, x1k, y0k, y1k in (
("x0", "x1", "y0", "y1"),
("xmin", "xmax", "ymin", "ymax"),
("left", "right", "top", "bottom"),
):
try:
x0 = getattr(b, x0k); x1 = getattr(b, x1k)
y0 = getattr(b, y0k); y1 = getattr(b, y1k)
if None not in (x0, x1, y0, y1):
return float(x0), float(y0), float(x1), float(y1)
except AttributeError:
pass
raise AttributeError("Unsupported bbox schema on object: %r" % (obj,))
def _median_char_width(valid_chars: List[Any]) -> float:
widths = []
for ch in valid_chars:
try:
x0, _, x1, _ = _extract_box(ch)
w = x1 - x0
if w > 0:
widths.append(w)
except Exception:
continue
return statistics.median(widths) if widths else 0.0
def _near_edge(direction: str, is_start: bool, ch_box, line_box,
tol_px: float) -> bool:
ch_x0, _, ch_x1, _ = ch_box
ln_x0, _, ln_x1, _ = _extract_box(line_box)
if direction == "ltr":
# start→left edge, end→right edge
if is_start:
return (ch_x0 - ln_x0) <= tol_px
else:
return (ln_x1 - ch_x1) <= tol_px
else: # rtl
# start→right edge, end→left edge
if is_start:
return (ln_x1 - ch_x1) <= tol_px
else:
return (ch_x0 - ln_x0) <= tol_px
def words_from_chars(chars: List["TextChar"], line_box: "PolygonBox",
snap_ratio_line: float = 0.03,
snap_ratio_char: float = 0.50):
"""
Build words and optionally snap first/last word to line edges *only if close*.
- snap_ratio_line: max distance to edge as % of line width (e.g., 3%)
- snap_ratio_char: max distance to edge as multiple of median char width (e.g., 0.5x)
"""
words = []
word = None
# Direction
direction = _detect_line_direction(chars)
# Valid indices
valid_indices = [i for i, ch in enumerate(chars) if getattr(ch, "bbox_valid", False)]
if not valid_indices:
return words
first_valid = valid_indices[0]
last_valid = valid_indices[-1]
# Tolerance in pixels (min of small % of line width and ~half char width)
ln_x0, _, ln_x1, _ = _extract_box(line_box)
line_w = max(1.0, ln_x1 - ln_x0) # avoid zero-division
med_char_w = _median_char_width([chars[i] for i in valid_indices])
tol_px = min(snap_ratio_line * line_w,
snap_ratio_char * med_char_w if med_char_w > 0 else float("inf"))
for i, char in enumerate(chars):
if not getattr(char, "bbox_valid", False):
if word:
words.append(word)
word = None
continue
ch_text = (getattr(char, "text", "") or "")
ch_box = _extract_box(char)
if not word:
word = TextWord(**char.model_dump())
# Only snap if the first valid char is *near* the expected edge
if i == first_valid and _near_edge(direction, True, ch_box, line_box, tol_px):
if direction == "ltr":
word.merge_left(line_box)
else:
word.merge_right(line_box)
# Single-char line: also check the far edge
if i == last_valid and _near_edge(direction, False, ch_box, line_box, tol_px):
if direction == "ltr":
word.merge_right(line_box)
else:
word.merge_left(line_box)
if not ch_text.strip():
words.append(word)
word = None
elif not ch_text.strip():
words.append(word)
word = None
else:
word.merge(char)
word.text = word.text + ch_text
if i == last_valid and _near_edge(direction, False, _extract_box(char), line_box, tol_px):
if direction == "ltr":
word.merge_right(line_box)
else:
word.merge_left(line_box)
if word:
words.append(word)
return words
just a suggestion of implanmen
Dordor333
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request