Skip to content

Commit e1f39f4

Browse files
authored
Implement text_to_mo.py, version 0.11.0 (#159)
1 parent 95ecf55 commit e1f39f4

File tree

5 files changed

+123
-12
lines changed

5 files changed

+123
-12
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from collections.abc import Iterable, Iterator
2+
from typing import BinaryIO, TextIO
3+
4+
from babel.messages.catalog import Catalog, Message
5+
from babel.messages.mofile import write_mo
6+
from babel.messages.pofile import read_po
7+
from loguru import logger
8+
9+
from df_translation_toolkit.parse.parse_raws import join_tag, split_tag
10+
from df_translation_toolkit.utils.fix_translated_strings import cleanup_string, fix_spaces
11+
from df_translation_toolkit.validation.validate_objects import validate_tag
12+
from df_translation_toolkit.validation.validation_models import Diagnostics, ProblemInfo
13+
14+
15+
def fix_spaces_in_tag_parts_translations(original_parts: list[str], translation_parts: list[str]) -> Iterator[str]:
16+
for original, translation in zip(original_parts, translation_parts, strict=False):
17+
yield fix_spaces(original, translation, strict=True)
18+
19+
20+
def translate_tag(
21+
original_tag: str,
22+
translation_tag: str,
23+
diagnostics: Diagnostics | None = None,
24+
) -> str | None:
25+
validation_problems = list(validate_tag(original_tag, translation_tag))
26+
problem_info = ProblemInfo(original=original_tag, translation=translation_tag, problems=validation_problems)
27+
logger.error("\n" + str(problem_info))
28+
if diagnostics:
29+
diagnostics.add(problem_info)
30+
31+
if problem_info.contains_errors():
32+
return None
33+
34+
original_parts = split_tag(original_tag)
35+
translation_parts = split_tag(translation_tag)
36+
37+
return join_tag(fix_spaces_in_tag_parts_translations(original_parts, translation_parts))
38+
39+
40+
def translate_tag_string(
41+
original_string_tag: str,
42+
translation_tag: str,
43+
diagnostics: Diagnostics | None = None,
44+
) -> str | None:
45+
if not (original_string_tag and translation_tag and translation_tag != original_string_tag):
46+
return None
47+
48+
translation_tag = fix_spaces(original_string_tag, translation_tag, strict=True)
49+
translation = translate_tag(original_string_tag, translation_tag, diagnostics=diagnostics)
50+
if not translation:
51+
return None
52+
53+
return cleanup_string(translation)
54+
55+
56+
def prepare_translation_messages(catalog: Catalog, diagnostics: Diagnostics | None = None) -> Iterable[Message]:
57+
for message in catalog:
58+
translation = translate_tag_string(str(message.id), str(message.string), diagnostics=diagnostics)
59+
if translation:
60+
yield Message(id=message.id, context=message.context, string=translation)
61+
62+
63+
def convert(po_file: TextIO, mo_file: BinaryIO, diagnostics: Diagnostics | None = None) -> None:
64+
input_catalog = read_po(po_file)
65+
output_catalog = Catalog()
66+
67+
for message in prepare_translation_messages(input_catalog, diagnostics):
68+
output_catalog.add(id=message.id, context=message.context, string=message.string)
69+
70+
write_mo(mo_file, output_catalog)

df_translation_toolkit/utils/fix_translated_strings.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,42 @@
11
from unidecode import unidecode_expect_nonascii as unidecode
22

33

4-
def fix_leading_spaces(original_string: str, translation: str) -> str:
4+
def fix_leading_spaces(original_string: str, translation: str, *, strict: bool = False) -> str:
55
"""
66
Adds missing space in the beginning of the translation.
77
Removes extra spaces, if the translation starts with "." or ",".
88
"""
9-
if original_string.startswith(" ") and not translation.startswith(" "):
10-
translation = " " + translation
9+
if original_string.startswith(" "):
10+
if not translation.startswith(" "):
11+
translation = " " + translation
12+
elif strict:
13+
translation = translation.lstrip()
1114

1215
if translation.lstrip().startswith((".", ",")):
1316
translation = translation.lstrip()
1417

1518
return translation
1619

1720

18-
def fix_trailing_spaces(original_string: str, translation: str) -> str:
21+
def fix_trailing_spaces(original_string: str, translation: str, *, strict: bool = False) -> str:
1922
"""
2023
Adds a missing trailing space.
2124
"""
22-
if original_string.endswith(" ") and not translation.endswith(" "):
23-
translation += " "
25+
if original_string.endswith(" "):
26+
if not translation.endswith(" "):
27+
translation += " "
28+
elif strict:
29+
translation = translation.rstrip()
2430

2531
return translation
2632

2733

28-
def fix_spaces(original_string: str, translation: str) -> str:
34+
def fix_spaces(original_string: str, translation: str, *, strict: bool = False) -> str:
2935
"""
3036
Fixes leading and trailing spaces of the translation string
3137
"""
32-
translation = fix_leading_spaces(original_string, translation)
33-
return fix_trailing_spaces(original_string, translation)
38+
translation = fix_leading_spaces(original_string, translation, strict=strict)
39+
return fix_trailing_spaces(original_string, translation, strict=strict)
3440

3541

3642
_exclusions = "¿¡"

df_translation_toolkit/validation/validate_objects.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ def validate_tag(original_tag: str, translation_tag: str) -> Iterator[Validation
1616
return
1717

1818
if translation_tag.strip() != translation_tag:
19-
yield ValidationProblem("Extra spaces at the beginning or at the end of the translation")
19+
yield ValidationProblem(
20+
"Extra spaces at the beginning or at the end of the translation",
21+
ProblemSeverity.WARNING,
22+
)
2023
translation_tag = translation_tag.strip()
2124
# No return to check issues with brackets after stripping spaces
2225

@@ -39,7 +42,10 @@ def validate_tag_parts(original_parts: list[str], translation_parts: list[str])
3942
if all_caps(original) or original.isdecimal():
4043
valid = not (original != translation and original == translation.strip())
4144
if not valid:
42-
yield ValidationProblem("Don't add extra spaces at the beginning or at the end of a tag part")
45+
yield ValidationProblem(
46+
"Don't add extra spaces at the beginning or at the end of a tag part",
47+
ProblemSeverity.WARNING,
48+
)
4349

4450
valid = original == translation or original in ("STP", "NP", "SINGULAR", "PLURAL")
4551
if not valid:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "df-translation-toolkit"
3-
version = "0.10.0"
3+
version = "0.11.0"
44
description = "Toolset to convert text extracted from DF to translation format and aback"
55
authors = [
66
"insolor <[email protected]>",

tests/test_text_to_mo.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import pytest
2+
3+
from df_translation_toolkit.convert.text_to_mo import fix_spaces_in_tag_parts_translations, translate_tag_string
4+
5+
6+
@pytest.mark.parametrize(
7+
"original_parts, translated_parts, result",
8+
[
9+
(["FIRST", " original "], ["FIRST", "translation"], ["FIRST", " translation "]),
10+
],
11+
)
12+
def test_fix_spaces_in_tag_parts_translations(
13+
original_parts: list[str],
14+
translated_parts: list[str],
15+
result: list[str],
16+
) -> None:
17+
assert list(fix_spaces_in_tag_parts_translations(original_parts, translated_parts)) == result
18+
19+
20+
@pytest.mark.parametrize(
21+
"original, translation, result",
22+
[
23+
("[FIRST: original ]", "[FIRST:translation]", "[FIRST: translation ]"),
24+
("[FIRST:original]", "[FIRST: translation ]", "[FIRST:translation]"),
25+
("[FIRST:original]", " [FIRST:translation] ", "[FIRST:translation]"),
26+
],
27+
)
28+
def test_translate_tag_string(original: str, translation: str, result: str) -> None:
29+
assert translate_tag_string(original, translation) == result

0 commit comments

Comments
 (0)