Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 50 additions & 34 deletions src/odoo_data_flow/import_threaded.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,53 @@ def _execute_load_batch( # noqa: C901
if len(load_lines[0]) > 10:
log.debug(f"Full first load_line: {load_lines[0]}")

# Sanitize the id column values to prevent XML ID constraint violations
from .lib.internal.tools import to_xmlid

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Per PEP 8, imports should be at the top of the file, not inside a function. This improves readability and makes dependencies clear. Please move from .lib.internal.tools import to_xmlid to the top-level imports of this module.

sanitized_load_lines = []
for i, line in enumerate(load_lines):
sanitized_line = list(line)
if uid_index < len(sanitized_line):
# Sanitize the source_id (which is in the id column)
original_id = sanitized_line[uid_index]
sanitized_id = to_xmlid(original_id)
sanitized_line[uid_index] = sanitized_id
if i < 3: # Only log first 3 lines for debugging
log.debug(
f"Sanitized ID for line {i}: '{original_id}' -> '{sanitized_id}'"
)
else:
if i < 3: # Only log first 3 lines for debugging
log.warning(
f"Line {i} does not have enough columns for uid_index {uid_index}. "
f"Line has {len(line)} columns."
)
sanitized_load_lines.append(sanitized_line)
Comment on lines +706 to +724

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block creates a copy of load_lines to sanitize the ID column. For large batches, this can be memory-inefficient. Since load_lines is not used in its original form after this, you can modify it in-place to save memory. This also makes the code more concise. Assuming load_lines contains mutable lists, you can directly modify each line.

            for i, line in enumerate(load_lines):
                if uid_index < len(line):
                    # Sanitize the source_id (which is in the id column)
                    original_id = line[uid_index]
                    sanitized_id = to_xmlid(original_id)
                    line[uid_index] = sanitized_id
                    if i < 3:  # Only log first 3 lines for debugging
                        log.debug(
                            f"Sanitized ID for line {i}: '{original_id}' -> '{sanitized_id}'"
                        )
                else:
                    if i < 3:  # Only log first 3 lines for debugging
                        log.warning(
                            f"Line {i} does not have enough columns for uid_index {uid_index}. "
                            f"Line has {len(line)} columns."
                        )
            sanitized_load_lines = load_lines


# Log sample of sanitized data without large base64 content
log.debug(f"Load header: {load_header}")
log.debug(f"Load lines count: {len(sanitized_load_lines)}")
if sanitized_load_lines and len(sanitized_load_lines) > 0:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The condition if sanitized_load_lines and len(sanitized_load_lines) > 0: is redundant. In Python, a non-empty list evaluates to True in a boolean context, so if sanitized_load_lines: is sufficient and more idiomatic.

            if sanitized_load_lines:

# Show first line but truncate large base64 data
preview_line = []
for i, field_value in enumerate(sanitized_load_lines[0][:10] if len(sanitized_load_lines[0]) > 10 else sanitized_load_lines[0]):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The conditional expression to slice the first 10 fields is more complex than necessary. Slicing with [:10] already handles cases where the list has fewer than 10 elements gracefully by returning all available elements. You can simplify this.

                for i, field_value in enumerate(sanitized_load_lines[0][:10]):

if isinstance(field_value, str) and len(field_value) > 100:
# Truncate large strings (likely base64 data)
preview_line.append(f"{field_value[:50]}...[{len(field_value)-100} chars truncated]...{field_value[-50:]}")
else:
preview_line.append(field_value)
log.debug(f"First load line (first 10 fields, truncated if large): {preview_line}")

res = model.load(load_header, sanitized_load_lines, context=context)

# DEBUG: Log detailed information about what we got back from Odoo
log.debug(f"Load response type: {type(res)}")
log.debug(f"Load response keys: {list(res.keys()) if hasattr(res, 'keys') else 'Not a dict'}")
log.debug(f"Load response full content: {res}")

# Extract the created IDs from the response
created_ids = res.get("ids", [])
log.debug(f"Expected records: {len(sanitized_load_lines)}, Created records: {len(created_ids)}")

# DEBUG: Log what we got back from Odoo
log.debug(
f"Load response - messages: {res.get('messages', 'None')}, "
Expand All @@ -719,37 +764,6 @@ def _execute_load_batch( # noqa: C901
else:
log.info(f"Load operation returned {msg_type}: {msg_text}")

# Check for any Odoo server errors in the response
if res.get("messages"):
error = res["messages"][0].get("message", "Batch load failed.")
# Don't raise immediately, log and continue to capture in fail file
log.error(f"Odoo server error during load: {error}")

created_ids = res.get("ids", [])
log.debug(f"Expected records: {len(sanitized_load_lines)}, Created records: {len(created_ids)}")

# Always log detailed information about record creation
if len(created_ids) != len(sanitized_load_lines):
log.warning(
f"Record creation mismatch: Expected {len(sanitized_load_lines)} records, "
f"but only {len(created_ids)} were created"
)
if len(created_ids) == 0:
log.error(
f"No records were created in this batch of {len(sanitized_load_lines)}. "
f"This may indicate silent failures in the Odoo load operation. "
f"Check Odoo server logs for validation errors."
)
# Log the actual data being sent for debugging
if sanitized_load_lines:
log.debug(f"First few lines being sent:")
for i, line in enumerate(sanitized_load_lines[:3]):
log.debug(f" Line {i}: {dict(zip(load_header, line))}")
else:
log.warning(
f"Partial record creation: {len(created_ids)}/{len(sanitized_load_lines)} "
f"records were created. Some records may have failed validation."
)
# Check for any Odoo server errors in the response that should halt processing
if res.get("messages"):
for message in res["messages"]:
Expand All @@ -763,9 +777,6 @@ def _execute_load_batch( # noqa: C901
log.warning(f"Load operation returned {msg_type}: {msg_text}")
else:
log.info(f"Load operation returned {msg_type}: {msg_text}")

created_ids = res.get("ids", [])
log.debug(f"Expected records: {len(sanitized_load_lines)}, Created records: {len(created_ids)}")

# Always log detailed information about record creation
if len(created_ids) != len(sanitized_load_lines):
Expand Down Expand Up @@ -841,6 +852,11 @@ def _execute_load_batch( # noqa: C901

except Exception as e:
error_str = str(e).lower()

# Log the full exception for debugging
import traceback

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

While importing inside a function is sometimes acceptable for modules used only in exceptional paths, traceback is a standard library module and it's generally better practice to place all imports at the top of the file for clarity and consistency. This helps other developers quickly see all module dependencies.

log.error(f"Exception during batch load: {type(e).__name__}: {e}")
log.error(f"Full traceback: {traceback.format_exc()}")

# SPECIAL CASE: Client-side timeouts for local processing
# These should be IGNORED entirely to allow long server processing
Expand Down
Loading