Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions recce/summary.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import os
import sys
from typing import Dict, List, Optional, Set, Type, Union
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Type, Union
from uuid import UUID

if TYPE_CHECKING:
from recce.models.types import NodeDiff

from pydantic import BaseModel

from recce.apis.check_func import get_node_name_by_id
Expand Down Expand Up @@ -67,14 +70,21 @@ def __init__(self, node_id: str, node_data: dict, data_from: str = "base"):

self.base_data = {}
self.current_data = {}
self._forced_change_status = None

if data_from == "base":
self.base_data = node_data
elif data_from == "current":
self.current_data = node_data

def apply_diff(self, node_diff: "NodeDiff"):
"""Apply an externally computed diff (e.g., from state:modified)."""
self._forced_change_status = node_diff.change_status

@property
def change_status(self):
if self._forced_change_status is not None:
return self._forced_change_status
base_checksum = self.base_data.get("checksum", {}).get("checksum")
curr_checksum = self.current_data.get("checksum", {}).get("checksum")
if self.data_from == "base":
Expand Down Expand Up @@ -308,7 +318,7 @@ def get_edge_str(self, edge_id):
return f"{edge.parent_id}-...->{edge.child_id}\n"


def _build_lineage_graph(base, current) -> LineageGraph:
def _build_lineage_graph(base, current, diff: Optional[Dict[str, "NodeDiff"]] = None) -> LineageGraph:
graph = LineageGraph()

# Get the current package name to filter nodes (from the current manifest metadata)
Expand Down Expand Up @@ -336,6 +346,14 @@ def _build_lineage_graph(base, current) -> LineageGraph:
node = graph.nodes[node_id]
node.update_data(node_data, "current")

# Apply externally computed diff (e.g., from state:modified or macro detection).
# This allows nodes whose SQL checksum didn't change (e.g., macro-affected nodes)
# to be surfaced as modified in the graph.
if diff:
for node_id, node_diff in diff.items():
if node_id in graph.nodes:
graph.nodes[node_id].apply_diff(node_diff)

Comment on lines 321 to +356
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LineageGraph.nodes and LineageGraph.edges are mutable class attributes, so _build_lineage_graph() mutates shared state across calls. With the new apply_diff() / _forced_change_status override, a node marked modified in one summary can remain modified in later summaries even when the next call’s diff doesn’t include it (and graphs can also retain stale nodes/edges from previous builds). Make nodes/edges instance attributes (e.g., add __init__ that sets self.nodes = {} and self.edges = {}), or otherwise ensure each _build_lineage_graph() call starts from a fresh graph state.

Copilot uses AI. Check for mistakes.
# Build edges
for child_id, parents in base.get("parent_map", {}).items():
for parent_id in parents:
Expand Down Expand Up @@ -525,7 +543,7 @@ def generate_markdown_summary(ctx: RecceContext, summary_format: str = "markdown

lineage_diff = ctx.get_lineage_diff()
summary_metadata = generate_summary_metadata(lineage_diff.base, lineage_diff.current)
graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current)
graph = _build_lineage_graph(lineage_diff.base, lineage_diff.current, lineage_diff.diff)
graph.checks, check_statistics = generate_check_summary(lineage_diff.base, lineage_diff.current)
summary_config = RecceConfig().get("summary") or {}
node_shapes = summary_config.get("node_shapes") or {}
Expand Down
103 changes: 103 additions & 0 deletions tests/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from recce.adapter.dbt_adapter import DbtAdapter, DbtVersion, load_manifest
from recce.core import RecceContext, set_default_context
from recce.models.types import NodeDiff
from recce.summary import (
MERMAID_NODE_SHAPES,
Node,
Expand Down Expand Up @@ -186,6 +187,7 @@ def test_no_diff_returns_none(self, mock_get_diff):
node = _make_node()
assert node._cal_row_count_delta_percentage() is None


@patch("recce.summary._get_node_row_count_diff")
def test_returns_na_when_base_table_not_found(self, mock_get_diff):
"""When base is None due to table_not_found, return 'N/A (table_not_found)'."""
Expand Down Expand Up @@ -224,3 +226,104 @@ def test_non_dict_meta_returns_none_gracefully(self, mock_get_diff):
)
node = _make_node()
assert node._cal_row_count_delta_percentage() is None


def _make_node_with_checksum(node_id, name, checksum="abc123"):
"""Helper to create a Node with checksum data (both base and current)."""
node_data = {
"name": name,
"resource_type": "model",
"package_name": "test",
"checksum": {"checksum": checksum},
}
node = Node(node_id, node_data, "both")
node.base_data = node_data
node.current_data = node_data
return node


def _make_both_node(node_id="model.test.my_model", name="my_model"):
"""Helper to create a Node present in both base and current (data_from='both')."""
node = _make_node_with_checksum(node_id, name, checksum="same")
return node


class TestNodeApplyDiff:
def test_apply_diff_sets_forced_change_status(self):
node = _make_both_node()
assert node.change_status is None # same checksum → no change
node.apply_diff(NodeDiff(change_status="modified"))
assert node.change_status == "modified"

def test_apply_diff_overrides_checksum_based_none(self):
node = _make_node_with_checksum("model.test.m", "m", checksum="same")
assert node.change_status is None # same checksum → no change
node.apply_diff(NodeDiff(change_status="modified"))
assert node.change_status == "modified"


class TestWhatChanged:
@patch("recce.summary._get_node_row_count_diff", return_value=(None, None))
def test_modified_shows_code(self, _mock):
node = _make_both_node()
node.apply_diff(NodeDiff(change_status="modified"))
changes = node._what_changed()
assert "Code" in changes


class TestBuildLineageGraphWithDiff:
def _make_lineage(self, node_ids):
nodes = {}
for nid in node_ids:
name = nid.split(".")[-1]
nodes[nid] = {
"id": nid,
"name": name,
"resource_type": "model",
"package_name": "test",
"checksum": {"checksum": "same_checksum"},
"raw_code": "SELECT 1",
}
return {"nodes": nodes, "parent_map": {}}

def test_diff_marks_state_modified_nodes(self):
base = self._make_lineage(["model.test.a", "model.test.b"])
current = self._make_lineage(["model.test.a", "model.test.b"])

# Without diff: node a and b have same checksum → no change
graph = _build_lineage_graph(base, current)
assert graph.nodes["model.test.a"].change_status is None
assert graph.nodes["model.test.b"].change_status is None

# With diff from state:modified: node b surfaces as modified
diff = {"model.test.b": NodeDiff(change_status="modified")}
graph = _build_lineage_graph(base, current, diff)
assert "model.test.b" in graph.modified_set
assert graph.nodes["model.test.a"].change_status is None

@patch("recce.summary._get_node_row_count_diff", return_value=(None, None))
def test_diff_node_shows_code_label(self, _mock):
base = self._make_lineage(["model.test.a"])
current = self._make_lineage(["model.test.a"])
diff = {"model.test.a": NodeDiff(change_status="modified")}
graph = _build_lineage_graph(base, current, diff)
changes = graph.nodes["model.test.a"]._what_changed()
assert "Code" in changes

def test_no_diff_preserves_existing_behavior(self):
"""Passing diff=None should behave identically to the original implementation."""
dbt_version = DbtVersion()
if dbt_version < "1.8.1":
pytest.skip("Dbt version is less than 1.8.1")

base_manifest_path = os.path.join(current_dir, "data", "manifest", "base", "manifest.json")
pr2_manifest_path = os.path.join(current_dir, "data", "manifest", "pr2", "manifest.json")
base_manifest = load_manifest(path=base_manifest_path)
curr_manifest = load_manifest(path=pr2_manifest_path)
dbt_adapter = DbtAdapter(curr_manifest=curr_manifest, base_manifest=base_manifest)
curr_lineage = dbt_adapter.get_lineage()
base_lineage = dbt_adapter.get_lineage(base=True)

graph_no_diff = _build_lineage_graph(base_lineage, curr_lineage)
graph_with_none = _build_lineage_graph(base_lineage, curr_lineage, None)
assert graph_no_diff.modified_set == graph_with_none.modified_set
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading