Skip to content

Commit e8ce82e

Browse files
authored
Merge pull request #35 from baselinrhq/feat/dbt-integration
feat: Add dbt integration
2 parents ada7f03 + e564006 commit e8ce82e

32 files changed

+3768
-5
lines changed
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
name: dbt Integration
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
dbt-integration:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout
14+
uses: actions/checkout@v4
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version: '3.12'
20+
21+
- name: Install dependencies
22+
run: |
23+
python -m pip install --upgrade pip setuptools wheel
24+
pip install -e ".[dev]"
25+
26+
- name: Run dbt integration tests
27+
run: pytest tests/test_dbt_integration.py -v
28+
29+
- name: Install dbt-core
30+
run: |
31+
pip install dbt-core dbt-postgres
32+
33+
- name: Create test dbt project
34+
run: |
35+
mkdir -p /tmp/test_dbt_project
36+
cd /tmp/test_dbt_project
37+
cat > dbt_project.yml << 'EOF'
38+
name: 'test_project'
39+
version: '1.0.0'
40+
config-version: 2
41+
profile: 'test_profile'
42+
43+
model-paths: ["models"]
44+
EOF
45+
46+
mkdir -p models
47+
cat > models/schema.yml << 'EOF'
48+
version: 2
49+
50+
models:
51+
- name: customers
52+
description: "Test customer model"
53+
- name: orders
54+
description: "Test orders model"
55+
- name: users
56+
description: "Test users model"
57+
EOF
58+
59+
cat > models/customers.sql << 'EOF'
60+
{{ config(
61+
materialized='table',
62+
tags=['critical', 'customer']
63+
) }}
64+
65+
SELECT
66+
1 as customer_id,
67+
'[email protected]' as email,
68+
'2024-01-01'::date as registration_date
69+
EOF
70+
71+
cat > models/orders.sql << 'EOF'
72+
{{ config(
73+
materialized='table',
74+
tags=['critical']
75+
) }}
76+
77+
SELECT
78+
1 as order_id,
79+
1 as customer_id,
80+
100.0 as amount,
81+
'2024-01-01'::date as order_date
82+
EOF
83+
84+
cat > models/users.sql << 'EOF'
85+
{{ config(
86+
materialized='view',
87+
tags=['user']
88+
) }}
89+
90+
SELECT
91+
1 as user_id,
92+
'test_user' as username
93+
EOF
94+
95+
mkdir -p profiles
96+
cat > profiles/profiles.yml << 'EOF'
97+
test_profile:
98+
outputs:
99+
dev:
100+
type: postgres
101+
host: localhost
102+
port: 5433
103+
user: baselinr
104+
password: baselinr
105+
dbname: baselinr
106+
schema: public
107+
target: dev
108+
EOF
109+
110+
- name: Start PostgreSQL
111+
run: |
112+
docker compose -f docker/docker-compose.yml up -d postgres
113+
114+
- name: Wait for PostgreSQL
115+
run: |
116+
for i in {1..20}; do
117+
if docker compose -f docker/docker-compose.yml exec -T postgres pg_isready -U baselinr; then
118+
ready=1
119+
break
120+
fi
121+
echo "Waiting for postgres startup..."
122+
sleep 5
123+
done
124+
if [ -z "$ready" ]; then
125+
echo "PostgreSQL did not become ready in time"
126+
docker compose -f docker/docker-compose.yml logs postgres
127+
exit 1
128+
fi
129+
130+
- name: Run dbt compile to generate manifest
131+
run: |
132+
cd /tmp/test_dbt_project
133+
export DBT_PROFILES_DIR=./profiles
134+
dbt compile --profiles-dir ./profiles
135+
ls -la target/ || echo "target directory not found"
136+
test -f target/manifest.json && echo "manifest.json exists" || echo "manifest.json NOT found"
137+
138+
- name: Test dbt manifest parsing
139+
run: |
140+
python -c "
141+
from baselinr.integrations.dbt import DBTManifestParser
142+
import json
143+
import os
144+
145+
manifest_path = '/tmp/test_dbt_project/target/manifest.json'
146+
if not os.path.exists(manifest_path):
147+
raise FileNotFoundError(f'Manifest not found at {manifest_path}')
148+
149+
parser = DBTManifestParser(manifest_path=manifest_path)
150+
manifest = parser.load_manifest()
151+
152+
# Debug: Check all models and their tag structure
153+
all_models = parser.get_all_models()
154+
print(f'Found {len(all_models)} total models')
155+
for model in all_models:
156+
name = model.get('name')
157+
tags = model.get('tags', [])
158+
config = model.get('config', {})
159+
config_tags = config.get('tags', []) if isinstance(config, dict) else []
160+
print(f'Model {name}: tags={tags}, config.tags={config_tags}')
161+
162+
# Test resolving refs
163+
schema, table = parser.resolve_ref('customers')
164+
assert schema == 'public', f'Expected schema public, got {schema}'
165+
assert table == 'customers', f'Expected table customers, got {table}'
166+
167+
# Test getting models by tag
168+
models = parser.get_models_by_tag('critical')
169+
print(f'Found {len(models)} models with critical tag')
170+
if len(models) == 0:
171+
print('ERROR: No models found with critical tag. Checking manifest structure...')
172+
# Load raw manifest to inspect
173+
with open(manifest_path) as f:
174+
raw_manifest = json.load(f)
175+
for node_id, node in raw_manifest.get('nodes', {}).items():
176+
if node.get('resource_type') == 'model':
177+
print(f' Node {node_id}: {json.dumps({k: v for k, v in node.items() if k in [\"name\", \"tags\", \"config\"]}, indent=2)}')
178+
179+
assert len(models) == 2, f'Expected 2 models with critical tag, got {len(models)}'
180+
181+
print('✓ dbt manifest parsing tests passed')
182+
"
183+
184+
- name: Test dbt selector resolution
185+
run: |
186+
python -c "
187+
from baselinr.integrations.dbt import DBTManifestParser, DBTSelectorResolver
188+
189+
parser = DBTManifestParser(
190+
manifest_path='/tmp/test_dbt_project/target/manifest.json'
191+
)
192+
parser.load_manifest()
193+
194+
resolver = DBTSelectorResolver(parser)
195+
196+
# Test tag selector
197+
models = resolver.resolve_selector('tag:critical')
198+
assert len(models) == 2, f'Expected 2 models, got {len(models)}'
199+
200+
# Test config selector
201+
models = resolver.resolve_selector('config.materialized:table')
202+
assert len(models) == 2, f'Expected 2 table models, got {len(models)}'
203+
204+
print('✓ dbt selector resolution tests passed')
205+
"
206+
207+
- name: Test dbt pattern expansion
208+
run: |
209+
python -c "
210+
from baselinr.config.loader import ConfigLoader
211+
from baselinr.planner import PlanBuilder
212+
import tempfile
213+
import yaml
214+
215+
# Create a test config with dbt patterns
216+
config_dict = {
217+
'environment': 'development',
218+
'source': {
219+
'type': 'postgres',
220+
'host': 'localhost',
221+
'port': 5433,
222+
'database': 'baselinr',
223+
'username': 'baselinr',
224+
'password': 'baselinr',
225+
'schema': 'public'
226+
},
227+
'storage': {
228+
'connection': {
229+
'type': 'postgres',
230+
'host': 'localhost',
231+
'port': 5433,
232+
'database': 'baselinr',
233+
'username': 'baselinr',
234+
'password': 'baselinr',
235+
'schema': 'public'
236+
},
237+
'results_table': 'baselinr_results',
238+
'runs_table': 'baselinr_runs',
239+
'create_tables': True
240+
},
241+
'profiling': {
242+
'tables': [
243+
{
244+
'dbt_ref': 'customers',
245+
'dbt_manifest_path': '/tmp/test_dbt_project/target/manifest.json'
246+
},
247+
{
248+
'dbt_selector': 'tag:critical',
249+
'dbt_manifest_path': '/tmp/test_dbt_project/target/manifest.json'
250+
}
251+
]
252+
}
253+
}
254+
255+
config = ConfigLoader.load_from_dict(config_dict)
256+
builder = PlanBuilder(config)
257+
258+
# Expand dbt patterns
259+
expanded = builder.expand_table_patterns()
260+
261+
# Should have customers (from dbt_ref) + customers and orders (from tag:critical)
262+
# But deduplicated, so should be 2 unique tables
263+
table_names = {p.table for p in expanded if p.table}
264+
assert 'customers' in table_names, 'customers table not found'
265+
assert 'orders' in table_names, 'orders table not found'
266+
267+
print(f'✓ dbt pattern expansion tests passed (expanded {len(expanded)} patterns)')
268+
"
269+
270+
- name: Tear down Docker resources
271+
if: always()
272+
run: docker compose -f docker/docker-compose.yml down -v
273+

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ help:
1919
@echo ""
2020
@echo "Development:"
2121
@echo " make test Run tests"
22+
@echo " make test-dbt Run dbt integration tests"
2223
@echo " make lint Run linters"
2324
@echo " make format Format code"
2425
@echo " make clean Clean build artifacts"
@@ -87,6 +88,9 @@ install-all:
8788
test:
8889
pytest tests/ -v
8990

91+
test-dbt:
92+
pytest tests/test_dbt_integration.py -v
93+
9094
lint:
9195
flake8 baselinr/ --config=.flake8
9296
mypy baselinr/

README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,54 @@ defs = build_baselinr_definitions(
379379
)
380380
```
381381

382+
## 🔧 dbt Integration
383+
384+
Baselinr provides comprehensive integration with dbt for scalable profiling and drift detection.
385+
386+
### Using dbt Refs/Selectors in Configs
387+
388+
Reference dbt models directly in your baselinr configuration:
389+
390+
```yaml
391+
profiling:
392+
tables:
393+
- dbt_ref: customers
394+
dbt_project_path: ./dbt_project
395+
- dbt_selector: tag:critical
396+
dbt_project_path: ./dbt_project
397+
```
398+
399+
### Direct dbt Model Integration
400+
401+
Add baselinr tests and profiling within dbt models:
402+
403+
```yaml
404+
# schema.yml
405+
models:
406+
- name: customers
407+
config:
408+
post-hook: "{{ baselinr_profile(target.schema, target.name) }}"
409+
columns:
410+
- name: customer_id
411+
tests:
412+
- baselinr_drift:
413+
metric: count
414+
threshold: 5.0
415+
severity: high
416+
```
417+
418+
**Installation**:
419+
1. Install baselinr: `pip install baselinr`
420+
2. Add to `packages.yml`:
421+
```yaml
422+
packages:
423+
- git: "https://github.com/baselinrhq/baselinr.git"
424+
subdirectory: dbt_package
425+
```
426+
3. Run: `dbt deps`
427+
428+
See [dbt Integration Guide](docs/guides/DBT_INTEGRATION.md) for complete documentation.
429+
382430
## 🐍 Python SDK
383431

384432
Baselinr provides a high-level Python SDK for programmatic access to all functionality.

0 commit comments

Comments
 (0)