|
| 1 | +name: dbt Integration |
| 2 | + |
| 3 | +on: |
| 4 | + push: |
| 5 | + branches: |
| 6 | + - main |
| 7 | + pull_request: |
| 8 | + |
| 9 | +jobs: |
| 10 | + dbt-integration: |
| 11 | + runs-on: ubuntu-latest |
| 12 | + steps: |
| 13 | + - name: Checkout |
| 14 | + uses: actions/checkout@v4 |
| 15 | + |
| 16 | + - name: Set up Python |
| 17 | + uses: actions/setup-python@v5 |
| 18 | + with: |
| 19 | + python-version: '3.12' |
| 20 | + |
| 21 | + - name: Install dependencies |
| 22 | + run: | |
| 23 | + python -m pip install --upgrade pip setuptools wheel |
| 24 | + pip install -e ".[dev]" |
| 25 | +
|
| 26 | + - name: Run dbt integration tests |
| 27 | + run: pytest tests/test_dbt_integration.py -v |
| 28 | + |
| 29 | + - name: Install dbt-core |
| 30 | + run: | |
| 31 | + pip install dbt-core dbt-postgres |
| 32 | +
|
| 33 | + - name: Create test dbt project |
| 34 | + run: | |
| 35 | + mkdir -p /tmp/test_dbt_project |
| 36 | + cd /tmp/test_dbt_project |
| 37 | + cat > dbt_project.yml << 'EOF' |
| 38 | + name: 'test_project' |
| 39 | + version: '1.0.0' |
| 40 | + config-version: 2 |
| 41 | + profile: 'test_profile' |
| 42 | + |
| 43 | + model-paths: ["models"] |
| 44 | + EOF |
| 45 | + |
| 46 | + mkdir -p models |
| 47 | + cat > models/schema.yml << 'EOF' |
| 48 | + version: 2 |
| 49 | + |
| 50 | + models: |
| 51 | + - name: customers |
| 52 | + description: "Test customer model" |
| 53 | + - name: orders |
| 54 | + description: "Test orders model" |
| 55 | + - name: users |
| 56 | + description: "Test users model" |
| 57 | + EOF |
| 58 | + |
| 59 | + cat > models/customers.sql << 'EOF' |
| 60 | + {{ config( |
| 61 | + materialized='table', |
| 62 | + tags=['critical', 'customer'] |
| 63 | + ) }} |
| 64 | + |
| 65 | + SELECT |
| 66 | + 1 as customer_id, |
| 67 | + |
| 68 | + '2024-01-01'::date as registration_date |
| 69 | + EOF |
| 70 | + |
| 71 | + cat > models/orders.sql << 'EOF' |
| 72 | + {{ config( |
| 73 | + materialized='table', |
| 74 | + tags=['critical'] |
| 75 | + ) }} |
| 76 | + |
| 77 | + SELECT |
| 78 | + 1 as order_id, |
| 79 | + 1 as customer_id, |
| 80 | + 100.0 as amount, |
| 81 | + '2024-01-01'::date as order_date |
| 82 | + EOF |
| 83 | + |
| 84 | + cat > models/users.sql << 'EOF' |
| 85 | + {{ config( |
| 86 | + materialized='view', |
| 87 | + tags=['user'] |
| 88 | + ) }} |
| 89 | + |
| 90 | + SELECT |
| 91 | + 1 as user_id, |
| 92 | + 'test_user' as username |
| 93 | + EOF |
| 94 | + |
| 95 | + mkdir -p profiles |
| 96 | + cat > profiles/profiles.yml << 'EOF' |
| 97 | + test_profile: |
| 98 | + outputs: |
| 99 | + dev: |
| 100 | + type: postgres |
| 101 | + host: localhost |
| 102 | + port: 5433 |
| 103 | + user: baselinr |
| 104 | + password: baselinr |
| 105 | + dbname: baselinr |
| 106 | + schema: public |
| 107 | + target: dev |
| 108 | + EOF |
| 109 | +
|
| 110 | + - name: Start PostgreSQL |
| 111 | + run: | |
| 112 | + docker compose -f docker/docker-compose.yml up -d postgres |
| 113 | +
|
| 114 | + - name: Wait for PostgreSQL |
| 115 | + run: | |
| 116 | + for i in {1..20}; do |
| 117 | + if docker compose -f docker/docker-compose.yml exec -T postgres pg_isready -U baselinr; then |
| 118 | + ready=1 |
| 119 | + break |
| 120 | + fi |
| 121 | + echo "Waiting for postgres startup..." |
| 122 | + sleep 5 |
| 123 | + done |
| 124 | + if [ -z "$ready" ]; then |
| 125 | + echo "PostgreSQL did not become ready in time" |
| 126 | + docker compose -f docker/docker-compose.yml logs postgres |
| 127 | + exit 1 |
| 128 | + fi |
| 129 | +
|
| 130 | + - name: Run dbt compile to generate manifest |
| 131 | + run: | |
| 132 | + cd /tmp/test_dbt_project |
| 133 | + export DBT_PROFILES_DIR=./profiles |
| 134 | + dbt compile --profiles-dir ./profiles |
| 135 | + ls -la target/ || echo "target directory not found" |
| 136 | + test -f target/manifest.json && echo "manifest.json exists" || echo "manifest.json NOT found" |
| 137 | +
|
| 138 | + - name: Test dbt manifest parsing |
| 139 | + run: | |
| 140 | + python -c " |
| 141 | + from baselinr.integrations.dbt import DBTManifestParser |
| 142 | + import json |
| 143 | + import os |
| 144 | + |
| 145 | + manifest_path = '/tmp/test_dbt_project/target/manifest.json' |
| 146 | + if not os.path.exists(manifest_path): |
| 147 | + raise FileNotFoundError(f'Manifest not found at {manifest_path}') |
| 148 | + |
| 149 | + parser = DBTManifestParser(manifest_path=manifest_path) |
| 150 | + manifest = parser.load_manifest() |
| 151 | + |
| 152 | + # Debug: Check all models and their tag structure |
| 153 | + all_models = parser.get_all_models() |
| 154 | + print(f'Found {len(all_models)} total models') |
| 155 | + for model in all_models: |
| 156 | + name = model.get('name') |
| 157 | + tags = model.get('tags', []) |
| 158 | + config = model.get('config', {}) |
| 159 | + config_tags = config.get('tags', []) if isinstance(config, dict) else [] |
| 160 | + print(f'Model {name}: tags={tags}, config.tags={config_tags}') |
| 161 | + |
| 162 | + # Test resolving refs |
| 163 | + schema, table = parser.resolve_ref('customers') |
| 164 | + assert schema == 'public', f'Expected schema public, got {schema}' |
| 165 | + assert table == 'customers', f'Expected table customers, got {table}' |
| 166 | + |
| 167 | + # Test getting models by tag |
| 168 | + models = parser.get_models_by_tag('critical') |
| 169 | + print(f'Found {len(models)} models with critical tag') |
| 170 | + if len(models) == 0: |
| 171 | + print('ERROR: No models found with critical tag. Checking manifest structure...') |
| 172 | + # Load raw manifest to inspect |
| 173 | + with open(manifest_path) as f: |
| 174 | + raw_manifest = json.load(f) |
| 175 | + for node_id, node in raw_manifest.get('nodes', {}).items(): |
| 176 | + if node.get('resource_type') == 'model': |
| 177 | + print(f' Node {node_id}: {json.dumps({k: v for k, v in node.items() if k in [\"name\", \"tags\", \"config\"]}, indent=2)}') |
| 178 | + |
| 179 | + assert len(models) == 2, f'Expected 2 models with critical tag, got {len(models)}' |
| 180 | + |
| 181 | + print('✓ dbt manifest parsing tests passed') |
| 182 | + " |
| 183 | +
|
| 184 | + - name: Test dbt selector resolution |
| 185 | + run: | |
| 186 | + python -c " |
| 187 | + from baselinr.integrations.dbt import DBTManifestParser, DBTSelectorResolver |
| 188 | + |
| 189 | + parser = DBTManifestParser( |
| 190 | + manifest_path='/tmp/test_dbt_project/target/manifest.json' |
| 191 | + ) |
| 192 | + parser.load_manifest() |
| 193 | + |
| 194 | + resolver = DBTSelectorResolver(parser) |
| 195 | + |
| 196 | + # Test tag selector |
| 197 | + models = resolver.resolve_selector('tag:critical') |
| 198 | + assert len(models) == 2, f'Expected 2 models, got {len(models)}' |
| 199 | + |
| 200 | + # Test config selector |
| 201 | + models = resolver.resolve_selector('config.materialized:table') |
| 202 | + assert len(models) == 2, f'Expected 2 table models, got {len(models)}' |
| 203 | + |
| 204 | + print('✓ dbt selector resolution tests passed') |
| 205 | + " |
| 206 | +
|
| 207 | + - name: Test dbt pattern expansion |
| 208 | + run: | |
| 209 | + python -c " |
| 210 | + from baselinr.config.loader import ConfigLoader |
| 211 | + from baselinr.planner import PlanBuilder |
| 212 | + import tempfile |
| 213 | + import yaml |
| 214 | + |
| 215 | + # Create a test config with dbt patterns |
| 216 | + config_dict = { |
| 217 | + 'environment': 'development', |
| 218 | + 'source': { |
| 219 | + 'type': 'postgres', |
| 220 | + 'host': 'localhost', |
| 221 | + 'port': 5433, |
| 222 | + 'database': 'baselinr', |
| 223 | + 'username': 'baselinr', |
| 224 | + 'password': 'baselinr', |
| 225 | + 'schema': 'public' |
| 226 | + }, |
| 227 | + 'storage': { |
| 228 | + 'connection': { |
| 229 | + 'type': 'postgres', |
| 230 | + 'host': 'localhost', |
| 231 | + 'port': 5433, |
| 232 | + 'database': 'baselinr', |
| 233 | + 'username': 'baselinr', |
| 234 | + 'password': 'baselinr', |
| 235 | + 'schema': 'public' |
| 236 | + }, |
| 237 | + 'results_table': 'baselinr_results', |
| 238 | + 'runs_table': 'baselinr_runs', |
| 239 | + 'create_tables': True |
| 240 | + }, |
| 241 | + 'profiling': { |
| 242 | + 'tables': [ |
| 243 | + { |
| 244 | + 'dbt_ref': 'customers', |
| 245 | + 'dbt_manifest_path': '/tmp/test_dbt_project/target/manifest.json' |
| 246 | + }, |
| 247 | + { |
| 248 | + 'dbt_selector': 'tag:critical', |
| 249 | + 'dbt_manifest_path': '/tmp/test_dbt_project/target/manifest.json' |
| 250 | + } |
| 251 | + ] |
| 252 | + } |
| 253 | + } |
| 254 | + |
| 255 | + config = ConfigLoader.load_from_dict(config_dict) |
| 256 | + builder = PlanBuilder(config) |
| 257 | + |
| 258 | + # Expand dbt patterns |
| 259 | + expanded = builder.expand_table_patterns() |
| 260 | + |
| 261 | + # Should have customers (from dbt_ref) + customers and orders (from tag:critical) |
| 262 | + # But deduplicated, so should be 2 unique tables |
| 263 | + table_names = {p.table for p in expanded if p.table} |
| 264 | + assert 'customers' in table_names, 'customers table not found' |
| 265 | + assert 'orders' in table_names, 'orders table not found' |
| 266 | + |
| 267 | + print(f'✓ dbt pattern expansion tests passed (expanded {len(expanded)} patterns)') |
| 268 | + " |
| 269 | +
|
| 270 | + - name: Tear down Docker resources |
| 271 | + if: always() |
| 272 | + run: docker compose -f docker/docker-compose.yml down -v |
| 273 | + |
0 commit comments