datacommonsorg · ajaits · Dec 30, 2025 · Mar 9, 2026 · Mar 9, 2026 · Mar 13, 2026
diff --git a/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv b/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv
@@ -0,0 +1,6 @@
+StatVar,NumPlaces,MinDate,MeasurementMethods,Units
+InterestRate_TreasuryNote_3Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBond_20Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_5Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_10Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBill_1Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
diff --git a/.../treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv b/.../treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv
@@ -0,0 +1,4 @@
+date,1-Month,3-Month,6-Month,1-Year,2-Year,3-Year,5-Year,7-Year,10-Year,20-Year,30-Year
+1962-01-02,,,,3.22,,3.70,3.88,,4.06,4.07,
+1962-02-01,,,,3.30,,3.81,4.00,,4.09,4.13,
+1962-04-19,,,,3.00,,3.37,3.60,,3.82,3.91,
diff --git a/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json b/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json
@@ -31,6 +31,27 @@
             "params": {
                 "threshold": 0
             }
+        },
+        {
+            "rule_id": "check_goldens_output_csv",
+            "validator": "GOLDENS",
+            "scope": {
+                "data_source": "stats"
+            },
+            "params": {
+                "golden_files": "golden_data/golden_treasury_constant_maturity_rates.csv",
+                "input_files": "treasury_constant_maturity_rates.csv"
+            }
+        },
+        {
+            "rule_id": "check_goldens_summary_report",
+            "validator": "GOLDENS",
+            "scope": {
+                "data_source": "stats"
+            },
+            "params": {
+                "golden_files": "golden_data/golden_summary_report.csv"
+            }
         }
     ]
-}
+}
diff --git a/tools/import_validation/README.md b/tools/import_validation/README.md
@@ -158,10 +158,41 @@ The following validations are currently supported:
 | `NUM_OBSERVATIONS_CHECK`  | Checks that the number of observations is within a defined range.        | `stats`           | `minimum`, `maximum`, or `value` (integer)             |
 | `UNIT_CONSISTENCY_CHECK`  | Checks that the unit is the same for all StatVars.                       | `stats`           | None                                                   |
 | `MIN_VALUE_CHECK`         | Checks that the minimum value is not below a defined minimum.            | `stats`           | `minimum` (integer or float)                           |
-| `MAX_VALUE_CHECK`         | Checks that the maximum value is not above a defined maximum.            | `stats`           | `maximum` (integer or float)                           |
+| MAX_VALUE_CHECK           | Checks that the maximum value is not above a defined maximum.            | `stats`           | `maximum` (integer or float)                           |
+| `GOLDENS`                 | Verifies that the data contains all records defined in a golden set.     | `stats`           | `golden_files` (list), `input_files` (list)   |
+
+### Golden Set Validation with `GOLDENS`
+
+The `GOLDENS` validator ensures that your import contains a specific set of expected records. This is useful for verifying that critical StatVars, Places, or specific metadata combinations are always present in your output.
+
+The validator compares the input data (usually from the `stats` data source) against one or more "golden" files (MCF or CSV).
+
+#### Configuration Parameters
+- `golden_files`: A list or glob pattern of golden MCF or CSV files to compare against.
+- `goldens_key_property`: A list of properties to match on. If not specified, all properties in the golden record must match.
+- `input_files`: (Optional) Path to specific input files. If not provided, the data source defined in the rule's `scope` is used.
+
+#### GOLDENS Validator Example
+
+**Rule:** "Ensure that observations for `Count_Person` and `Median_Age_Person` are present in the import as defined in our critical golden set."
+
+```json
+{
+  "rule_id": "verify_critical_obs",
+  "validator": "GOLDENS",
+  "scope": {
+    "data_source": "stats"
+  },
+  "params": {
+    "golden_files": ["goldens/critical_stats.csv"],
+    "input_files": "processed_obs.csv"
+  }
+}
+```
 
 ## Output
 
+
 The framework generates a report file (specified by the `--validation_output` flag) with the results of each validation. The format of the report is determined by the file extension (`.csv` or `.json`).
 
 ### CSV Output

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
@@ -77,6 +77,7 @@ def __init__(self, validation_config_path: str, differ_output: str,
                 (self.validator.validate_min_value_check, 'stats'),
             'MAX_VALUE_CHECK':
                 (self.validator.validate_max_value_check, 'stats'),
+            'GOLDENS': (self.validator.validate_goldens, 'stats'),
         }
 
         self._initialize_data_sources(stats_summary, lint_report, differ_output)
@@ -166,10 +167,20 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
             validation_func, data_source_key = self.validation_dispatch[
                 validator_name]
 
+            rule_params = dict(rule.get('params', {}))
+            if rule_params:
+                # Add default parameters for output folder
+                output_dir = self.validation_output
+                if output_dir and not output_dir.endswith(
+                        '/') and not os.path.isdir(output_dir):
+                    output_dir = os.path.dirname(output_dir)
+                if output_dir:
+                    rule_params.setdefault('output_path', output_dir)
+
             if validator_name == 'SQL_VALIDATOR':
                 result = validation_func(self.data_sources['stats'],
                                          self.data_sources['differ'],
-                                         rule['params'])
+                                         rule_params)
             else:
                 scope = rule['scope']
                 if isinstance(scope, str):
@@ -185,7 +196,7 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                         regex_patterns=variables_config.get('regex'),
                         contains_all=variables_config.get('contains_all'))
 
-                result = validation_func(df, rule['params'])
+                result = validation_func(df, rule_params)
 
             result.name = rule['rule_id']
             result.validation_params = rule.get('params', {})

diff --git a/tools/import_validation/validator.py b/tools/import_validation/validator.py
@@ -20,8 +20,12 @@
 
 _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(_SCRIPT_DIR)
+_DATA_DIR = os.path.join(_SCRIPT_DIR.split('/data/')[0], 'data')
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
 
 from result import ValidationResult, ValidationStatus
+from counters import Counters
+import validator_goldens
 
 
 class Validator:
@@ -847,3 +851,65 @@ def validate_max_value_check(self, stats_df: pd.DataFrame,
                                     'rows_succeeded': rows_succeeded,
                                     'rows_failed': rows_failed
                                 })
+
+    def validate_goldens(self, df: pd.DataFrame,
+                         params: dict) -> ValidationResult:
+        """Validates records against a golden set.
+
+        Args:
+          df: A DataFrame containing the data to validate (used if input_files
+            is not provided in params).
+          params: A dictionary containing:
+            'golden_files': Path(s) to golden MCF/CSV files.
+            'input_files': (Optional) Path(s) to input files. If not provided,
+                           the 'df' will be used.
+            'output_path': (Optional) folder or output filename to save missing goldens.
+            And other optional validator_goldens config (e.g., goldens_key_property).
+
+        Returns:
+          A ValidationResult object.
+        """
+        golden_files = params.get('golden_files')
+        if not golden_files:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'GOLDENS',
+                message=
+                "Configuration error: 'golden_files' must be specified for GOLDENS validator."
+            )
+
+        try:
+            inputs = params.get('input_files')
+            if not inputs:
+                inputs = df.to_dict('index')
+            output_path = params.get('output_path')
+            # Compare nodes
+            counters = Counters()
+            missing_goldens = validator_goldens.validate_goldens(
+                inputs,
+                golden_files,
+                output_path,
+                config=params,
+                counters=counters)
+            details = {
+                name: value
+                for name, value in counters.get_counters().items()
+                if 'golden' in name
+            }
+            if not missing_goldens:
+                return ValidationResult(ValidationStatus.PASSED,
+                                        'GOLDENS',
+                                        details=details)
+            details['missing_goldens'] = missing_goldens
+
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'GOLDENS',
+                message=f"Found {len(missing_goldens)} missing golden records.",
+                details=details)
+
+        except Exception as e:
+            return ValidationResult(
+                ValidationStatus.DATA_ERROR,
+                'GOLDENS',
+                message=f"Error during golden validation: {e}")