diff --git a/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv b/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv
new file mode 100644
index 0000000000..1c40614669
--- /dev/null
+++ b/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_summary_report.csv
@@ -0,0 +1,6 @@
+StatVar,NumPlaces,MinDate,MeasurementMethods,Units
+InterestRate_TreasuryNote_3Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBond_20Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_5Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryNote_10Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
+InterestRate_TreasuryBill_1Year,1,1962-01-02,[ConstantMaturityRate],[Percent]
diff --git a/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv b/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv
new file mode 100644
index 0000000000..92ebb1a070
--- /dev/null
+++ b/scripts/us_fed/treasury_constant_maturity_rates/golden_data/golden_treasury_constant_maturity_rates.csv
@@ -0,0 +1,4 @@
+date,1-Month,3-Month,6-Month,1-Year,2-Year,3-Year,5-Year,7-Year,10-Year,20-Year,30-Year
+1962-01-02,,,,3.22,,3.70,3.88,,4.06,4.07,
+1962-02-01,,,,3.30,,3.81,4.00,,4.09,4.13,
+1962-04-19,,,,3.00,,3.37,3.60,,3.82,3.91,
diff --git a/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json b/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json
index b92d23c5bd..0a4b242b16 100644
--- a/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json
+++ b/scripts/us_fed/treasury_constant_maturity_rates/validation_config.json
@@ -31,6 +31,27 @@
             "params": {
                 "threshold": 0
             }
+        },
+        {
+            "rule_id": "check_goldens_output_csv",
+            "validator": "GOLDENS",
+            "scope": {
+                "data_source": "stats"
+            },
+            "params": {
+                "golden_files": "golden_data/golden_treasury_constant_maturity_rates.csv",
+                "input_files": "treasury_constant_maturity_rates.csv"
+            }
+        },
+        {
+            "rule_id": "check_goldens_summary_report",
+            "validator": "GOLDENS",
+            "scope": {
+                "data_source": "stats"
+            },
+            "params": {
+                "golden_files": "golden_data/golden_summary_report.csv"
+            }
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/tools/import_validation/README.md b/tools/import_validation/README.md
index 54406457df..fa89b78adf 100644
--- a/tools/import_validation/README.md
+++ b/tools/import_validation/README.md
@@ -158,10 +158,41 @@ The following validations are currently supported:
 | `NUM_OBSERVATIONS_CHECK`  | Checks that the number of observations is within a defined range.        | `stats`           | `minimum`, `maximum`, or `value` (integer)             |
 | `UNIT_CONSISTENCY_CHECK`  | Checks that the unit is the same for all StatVars.                       | `stats`           | None                                                   |
 | `MIN_VALUE_CHECK`         | Checks that the minimum value is not below a defined minimum.            | `stats`           | `minimum` (integer or float)                           |
-| `MAX_VALUE_CHECK`         | Checks that the maximum value is not above a defined maximum.            | `stats`           | `maximum` (integer or float)                           |
+| MAX_VALUE_CHECK           | Checks that the maximum value is not above a defined maximum.            | `stats`           | `maximum` (integer or float)                           |
+| `GOLDENS`                 | Verifies that the data contains all records defined in a golden set.     | `stats`           | `golden_files` (list), `input_files` (list)   |
+
+### Golden Set Validation with `GOLDENS`
+
+The `GOLDENS` validator ensures that your import contains a specific set of expected records. This is useful for verifying that critical StatVars, Places, or specific metadata combinations are always present in your output.
+
+The validator compares the input data (usually from the `stats` data source) against one or more "golden" files (MCF or CSV).
+
+#### Configuration Parameters
+- `golden_files`: A list or glob pattern of golden MCF or CSV files to compare against.
+- `goldens_key_property`: A list of properties to match on. If not specified, all properties in the golden record must match.
+- `input_files`: (Optional) Path to specific input files. If not provided, the data source defined in the rule's `scope` is used.
+
+#### GOLDENS Validator Example
+
+**Rule:** "Ensure that observations for `Count_Person` and `Median_Age_Person` are present in the import as defined in our critical golden set."
+
+```json
+{
+  "rule_id": "verify_critical_obs",
+  "validator": "GOLDENS",
+  "scope": {
+    "data_source": "stats"
+  },
+  "params": {
+    "golden_files": ["goldens/critical_stats.csv"],
+    "input_files": "processed_obs.csv"
+  }
+}
+```
 
 ## Output
 
+
 The framework generates a report file (specified by the `--validation_output` flag) with the results of each validation. The format of the report is determined by the file extension (`.csv` or `.json`).
 
 ### CSV Output
diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index a3bbbde132..809269aaee 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -77,6 +77,7 @@ def __init__(self, validation_config_path: str, differ_output: str,
                 (self.validator.validate_min_value_check, 'stats'),
             'MAX_VALUE_CHECK':
                 (self.validator.validate_max_value_check, 'stats'),
+            'GOLDENS': (self.validator.validate_goldens, 'stats'),
         }
 
         self._initialize_data_sources(stats_summary, lint_report, differ_output)
@@ -166,10 +167,20 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
             validation_func, data_source_key = self.validation_dispatch[
                 validator_name]
 
+            rule_params = dict(rule.get('params', {}))
+            if rule_params:
+                # Add default parameters for output folder
+                output_dir = self.validation_output
+                if output_dir and not output_dir.endswith(
+                        '/') and not os.path.isdir(output_dir):
+                    output_dir = os.path.dirname(output_dir)
+                if output_dir:
+                    rule_params.setdefault('output_path', output_dir)
+
             if validator_name == 'SQL_VALIDATOR':
                 result = validation_func(self.data_sources['stats'],
                                          self.data_sources['differ'],
-                                         rule['params'])
+                                         rule_params)
             else:
                 scope = rule['scope']
                 if isinstance(scope, str):
@@ -185,7 +196,7 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                         regex_patterns=variables_config.get('regex'),
                         contains_all=variables_config.get('contains_all'))
 
-                result = validation_func(df, rule['params'])
+                result = validation_func(df, rule_params)
 
             result.name = rule['rule_id']
             result.validation_params = rule.get('params', {})
diff --git a/tools/import_validation/validator.py b/tools/import_validation/validator.py
index b8eec807ce..a4fd19dabd 100644
--- a/tools/import_validation/validator.py
+++ b/tools/import_validation/validator.py
@@ -20,8 +20,12 @@
 
 _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(_SCRIPT_DIR)
+_DATA_DIR = os.path.join(_SCRIPT_DIR.split('/data/')[0], 'data')
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
 
 from result import ValidationResult, ValidationStatus
+from counters import Counters
+import validator_goldens
 
 
 class Validator:
@@ -847,3 +851,65 @@ def validate_max_value_check(self, stats_df: pd.DataFrame,
                                     'rows_succeeded': rows_succeeded,
                                     'rows_failed': rows_failed
                                 })
+
+    def validate_goldens(self, df: pd.DataFrame,
+                         params: dict) -> ValidationResult:
+        """Validates records against a golden set.
+
+        Args:
+          df: A DataFrame containing the data to validate (used if input_files
+            is not provided in params).
+          params: A dictionary containing:
+            'golden_files': Path(s) to golden MCF/CSV files.
+            'input_files': (Optional) Path(s) to input files. If not provided,
+                           the 'df' will be used.
+            'output_path': (Optional) folder or output filename to save missing goldens.
+            And other optional validator_goldens config (e.g., goldens_key_property).
+
+        Returns:
+          A ValidationResult object.
+        """
+        golden_files = params.get('golden_files')
+        if not golden_files:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'GOLDENS',
+                message=
+                "Configuration error: 'golden_files' must be specified for GOLDENS validator."
+            )
+
+        try:
+            inputs = params.get('input_files')
+            if not inputs:
+                inputs = df.to_dict('index')
+            output_path = params.get('output_path')
+            # Compare nodes
+            counters = Counters()
+            missing_goldens = validator_goldens.validate_goldens(
+                inputs,
+                golden_files,
+                output_path,
+                config=params,
+                counters=counters)
+            details = {
+                name: value
+                for name, value in counters.get_counters().items()
+                if 'golden' in name
+            }
+            if not missing_goldens:
+                return ValidationResult(ValidationStatus.PASSED,
+                                        'GOLDENS',
+                                        details=details)
+            details['missing_goldens'] = missing_goldens
+
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'GOLDENS',
+                message=f"Found {len(missing_goldens)} missing golden records.",
+                details=details)
+
+        except Exception as e:
+            return ValidationResult(
+                ValidationStatus.DATA_ERROR,
+                'GOLDENS',
+                message=f"Error during golden validation: {e}")
diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
new file mode 100644
index 0000000000..72bfcaca7a
--- /dev/null
+++ b/tools/import_validation/validator_goldens.py
@@ -0,0 +1,528 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility functions to validate data with golden sets.
+
+This module provides tools to compare sets of nodes (e.g., from CSV or MCF files)
+against a "golden" set of expected nodes. It supports flexible matching based on
+configurable property sets and handles normalization of values (like stripping
+namespaces from DCIDs).
+
+Example Use Case: Validating StatVarObservations
+------------------------------------------------
+You can use this to ensure that your import contains expected observations.
+
+1. Validate based on variableMeasured and observationAbout:
+   Config: {'goldens_key_property': ['variableMeasured', 'observationAbout']}
+   This will check that for every golden observation, an input observation exists
+   with the same StatVar and Place, regardless of the value or time.
+
+2. Validate based on a combination of metadata:
+   Config: {
+       'goldens_key_property': [
+           'variableMeasured', 'unit', 'scalingFactor', 'measurementMethod'
+       ]
+   }
+   This ensures that the specific measurement metadata combinations defined in
+   your goldens are present in the input nodes.
+
+Usage:
+    python3 validator_goldens.py \
+      --validate_goldens_input=output/observations.csv \
+      --validate_goldens=goldens/expected_obs.mcf \
+      --goldens_key_property=variableMeasured,observationAbout
+
+    # To generate goldens from input:
+    python3 validator_goldens.py \
+      --validate_goldens_input=output/observations.csv \
+      --generate_goldens_property_sets="variableMeasured|observationAbout,observationDate,variableMeasured|unit|scalingFactor|observationPeriod|measurementMethod" \
+      --generate_goldens=goldens_data/generated_goldens.csv
+
+    # To generate goldens using a sample of input nodes:
+    python3 validator_goldens.py \
+      --validate_goldens_input=output/observations.csv \
+      --goldens_sample_rows=100 \
+      --generate_goldens_property_sets="variableMeasured|observationAbout" \
+      --generate_goldens=goldens_data/generated_goldens.csv
+
+    # To generate goldens capturing every unique value in every column:
+    python3 validator_goldens.py \
+      --validate_goldens_input=output/observations.mcf \
+      --goldens_sampler_exhaustive \
+      --generate_goldens=goldens_data/generated_goldens.mcf
+
+    # To generate goldens ensuring prominent DCIDs are included if present:
+    python3 validator_goldens.py \
+      --validate_goldens_input=output/observations.csv \
+      --goldens_must_include="variableMeasured:selected_svs.txt,observationAbout:selected_places.txt" \
+      --generate_goldens=goldens_data/generated_goldens.csv
+"""
+
+import os
+import sys
+import tempfile
+
+from absl import app
+from absl import flags
+from absl import logging
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+_DATA_DIR = os.path.join(_SCRIPT_DIR.split('/data/')[0], 'data')
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(_DATA_DIR)
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
+sys.path.append(os.path.join(_DATA_DIR, 'tools', 'statvar_importer'))
+
+import file_util
+import mcf_file_util
+import data_sampler
+
+from counters import Counters
+from mcf_diff import fingerprint_node
+
+flags.DEFINE_list('validate_goldens_input', None,
+                  'List of files to be compared against goldens.')
+flags.DEFINE_list('validate_goldens', None,
+                  'List of golden files to be compared against')
+flags.DEFINE_string('generate_goldens', None,
+                    'Golden file to be generated from the input.')
+flags.DEFINE_string('validate_goldens_output', None,
+                    'Output file with missing goldens')
+flags.DEFINE_list('goldens_key_property', [],
+                  'Properties in golden nodes to be compared.')
+flags.DEFINE_list('goldens_ignore_property', ['value'],
+                  'Properties in golden nodes to be ignored.')
+flags.DEFINE_list(
+    'generate_goldens_property_sets', [],
+    'List of property sets to generate goldens for. '
+    'Each set is a pipe (|) separated list of properties. '
+    'Example: "variableMeasured|observationAbout,observationDate"')
+flags.DEFINE_integer(
+    'goldens_sample_rows', 0,
+    'Number of input rows to sample for generating goldens. '
+    'If 0, all rows are used.')
+flags.DEFINE_boolean(
+    'goldens_sampler_exhaustive', False,
+    'If True, uses exhaustive sampling to capture every '
+    'unique value in the input nodes.')
+flags.DEFINE_list(
+    'goldens_must_include', [],
+    'List of "column:file" pairs containing values (e.g. prominent DCIDs) '
+    'that MUST be included in the generated goldens if they appear '
+    'in the input data. '
+    'Example: "variableMeasured:website/tools/nl/embeddings/input/base/sheets_svs.csv,observationAbout:places.txt"'
+)
+flags.DEFINE_string('goldens_ignore_comments', '#',
+                    'Prefix for comments to be ignored in the golden set.')
+
+_FLAGS = flags.FLAGS
+
+
+def get_validator_goldens_config() -> dict:
+    """Returns a dictionary of config parameters for MCF diff from flags.
+
+    The config includes properties to ignore and properties to use as keys
+    for matching nodes, derived from command-line flags.
+    """
+    if not _FLAGS.is_parsed():
+        _FLAGS.mark_as_parsed()
+    return {
+        'goldens_ignore_property': _FLAGS.goldens_ignore_property,
+        'goldens_key_property': _FLAGS.goldens_key_property,
+        'goldens_must_include': _FLAGS.goldens_must_include,
+        'goldens_ignore_comments': _FLAGS.goldens_ignore_comments,
+
+        # config options for data_sampler when generating goldens
+        'sampler_output_rows': _FLAGS.goldens_sample_rows,
+        'sampler_exhaustive': _FLAGS.goldens_sampler_exhaustive,
+        'sampler_column_keys': _FLAGS.goldens_must_include,
+    }
+
+
+def _is_commented_node(fingerprint: str, comment_char: str = '#') -> bool:
+    """Returns True if the node fingerprint is commented.
+
+    Args:
+      fingerprint: string fingerprint of the node of the form 'prop=value;...'
+
+    Returns:
+      True if any property or value is commented.
+    """
+    if not comment_char:
+        return False
+    if fingerprint.startswith(
+            comment_char
+    ) or f';{comment_char}' in fingerprint or f'={comment_char}' in fingerprint:
+        return True
+    return False
+
+
+# Compare nodes in a dictionary to nodes in a golden set
+def validator_compare_nodes(input_nodes: dict,
+                            golden_nodes: dict,
+                            config: dict = None,
+                            counters: Counters = None) -> list:
+    """Returns a summary of the differences in the input and golden nodes.
+
+    It only compares the properties defined in the golden nodes against the 
+    corresponding properties in the input_nodes.
+
+    Args:
+        input_nodes: dictionary of nodes which are dictionary of property:values.
+            { <key1>: { <prop1>: <value1> ,,,}, <key2>: { <prop2>: <value2>..}
+        golden_nodes: dictionary of key to expected nodes with property:values.
+            These nodes may have fewer properties than input_nodes.
+        config: dictionary of config parameters such as ignore lists and
+            normalization settings.
+        counters: Output counters for tracking match statistics.
+
+    Returns:
+        A list of fingerprints for golden nodes that were not matched in the input.
+    """
+
+    if counters is None:
+        counters = Counters()
+
+    if config is None:
+        config = get_validator_goldens_config()
+
+    # Extract configuration parameters with defaults.
+    ignore_props = config.get('goldens_ignore_property', {})
+    comment_char = config.get('goldens_ignore_comments', '#')
+    golden_key_props = set(config.get('goldens_key_property', {}))
+    key_delimiter = config.get('golden_key_delimiter', '|')
+
+    # Step 1: Group golden nodes by their set of properties.
+    # Goldens may have a subset of the input node properties and would match
+    # any input node that contains all the golden property:values.
+    # Different golden nodes might specify different subsets of properties to match on.
+    golden_key_sets = {}
+    golden_matches = dict()
+    logging.debug(f'Extracting properties for {len(golden_nodes)} goldens')
+    for node_key, node in golden_nodes.items():
+        node_props = set()
+        for prop in node.keys():
+            if not prop:
+                continue
+            if comment_char and prop.startswith(comment_char):
+                continue
+            if prop in ignore_props:
+                continue
+            if golden_key_props and prop not in golden_key_props:
+                continue
+            node_props.add(prop)
+
+        if not node_props:
+            counters.add_counter('validate-goldens-commented', 1)
+            continue
+        # Use the joined sorted property names as a key for the group.
+        node_props_key = key_delimiter.join(sorted(list(node_props)))
+        golden_key_sets[node_props_key] = node_props
+
+        # Initialize match count for the golden node to 0.
+        key = fingerprint_node(node, compare_props=node_props)
+        golden_matches.setdefault(key, {'node': node, 'matches': 0})
+
+    logging.info(
+        f'Comparing {len(input_nodes)} nodes against {len(golden_matches)} goldens in {len(golden_key_sets)} sets using properties: {golden_key_sets.keys()}'
+    )
+    counters.add_counter('validate-goldens-sets', len(golden_key_sets))
+    counters.add_counter('validate-goldens-inputs', len(input_nodes))
+    counters.add_counter('validate-goldens-expected', len(golden_matches))
+
+    # Step 2: Match each input node with the golden fingerprints.
+    # An input node may match more than one golden node with different
+    # set of property:values.
+    for node in input_nodes.values():
+        # An input node might match different golden "shapes" (sets of properties).
+        for node_key_props in golden_key_sets.values():
+            key = fingerprint_node(node,
+                                   compare_props=node_key_props,
+                                   ignore_props=ignore_props)
+            if key in golden_matches:
+                golden_matches[key]['matches'] += 1
+                counters.add_counter('validate-goldens-input-matched', 1)
+
+    # Step 3: Identify which golden fingerprints had no corresponding input nodes.
+    missing_goldens = []
+    for key, node_counts in golden_matches.items():
+        count = node_counts.get('matches', 0)
+        if count > 0:
+            # This key got matches.
+            counters.add_counter('validate-goldens-matched', 1)
+        else:
+            if _is_commented_node(key, comment_char):
+                # No matches for this key. Ignore commented keys.
+                counters.add_counter('validate-goldens-ignored', 1)
+            else:
+                missing_goldens.append(node_counts.get('node'))
+                counters.add_counter('validate-goldens-missing', 1)
+
+    if missing_goldens:
+        logging.error(
+            f'Missing {len(missing_goldens)} among {len(golden_nodes)} goldens in {len(input_nodes)} input nodes.'
+        )
+        logging.debug(f'Missing goldens: {missing_goldens}')
+    else:
+        logging.info(
+            f'Goldens match successful: {len(golden_nodes)} goldens matched {len(input_nodes)} inputs'
+        )
+
+    return missing_goldens
+
+
+def load_nodes_from_file(files: str) -> dict:
+    """Returns a dictionary of nodes loaded from the files.
+
+    Supports CSV and MCF formats.
+    - CSV files: Each row is loaded as a node.
+    - MCF files: Each node is loaded based on its DCID.
+    """
+    nodes = {}
+    input_files = file_util.file_get_matching(files)
+    for input_file in input_files:
+        if file_util.file_is_csv(input_file):
+            # For CSV, we treat each row as a dictionary of column:value.
+            # Nodes are keyed by their index in the combined loaded set.
+            file_nodes = file_util.file_load_csv_dict(input_file,
+                                                      key_index=True)
+            for node in file_nodes.values():
+                nodes[len(nodes)] = node
+        else:
+            # For MCF or JSON, we assume nodes are already keyed by DCID.
+            file_nodes = mcf_file_util.load_mcf_nodes(input_file)
+            for dcid, node in file_nodes.items():
+                # Ensure the dcid is present in the node dictionary itself.
+                if 'dcid' not in node:
+                    node['dcid'] = mcf_file_util.strip_namespace(dcid)
+                mcf_file_util.add_mcf_node(node, nodes)
+
+    logging.info(f'Loaded {len(nodes)} nodes from {input_files}')
+    return nodes
+
+
+def generate_goldens(input_files: str,
+                     property_sets: list,
+                     output_file: str = None,
+                     config: dict = None,
+                     counters: Counters = None) -> dict:
+    """Generates a set of unique golden nodes from input files.
+
+    For each input node and each property set in property_sets, it extracts
+    the values for those properties and creates a unique golden node.
+    If sampling is requested, a representative sample of input nodes is used
+    as the basis for generating the golden nodes.
+
+    Args:
+        input_files: Glob pattern or list of input data files.
+        property_sets: List of sets/lists of properties to extract.
+            Example: [{'variableMeasured'}, {'observationAbout', 'variableMeasured'}]
+        output_file: Path to write the generated goldens to (MCF format).
+        config: Configuration for normalization and sampling.
+        counters: Output counters.
+
+    Returns:
+        A dictionary of unique golden nodes keyed by their fingerprints.
+    """
+    if counters is None:
+        counters = Counters()
+
+    if config is None:
+        config = get_validator_goldens_config()
+
+    # Apply sampling if requested.
+    sampler_rows = config.get('sampler_output_rows', 0)
+    exhaustive = config.get('sampler_exhaustive', False)
+    must_include_values = data_sampler.load_column_keys(
+        config.get('sampler_column_keys', []))
+    if must_include_values:
+        for col, vals in must_include_values.items():
+            counters.add_counter(f'generate-goldens-include-{col}', len(vals))
+    if sampler_rows > 0 or exhaustive:
+        logging.info(
+            f'Sampling rows from {input_files} (exhaustive={exhaustive}, rows={sampler_rows})'
+        )
+        if exhaustive:
+            config['sampler_column_regex'] = '.*'
+
+        # Generate a representative sample with unique values across columns.
+        with tempfile.NamedTemporaryFile(mode='w+t', suffix='.csv',
+                                         delete=True) as sampled_file:
+            sampler = data_sampler.DataSampler(config_dict=config,
+                                               counters=counters)
+            sampler.sample_csv_file(input_files, output_file=sampled_file.name)
+            input_nodes = load_nodes_from_file(sampled_file.name)
+            logging.info(
+                f'Using sampled file: {sampled_file} with {len(input_nodes)} nodes'
+            )
+            counters.add_counter(f'generate-goldens-sampled-nodes',
+                                 len(input_nodes))
+    else:
+        input_nodes = load_nodes_from_file(input_files)
+        counters.add_counter('generate-goldens-input-nodes', len(input_nodes))
+
+        # If not sampling, but must_include_values are provided, use them as a filter
+        # to focus goldens on prominent DCIDs if requested.
+        if must_include_values:
+            filtered_nodes = {}
+            for k, node in input_nodes.items():
+                match = False
+                for col, vals in must_include_values.items():
+                    if node.get(col) in vals:
+                        match = True
+                        break
+                if match:
+                    filtered_nodes[k] = node
+
+            logging.info(
+                f'Filtered {len(input_nodes)} nodes down to {len(filtered_nodes)} matching prominent DCIDs.'
+            )
+            input_nodes = filtered_nodes
+            counters.add_counter('generate-goldens-filtered-nodes',
+                                 len(filtered_nodes))
+
+    ignore_props = set(config.get('goldens_ignore_property', []))
+
+    golden_nodes = {}
+    for node in input_nodes.values():
+        # If no property sets are provided, use all properties in the current node
+        # except those that are explicitly ignored.
+        effective_property_sets = property_sets
+        if not effective_property_sets:
+            node_props = set(node.keys()) - ignore_props
+            if node_props:
+                effective_property_sets = [node_props]
+            else:
+                continue
+
+        for props in effective_property_sets:
+            # Create a dictionary for this specific property set from the input node.
+            golden_node = {}
+            has_all_props = True
+            for prop in props:
+
+                if prop in node:
+                    golden_node[prop] = node[prop]
+                else:
+                    # If a node is missing one of the properties in a set,
+                    # we skip this combination.
+                    has_all_props = False
+                    break
+
+            if not has_all_props or not golden_node:
+                continue
+
+            # Generate a unique key for this golden node shape.
+            key = fingerprint_node(golden_node, compare_props=props)
+
+            if key not in golden_nodes:
+                golden_nodes[key] = golden_node
+                counters.add_counter('generate-goldens-unique', 1)
+
+            counters.add_counter('generate-goldens-processed', 1)
+
+    logging.info(
+        f'Generated {len(golden_nodes)} unique goldens from {len(input_nodes)} input nodes.'
+    )
+    counters.add_counter('generated-golden-output', len(golden_nodes))
+
+    if golden_nodes and output_file:
+        logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}')
+        if file_util.file_is_csv(output_file):
+            file_util.file_write_csv_dict(golden_nodes,
+                                          output_file,
+                                          key_column_name=None)
+        else:
+            mcf_file_util.write_mcf_nodes([golden_nodes], output_file)
+
+    return golden_nodes
+
+
+def validate_goldens(inputs: str | dict,
+                     golden_files: str,
+                     output_file: str = None,
+                     config: dict = None,
+                     counters: Counters = None) -> list:
+    """Validate records in the input files against goldens.
+
+    This is the high-level entry point for comparing two sets of files.
+
+    Args:
+        inputs: Glob pattern for list of input data files or
+            dictionary of input nodes.
+        golden_files: Glob pattern or list of golden data files.
+        output_file: Path to write missing goldens to.
+        config: Validation configuration.
+        counters: Counters for tracking progress and results.
+    """
+    if config is None:
+        config = get_validator_goldens_config()
+
+    # Load all nodes from input and golden files.
+    if isinstance(inputs, dict):
+        input_nodes = inputs
+    else:
+        input_nodes = load_nodes_from_file(inputs)
+    golden_files_list = file_util.file_get_matching(golden_files)
+    golden_nodes = load_nodes_from_file(golden_files_list)
+
+    # Run the core comparison logic.
+    missing_goldens = validator_compare_nodes(input_nodes, golden_nodes, config,
+                                              counters)
+
+    # Optionally write out the missing golden nodes for debugging.
+    if missing_goldens and output_file:
+        if output_file.endswith('/') or os.path.isdir(output_file):
+            # Append a default filename if only a directory was provided.
+            output_file = os.path.join(
+                output_file,
+                'goldens_missing_' + os.path.basename(golden_files_list[0]))
+        logging.info(
+            f'Writing {len(missing_goldens)} missing goldens to {output_file}')
+        if file_util.file_is_csv(output_file):
+            file_util.file_write_csv_dict(dict(enumerate(missing_goldens)),
+                                          output_file)
+        else:
+            mcf_file_util.write_mcf_nodes(dict(enumerate(missing_goldens)),
+                                          output_file)
+    return missing_goldens
+
+
+def main(_):
+    """Main entry point for the validator script."""
+    logging.set_verbosity(2)
+    counters = Counters()
+
+    if _FLAGS.generate_goldens:
+        # Generation Mode
+        property_sets = []
+        for p_set_str in _FLAGS.generate_goldens_property_sets:
+            property_sets.append(set(p_set_str.split('|')))
+
+        generate_goldens(_FLAGS.validate_goldens_input,
+                         property_sets,
+                         output_file=_FLAGS.generate_goldens,
+                         config=get_validator_goldens_config(),
+                         counters=counters)
+    if _FLAGS.validate_goldens:
+        # Validation Mode
+        validate_goldens(_FLAGS.validate_goldens_input,
+                         _FLAGS.validate_goldens,
+                         output_file=_FLAGS.validate_goldens_output,
+                         config=get_validator_goldens_config(),
+                         counters=counters)
+
+
+if __name__ == '__main__':
+    app.run(main)
diff --git a/tools/import_validation/validator_goldens_test.py b/tools/import_validation/validator_goldens_test.py
new file mode 100644
index 0000000000..5675195e99
--- /dev/null
+++ b/tools/import_validation/validator_goldens_test.py
@@ -0,0 +1,288 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for validator_goldens.py"""
+
+import os
+import sys
+import unittest
+from unittest.mock import patch, MagicMock
+
+# Set up paths as in validator_goldens.py
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+_DATA_DIR = os.path.join(_SCRIPT_DIR.split('/data/')[0], 'data')
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(_DATA_DIR)
+sys.path.append(os.path.join(_DATA_DIR, 'util'))
+sys.path.append(os.path.join(_DATA_DIR, 'tools', 'statvar_importer'))
+
+import validator_goldens
+from counters import Counters
+
+
+class TestValidatorGoldens(unittest.TestCase):
+
+    def test_get_validator_goldens_config(self):
+        with patch('validator_goldens._FLAGS') as mock_flags:
+            mock_flags.goldens_ignore_property = ['p1']
+            mock_flags.goldens_key_property = ['p2']
+            config = validator_goldens.get_validator_goldens_config()
+            self.assertEqual(config['goldens_ignore_property'], ['p1'])
+            self.assertEqual(config['goldens_key_property'], ['p2'])
+
+    @patch('validator_goldens.mcf_file_util')
+    def test_normalize_value(self, mock_mcf):
+        # Test string with quotes
+        self.assertEqual(validator_goldens.normalize_value(' "val" '), 'val')
+        # Test float normalization
+        self.assertEqual(validator_goldens.normalize_value(1.20), '1.2')
+        # Test namespace stripping
+        mock_mcf.strip_namespace.return_value = 'val'
+        self.assertEqual(
+            validator_goldens.normalize_value('dcid:val',
+                                              strip_namespaces=True), 'val')
+        mock_mcf.strip_namespace.assert_called_with('dcid:val')
+        # Test list normalization
+        mock_mcf.normalize_list.return_value = 'v1,v2'
+        self.assertEqual(validator_goldens.normalize_value('v1,v2'), 'v1,v2')
+        mock_mcf.normalize_list.assert_called_with('v1,v2')
+        # Test alphanumeric string
+        self.assertEqual(validator_goldens.normalize_value('simple'), 'simple')
+
+    def test_get_node_fingerprint(self):
+        node = {'p1': 'v1', 'p2': 'v2', 'p3': 'v3'}
+        # All properties (default)
+        self.assertEqual(validator_goldens.get_node_fingerprint(node),
+                         'p1=v1;p2=v2;p3=v3')
+        # Specific key properties
+        self.assertEqual(
+            validator_goldens.get_node_fingerprint(node,
+                                                   key_property={'p1', 'p3'}),
+            'p1=v1;p3=v3')
+        # Ignore properties
+        self.assertEqual(
+            validator_goldens.get_node_fingerprint(node,
+                                                   ignore_property={'p2'}),
+            'p1=v1;p3=v3')
+        # Combined key and ignore
+        self.assertEqual(
+            validator_goldens.get_node_fingerprint(node,
+                                                   key_property={'p1', 'p2'},
+                                                   ignore_property={'p2'}),
+            'p1=v1')
+
+    def test_validator_compare_nodes(self):
+        input_nodes = {
+            'n1': {
+                'p1': 'v1',
+                'p2': 'v2'
+            },
+            'n2': {
+                'p1': 'v3',
+                'p2': 'v4'
+            }
+        }
+        golden_nodes = {'g1': {'p1': 'v1'}, 'g2': {'p1': 'v5'}}
+        config = {'goldens_key_property': ['p1']}
+        counters = Counters()
+        missing = validator_goldens.validator_compare_nodes(
+            input_nodes, golden_nodes, config, counters)
+        # Expected fingerprint for g2 is p1=v5, which is not in input_nodes
+        self.assertEqual(missing, ['p1=v5'])
+        self.assertEqual(counters.get_counter('validate-goldens-missing'), 1)
+        self.assertEqual(counters.get_counter('validate-goldens-matched'), 1)
+
+    def test_validator_compare_nodes_multiple_sets(self):
+        input_nodes = {
+            'n1': {
+                'p1': 'v1',
+                'p2': 'v2'
+            },
+            'n2': {
+                'p1': 'v1',
+                'p3': 'v3'
+            }
+        }
+        golden_nodes = {
+            'g1': {
+                'p1': 'v1',
+                'p2': 'v2'
+            },
+            'g2': {
+                'p1': 'v1',
+                'p3': 'v3'
+            }
+        }
+        # config empty, so it will group by all props in each golden node
+        counters = Counters()
+        missing = validator_goldens.validator_compare_nodes(
+            input_nodes, golden_nodes, {}, counters)
+        self.assertEqual(missing, [])
+        self.assertEqual(counters.get_counter('validate-goldens-matched'), 2)
+
+    @patch('validator_goldens.file_util')
+    @patch('validator_goldens.mcf_file_util')
+    def test_load_nodes_from_file(self, mock_mcf, mock_file):
+        mock_file.file_get_matching.return_value = ['f1.csv', 'f2.mcf']
+        mock_file.file_is_csv.side_effect = lambda x: x.endswith('.csv')
+        mock_file.file_load_csv_dict.return_value = {0: {'p1': 'v1'}}
+        mock_mcf.load_file_nodes.return_value = {'dcid:n1': {'p1': 'v2'}}
+        mock_mcf.strip_namespace.return_value = 'n1'
+
+        def side_effect_add(pvs, nodes, **kwargs):
+            nodes[pvs['dcid']] = pvs
+            return True
+
+        mock_mcf.add_mcf_node.side_effect = side_effect_add
+
+        nodes = validator_goldens.load_nodes_from_file('dummy')
+
+        self.assertEqual(len(nodes), 2)
+        self.assertIn(0, nodes)
+        self.assertEqual(nodes[0]['p1'], 'v1')
+        self.assertIn('n1', nodes)
+        self.assertEqual(nodes['n1']['p1'], 'v2')
+
+    @patch('validator_goldens.load_nodes_from_file')
+    @patch('validator_goldens.mcf_file_util')
+    def test_generate_goldens(self, mock_mcf, mock_load):
+        mock_load.return_value = {
+            0: {
+                'variableMeasured': 'sv1',
+                'observationAbout': 'geo1',
+                'value': 10
+            },
+            1: {
+                'variableMeasured': 'sv2',
+                'observationAbout': 'geo1',
+                'value': 20
+            },
+        }
+        property_sets = [{'variableMeasured'}, {'variableMeasured', 'observationAbout'}]
+        
+        goldens = validator_goldens.generate_goldens('dummy', property_sets)
+        
+        # Unique goldens expected:
+        # 1. variableMeasured=sv1
+        # 2. variableMeasured=sv2
+        # 3. observationAbout=geo1;variableMeasured=sv1
+        # 4. observationAbout=geo1;variableMeasured=sv2
+        self.assertEqual(len(goldens), 4)
+        self.assertIn('variableMeasured=sv1', goldens)
+        self.assertIn('variableMeasured=sv2', goldens)
+        self.assertIn('observationAbout=geo1;variableMeasured=sv1', goldens)
+        self.assertIn('observationAbout=geo1;variableMeasured=sv2', goldens)
+
+    @patch('validator_goldens.load_nodes_from_file')
+    @patch('validator_goldens.mcf_file_util')
+    @patch('validator_goldens.data_sampler')
+    def test_generate_goldens_with_sampling(self, mock_sampler, mock_mcf, mock_load):
+        mock_sampler.sample_csv_file.return_value = 'tmp-sample.csv'
+        mock_load.return_value = {0: {'p1': 'v1'}}
+        
+        property_sets = [{'p1'}]
+        config = {'sampler_output_rows': 10}
+
+        
+        with patch('os.path.exists', return_value=True), patch('os.remove') as mock_remove:
+             goldens = validator_goldens.generate_goldens('input.csv', property_sets, config=config)
+             
+             self.assertEqual(len(goldens), 1)
+             mock_sampler.sample_csv_file.assert_called_once()
+             mock_load.assert_called_with('tmp-sample.csv')
+             mock_remove.assert_called_with('tmp-sample.csv')
+
+    @patch('validator_goldens.load_nodes_from_file')
+    @patch('validator_goldens.mcf_file_util')
+    def test_generate_goldens_all_props(self, mock_mcf, mock_load):
+        mock_load.return_value = {
+            0: {
+                'p1': 'v1',
+                'p2': 'v2',
+                'ignore_me': 'x'
+            }
+        }
+        # property_sets is empty, should use all props except ignore_me
+        property_sets = []
+        config = {'goldens_ignore_property': ['ignore_me']}
+        
+        goldens = validator_goldens.generate_goldens('dummy', property_sets, config=config)
+        
+        self.assertEqual(len(goldens), 1)
+        key = list(goldens.keys())[0]
+        # p1=v1;p2=v2 (alphabetical)
+        self.assertEqual(key, 'p1=v1;p2=v2')
+        self.assertIn('p1', goldens[key])
+        self.assertIn('p2', goldens[key])
+        self.assertNotIn('ignore_me', goldens[key])
+
+    @patch('validator_goldens.load_nodes_from_file')
+    @patch('validator_goldens.mcf_file_util')
+    def test_generate_goldens_with_must_include_values(self, mock_mcf, mock_load):
+        # Input has two nodes, but only one matches the prominent DCID filter.
+        mock_load.return_value = {
+            0: {'p1': 'v1', 'p2': 'other'},
+            1: {'p1': 'v2', 'p2': 'other'}
+        }
+        # Filter for p1=v1
+        must_include_values = {'p1': {'v1'}}
+        property_sets = [{'p1'}]
+        
+        goldens = validator_goldens.generate_goldens(
+            'dummy', property_sets, must_include_values=must_include_values)
+            
+        # Only v1 should be included because of the filter (non-sampled mode).
+        self.assertEqual(len(goldens), 1)
+        self.assertIn('p1=v1', goldens)
+        self.assertNotIn('p1=v2', goldens)
+
+    @patch('validator_goldens.load_nodes_from_file')
+    @patch('validator_goldens.mcf_file_util')
+    def test_generate_goldens_all_props_mixed_schema(self, mock_mcf, mock_load):
+        # input nodes have different columns
+        mock_load.return_value = {
+            0: {'p1': 'v1'},
+            1: {'p2': 'v2'}
+        }
+        # property_sets is empty, should use each node's own props
+        property_sets = []
+        
+        goldens = validator_goldens.generate_goldens('dummy', property_sets)
+        
+        self.assertEqual(len(goldens), 2)
+        self.assertIn('p1=v1', goldens)
+        self.assertIn('p2=v2', goldens)
+
+    @patch('validator_goldens.load_nodes_from_file')
+    @patch('validator_goldens.validator_compare_nodes')
+    @patch('validator_goldens.file_util')
+    def test_validate_goldens(self, mock_file, mock_compare, mock_load):
+        mock_load.side_effect = [
+            {'n1': {
+                'p1': 'v1'
+            }},  # input
+            {'g1': {
+                'p1': 'v1'
+            }}  # golden
+        ]
+        mock_compare.return_value = []
+
+        missing = validator_goldens.validate_goldens('in', 'gold', 'out')
+
+        self.assertEqual(missing, [])
+        mock_compare.assert_called_once()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/import_validation/validator_integration_test.py b/tools/import_validation/validator_integration_test.py
new file mode 100644
index 0000000000..b4a0bee0a1
--- /dev/null
+++ b/tools/import_validation/validator_integration_test.py
@@ -0,0 +1,129 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import pandas as pd
+import unittest
+import tempfile
+import shutil
+import csv
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, _SCRIPT_DIR)
+
+from validator import Validator
+from result import ValidationStatus
+
+
+class TestGoldensValidation(unittest.TestCase):
+    '''Test Class for the GOLDENS validation rule.'''
+
+    def setUp(self):
+        self.validator = Validator()
+        self.test_dir = tempfile.mkdtemp()
+        
+        # Create a sample golden CSV
+        self.golden_file = os.path.join(self.test_dir, 'goldens.csv')
+        with open(self.golden_file, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(['StatVar', 'NumPlaces'])
+            writer.writerow(['sv1', '10'])
+            writer.writerow(['sv2', '20'])
+
+        # Create a sample input CSV that matches
+        self.input_file_match = os.path.join(self.test_dir, 'input_match.csv')
+        with open(self.input_file_match, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(['StatVar', 'NumPlaces', 'Value'])
+            writer.writerow(['sv1', '10', '100'])
+            writer.writerow(['sv2', '20', '200'])
+
+        # Create a sample input CSV that is missing a golden
+        self.input_file_missing = os.path.join(self.test_dir, 'input_missing.csv')
+        with open(self.input_file_missing, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(['StatVar', 'NumPlaces', 'Value'])
+            writer.writerow(['sv1', '10', '100'])
+            # sv2 is missing
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir)
+
+    def test_validate_goldens_passes_with_matching_files(self):
+        params = {
+            'golden_files': self.golden_file,
+            'input_files': self.input_file_match,
+            'goldens_key_property': ['StatVar', 'NumPlaces']
+        }
+        # df is not used when input_files is in params
+        result = self.validator.validate_goldens(pd.DataFrame(), params)
+        self.assertEqual(result.status, ValidationStatus.PASSED)
+
+    def test_validate_goldens_fails_with_missing_records(self):
+        params = {
+            'golden_files': self.golden_file,
+            'input_files': self.input_file_missing,
+            'goldens_key_property': ['StatVar', 'NumPlaces']
+        }
+        result = self.validator.validate_goldens(pd.DataFrame(), params)
+        self.assertEqual(result.status, ValidationStatus.FAILED)
+        self.assertIn('Found 1 missing golden records', result.message)
+        # Fingerprint of sv2: 'NumPlaces=20;StatVar=sv2' (alphabetical)
+        self.assertIn('StatVar=sv2', result.details['missing_goldens'][0])
+
+    def test_validate_goldens_uses_dataframe_when_input_files_missing(self):
+        # Sample DataFrame representing the stats data source
+        df = pd.DataFrame({
+            'StatVar': ['sv1', 'sv2'],
+            'NumPlaces': [10, 20],
+            'Value': [100, 200]
+        })
+        params = {
+            'golden_files': self.golden_file,
+            'goldens_key_property': ['StatVar', 'NumPlaces']
+        }
+        result = self.validator.validate_goldens(df, params)
+        self.assertEqual(result.status, ValidationStatus.PASSED)
+
+    def test_validate_goldens_fails_with_missing_records_from_df(self):
+        # Sample DataFrame missing sv2
+        df = pd.DataFrame({
+            'StatVar': ['sv1'],
+            'NumPlaces': [10],
+            'Value': [100]
+        })
+        params = {
+            'golden_files': self.golden_file,
+            'goldens_key_property': ['StatVar', 'NumPlaces']
+        }
+        result = self.validator.validate_goldens(df, params)
+        self.assertEqual(result.status, ValidationStatus.FAILED)
+        self.assertEqual(len(result.details['missing_goldens']), 1)
+
+    def test_validate_goldens_missing_golden_files_param(self):
+        params = {'input_files': self.input_file_match}
+        result = self.validator.validate_goldens(pd.DataFrame(), params)
+        self.assertEqual(result.status, ValidationStatus.CONFIG_ERROR)
+        self.assertIn('golden_files', result.message)
+
+    def test_validate_goldens_empty_df_error(self):
+        params = {'golden_files': self.golden_file}
+        result = self.validator.validate_goldens(pd.DataFrame(), params)
+        self.assertEqual(result.status, ValidationStatus.DATA_ERROR)
+        self.assertIn('provided data source is empty', result.message)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/statvar_importer/mcf_diff.py b/tools/statvar_importer/mcf_diff.py
index 6b7cb79e1a..e7eaa825f8 100644
--- a/tools/statvar_importer/mcf_diff.py
+++ b/tools/statvar_importer/mcf_diff.py
@@ -299,7 +299,7 @@ def fingerprint_node(pvs: dict,
     for p in sorted(normalized_pvs.keys()):
         if p not in ignore_props:
             if not compare_props or p in compare_props:
-                fp.append(f'{p}:{normalized_pvs[p]}')
+                fp.append(f'{p}={normalized_pvs[p]}')
     return ';'.join(fp)
 
 
diff --git a/tools/statvar_importer/mcf_file_util.py b/tools/statvar_importer/mcf_file_util.py
index 2a78fde9db..62a6b5da92 100644
--- a/tools/statvar_importer/mcf_file_util.py
+++ b/tools/statvar_importer/mcf_file_util.py
@@ -43,6 +43,7 @@
 from collections import OrderedDict
 import csv
 import glob
+import hashlib
 import os
 import re
 import sys
@@ -1051,6 +1052,7 @@ def write_mcf_nodes(
             node_dict.update(d)
         file_util.file_write_csv_dict(node_dict, filename)
         return
+    filename_base = os.path.basename(filename)
     with file_util.FileIO(filename, mode) as output_f:
         if header is not None:
             output_f.write(header)
@@ -1061,6 +1063,11 @@ def write_mcf_nodes(
                 node_keys = sorted(node_keys)
             for dcid in node_keys:
                 node = nodes[dcid]
+                if 'dcid' not in node and 'Node' not in node:
+                    # Generate a local dcid in a node copy
+                    node = dict(node)
+                    node['Node'] = f'l:{filename_base}/' + hashlib.md5(
+                        str(dcid).encode('utf-8')).hexdigest()
                 if sort:
                     node = normalize_mcf_node(node, ignore_comments)
                 pvs = node_dict_to_text(node, default_pvs)
diff --git a/tools/statvar_importer/property_value_mapper.py b/tools/statvar_importer/property_value_mapper.py
index 76d5209216..8c60f481a3 100644
--- a/tools/statvar_importer/property_value_mapper.py
+++ b/tools/statvar_importer/property_value_mapper.py
@@ -347,7 +347,7 @@ def _process_eval(self, pvs: dict, data_key: str) -> bool:
             self._log_every_n)
         if not eval_prop:
             eval_prop = data_key
-        if eval_data and eval_data != eval_str:
+        if eval_data is not None and eval_data != eval_str:
             pvs[eval_prop] = eval_data
             self._counters.add_counter('processed-eval', 1, eval_str)
             pvs.pop(eval_key)
diff --git a/util/file_util.py b/util/file_util.py
index e1d7fc1f1c..f375cb76bf 100644
--- a/util/file_util.py
+++ b/util/file_util.py
@@ -574,7 +574,7 @@ def file_load_csv_dict(
                 if not key_column:
                     # Use the first column as the key
                     key_column = reader.fieldnames[0]
-                if not value_column and len(reader.fieldnames) == 2:
+                if not value_column and len(reader.fieldnames) == 2 and not key_index:
                     # Use second column as value if there are only two columns.
                     value_column = reader.fieldnames[1]
             logging.info(