From a82566c20f855d6b974b7cd3897b8f74c25df529 Mon Sep 17 00:00:00 2001 From: Sandeep Tuniki Date: Mon, 9 Feb 2026 09:15:36 +0530 Subject: [PATCH 1/3] refactor(filter_data_outliers): Encapsulate global flags for library safety --- .../statvar_importer/filter_data_outliers.py | 97 ++++++++++++------- 1 file changed, 62 insertions(+), 35 deletions(-) diff --git a/tools/statvar_importer/filter_data_outliers.py b/tools/statvar_importer/filter_data_outliers.py index dba9074b80..7497445496 100644 --- a/tools/statvar_importer/filter_data_outliers.py +++ b/tools/statvar_importer/filter_data_outliers.py @@ -42,44 +42,69 @@ from counters import Counters from mcf_file_util import get_numeric_value -flags.DEFINE_string('filter_data_input', '', - 'input CSV file with statvar observations') -flags.DEFINE_string('filter_data_output', '', 'output CSV file') -flags.DEFINE_float('filter_data_max_change_ratio', None, - 'Maximum change alowed between successive values.') -flags.DEFINE_float('filter_data_max_yearly_change_ratio', None, - 'Maximum change alowed between successive years.') -flags.DEFINE_float('filter_data_min_value', None, 'Minumum value allowed') -flags.DEFINE_float('filter_data_max_value', None, 'Maximum value allowed') -flags.DEFINE_list('data_series_value_properties', ['value'], - 'Properties with the value to be checked') -flags.DEFINE_list( - 'data_series_date_properties', ['observationDate'], - 'Properties that can be used to sort values within a series such as date') -flags.DEFINE_bool('filter_data_keep_recent', True, - 'Keep the most recent value for a time series.') - -_FLAGS = flags.FLAGS +_DEFAULT_FILTER_CONFIG = { + 'filter_data_keep_recent': True, + 'filter_data_max_change_ratio': None, + 'filter_data_max_yearly_change_ratio': None, + 'filter_data_min_value': None, + 'filter_data_max_value': None, + 'data_series_value_properties': ['value'], + 'data_series_date_properties': ['observationDate'], +} + + +def _define_flags(): + flags.DEFINE_string('filter_data_input', '', + 'input CSV file with statvar observations') + flags.DEFINE_string('filter_data_output', '', 'output CSV file') + flags.DEFINE_float('filter_data_max_change_ratio', + _DEFAULT_FILTER_CONFIG['filter_data_max_change_ratio'], + 'Maximum change alowed between successive values.') + flags.DEFINE_float( + 'filter_data_max_yearly_change_ratio', + _DEFAULT_FILTER_CONFIG['filter_data_max_yearly_change_ratio'], + 'Maximum change alowed between successive years.') + flags.DEFINE_float('filter_data_min_value', + _DEFAULT_FILTER_CONFIG['filter_data_min_value'], + 'Minumum value allowed') + flags.DEFINE_float('filter_data_max_value', + _DEFAULT_FILTER_CONFIG['filter_data_max_value'], + 'Maximum value allowed') + flags.DEFINE_list('data_series_value_properties', + _DEFAULT_FILTER_CONFIG['data_series_value_properties'], + 'Properties with the value to be checked') + flags.DEFINE_list( + 'data_series_date_properties', + _DEFAULT_FILTER_CONFIG['data_series_date_properties'], + 'Properties that can be used to sort values within a series such as date' + ) + flags.DEFINE_bool('filter_data_keep_recent', + _DEFAULT_FILTER_CONFIG['filter_data_keep_recent'], + 'Keep the most recent value for a time series.') def get_default_filter_data_config() -> dict: '''Returns the default filter config settings form flags as dict.''' - return { - 'filter_data_keep_recent': - _FLAGS.filter_data_keep_recent, - 'filter_data_max_change_ratio': - _FLAGS.filter_data_max_change_ratio, - 'filter_data_max_yearly_change_ratio': - _FLAGS.filter_data_max_yearly_change_ratio, - 'filter_data_min_value': - _FLAGS.filter_data_min_value, - 'filter_data_max_value': - _FLAGS.filter_data_max_value, - 'data_series_value_properties': - _FLAGS.data_series_value_properties, - 'data_series_date_properties': - _FLAGS.data_series_date_properties, - } + flag_names = [ + 'filter_data_keep_recent', + 'filter_data_max_change_ratio', + 'filter_data_max_yearly_change_ratio', + 'filter_data_min_value', + 'filter_data_max_value', + 'data_series_value_properties', + 'data_series_date_properties', + ] + configs = {name: _DEFAULT_FILTER_CONFIG[name] for name in flag_names} + # Use default values of flags if defined and parsed + try: + if not flags.FLAGS.is_parsed(): + flags.FLAGS.mark_as_parsed() + for flag_name in flag_names: + if hasattr(flags.FLAGS, flag_name): + configs[flag_name] = getattr(flags.FLAGS, flag_name) + except flags.UnparsedFlagAccessError: + pass + return configs def filter_data_get_series_key(pvs: dict, @@ -326,8 +351,10 @@ def _get_years_difference(dt1: datetime, dt2: datetime) -> float: def main(_): logging.set_verbosity(1) - filter_data_files(_FLAGS.filter_data_input, _FLAGS.filter_data_output) + filter_data_files(flags.FLAGS.filter_data_input, + flags.FLAGS.filter_data_output) if __name__ == '__main__': + _define_flags() app.run(main) From e960d979de7d0c6d91d5b4b42e4134ddc8238b73 Mon Sep 17 00:00:00 2001 From: Sandeep Tuniki Date: Mon, 9 Feb 2026 09:42:32 +0530 Subject: [PATCH 2/3] fix: Address review comments for library safety and typos --- tools/statvar_importer/filter_data_outliers.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/statvar_importer/filter_data_outliers.py b/tools/statvar_importer/filter_data_outliers.py index 7497445496..78eb43aaf4 100644 --- a/tools/statvar_importer/filter_data_outliers.py +++ b/tools/statvar_importer/filter_data_outliers.py @@ -59,14 +59,14 @@ def _define_flags(): flags.DEFINE_string('filter_data_output', '', 'output CSV file') flags.DEFINE_float('filter_data_max_change_ratio', _DEFAULT_FILTER_CONFIG['filter_data_max_change_ratio'], - 'Maximum change alowed between successive values.') + 'Maximum change allowed between successive values.') flags.DEFINE_float( 'filter_data_max_yearly_change_ratio', _DEFAULT_FILTER_CONFIG['filter_data_max_yearly_change_ratio'], - 'Maximum change alowed between successive years.') + 'Maximum change allowed between successive years.') flags.DEFINE_float('filter_data_min_value', _DEFAULT_FILTER_CONFIG['filter_data_min_value'], - 'Minumum value allowed') + 'Minimum value allowed') flags.DEFINE_float('filter_data_max_value', _DEFAULT_FILTER_CONFIG['filter_data_max_value'], 'Maximum value allowed') @@ -96,14 +96,10 @@ def get_default_filter_data_config() -> dict: ] configs = {name: _DEFAULT_FILTER_CONFIG[name] for name in flag_names} # Use default values of flags if defined and parsed - try: - if not flags.FLAGS.is_parsed(): - flags.FLAGS.mark_as_parsed() + if flags.FLAGS.is_parsed(): for flag_name in flag_names: if hasattr(flags.FLAGS, flag_name): configs[flag_name] = getattr(flags.FLAGS, flag_name) - except flags.UnparsedFlagAccessError: - pass return configs From 123d94cf54c5c849ed52c9fdd8ba613e02b5bdc0 Mon Sep 17 00:00:00 2001 From: Sandeep Tuniki Date: Mon, 9 Feb 2026 09:49:47 +0530 Subject: [PATCH 3/3] refactor: Derive flag names from config in get_default_filter_data_config --- tools/statvar_importer/filter_data_outliers.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tools/statvar_importer/filter_data_outliers.py b/tools/statvar_importer/filter_data_outliers.py index 78eb43aaf4..663bf672bf 100644 --- a/tools/statvar_importer/filter_data_outliers.py +++ b/tools/statvar_importer/filter_data_outliers.py @@ -85,19 +85,10 @@ def _define_flags(): def get_default_filter_data_config() -> dict: '''Returns the default filter config settings form flags as dict.''' - flag_names = [ - 'filter_data_keep_recent', - 'filter_data_max_change_ratio', - 'filter_data_max_yearly_change_ratio', - 'filter_data_min_value', - 'filter_data_max_value', - 'data_series_value_properties', - 'data_series_date_properties', - ] - configs = {name: _DEFAULT_FILTER_CONFIG[name] for name in flag_names} + configs = _DEFAULT_FILTER_CONFIG.copy() # Use default values of flags if defined and parsed if flags.FLAGS.is_parsed(): - for flag_name in flag_names: + for flag_name in configs: if hasattr(flags.FLAGS, flag_name): configs[flag_name] = getattr(flags.FLAGS, flag_name) return configs