From ab9f8a00540697c24187f5b022e8c568c7f39331 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 10 Mar 2026 16:02:20 -0600 Subject: [PATCH] Upgrade breakdown enum mismatch from WARNING to ERROR Parameters with keys not in the breakdown variable's possible values now raise ValueError instead of silently logging a warning. This prevents data loss when parameter YAML files use keys that don't match the breakdown enum (e.g., using snap_utility_region keys with a state_code breakdown). Partial coverage (enum values missing from YAML) remains allowed. Closes #444 Co-Authored-By: Claude Opus 4.6 --- changelog_entry.yaml | 4 + .../operations/homogenize_parameters.py | 31 +++--- .../parameters/operations/test_nesting.py | 103 +++++++++++++++++- 3 files changed, 122 insertions(+), 16 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..c3d917b48 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + changed: + - Upgrade breakdown enum mismatch from WARNING to ERROR. Parameters with keys not in the breakdown variable's possible values now raise ValueError instead of logging a warning and silently ignoring the data. diff --git a/policyengine_core/parameters/operations/homogenize_parameters.py b/policyengine_core/parameters/operations/homogenize_parameters.py index 5eb89f28a..04888300c 100644 --- a/policyengine_core/parameters/operations/homogenize_parameters.py +++ b/policyengine_core/parameters/operations/homogenize_parameters.py @@ -24,9 +24,7 @@ def homogenize_parameter_structures( for node in root.get_descendants(): if isinstance(node, ParameterNode): breakdown = get_breakdown_variables(node) - node = homogenize_parameter_node( - node, breakdown, variables, default_value - ) + node = homogenize_parameter_node(node, breakdown, variables, default_value) return root @@ -98,17 +96,24 @@ def homogenize_parameter_node( {"0000-01-01": default_value, "2040-01-01": default_value}, ), ) + possible_values_str = {str(v) for v in possible_values} + extra_children = [] + for child in node.children: + child_key = child.split(".")[-1] + if ( + child_key not in possible_values_str + and str(child_key) not in possible_values_str + ): + extra_children.append(child_key) + if extra_children: + raise ValueError( + f"Parameter {node.name} has children {extra_children} " + f"that are not in the possible values of the breakdown " + f"variable '{first_breakdown}'. Check that the breakdown " + f"metadata references the correct variable and that all " + f"parameter keys are valid enum values." + ) for child in node.children: - if child.split(".")[-1] not in possible_values: - try: - int(child) - is_int = True - except: - is_int = False - if not is_int or str(child) not in node.children: - logging.warning( - f"Parameter {node.name} has a child {child} that is not in the possible values of {first_breakdown}, ignoring." - ) if further_breakdown: node.children[child] = homogenize_parameter_node( node.children[child], breakdown[1:], variables, default_value diff --git a/tests/core/parameters/operations/test_nesting.py b/tests/core/parameters/operations/test_nesting.py index a4a808798..97d3dcdc2 100644 --- a/tests/core/parameters/operations/test_nesting.py +++ b/tests/core/parameters/operations/test_nesting.py @@ -1,3 +1,6 @@ +import pytest + + def test_parameter_homogenization(): import numpy as np @@ -85,8 +88,102 @@ class family_size(Variable): family_sizes = np.array([1, 2, 3]) assert ( - system.parameters("2021-01-01").value_by_country_and_region[countries][ - regions - ][family_sizes] + system.parameters("2021-01-01").value_by_country_and_region[countries][regions][ + family_sizes + ] == [1, 0, 0] ).all() + + +def test_breakdown_mismatch_raises_error(): + """Extra parameter keys not in the breakdown enum should raise ValueError.""" + from policyengine_core.entities import Entity + from policyengine_core.model_api import ETERNITY, Enum, Variable + from policyengine_core.parameters import ( + ParameterNode, + homogenize_parameter_structures, + ) + from policyengine_core.taxbenefitsystems import TaxBenefitSystem + + Person = Entity("person", "people", "Person", "A person") + + class Color(Enum): + RED = "Red" + BLUE = "Blue" + + class color(Variable): + value_type = Enum + entity = Person + definition_period = ETERNITY + possible_values = Color + default_value = Color.RED + label = "color" + + # "GREEN" is not in the Color enum — should raise ValueError + root = ParameterNode( + data={ + "value_by_color": { + "RED": {"2021-01-01": 1}, + "GREEN": {"2021-01-01": 2}, + "metadata": { + "breakdown": ["color"], + }, + } + } + ) + + system = TaxBenefitSystem([Person]) + system.add_variables(color) + system.parameters = root + + with pytest.raises(ValueError, match="GREEN"): + homogenize_parameter_structures( + system.parameters, system.variables, default_value=0 + ) + + +def test_breakdown_partial_coverage_is_ok(): + """Missing enum values in parameter YAML should NOT raise an error.""" + from policyengine_core.entities import Entity + from policyengine_core.model_api import ETERNITY, Enum, Variable + from policyengine_core.parameters import ( + ParameterNode, + homogenize_parameter_structures, + ) + from policyengine_core.taxbenefitsystems import TaxBenefitSystem + + Person = Entity("person", "people", "Person", "A person") + + class Color(Enum): + RED = "Red" + BLUE = "Blue" + GREEN = "Green" + + class color(Variable): + value_type = Enum + entity = Person + definition_period = ETERNITY + possible_values = Color + default_value = Color.RED + label = "color" + + # Only RED is present — BLUE and GREEN are missing but that's fine + root = ParameterNode( + data={ + "value_by_color": { + "RED": {"2021-01-01": 1}, + "metadata": { + "breakdown": ["color"], + }, + } + } + ) + + system = TaxBenefitSystem([Person]) + system.add_variables(color) + system.parameters = root + + # Should not raise — partial coverage is allowed + homogenize_parameter_structures( + system.parameters, system.variables, default_value=0 + )