From a4219612c7938e1da4b86b10682263f77d11a215 Mon Sep 17 00:00:00 2001 From: gabriel Date: Tue, 3 Mar 2026 16:37:43 +0100 Subject: [PATCH 1/3] raise on duplicated alias --- dataframely/_base_schema.py | 6 ++++++ tests/columns/test_alias.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index 7ae64e3d..ca3678c9 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -84,6 +84,10 @@ class Metadata: rules: dict[str, RuleFactory] = field(default_factory=dict) def update(self, other: Self) -> None: + if duplicated_column_names := self.columns.keys() & other.columns.keys(): + raise ImplementationError( + f"Columns {duplicated_column_names} are duplicated." + ) self.columns.update(other.columns) self.rules.update(other.rules) @@ -203,6 +207,8 @@ def _get_metadata(source: dict[str, Any]) -> Metadata: k: v for k, v in source.items() if not k.startswith("__") }.items(): if isinstance(value, Column): + if (col_name := value.alias or attr) in result.columns: + raise ImplementationError(f"Column {col_name!r} is duplicated.") result.columns[value.alias or attr] = value if isinstance(value, RuleFactory): # We must ensure that custom rules do not clash with internal rules. diff --git a/tests/columns/test_alias.py b/tests/columns/test_alias.py index 0dd364a7..7276f049 100644 --- a/tests/columns/test_alias.py +++ b/tests/columns/test_alias.py @@ -2,8 +2,10 @@ # SPDX-License-Identifier: BSD-3-Clause import polars as pl +import pytest import dataframely as dy +from dataframely.exc import ImplementationError class AliasSchema(dy.Schema): @@ -36,3 +38,21 @@ def test_alias_unset() -> None: no_alias_col = dy.Int32() assert no_alias_col.alias is None assert no_alias_col.name == "" + + +def test_duplicate_alias_same_schema() -> None: + with pytest.raises(ImplementationError, match="'a' is duplicated"): + + class MySchema(dy.Schema): + a = dy.Int64(alias="a") + b = dy.String(alias="a") + + +def test_duplicate_alias_inherited_schema() -> None: + class MySchema(dy.Schema): + a = dy.Int64(alias="a") + + with pytest.raises(ImplementationError, match="'a'.*duplicated"): + + class MySchema2(MySchema): + b = dy.Int64(alias="a") From a167a498d20a38217dd2cd670ff7ab75060a5cb4 Mon Sep 17 00:00:00 2001 From: Andreas Albert <103571926+AndreasAlbertQC@users.noreply.github.com> Date: Wed, 4 Mar 2026 19:44:18 +0100 Subject: [PATCH 2/3] Apply suggestion from @AndreasAlbertQC --- dataframely/_base_schema.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index ca3678c9..7bd87f5e 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -87,6 +87,21 @@ def update(self, other: Self) -> None: if duplicated_column_names := self.columns.keys() & other.columns.keys(): raise ImplementationError( f"Columns {duplicated_column_names} are duplicated." + """Merge another Metadata instance into this one. + Overlapping keys are allowed if and only if they refer to the *same* + underlying object. This accommodates multiple-inheritance / diamond + patterns where the same base schema is visited more than once. + """ + # Detect conflicting column definitions: same name, different Column instance + duplicated_column_names = self.columns.keys() & other.columns.keys() + conflicting_columns = { + name + for name in duplicated_column_names + if self.columns[name] is not other.columns[name] + } + if conflicting_columns: + raise ImplementationError( + f"Columns {conflicting_columns} are duplicated with conflicting definitions." ) self.columns.update(other.columns) self.rules.update(other.rules) From 08efe2b2600c506c3da2e8b2cca4ed576b5bb798 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Wed, 4 Mar 2026 19:48:52 +0100 Subject: [PATCH 3/3] fix --- dataframely/_base_schema.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index 7bd87f5e..b875e6bf 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -21,12 +21,12 @@ else: from typing_extensions import Self - _COLUMN_ATTR = "__dataframely_columns__" _RULE_ATTR = "__dataframely_rules__" ORIGINAL_COLUMN_PREFIX = "__DATAFRAMELY_ORIGINAL__" + # --------------------------------------- UTILS -------------------------------------- # @@ -84,13 +84,11 @@ class Metadata: rules: dict[str, RuleFactory] = field(default_factory=dict) def update(self, other: Self) -> None: - if duplicated_column_names := self.columns.keys() & other.columns.keys(): - raise ImplementationError( - f"Columns {duplicated_column_names} are duplicated." """Merge another Metadata instance into this one. - Overlapping keys are allowed if and only if they refer to the *same* - underlying object. This accommodates multiple-inheritance / diamond - patterns where the same base schema is visited more than once. + + Overlapping keys are allowed if and only if they refer to the *same* underlying + object. This accommodates multiple-inheritance / diamond patterns where the same + base schema is visited more than once. """ # Detect conflicting column definitions: same name, different Column instance duplicated_column_names = self.columns.keys() & other.columns.keys() @@ -103,6 +101,8 @@ def update(self, other: Self) -> None: raise ImplementationError( f"Columns {conflicting_columns} are duplicated with conflicting definitions." ) + + # All clear self.columns.update(other.columns) self.rules.update(other.rules)