From 2908d6dd3f68d93349b583de450bb33eaeda4dd0 Mon Sep 17 00:00:00 2001 From: Kanthi Subramanian Date: Thu, 12 Mar 2026 15:54:06 +0100 Subject: [PATCH] Fixed s3 decoding Signed-off-by: Kanthi Subramanian --- src/Storages/ObjectStorage/Utils.cpp | 8 ++-- .../integration/test_database_iceberg/test.py | 48 ++++++++++++++++++- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index b5330ac180b2..8f490b504cc2 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -513,9 +513,11 @@ std::pair resolveObjectStorageForPath( normalized_path = "gs://" + target_decomposed.authority + "/" + target_decomposed.key; } S3::URI s3_uri(normalized_path); - - std::string key_to_use = s3_uri.key; - + + // Use key (parsed without URI decoding) so that percent-encoded + // characters in object keys (e.g. %2F in Iceberg partition paths) are preserved. + std::string key_to_use = target_decomposed.key; + bool use_base_storage = false; if (base_storage->getType() == ObjectStorageType::S3) { diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py index fa1ff5b45359..57efe7d2fb2e 100644 --- a/tests/integration/test_database_iceberg/test.py +++ b/tests/integration/test_database_iceberg/test.py @@ -625,6 +625,52 @@ def test_table_with_slash(started_cluster): assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "\\N\tAAPL\t193.24\t193.31\t('bot')\n" +def test_partition_value_with_slash(started_cluster): + """Partition value containing '/' produces object keys with %2F; reading must preserve encoding.""" + node = started_cluster.instances["node1"] + + test_ref = f"test_partition_slash_{uuid.uuid4()}" + table_name = f"{test_ref}_table" + root_namespace = f"{test_ref}_namespace" + + # Partition by symbol (string) so partition value "us/west" becomes path segment symbol=us%2Fwest + partition_spec = PartitionSpec( + PartitionField( + source_id=2, field_id=1000, transform=IdentityTransform(), name="symbol" + ) + ) + schema = DEFAULT_SCHEMA + + catalog = load_catalog_impl(started_cluster) + catalog.create_namespace(root_namespace) + + table = create_table( + catalog, + root_namespace, + table_name, + schema, + partition_spec=partition_spec, + sort_order=DEFAULT_SORT_ORDER, + ) + + # Write a row with partition value containing slash (path will have %2F in S3 key) + data = [ + { + "datetime": datetime.now(), + "symbol": "us/west", + "bid": 100.0, + "ask": 101.0, + "details": {"created_by": "test"}, + } + ] + df = pa.Table.from_pylist(data) + table.append(df) + + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + assert 1 == int(node.query(f"SELECT count() FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`")) + assert "us/west" in node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`") + + def test_cluster_select(started_cluster): node1 = started_cluster.instances["node1"] node2 = started_cluster.instances["node2"] @@ -665,7 +711,7 @@ def test_cluster_select(started_cluster): assert len(cluster_secondary_queries) == 1 assert node2.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`", settings={"parallel_replicas_for_cluster_engines":1, 'enable_parallel_replicas': 2, 'cluster_for_parallel_replicas': 'cluster_simple', 'parallel_replicas_for_cluster_engines' : 1}) == 'pablo\n' - + def test_not_specified_catalog_type(started_cluster): node = started_cluster.instances["node1"] settings = {