diff --git a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java index c73b1a71..526a0096 100644 --- a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java +++ b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java @@ -582,6 +582,7 @@ private Type getTypeForColumnDescriptor(ColumnDescriptor columnDescriptor) { logicalTypeAnnotation = LogicalTypeAnnotation.dateType(); break; case TIME: + case UUID: case VARCHAR: case BPCHAR: case TEXT: diff --git a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java index 17ca9df3..18dedaed 100644 --- a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java +++ b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java @@ -1,11 +1,22 @@ package org.apache.cloudberry.pxf.plugins.hdfs; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; +import org.apache.cloudberry.pxf.api.io.DataType; import org.apache.cloudberry.pxf.api.model.RequestContext; +import org.apache.cloudberry.pxf.api.utilities.ColumnDescriptor; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; public class ParquetFileAccessorTest { ParquetFileAccessor accessor; @@ -26,4 +37,32 @@ public void testInitialize() { accessor.setRequestContext(context); assertNull(context.getMetadata()); } + + @Test + public void testGetTypeForColumnDescriptor_UUID() throws Exception { + ColumnDescriptor uuidColumn = new ColumnDescriptor("id", DataType.UUID.getOID(), 0, "uuid", new Integer[]{}); + + Method method = ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor", ColumnDescriptor.class); + method.setAccessible(true); + Type result = (Type) method.invoke(accessor, uuidColumn); + + assertEquals("id", result.getName()); + assertTrue(result.isPrimitive()); + PrimitiveType primitiveType = result.asPrimitiveType(); + assertEquals(PrimitiveType.PrimitiveTypeName.BINARY, primitiveType.getPrimitiveTypeName()); + assertEquals(LogicalTypeAnnotation.stringType(), primitiveType.getLogicalTypeAnnotation()); + } + + @Test + public void testGetTypeForColumnDescriptor_UUIDArray() throws Exception { + ColumnDescriptor uuidArrayColumn = new ColumnDescriptor("ids", DataType.UUIDARRAY.getOID(), 0, "uuid[]", new Integer[]{}); + + Method method = ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor", ColumnDescriptor.class); + method.setAccessible(true); + Type result = (Type) method.invoke(accessor, uuidArrayColumn); + + assertEquals("ids", result.getName()); + // array types are wrapped in a list group + assertTrue(result.asGroupType().isRepetition(Type.Repetition.OPTIONAL)); + } } diff --git a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java index 936978ff..a29a7f52 100644 --- a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java +++ b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java @@ -193,6 +193,78 @@ public void testSetFields_RightTrimCharNoLeftTrim() throws IOException { testSetFields_RightTrimCharHelper(" abcd ", " abc ", " abc"); } + @Test + @SuppressWarnings("deprecation") + public void testSetFields_UUID() throws IOException { + List typeFields = new ArrayList<>(); + typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "id", org.apache.parquet.schema.OriginalType.UTF8)); + typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "value", org.apache.parquet.schema.OriginalType.UTF8)); + schema = new MessageType("hive_schema", typeFields); + context.setMetadata(schema); + + List columnDescriptors = new ArrayList<>(); + columnDescriptors.add(new ColumnDescriptor("id", DataType.UUID.getOID(), 0, "uuid", null)); + columnDescriptors.add(new ColumnDescriptor("value", DataType.VARCHAR.getOID(), 1, "varchar", null)); + context.setTupleDescription(columnDescriptors); + + resolver.setRequestContext(context); + resolver.afterPropertiesSet(); + + String uuidValue = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11"; + List fields = new ArrayList<>(); + fields.add(new OneField(DataType.TEXT.getOID(), uuidValue)); + fields.add(new OneField(DataType.TEXT.getOID(), "test")); + + OneRow row = resolver.setFields(fields); + assertNotNull(row); + Object data = row.getData(); + assertNotNull(data); + assertTrue(data instanceof Group); + Group group = (Group) data; + + // assert UUID value is stored as string in BINARY column + assertEquals(uuidValue, group.getString(0, 0)); + assertEquals("test", group.getString(1, 0)); + + // assert value repetition count + for (int i = 0; i < 2; i++) { + assertEquals(1, group.getFieldRepetitionCount(i)); + } + } + + @Test + @SuppressWarnings("deprecation") + public void testSetFields_UUID_Null() throws IOException { + List typeFields = new ArrayList<>(); + typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "id", org.apache.parquet.schema.OriginalType.UTF8)); + typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "value", org.apache.parquet.schema.OriginalType.UTF8)); + schema = new MessageType("hive_schema", typeFields); + context.setMetadata(schema); + + List columnDescriptors = new ArrayList<>(); + columnDescriptors.add(new ColumnDescriptor("id", DataType.UUID.getOID(), 0, "uuid", null)); + columnDescriptors.add(new ColumnDescriptor("value", DataType.VARCHAR.getOID(), 1, "varchar", null)); + context.setTupleDescription(columnDescriptors); + + resolver.setRequestContext(context); + resolver.afterPropertiesSet(); + + List fields = new ArrayList<>(); + fields.add(new OneField(DataType.TEXT.getOID(), null)); + fields.add(new OneField(DataType.TEXT.getOID(), "test")); + + OneRow row = resolver.setFields(fields); + assertNotNull(row); + Object data = row.getData(); + assertNotNull(data); + assertTrue(data instanceof Group); + Group group = (Group) data; + + // assert null UUID is not written (repetition count 0) + assertEquals(0, group.getFieldRepetitionCount(0)); + assertEquals(1, group.getFieldRepetitionCount(1)); + } + @Test public void testSetFields_Primitive_Nulls() throws IOException { schema = getParquetSchemaForPrimitiveTypes(Type.Repetition.OPTIONAL, false); diff --git a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java index c590b4ca..7503bd41 100644 --- a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java +++ b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java @@ -67,6 +67,15 @@ public void testParsePostgresArrayStringArray() { assertIterableEquals(Arrays.asList("fizz", "buzz", "fizzbuzz"), result); } + @Test + public void testParsePostgresArrayUuidArray() { + // GPDB UUID is a parquet BINARY primitive type with String annotation (same as text) + String value = "{a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,b1ffcd00-0d1c-5f09-cc7e-7ccace491b22}"; + + List result = parquetUtilities.parsePostgresArray(value, PrimitiveType.PrimitiveTypeName.BINARY, LogicalTypeAnnotation.StringLogicalTypeAnnotation.stringType()); + assertIterableEquals(Arrays.asList("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", "b1ffcd00-0d1c-5f09-cc7e-7ccace491b22"), result); + } + @Test public void testParsePostgresArrayDateArray() { // GPDB Date is an parquet INT64 primitive type with String annotation