Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ private Type getTypeForColumnDescriptor(ColumnDescriptor columnDescriptor) {
logicalTypeAnnotation = LogicalTypeAnnotation.dateType();
break;
case TIME:
case UUID:
case VARCHAR:
case BPCHAR:
case TEXT:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
package org.apache.cloudberry.pxf.plugins.hdfs;

import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import org.apache.cloudberry.pxf.api.io.DataType;
import org.apache.cloudberry.pxf.api.model.RequestContext;
import org.apache.cloudberry.pxf.api.utilities.ColumnDescriptor;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class ParquetFileAccessorTest {
ParquetFileAccessor accessor;
Expand All @@ -26,4 +37,32 @@ public void testInitialize() {
accessor.setRequestContext(context);
assertNull(context.getMetadata());
}

@Test
public void testGetTypeForColumnDescriptor_UUID() throws Exception {
ColumnDescriptor uuidColumn = new ColumnDescriptor("id", DataType.UUID.getOID(), 0, "uuid", new Integer[]{});

Method method = ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor", ColumnDescriptor.class);
method.setAccessible(true);
Type result = (Type) method.invoke(accessor, uuidColumn);

assertEquals("id", result.getName());
assertTrue(result.isPrimitive());
PrimitiveType primitiveType = result.asPrimitiveType();
assertEquals(PrimitiveType.PrimitiveTypeName.BINARY, primitiveType.getPrimitiveTypeName());
assertEquals(LogicalTypeAnnotation.stringType(), primitiveType.getLogicalTypeAnnotation());
}

@Test
public void testGetTypeForColumnDescriptor_UUIDArray() throws Exception {
ColumnDescriptor uuidArrayColumn = new ColumnDescriptor("ids", DataType.UUIDARRAY.getOID(), 0, "uuid[]", new Integer[]{});

Method method = ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor", ColumnDescriptor.class);
method.setAccessible(true);
Type result = (Type) method.invoke(accessor, uuidArrayColumn);

assertEquals("ids", result.getName());
// array types are wrapped in a list group
assertTrue(result.asGroupType().isRepetition(Type.Repetition.OPTIONAL));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,78 @@ public void testSetFields_RightTrimCharNoLeftTrim() throws IOException {
testSetFields_RightTrimCharHelper(" abcd ", " abc ", " abc");
}

@Test
@SuppressWarnings("deprecation")
public void testSetFields_UUID() throws IOException {
List<Type> typeFields = new ArrayList<>();
typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "id", org.apache.parquet.schema.OriginalType.UTF8));
typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "value", org.apache.parquet.schema.OriginalType.UTF8));
schema = new MessageType("hive_schema", typeFields);
context.setMetadata(schema);

List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
columnDescriptors.add(new ColumnDescriptor("id", DataType.UUID.getOID(), 0, "uuid", null));
columnDescriptors.add(new ColumnDescriptor("value", DataType.VARCHAR.getOID(), 1, "varchar", null));
context.setTupleDescription(columnDescriptors);

resolver.setRequestContext(context);
resolver.afterPropertiesSet();

String uuidValue = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11";
List<OneField> fields = new ArrayList<>();
fields.add(new OneField(DataType.TEXT.getOID(), uuidValue));
fields.add(new OneField(DataType.TEXT.getOID(), "test"));

OneRow row = resolver.setFields(fields);
assertNotNull(row);
Object data = row.getData();
assertNotNull(data);
assertTrue(data instanceof Group);
Group group = (Group) data;

// assert UUID value is stored as string in BINARY column
assertEquals(uuidValue, group.getString(0, 0));
assertEquals("test", group.getString(1, 0));

// assert value repetition count
for (int i = 0; i < 2; i++) {
assertEquals(1, group.getFieldRepetitionCount(i));
}
}

@Test
@SuppressWarnings("deprecation")
public void testSetFields_UUID_Null() throws IOException {
List<Type> typeFields = new ArrayList<>();
typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "id", org.apache.parquet.schema.OriginalType.UTF8));
typeFields.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "value", org.apache.parquet.schema.OriginalType.UTF8));
schema = new MessageType("hive_schema", typeFields);
context.setMetadata(schema);

List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
columnDescriptors.add(new ColumnDescriptor("id", DataType.UUID.getOID(), 0, "uuid", null));
columnDescriptors.add(new ColumnDescriptor("value", DataType.VARCHAR.getOID(), 1, "varchar", null));
context.setTupleDescription(columnDescriptors);

resolver.setRequestContext(context);
resolver.afterPropertiesSet();

List<OneField> fields = new ArrayList<>();
fields.add(new OneField(DataType.TEXT.getOID(), null));
fields.add(new OneField(DataType.TEXT.getOID(), "test"));

OneRow row = resolver.setFields(fields);
assertNotNull(row);
Object data = row.getData();
assertNotNull(data);
assertTrue(data instanceof Group);
Group group = (Group) data;

// assert null UUID is not written (repetition count 0)
assertEquals(0, group.getFieldRepetitionCount(0));
assertEquals(1, group.getFieldRepetitionCount(1));
}

@Test
public void testSetFields_Primitive_Nulls() throws IOException {
schema = getParquetSchemaForPrimitiveTypes(Type.Repetition.OPTIONAL, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ public void testParsePostgresArrayStringArray() {
assertIterableEquals(Arrays.asList("fizz", "buzz", "fizzbuzz"), result);
}

@Test
public void testParsePostgresArrayUuidArray() {
// GPDB UUID is a parquet BINARY primitive type with String annotation (same as text)
String value = "{a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,b1ffcd00-0d1c-5f09-cc7e-7ccace491b22}";

List<Object> result = parquetUtilities.parsePostgresArray(value, PrimitiveType.PrimitiveTypeName.BINARY, LogicalTypeAnnotation.StringLogicalTypeAnnotation.stringType());
assertIterableEquals(Arrays.asList("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", "b1ffcd00-0d1c-5f09-cc7e-7ccace491b22"), result);
}

@Test
public void testParsePostgresArrayDateArray() {
// GPDB Date is an parquet INT64 primitive type with String annotation
Expand Down
Loading