Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ MANIFEST
.vscode/
.idea/

# Test configuration
pytest.ini

# Environments
.env
.envrc
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,10 @@ PyMongoSQL can be used as a database driver in Apache Superset for querying and

This allows seamless integration between MongoDB data and Superset's BI capabilities without requiring data migration to traditional SQL databases.

**Important Note on Collection Names:**

When using collection names containing special characters (`.`, `-`, `:`), you must wrap them in double quotes to prevent Superset's SQL parser from incorrectly interpreting them.

## Contributing

Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
Expand Down
2 changes: 1 addition & 1 deletion pymongosql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
if TYPE_CHECKING:
from .connection import Connection

__version__: str = "0.4.1"
__version__: str = "0.4.2"

# Globals https://www.python.org/dev/peps/pep-0249/#globals
apilevel: str = "2.0"
Expand Down
3 changes: 2 additions & 1 deletion pymongosql/sql/partiql/PartiQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
CURRENT_USER: 'CURRENT_USER';
CURSOR: 'CURSOR';
DATE: 'DATE';
DATETIME: 'DATETIME';
DEALLOCATE: 'DEALLOCATE';
DEC: 'DEC';
DECIMAL: 'DECIMAL';
Expand Down Expand Up @@ -370,7 +371,7 @@ LITERAL_DECIMAL:
;

IDENTIFIER
: [A-Z$_][A-Z0-9$_]*;
: [A-Z$_][A-Z0-9$_-]*;

IDENTIFIER_QUOTED
: '"' ( ('""') | ~('"') )* '"';
Expand Down
2,900 changes: 1,452 additions & 1,448 deletions pymongosql/sql/partiql/PartiQLLexer.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pymongosql/sql/partiql/PartiQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ functionCall

// SQL-99 10.4 � <routine name> ::= [ <schema name> <period> ] <qualified identifier>
functionName
: (qualifier+=symbolPrimitive PERIOD)* name=( CHAR_LENGTH | CHARACTER_LENGTH | OCTET_LENGTH | BIT_LENGTH | UPPER | LOWER | SIZE | EXISTS | COUNT ) # FunctionNameReserved
: (qualifier+=symbolPrimitive PERIOD)* name=( CHAR_LENGTH | CHARACTER_LENGTH | OCTET_LENGTH | BIT_LENGTH | UPPER | LOWER | SIZE | EXISTS | COUNT | DATE | DATETIME | SUBSTRING | REPLACE | TRIM ) # FunctionNameReserved
| (qualifier+=symbolPrimitive PERIOD)* name=symbolPrimitive # FunctionNameSymbol
;

Expand Down
2,119 changes: 1,066 additions & 1,053 deletions pymongosql/sql/partiql/PartiQLParser.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pymongosql/sql/partiql/PartiQLParserListener.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated from PartiQLParser.g4 by ANTLR 4.13.0
# Generated from PartiQLParser.g4 by ANTLR 4.13.1
from antlr4 import *
if "." in __name__:
from .PartiQLParser import PartiQLParser
Expand Down
2 changes: 1 addition & 1 deletion pymongosql/sql/partiql/PartiQLParserVisitor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated from PartiQLParser.g4 by ANTLR 4.13.0
# Generated from PartiQLParser.g4 by ANTLR 4.13.1
from antlr4 import *
if "." in __name__:
from .PartiQLParser import PartiQLParser
Expand Down
3 changes: 2 additions & 1 deletion pymongosql/sql/query_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def handle_visitor(self, ctx: PartiQLParser.FromClauseContext, parse_result: "Qu

# Regular collection reference
table_text = ctx.tableReference().getText()
collection_name = table_text
# Strip surrounding quotes from collection name (e.g., "user.accounts" -> user.accounts)
collection_name = re.sub(r'^"([^"]+)"$', r"\1", table_text)
parse_result.collection = collection_name
_logger.debug(f"Parsed regular collection: {collection_name}")
return collection_name
Expand Down
167 changes: 167 additions & 0 deletions tests/data/user-orders.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
[
{
"_id": "uo1",
"user_id": "1",
"order_id": "ord1",
"order_priority": "normal",
"notification_sent": true,
"user_notes": "Regular customer - expedite if possible",
"relationship_created_at": {"$date": "2023-12-01T10:30:00Z"},
"customer_type": "premium",
"follow_up_required": false
},
{
"_id": "uo2",
"user_id": "2",
"order_id": "ord2",
"order_priority": "high",
"notification_sent": true,
"user_notes": "First time buyer",
"relationship_created_at": {"$date": "2023-12-10T14:22:00Z"},
"customer_type": "standard",
"follow_up_required": true
},
{
"_id": "uo3",
"user_id": "3",
"order_id": "ord3",
"order_priority": "urgent",
"notification_sent": true,
"user_notes": "VIP customer - priority handling",
"relationship_created_at": {"$date": "2023-11-25T09:15:00Z"},
"customer_type": "vip",
"follow_up_required": false
},
{
"_id": "uo4",
"user_id": "1",
"order_id": "ord15",
"order_priority": "normal",
"notification_sent": true,
"user_notes": "Repeat order",
"relationship_created_at": {"$date": "2023-12-20T11:45:00Z"},
"customer_type": "premium",
"follow_up_required": false
},
{
"_id": "uo5",
"user_id": "4",
"order_id": "ord4",
"order_priority": "normal",
"notification_sent": false,
"user_notes": null,
"relationship_created_at": {"$date": "2023-12-15T16:30:00Z"},
"customer_type": "standard",
"follow_up_required": false
},
{
"_id": "uo6",
"user_id": "5",
"order_id": "ord5",
"order_priority": "low",
"notification_sent": true,
"user_notes": "Seasonal customer",
"relationship_created_at": {"$date": "2023-12-18T08:20:00Z"},
"customer_type": "standard",
"follow_up_required": true
},
{
"_id": "uo7",
"user_id": "2",
"order_id": "ord20",
"order_priority": "high",
"notification_sent": true,
"user_notes": "Special packaging requested",
"relationship_created_at": {"$date": "2024-01-05T13:10:00Z"},
"customer_type": "standard",
"follow_up_required": false
},
{
"_id": "uo8",
"user_id": "3",
"order_id": "ord25",
"order_priority": "urgent",
"notification_sent": true,
"user_notes": "Rush delivery requested",
"relationship_created_at": {"$date": "2024-01-10T10:00:00Z"},
"customer_type": "vip",
"follow_up_required": true
},
{
"_id": "uo9",
"user_id": "6",
"order_id": "ord6",
"order_priority": "normal",
"notification_sent": true,
"user_notes": "Corporate account",
"relationship_created_at": {"$date": "2023-12-22T14:55:00Z"},
"customer_type": "corporate",
"follow_up_required": false
},
{
"_id": "uo10",
"user_id": "7",
"order_id": "ord7",
"order_priority": "normal",
"notification_sent": false,
"user_notes": "Email bounced - alternate contact needed",
"relationship_created_at": {"$date": "2023-12-28T09:30:00Z"},
"customer_type": "standard",
"follow_up_required": true
},
{
"_id": "uo11",
"user_id": "1",
"order_id": "ord30",
"order_priority": "high",
"notification_sent": true,
"user_notes": "Loyalty program member",
"relationship_created_at": {"$date": "2024-01-15T12:20:00Z"},
"customer_type": "premium",
"follow_up_required": false
},
{
"_id": "uo12",
"user_id": "8",
"order_id": "ord8",
"order_priority": "normal",
"notification_sent": true,
"user_notes": null,
"relationship_created_at": {"$date": "2024-01-02T15:40:00Z"},
"customer_type": "standard",
"follow_up_required": false
},
{
"_id": "uo13",
"user_id": "9",
"order_id": "ord9",
"order_priority": "low",
"notification_sent": true,
"user_notes": "International shipping",
"relationship_created_at": {"$date": "2024-01-08T11:15:00Z"},
"customer_type": "standard",
"follow_up_required": true
},
{
"_id": "uo14",
"user_id": "10",
"order_id": "ord10",
"order_priority": "urgent",
"notification_sent": true,
"user_notes": "Gift order - include gift message",
"relationship_created_at": {"$date": "2024-01-12T16:50:00Z"},
"customer_type": "premium",
"follow_up_required": false
},
{
"_id": "uo15",
"user_id": "3",
"order_id": "ord35",
"order_priority": "urgent",
"notification_sent": true,
"user_notes": "VIP - anniversary order",
"relationship_created_at": {"$date": "2024-01-20T10:30:00Z"},
"customer_type": "vip",
"follow_up_required": false
}
]
4 changes: 2 additions & 2 deletions tests/run_test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,8 @@ def setup_test_data():
)
db = client[MONGODB_DATABASE]

# List of all collections to handle
collections = ["users", "products", "categories", "orders", "analytics", "departments", "suppliers"]
# Get all collections from loaded test data (dynamic, no hardcoding)
collections = list(test_data.keys())

# Clear existing data and insert new data for each collection
for collection_name in collections:
Expand Down
3 changes: 2 additions & 1 deletion tests/server_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"orders": "data/orders.json",
"analytics": "data/analytics.json",
"departments": "data/departments.json",
"suppliers": "data/suppliers.json"
"suppliers": "data/suppliers.json",
"user-orders": "data/user-orders.json"
}
}
55 changes: 54 additions & 1 deletion tests/test_cursor_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,48 @@ def test_aggregate_with_projection(self, conn):
assert len(rows) > 0
assert len(rows[0]) == 2 # Should have 2 columns (name, age)

def test_aggregate_with_nested_projection(self, conn):
"""Test aggregate with $project stage to validate nested structure projection (e.g., address.city)"""
pipeline = json.dumps(
[{"$match": {"active": True}}, {"$project": {"name": 1, "city": "$address.city", "age": 1}}]
)

sql = f"""
SELECT *
FROM users.aggregate('{pipeline}', '{{}}')
LIMIT 5
"""

cursor = conn.cursor()
result = cursor.execute(sql)

assert result == cursor
assert isinstance(cursor.result_set, ResultSet)

# Check description has correct columns including projected nested field
col_names = [desc[0] for desc in cursor.result_set.description]
assert "name" in col_names
assert "city" in col_names, "city field should be projected from address.city"
assert "age" in col_names

rows = cursor.result_set.fetchall()
assert len(rows) > 0

# Verify that nested city values are correctly returned
city_idx = col_names.index("city")
name_idx = col_names.index("name")
age_idx = col_names.index("age")

for row in rows:
city_value = row[city_idx]
# City should be a string value extracted from the nested address object
assert city_value is not None
assert isinstance(city_value, str)
assert len(city_value) > 0
# Verify other fields are also present
assert row[name_idx] is not None
assert row[age_idx] is not None

def test_aggregate_with_where_clause(self, conn):
"""Test aggregate pipeline combined with WHERE clause for additional filtering"""
sql = """
Expand Down Expand Up @@ -273,4 +315,15 @@ def test_aggregate_multiple_stages(self, conn):
# Should have one row with aggregated stats
assert len(rows) == 1
row = rows[0]
assert len(row) >= 2 # Should have average_age and total_users

# Verify projections defined in pipeline appear in the result
col_names = [desc[0] for desc in cursor.result_set.description]
assert "average_age" in col_names, "average_age should be in result columns"
assert "total_users" in col_names, "total_users should be in result columns"
assert "_id" not in col_names, "_id should be excluded from result columns"

# Verify the values are present and valid
avg_age_idx = col_names.index("average_age")
total_users_idx = col_names.index("total_users")
assert row[avg_age_idx] is not None and isinstance(row[avg_age_idx], (int, float))
assert row[total_users_idx] is not None and isinstance(row[total_users_idx], (int, float))
48 changes: 48 additions & 0 deletions tests/test_sql_parser_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,3 +433,51 @@ def test_select_with_alias_and_where_clause(self):
"age": "user_age",
}
assert execution_plan.filter_stage == {"status": "active"}

@pytest.mark.parametrize(
"collection,sql,projection,filter_condition",
[
# Hyphen (-) tests
("user-accounts", "SELECT * FROM user-accounts", None, {}),
(
"user-accounts",
"SELECT name, email FROM user-accounts WHERE status = 'active'",
{"name": 1, "email": 1},
{"status": "active"},
),
# Period (.) tests
("user.accounts", 'SELECT * FROM "user.accounts"', None, {}),
(
"customer.orders",
'SELECT name FROM "customer.orders" WHERE total > 100',
{"name": 1},
{"total": {"$gt": 100}},
),
# Colon (:) tests
("user:accounts", 'SELECT * FROM "user:accounts"', None, {}),
(
"service:requests",
'SELECT id, name FROM "service:requests" WHERE resolved = false',
{"id": 1, "name": 1},
{"resolved": False},
),
# Multiple special characters test
("user-account.data:prod", 'SELECT * FROM "user-account.data:prod"', None, {}),
],
)
def test_collection_name_with_special_characters(self, collection, sql, projection, filter_condition):
"""Test SELECT with collection names containing special characters (-, ., :)"""
parser = SQLParser(sql)

assert not parser.has_errors, f"Parser errors: {parser.errors}"

execution_plan = parser.get_execution_plan()
assert execution_plan.collection == collection

# For SELECT *, projection should be a dict (possibly empty or with just keys)
if projection is None:
assert isinstance(execution_plan.projection_stage, dict)
else:
assert execution_plan.projection_stage == projection

assert execution_plan.filter_stage == filter_condition