Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def arctanh_op_impl(x: ibis_types.Value):
@scalar_op_compiler.register_unary_op(ops.floor_op)
def floor_op_impl(x: ibis_types.Value):
x_numeric = typing.cast(ibis_types.NumericValue, x)
if x_numeric.type().is_boolean():
return x_numeric.cast(ibis_dtypes.Int64()).cast(ibis_dtypes.Float64())
if x_numeric.type().is_integer():
return x_numeric.cast(ibis_dtypes.Float64())
if x_numeric.type().is_floating():
Expand All @@ -181,6 +183,8 @@ def floor_op_impl(x: ibis_types.Value):
@scalar_op_compiler.register_unary_op(ops.ceil_op)
def ceil_op_impl(x: ibis_types.Value):
x_numeric = typing.cast(ibis_types.NumericValue, x)
if x_numeric.type().is_boolean():
return x_numeric.cast(ibis_dtypes.Int64()).cast(ibis_dtypes.Float64())
if x_numeric.type().is_integer():
return x_numeric.cast(ibis_dtypes.Float64())
if x_numeric.type().is_floating():
Expand Down Expand Up @@ -1026,7 +1030,7 @@ def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp):

@scalar_op_compiler.register_unary_op(ops.timedelta_floor_op)
def timedelta_floor_op_impl(x: ibis_types.NumericValue):
return x.floor()
return ibis_api.case().when(x > ibis.literal(0), x.floor()).else_(x.ceil()).end()


@scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)
Expand Down
50 changes: 43 additions & 7 deletions bigframes/core/compile/polars/lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,10 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
divisor.output_type
):
# exact same as floordiv impl for timedelta
numeric_result = ops.floordiv_op.as_expr(
numeric_result = ops.div_op.as_expr(
ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(dividend), divisor
)
int_result = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(numeric_result)
return ops.AsTypeOp(to_type=dtypes.TIMEDELTA_DTYPE).as_expr(int_result)

return _numeric_to_timedelta(numeric_result)
if (
dividend.output_type == dtypes.BOOL_DTYPE
and divisor.output_type == dtypes.BOOL_DTYPE
Expand Down Expand Up @@ -226,11 +224,10 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
divisor.output_type
):
# this is pretty fragile as zero will break it, and must fit back into int
numeric_result = expr.op.as_expr(
numeric_result = ops.div_op.as_expr(
ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(dividend), divisor
)
int_result = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(numeric_result)
return ops.AsTypeOp(to_type=dtypes.TIMEDELTA_DTYPE).as_expr(int_result)
return _numeric_to_timedelta(numeric_result)

if dividend.output_type == dtypes.BOOL_DTYPE:
dividend = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(dividend)
Expand Down Expand Up @@ -319,6 +316,32 @@ def lower(self, expr: expression.OpExpression) -> expression.Expression:
return expr


class LowerCeilOp(op_lowering.OpLoweringRule):
@property
def op(self) -> type[ops.ScalarOp]:
return numeric_ops.CeilOp

def lower(self, expr: expression.OpExpression) -> expression.Expression:
assert isinstance(expr.op, numeric_ops.CeilOp)
arg = expr.children[0]
if arg.output_type in (dtypes.INT_DTYPE, dtypes.BOOL_DTYPE):
return expr.op.as_expr(ops.AsTypeOp(dtypes.FLOAT_DTYPE).as_expr(arg))
return expr


class LowerFloorOp(op_lowering.OpLoweringRule):
@property
def op(self) -> type[ops.ScalarOp]:
return numeric_ops.FloorOp

def lower(self, expr: expression.OpExpression) -> expression.Expression:
assert isinstance(expr.op, numeric_ops.FloorOp)
arg = expr.children[0]
if arg.output_type in (dtypes.INT_DTYPE, dtypes.BOOL_DTYPE):
return expr.op.as_expr(ops.AsTypeOp(dtypes.FLOAT_DTYPE).as_expr(arg))
return expr


class LowerIsinOp(op_lowering.OpLoweringRule):
@property
def op(self) -> type[ops.ScalarOp]:
Expand Down Expand Up @@ -465,8 +488,21 @@ def _lower_cast(cast_op: ops.AsTypeOp, arg: expression.Expression):
LowerInvertOp(),
LowerIsinOp(),
LowerLenOp(),
LowerCeilOp(),
LowerFloorOp(),
)


def lower_ops_to_polars(root: bigframe_node.BigFrameNode) -> bigframe_node.BigFrameNode:
return op_lowering.lower_ops(root, rules=POLARS_LOWERING_RULES)


def _numeric_to_timedelta(expr: expression.Expression) -> expression.Expression:
"""rounding logic used for emulating timedelta ops"""
rounded_value = ops.where_op.as_expr(
ops.floor_op.as_expr(expr),
ops.gt_op.as_expr(expr, expression.const(0)),
ops.ceil_op.as_expr(expr),
)
int_value = ops.AsTypeOp(to_type=dtypes.INT_DTYPE).as_expr(rounded_value)
return ops.AsTypeOp(to_type=dtypes.TIMEDELTA_DTYPE).as_expr(int_value)
33 changes: 33 additions & 0 deletions bigframes/core/compile/sqlglot/expressions/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import bigframes_vendored.sqlglot.expressions as sge


def round_towards_zero(expr: sge.Expression):
"""
Round a float value to to an integer, always rounding towards zero.

This is used to handle duration/timedelta emulation mostly.
"""
return sge.Cast(
this=sge.If(
this=sge.GT(this=expr, expression=sge.convert(0)),
true=sge.Floor(this=expr),
false=sge.Ceil(this=expr),
),
to="INT64",
)
7 changes: 4 additions & 3 deletions bigframes/core/compile/sqlglot/expressions/numeric_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from bigframes import dtypes
from bigframes import operations as ops
import bigframes.core.compile.sqlglot.expression_compiler as expression_compiler
from bigframes.core.compile.sqlglot.expressions.common import round_towards_zero
import bigframes.core.compile.sqlglot.expressions.constants as constants
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
from bigframes.operations import numeric_ops
Expand Down Expand Up @@ -467,7 +468,7 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:

result = sge.func("IEEE_DIVIDE", left_expr, right_expr)
if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
return sge.Cast(this=sge.Floor(this=result), to="INT64")
return round_towards_zero(result)
else:
return result

Expand Down Expand Up @@ -510,7 +511,7 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
)

if dtypes.is_numeric(right.dtype) and left.dtype == dtypes.TIMEDELTA_DTYPE:
result = sge.Cast(this=sge.Floor(this=result), to="INT64")
result = round_towards_zero(sge.func("IEEE_DIVIDE", left_expr, right_expr))

return result

Expand Down Expand Up @@ -578,7 +579,7 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
if (dtypes.is_numeric(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE) or (
left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype)
):
return sge.Cast(this=sge.Floor(this=result), to="INT64")
return round_towards_zero(result)
else:
return result

Expand Down
8 changes: 4 additions & 4 deletions bigframes/core/rewrite/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,12 @@ def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:


def _rewrite_floordiv_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
result = _TypedExpr.create_op_expr(ops.floordiv_op, left, right)

if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
return _TypedExpr.create_op_expr(
ops.timedelta_floor_op, _TypedExpr.create_op_expr(ops.div_op, left, right)
)

return result
return _TypedExpr.create_op_expr(ops.floordiv_op, left, right)


def _rewrite_to_timedelta_op(op: ops.ToTimedeltaOp, arg: _TypedExpr):
Expand Down
4 changes: 2 additions & 2 deletions bigframes/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,13 @@ def get_standardized_ids(
"""
col_ids = [
UNNAMED_COLUMN_ID
if col_label is None
if pd.isna(col_label) # type: ignore
else label_to_identifier(col_label, strict=strict)
for col_label in col_labels
]
idx_ids = [
UNNAMED_INDEX_ID
if idx_label is None
if pd.isna(idx_label) # type: ignore
else label_to_identifier(idx_label, strict=strict)
for idx_label in idx_labels
]
Expand Down
2 changes: 1 addition & 1 deletion bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4398,7 +4398,7 @@ def to_excel(
**kwargs,
) -> None:
return self.to_pandas(allow_large_results=allow_large_results).to_excel(
excel_writer, sheet_name, **kwargs
excel_writer, sheet_name=sheet_name, **kwargs
)

def to_latex(
Expand Down
8 changes: 4 additions & 4 deletions bigframes/ml/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def confusion_matrix(
y_true = row["y_true"]
y_pred = row["y_pred"]
count = row["dummy"]
confusion_matrix[y_pred][y_true] = count
confusion_matrix.at[y_true, y_pred] = count

return confusion_matrix

Expand Down Expand Up @@ -251,7 +251,7 @@ def recall_score(
/ is_accurate.groupby(y_true_series).count()
).to_pandas()

recall_score = pd.Series(0, index=index)
recall_score = pd.Series(0.0, index=index)
for i in recall_score.index:
recall_score.loc[i] = recall.loc[i]

Expand Down Expand Up @@ -321,7 +321,7 @@ def _precision_score_per_label(y_true: bpd.Series, y_pred: bpd.Series) -> pd.Ser
is_accurate.groupby(y_pred).sum() / is_accurate.groupby(y_pred).count()
).to_pandas()

precision_score = pd.Series(0, index=index)
precision_score = pd.Series(0.0, index=index)
for i in precision.index:
precision_score.loc[i] = precision.loc[i]

Expand Down Expand Up @@ -366,7 +366,7 @@ def f1_score(
recall = recall_score(y_true_series, y_pred_series, average=None)
precision = precision_score(y_true_series, y_pred_series, average=None)

f1_score = pd.Series(0, index=recall.index)
f1_score = pd.Series(0.0, index=recall.index)
for index in recall.index:
if precision[index] + recall[index] != 0:
f1_score[index] = (
Expand Down
2 changes: 1 addition & 1 deletion bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2292,7 +2292,7 @@ def to_excel(
self, excel_writer, sheet_name="Sheet1", *, allow_large_results=None, **kwargs
) -> None:
return self.to_pandas(allow_large_results=allow_large_results).to_excel(
excel_writer, sheet_name, **kwargs
excel_writer, sheet_name=sheet_name, **kwargs
)

def to_json(
Expand Down
2 changes: 2 additions & 0 deletions bigframes/session/polars_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@
numeric_ops.SubOp,
numeric_ops.MulOp,
numeric_ops.DivOp,
numeric_ops.CeilOp,
numeric_ops.FloorOp,
numeric_ops.FloorDivOp,
numeric_ops.ModOp,
generic_ops.AsTypeOp,
Expand Down
7 changes: 7 additions & 0 deletions bigframes/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,10 @@
These modules are provided for testing the BigQuery DataFrames package. The
interface is not considered stable.
"""
from bigframes.testing.utils import (
assert_frame_equal,
assert_index_equal,
assert_series_equal,
)

__all__ = ["assert_frame_equal", "assert_series_equal", "assert_index_equal"]
44 changes: 36 additions & 8 deletions bigframes/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import base64
import decimal
import re
from typing import Iterable, Optional, Sequence, Set, Union
from typing import Iterable, Optional, Sequence, Set, TypeVar, Union

import geopandas as gpd # type: ignore
import google.api_core.operation
Expand All @@ -29,7 +29,6 @@

from bigframes import operations as ops
from bigframes.core import expression as ex
import bigframes.dtypes
import bigframes.functions._utils as bff_utils
import bigframes.pandas as bpd

Expand Down Expand Up @@ -69,6 +68,8 @@
"content",
]

SeriesOrIndexT = TypeVar("SeriesOrIndexT", pd.Series, pd.Index)


def pandas_major_version() -> int:
match = re.search(r"^v?(\d+)", pd.__version__.strip())
Expand All @@ -90,19 +91,30 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar


def _normalize_all_nulls(col: pd.Series) -> pd.Series:
if col.dtype in (bigframes.dtypes.FLOAT_DTYPE, bigframes.dtypes.INT_DTYPE):
col = col.astype("float64")
if pd_types.is_object_dtype(col):
col = col.fillna(float("nan"))
if pd_types.is_float_dtype(col.dtype):
col = col.astype("float64").astype("Float64")
return col


def _normalize_index_nulls(idx: pd.Index) -> pd.Index:
if isinstance(idx, pd.MultiIndex):
new_levels = [
_normalize_index_nulls(idx.get_level_values(i)) for i in range(idx.nlevels)
]
return pd.MultiIndex.from_arrays(new_levels, names=idx.names)
if idx.hasnans:
if pd_types.is_float_dtype(idx.dtype):
idx = idx.astype("float64").astype("Float64")
return idx


def assert_frame_equal(
left: pd.DataFrame,
right: pd.DataFrame,
*,
ignore_order: bool = False,
nulls_are_nan: bool = True,
downcast_object: bool = True,
**kwargs,
):
if ignore_order:
Expand All @@ -118,9 +130,17 @@ def assert_frame_equal(
left = left.sort_index()
right = right.sort_index()

# Pandas sometimes likes to produce object dtype columns
# However, nan/None/Null inconsistency makes comparison futile, convert to typed column
if downcast_object:
left = left.apply(lambda x: x.infer_objects())
right = right.apply(lambda x: x.infer_objects())

if nulls_are_nan:
left = left.apply(_normalize_all_nulls)
right = right.apply(_normalize_all_nulls)
left.index = _normalize_index_nulls(left.index)
right.index = _normalize_index_nulls(right.index)

pd.testing.assert_frame_equal(left, right, **kwargs)

Expand Down Expand Up @@ -151,12 +171,20 @@ def assert_series_equal(
right.index = right.index.astype("Int64")

if nulls_are_nan:
left = _normalize_all_nulls(left)
right = _normalize_all_nulls(right)
left = _normalize_all_nulls(left.infer_objects())
right = _normalize_all_nulls(right.infer_objects())
left.index = _normalize_index_nulls(left.index)
right.index = _normalize_index_nulls(right.index)
left.name = pd.NA if pd.isna(left.name) else left.name # type: ignore
right.name = pd.NA if pd.isna(right.name) else right.name # type: ignore

pd.testing.assert_series_equal(left, right, **kwargs)


def assert_index_equal(left, right, **kwargs):
pd.testing.assert_index_equal(left, right, **kwargs)


def _standardize_index(idx):
return pd.Index(list(idx), name=idx.name)

Expand Down
Loading
Loading