Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 64 additions & 16 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@

if TYPE_CHECKING:
from datafusion.context import SessionContext

__all__ = [
"abs",
"acos",
Expand Down Expand Up @@ -268,13 +267,18 @@
"sum",
"tan",
"tanh",
"to_char",
"to_date",
"to_hex",
"to_local_time",
"to_time",
"to_timestamp",
"to_timestamp_micros",
"to_timestamp_millis",
"to_timestamp_nanos",
"to_timestamp_seconds",
"to_unixtime",
"today",
"translate",
"trim",
"trunc",
Expand Down Expand Up @@ -1010,67 +1014,111 @@ def now() -> Expr:
return Expr(f.now())


def to_char(arg: Expr, formatter: Expr) -> Expr:
"""Returns a string representation of a date, time, timestamp or duration.

For usage of ``formatter`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
return Expr(f.to_char(arg.expr, formatter.expr))


def _unwrap_exprs(args: tuple[Expr, ...]) -> list:
return [arg.expr for arg in args]


def to_date(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a value to a date (YYYY-MM-DD).

Supports strings, numeric and timestamp types as input.
Integers and doubles are interpreted as days since the unix epoch.
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20')
if ``formatters`` are not provided.

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
return Expr(f.to_date(arg.expr, *_unwrap_exprs(formatters)))


def to_local_time(*args: Expr) -> Expr:
"""Converts a timestamp with a timezone to a timestamp without a timezone.

This function handles daylight saving time changes.
"""
return Expr(f.to_local_time(*_unwrap_exprs(args)))


def to_time(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a value to a time. Supports strings and timestamps as input.

If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or
HH:MM:SS.nnnnnnnnn;

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
return Expr(f.to_time(arg.expr, *_unwrap_exprs(formatters)))


def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
if formatters is None:
return f.to_timestamp(arg.expr)

formatters = [f.expr for f in formatters]
return Expr(f.to_timestamp(arg.expr, *formatters))
return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters)))


def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in milliseconds.

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
return Expr(f.to_timestamp_millis(arg.expr, *formatters))
return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters)))


def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in microseconds.

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
return Expr(f.to_timestamp_micros(arg.expr, *formatters))
return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters)))


def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
return Expr(f.to_timestamp_nanos(arg.expr, *formatters))
return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters)))


def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in seconds.

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
return Expr(f.to_timestamp_seconds(arg.expr, *formatters))
return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))


def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
"""Converts a string and optional formats to a Unixtime."""
args = [f.expr for f in format_arguments]
return Expr(f.to_unixtime(string.expr, *args))
return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))


def current_date() -> Expr:
"""Returns current UTC date as a Date32 value."""
return Expr(f.current_date())


today = current_date


def current_time() -> Expr:
"""Returns current UTC time as a Time64 value."""
return Expr(f.current_time())
Expand Down
75 changes: 74 additions & 1 deletion python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import math
from datetime import datetime, timezone
from datetime import date, datetime, time, timezone

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -952,6 +952,12 @@ def test_temporal_functions(df):
f.to_timestamp_nanos(
literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
),
f.to_time(literal("12:30:45")),
f.to_time(literal("12-30-45"), literal("%H-%M-%S")),
f.to_date(literal("2017-05-31")),
f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")),
f.to_local_time(column("d")),
f.to_char(column("d"), literal("%d-%m-%Y")),
)
result = df.collect()
assert len(result) == 1
Expand Down Expand Up @@ -1026,6 +1032,73 @@ def test_temporal_functions(df):
[datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
type=pa.timestamp("ns"),
)
assert result.column(17) == pa.array(
[time(12, 30, 45)] * 3,
type=pa.time64("ns"),
)
assert result.column(18) == pa.array(
[time(12, 30, 45)] * 3,
type=pa.time64("ns"),
)
assert result.column(19) == pa.array(
[date(2017, 5, 31)] * 3,
type=pa.date32(),
)
assert result.column(20) == pa.array(
[date(2017, 5, 31)] * 3,
type=pa.date32(),
)
assert result.column(21) == pa.array(
[
datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
],
type=pa.timestamp("us"),
)

assert result.column(22) == pa.array(
[
"31-12-2022",
"26-06-2027",
"02-07-2020",
],
type=pa.string(),
)


def test_to_time_invalid_input(df):
with pytest.raises(Exception, match=r"Error parsing 'not-a-time' as time"):
df.select(f.to_time(literal("not-a-time"))).collect()


def test_to_time_mismatched_formatter(df):
with pytest.raises(Exception, match=r"Error parsing '12:30:45' as time"):
df.select(f.to_time(literal("12:30:45"), literal("%Y-%m-%d"))).collect()


def test_to_date_invalid_input(df):
with pytest.raises(Exception, match=r"Date32"):
df.select(f.to_date(literal("not-a-date"))).collect()


def test_temporal_formatter_requires_expr():
with pytest.raises(AttributeError, match="'str' object has no attribute 'expr'"):
f.to_time(literal("12:30:45"), "not-an-expr")


def test_today_returns_date32(df):
result = df.select(f.today().alias("today")).collect()[0]
assert result.column(0).type == pa.date32()


def test_today_alias_matches_current_date(df):
result = df.select(
f.current_date().alias("current_date"),
f.today().alias("today"),
).collect()[0]

assert result.column(0) == result.column(1)


def test_arrow_cast(df):
Expand Down
8 changes: 8 additions & 0 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,9 @@ expr_fn!(
"Converts the number to its equivalent hexadecimal representation."
);
expr_fn!(now);
expr_fn_vec!(to_date);
expr_fn_vec!(to_local_time);
expr_fn_vec!(to_time);
expr_fn_vec!(to_timestamp);
expr_fn_vec!(to_timestamp_millis);
expr_fn_vec!(to_timestamp_nanos);
Expand All @@ -613,6 +616,7 @@ expr_fn!(date_part, part date);
expr_fn!(date_trunc, part date);
expr_fn!(date_bin, stride source origin);
expr_fn!(make_date, year month day);
expr_fn!(to_char, datetime format);

expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.");
expr_fn_vec!(
Expand Down Expand Up @@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(tan))?;
m.add_wrapped(wrap_pyfunction!(tanh))?;
m.add_wrapped(wrap_pyfunction!(to_hex))?;
m.add_wrapped(wrap_pyfunction!(to_char))?;
m.add_wrapped(wrap_pyfunction!(to_date))?;
m.add_wrapped(wrap_pyfunction!(to_local_time))?;
m.add_wrapped(wrap_pyfunction!(to_time))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;
Expand Down