Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 91 additions & 50 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,14 +823,11 @@ def cot(arg: Expr) -> Expr:
>>> from math import pi
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [pi / 4]})
>>> import builtins
>>> result = df.select(
... dfn.functions.cot(dfn.col("a")).alias("cot")
... )
>>> builtins.round(
... result.collect_column("cot")[0].as_py(), 1
... )
1.0
>>> result.collect_column("cot")[0].as_py()
1.0...
"""
return Expr(f.cot(arg.expr))

Expand Down Expand Up @@ -1171,14 +1168,11 @@ def radians(arg: Expr) -> Expr:
>>> from math import pi
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [180.0]})
>>> import builtins
>>> result = df.select(
... dfn.functions.radians(dfn.col("a")).alias("rad")
... )
>>> builtins.round(
... result.collect_column("rad")[0].as_py(), 6
... )
3.141593
>>> result.collect_column("rad")[0].as_py()
3.14159...
"""
return Expr(f.radians(arg.expr))

Expand Down Expand Up @@ -2521,10 +2515,6 @@ def array_agg(
distinct: If True, a single entry for each distinct value will be in the result
filter: If provided, only compute against rows for which the filter is True
order_by: Order the resultant array values. Accepts column names or expressions.

For example::

df.aggregate([], array_agg(col("a"), order_by="b"))
"""
order_by_raw = sort_list_to_raw_sort_list(order_by)
filter_raw = filter.expr if filter is not None else None
Expand Down Expand Up @@ -3081,9 +3071,14 @@ def first_value(
column names or expressions.
null_treatment: Assign whether to respect or ignore null values.

For example::

df.aggregate([], first_value(col("a"), order_by="ts"))
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
>>> result = df.aggregate(
... [], [dfn.functions.first_value(dfn.col("a")).alias("v")]
... )
>>> result.collect_column("v")[0].as_py()
10
"""
order_by_raw = sort_list_to_raw_sort_list(order_by)
filter_raw = filter.expr if filter is not None else None
Expand Down Expand Up @@ -3118,9 +3113,14 @@ def last_value(
column names or expressions.
null_treatment: Assign whether to respect or ignore null values.

For example::

df.aggregate([], last_value(col("a"), order_by="ts"))
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
>>> result = df.aggregate(
... [], [dfn.functions.last_value(dfn.col("a")).alias("v")]
... )
>>> result.collect_column("v")[0].as_py()
30
"""
order_by_raw = sort_list_to_raw_sort_list(order_by)
filter_raw = filter.expr if filter is not None else None
Expand Down Expand Up @@ -3157,9 +3157,14 @@ def nth_value(
column names or expressions.
null_treatment: Assign whether to respect or ignore null values.

For example::

df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
>>> result = df.aggregate(
... [], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")]
... )
>>> result.collect_column("v")[0].as_py()
20
"""
order_by_raw = sort_list_to_raw_sort_list(order_by)
filter_raw = filter.expr if filter is not None else None
Expand Down Expand Up @@ -3337,9 +3342,14 @@ def lead(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

lead(col("b"), order_by="ts")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
>>> result = df.select(
... dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1,
... default_value=0, order_by="a").alias("lead"))
>>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
[2, 3, 0]
"""
if not isinstance(default_value, pa.Scalar) and default_value is not None:
default_value = pa.scalar(default_value)
Expand Down Expand Up @@ -3392,9 +3402,14 @@ def lag(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

lag(col("b"), order_by="ts")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
>>> result = df.select(
... dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1,
... default_value=0, order_by="a").alias("lag"))
>>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
[0, 1, 2]
"""
if not isinstance(default_value, pa.Scalar):
default_value = pa.scalar(default_value)
Expand Down Expand Up @@ -3437,9 +3452,13 @@ def row_number(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

row_number(order_by="points")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
>>> result = df.select(
... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn"))
>>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
[1, 2, 3]
"""
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
order_by_raw = sort_list_to_raw_sort_list(order_by)
Expand Down Expand Up @@ -3481,9 +3500,14 @@ def rank(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

rank(order_by="points")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 10, 20]})
>>> result = df.select(
... dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk")
... )
>>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
[1, 1, 3]
"""
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
order_by_raw = sort_list_to_raw_sort_list(order_by)
Expand Down Expand Up @@ -3520,9 +3544,13 @@ def dense_rank(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

dense_rank(order_by="points")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 10, 20]})
>>> result = df.select(
... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr"))
>>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
[1, 1, 2]
"""
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
order_by_raw = sort_list_to_raw_sort_list(order_by)
Expand Down Expand Up @@ -3560,9 +3588,14 @@ def percent_rank(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

percent_rank(order_by="points")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
>>> result = df.select(
... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr"))
>>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
[0.0, 0.5, 1.0]
"""
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
order_by_raw = sort_list_to_raw_sort_list(order_by)
Expand Down Expand Up @@ -3600,9 +3633,17 @@ def cume_dist(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

cume_dist(order_by="points")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1., 2., 2., 3.]})
>>> result = df.select(
... dfn.col("a"),
... dfn.functions.cume_dist(
... order_by="a"
... ).alias("cd")
... )
>>> result.collect_column("cd").to_pylist()
[0.25..., 0.75..., 0.75..., 1.0...]
"""
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
order_by_raw = sort_list_to_raw_sort_list(order_by)
Expand Down Expand Up @@ -3644,9 +3685,13 @@ def ntile(
order_by: Set ordering within the window frame. Accepts
column names or expressions.

For example::

ntile(3, order_by="points")
Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [10, 20, 30, 40]})
>>> result = df.select(
... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
>>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
[1, 1, 2, 2]
"""
partition_by_raw = expr_list_to_raw_expr_list(partition_by)
order_by_raw = sort_list_to_raw_sort_list(order_by)
Expand Down Expand Up @@ -3682,10 +3727,6 @@ def string_agg(
order_by: Set the ordering of the expression to evaluate. Accepts
column names or expressions.

For example::

df.aggregate([], string_agg(col("a"), ",", order_by="b"))

Examples:
---------
>>> ctx = dfn.SessionContext()
Expand Down
Loading