Skip to content

Commit

Permalink
Add Series.row_index/1 (#862)
Browse files Browse the repository at this point in the history
  • Loading branch information
iurimateus authored Feb 20, 2024
1 parent 0f666b6 commit 9e75178
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 0 deletions.
8 changes: 8 additions & 0 deletions lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ defmodule Explorer.Backend.LazySeries do
covariance: 3,
all: 1,
any: 1,
row_index: 1,
# Strings
contains: 2,
replace: 3,
Expand Down Expand Up @@ -1111,6 +1112,13 @@ defmodule Explorer.Backend.LazySeries do
Backend.Series.new(data, :string)
end

@impl true
def row_index(series) do
data = new(:row_index, [lazy_series!(series)], {:u, 32})

Backend.Series.new(data, {:u, 32})
end

@remaining_non_lazy_operations [
at: 2,
at_every: 2,
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ defmodule Explorer.Backend.Series do
@callback covariance(s, s, ddof :: non_neg_integer()) :: float() | non_finite() | lazy_s() | nil
@callback all?(s) :: boolean() | lazy_s()
@callback any?(s) :: boolean() | lazy_s()
@callback row_index(s) :: s | lazy_s()

# Cumulative

Expand Down
6 changes: 6 additions & 0 deletions lib/explorer/polars_backend/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ defmodule Explorer.PolarsBackend.Expression do
shift: 3,
slice: 2,
slice: 3,
row_index: 1,
concat: 1,
column: 1,
correlation: 4,
Expand Down Expand Up @@ -296,6 +297,11 @@ defmodule Explorer.PolarsBackend.Expression do
end
end

def to_expr(%LazySeries{op: :row_index, args: [lazy_series]}) do
size_expr = Native.expr_size(to_expr(lazy_series))
Native.expr_int_range(to_expr(0), size_expr, 1, {:u, 32})
end

for {op, arity} <- @all_expressions do
args = Macro.generate_arguments(arity, __MODULE__)

Expand Down
2 changes: 2 additions & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ defmodule Explorer.PolarsBackend.Native do
def expr_describe_filter_plan(_df, _expr), do: err()
def expr_float(_number), do: err()
def expr_integer(_number), do: err()
def expr_int_range(_start, _end, _step, _dtype), do: err()
def expr_series(_series), do: err()
def expr_string(_string), do: err()
def expr_struct(_map), do: err()
Expand Down Expand Up @@ -382,6 +383,7 @@ defmodule Explorer.PolarsBackend.Native do
def s_rename(_s, _name), do: err()
def s_reverse(_s), do: err()
def s_round(_s, _decimals), do: err()
def s_row_index(_s), do: err()
def s_floor(_s), do: err()
def s_ceil(_s), do: err()
def s_rstrip(_s, _string), do: err()
Expand Down
3 changes: 3 additions & 0 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ defmodule Explorer.PolarsBackend.Series do
@impl true
def any?(series), do: Shared.apply_series(series, :s_any)

@impl true
def row_index(series), do: Shared.apply_series(series, :s_row_index)

# Cumulative

@impl true
Expand Down
37 changes: 37 additions & 0 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2909,6 +2909,43 @@ defmodule Explorer.Series do
def any?(%Series{dtype: :boolean} = series), do: apply_series(series, :any?)
def any?(%Series{dtype: dtype}), do: dtype_error("any?/1", dtype, [:boolean])

@doc """
Returns a series of indexes for each item (row) in the series, starting from 0.
## Examples
iex> s = Series.from_list([nil, true, true])
iex> Series.row_index(s)
#Explorer.Series<
Polars[3]
u32 [0, 1, 2]
>
This function can be used to add a row index as the first column of a dataframe.
The resulting column is a regular column of type `:u32`.
iex> require Explorer.DataFrame, as: DF
iex> df = DF.new(a: [1, 3, 5], b: [2, 4, 6])
iex> DF.mutate(df, index: row_index(a)) |> DF.relocate("index", before: 0)
#Explorer.DataFrame<
Polars[3 x 3]
index u32 [0, 1, 2]
a s64 [1, 3, 5]
b s64 [2, 4, 6]
>
iex> df = DF.new(a: [1, 3, 5], b: [2, 4, 6])
iex> DF.mutate(df, id: row_index(a) + 1000)
#Explorer.DataFrame<
Polars[3 x 3]
a s64 [1, 3, 5]
b s64 [2, 4, 6]
id s64 [1000, 1001, 1002]
>
"""
@doc type: :shape
@spec row_index(Series.t()) :: Series.t()
def row_index(%Series{} = series), do: apply_series(series, :row_index)

# Cumulative

@doc """
Expand Down
1 change: 1 addition & 0 deletions native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ features = [
"product",
"peaks",
"moment",
"range",
"rank",
"propagate_nans",
"extract_jsonpath"
Expand Down
10 changes: 10 additions & 0 deletions native/explorer/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,16 @@ pub fn expr_join(expr: ExExpr, sep: String) -> ExExpr {
ExExpr::new(expr.list().join(sep.lit()))
}

#[rustler::nif]
pub fn expr_int_range(start: ExExpr, end: ExExpr, step: i64, dtype: ExSeriesDtype) -> ExExpr {
let start = start.clone_inner();
let end = end.clone_inner();
let dtype = DataType::try_from(&dtype).unwrap();
let expr = dsl::int_range(start, end, step, dtype);

ExExpr::new(expr)
}

#[rustler::nif]
pub fn expr_lengths(expr: ExExpr) -> ExExpr {
let expr = expr.clone_inner();
Expand Down
2 changes: 2 additions & 0 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ rustler::init!(
expr_float,
expr_head,
expr_integer,
expr_int_range,
expr_peaks,
expr_rank,
expr_unary_not,
Expand Down Expand Up @@ -439,6 +440,7 @@ rustler::init!(
s_rename,
s_replace,
s_reverse,
s_row_index,
s_rstrip,
s_sample_n,
s_sample_frac,
Expand Down
7 changes: 7 additions & 0 deletions native/explorer/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1864,3 +1864,10 @@ pub fn s_json_path_match(s: ExSeries, json_path: &str) -> Result<ExSeries, Explo
.clone();
Ok(ExSeries::new(s2))
}

#[rustler::nif]
pub fn s_row_index(series: ExSeries) -> Result<ExSeries, ExplorerError> {
let len = u32::try_from(series.len())?;
let s = Series::new("row_index", 0..len);
Ok(ExSeries::new(s))
}
21 changes: 21 additions & 0 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4452,4 +4452,25 @@ defmodule Explorer.DataFrameTest do
}
end
end

describe "row_index/1" do
test "works as row_count(), including offset" do
df = DF.new(a: [1, 3, 5], b: [2, 4, 6])
df1 = DF.mutate(df, index: row_index(a))

assert DF.to_columns(df1, atom_keys: true) == %{
a: [1, 3, 5],
b: [2, 4, 6],
index: [0, 1, 2]
}

df2 = DF.mutate(df, id: row_index(a) + 1000)

assert DF.to_columns(df2, atom_keys: true) == %{
a: [1, 3, 5],
b: [2, 4, 6],
id: [1000, 1001, 1002]
}
end
end
end

0 comments on commit 9e75178

Please sign in to comment.