Skip to content

Commit

Permalink
feat: add resample operator in post processing (#16607)
Browse files Browse the repository at this point in the history
* feat: add resample operator in post processing

* wip

* fill zero values

* updates

* fix ut
  • Loading branch information
zhaoyongjie authored Sep 17, 2021
1 parent a4f6001 commit cc1c6c1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 0 deletions.
1 change: 1 addition & 0 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,7 @@ class ChartDataPostProcessingOperationSchema(Schema):
"sort",
"diff",
"compare",
"resample",
)
),
example="aggregate",
Expand Down
26 changes: 26 additions & 0 deletions superset/utils/pandas_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,3 +915,29 @@ def outliers(series: Series) -> Set[float]:
for metric in metrics
}
return aggregate(df, groupby=groupby, aggregates=aggregates)


def resample(
df: DataFrame,
rule: str,
method: str,
time_column: str,
fill_value: Optional[Union[float, int]] = None,
) -> DataFrame:
"""
resample a timeseries dataframe.
:param df: DataFrame to resample.
:param rule: The offset string representing target conversion.
:param method: How to fill the NaN value after resample.
:param time_column: existing columns in DataFrame.
:param fill_value: What values do fill missing.
:return: DataFrame after resample
:raises QueryObjectValidationError: If the request in incorrect
"""
df = df.set_index(time_column)
if method == "asfreq" and fill_value is not None:
df = df.resample(rule).asfreq(fill_value=fill_value)
else:
df = getattr(df.resample(rule), method)()
return df.reset_index()
19 changes: 19 additions & 0 deletions tests/integration_tests/pandas_postprocessing_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,22 @@ def test_boxplot_percentile_incorrect_params(self):
metrics=["cars"],
percentiles=[10, 90, 10],
)

def test_resample(self):
df = timeseries_df.copy()
df.index.name = "time_column"
df.reset_index(inplace=True)

post_df = proc.resample(
df=df, rule="1D", method="ffill", time_column="time_column",
)
self.assertListEqual(
post_df["label"].tolist(), ["x", "y", "y", "y", "z", "z", "q"]
)
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0])

post_df = proc.resample(
df=df, rule="1D", method="asfreq", time_column="time_column", fill_value=0,
)
self.assertListEqual(post_df["label"].tolist(), ["x", "y", 0, 0, "z", 0, "q"])
self.assertListEqual(post_df["y"].tolist(), [1.0, 2.0, 0, 0, 3.0, 0, 4.0])

0 comments on commit cc1c6c1

Please sign in to comment.