-
Notifications
You must be signed in to change notification settings - Fork 14.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore(deps): bump pandas >=2.0 #24705
Changes from 7 commits
69daeac
5f93d39
71c991a
15b3cec
5ffbf40
f93ccb9
5bd5a2c
b5f71b3
a09d22d
07360b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,17 +134,15 @@ def get_df_payload( | |
|
||
if query_obj and cache_key and not cache.is_loaded: | ||
try: | ||
invalid_columns = [ | ||
if invalid_columns := [ | ||
col | ||
for col in get_column_names_from_columns(query_obj.columns) | ||
+ get_column_names_from_metrics(query_obj.metrics or []) | ||
if ( | ||
col not in self._qc_datasource.column_names | ||
and col != DTTM_ALIAS | ||
) | ||
] | ||
|
||
if invalid_columns: | ||
]: | ||
raise QueryObjectValidationError( | ||
_( | ||
"Columns missing in dataset: %(invalid_columns)s", | ||
|
@@ -570,7 +568,7 @@ def get_data(self, df: pd.DataFrame) -> str | list[dict[str, Any]]: | |
df, index=include_index, **config["CSV_EXPORT"] | ||
) | ||
elif self._query_context.result_format == ChartDataResultFormat.XLSX: | ||
result = excel.df_to_excel(df, **config["EXCEL_EXPORT"]) | ||
result = excel.df_to_excel(df) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. keyword arg "encoding" is deprecated since 1.5.0 https://pandas.pydata.org/pandas-docs/version/1.5/reference/api/pandas.DataFrame.to_excel.html |
||
return result or "" | ||
|
||
return df.to_dict(orient="records") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -758,11 +758,6 @@ class D3Format(TypedDict, total=False): | |
# note: index option should not be overridden | ||
CSV_EXPORT = {"encoding": "utf-8"} | ||
|
||
# Excel Options: key/value pairs that will be passed as argument to DataFrame.to_excel | ||
# method. | ||
# note: index option should not be overridden | ||
EXCEL_EXPORT = {"encoding": "utf-8"} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only I will leave There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your feedback! |
||
|
||
# --------------------------------------------------- | ||
# Time grain configurations | ||
# --------------------------------------------------- | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
from typing import Union | ||
|
||
import backoff | ||
import pandas as pd | ||
from flask_babel import gettext as __ | ||
from slack_sdk import WebClient | ||
from slack_sdk.errors import ( | ||
|
@@ -121,17 +122,19 @@ def _get_body(self) -> str: | |
# need to truncate the data | ||
for i in range(len(df) - 1): | ||
truncated_df = df[: i + 1].fillna("") | ||
truncated_df = truncated_df.append( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. df.append deprecated since 1.4 https://pandas.pydata.org/pandas-docs/version/1.5/reference/api/pandas.DataFrame.append.html |
||
{k: "..." for k in df.columns}, ignore_index=True | ||
truncated_row = pd.Series({k: "..." for k in df.columns}) | ||
truncated_df = pd.concat( | ||
[truncated_df, truncated_row.to_frame().T], ignore_index=True | ||
) | ||
tabulated = df.to_markdown() | ||
table = f"```\n{tabulated}\n```\n\n(table was truncated)" | ||
message = self._message_template(table) | ||
if len(message) > MAXIMUM_MESSAGE_SIZE: | ||
# Decrement i and build a message that is under the limit | ||
truncated_df = df[:i].fillna("") | ||
truncated_df = truncated_df.append( | ||
{k: "..." for k in df.columns}, ignore_index=True | ||
truncated_row = pd.Series({k: "..." for k in df.columns}) | ||
truncated_df = pd.concat( | ||
[truncated_df, truncated_row.to_frame().T], ignore_index=True | ||
) | ||
tabulated = df.to_markdown() | ||
table = ( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -201,7 +201,6 @@ def form_post(self, form: CsvToDatabaseForm) -> Response: | |
infer_datetime_format=form.infer_datetime_format.data, | ||
iterator=True, | ||
keep_default_na=not form.null_values.data, | ||
mangle_dupe_cols=form.overwrite_duplicate.data, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deprecated since 1.5 https://pandas.pydata.org/pandas-docs/version/1.5/reference/api/pandas.read_csv.html |
||
usecols=form.use_cols.data if form.use_cols.data else None, | ||
na_values=form.null_values.data if form.null_values.data else None, | ||
nrows=form.nrows.data, | ||
|
@@ -344,7 +343,6 @@ def form_post(self, form: ExcelToDatabaseForm) -> Response: | |
index_col=form.index_col.data, | ||
io=form.excel_file.data, | ||
keep_default_na=not form.null_values.data, | ||
mangle_dupe_cols=form.mangle_dupe_cols.data, | ||
na_values=form.null_values.data if form.null_values.data else None, | ||
parse_dates=form.parse_dates.data, | ||
skiprows=form.skiprows.data, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2849,7 +2849,7 @@ def levels_for( | |
for i in range(0, len(groups) + 1): | ||
agg_df = df.groupby(groups[:i]) if i else df | ||
levels[i] = ( | ||
agg_df.mean() | ||
agg_df.mean(numeric_only=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. new default in 2.0 is |
||
if time_op == "agg_mean" | ||
else agg_df.sum(numeric_only=True) | ||
) | ||
|
@@ -2874,7 +2874,7 @@ def levels_for_diff( | |
lambda a, b, fill_value: a / float(b) - 1, | ||
], | ||
}[time_op] | ||
agg_df = df.groupby(DTTM_ALIAS).sum() | ||
agg_df = df.groupby(DTTM_ALIAS).sum(numeric_only=True) | ||
levels = { | ||
0: pd.Series( | ||
{ | ||
|
@@ -2884,7 +2884,7 @@ def levels_for_diff( | |
) | ||
} | ||
for i in range(1, len(groups) + 1): | ||
agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum() | ||
agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum(numeric_only=True) | ||
levels[i] = pd.DataFrame( | ||
{ | ||
m: func[0](agg_df[m][until], agg_df[m][since], fill_value=0) | ||
|
@@ -2900,7 +2900,7 @@ def levels_for_time( | |
procs = {} | ||
for i in range(0, len(groups) + 1): | ||
self.form_data["groupby"] = groups[:i] | ||
df_drop = df.drop(groups[i:], 1) | ||
df_drop = df.drop(groups[i:], axis=1) | ||
procs[i] = self.process_data(df_drop, aggregate=True) | ||
self.form_data["groupby"] = groups | ||
return procs | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,8 +162,8 @@ def test_rolling_after_pivot_with_single_metric(): | |
pd.DataFrame( | ||
data={ | ||
"dttm": pd.to_datetime(["2019-01-01", "2019-01-02"]), | ||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5.0, 12.0], | ||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6.0, 14.0], | ||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5, 12], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIce! I'm glad to see integers remain integers when summed. Would you mind updating the comment above as it still references floats. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6, 14], | ||
} | ||
) | ||
) | ||
|
@@ -213,10 +213,10 @@ def test_rolling_after_pivot_with_multiple_metrics(): | |
pd.DataFrame( | ||
data={ | ||
"dttm": pd.to_datetime(["2019-01-01", "2019-01-02"]), | ||
FLAT_COLUMN_SEPARATOR.join(["count_metric", "UK"]): [1.0, 4.0], | ||
FLAT_COLUMN_SEPARATOR.join(["count_metric", "US"]): [2.0, 6.0], | ||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5.0, 12.0], | ||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6.0, 14.0], | ||
FLAT_COLUMN_SEPARATOR.join(["count_metric", "UK"]): [1, 4], | ||
FLAT_COLUMN_SEPARATOR.join(["count_metric", "US"]): [2, 6], | ||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5, 12], | ||
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6, 14], | ||
} | ||
) | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Love the walrus.