Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(sqllab): remove deprecated PyArrow API #24135

Merged
merged 2 commits into from
May 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ geographiclib==1.52
# via geopy
geopy==2.2.0
# via apache-superset
greenlet==2.0.2
# via sqlalchemy
gunicorn==20.1.0
# via apache-superset
hashids==1.3.1
Expand All @@ -134,6 +136,8 @@ humanize==3.11.0
# via apache-superset
idna==3.2
# via email-validator
importlib-metadata==6.6.0
# via flask
importlib-resources==5.12.0
# via limits
isodate==0.6.0
Expand Down Expand Up @@ -209,7 +213,7 @@ prison==0.2.1
# via flask-appbuilder
prompt-toolkit==3.0.38
# via click-repl
pyarrow==10.0.1
pyarrow==12.0.0
# via apache-superset
pycparser==2.20
# via cffi
Expand Down Expand Up @@ -322,6 +326,10 @@ wtforms-json==0.3.5
# via apache-superset
xlsxwriter==3.0.7
# via apache-superset
zipp==3.15.0
# via
# importlib-metadata
# importlib-resources

# The following packages are considered to be unsafe in a requirements file:
# setuptools
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def get_git_sha() -> str:
"python-dateutil",
"python-dotenv",
"python-geohash",
"pyarrow>=10.0.1, <11",
"pyarrow>=12.0.0, <13",
"pyyaml>=5.4",
"PyJWT>=2.4.0, <3.0",
"redis>=4.5.4, <5.0",
Expand Down
12 changes: 4 additions & 8 deletions superset/sql_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import backoff
import msgpack
import pyarrow as pa
import simplejson as json
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
Expand All @@ -51,6 +50,7 @@
from superset.result_set import SupersetResultSet
from superset.sql_parse import CtasMethod, insert_rls, ParsedQuery
from superset.sqllab.limiting_factor import LimitingFactor
from superset.sqllab.utils import write_ipc_buffer
from superset.utils.celery import session_scope
from superset.utils.core import (
get_username,
Expand Down Expand Up @@ -358,12 +358,7 @@ def _serialize_and_expand_data(
with stats_timing(
"sqllab.query.results_backend_pa_serialization", stats_logger
):
data = (
pa.default_serialization_context()
.serialize(result_set.pa_table)
.to_buffer()
.to_pybytes()
)
data = write_ipc_buffer(result_set.pa_table).to_pybytes()

# expand when loading data from results backend
all_columns, expanded_columns = (selected_columns, [])
Expand All @@ -382,7 +377,8 @@ def _serialize_and_expand_data(
return (data, selected_columns, all_columns, expanded_columns)


def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches
def execute_sql_statements(
# pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches
query_id: int,
rendered_query: str,
return_results: bool,
Expand Down
11 changes: 11 additions & 0 deletions superset/sqllab/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# under the License.
from typing import Any, Dict

import pyarrow as pa

from superset.common.db_query_status import QueryStatus


Expand Down Expand Up @@ -45,3 +47,12 @@ def is_require_to_apply() -> bool:
sql_results["data"] = sql_results["data"][:max_rows_in_result]
sql_results["displayLimitReached"] = True
return sql_results


def write_ipc_buffer(table: pa.Table) -> pa.Buffer:
sink = pa.BufferOutputStream()

with pa.ipc.new_stream(sink, table.schema) as writer:
writer.write_table(table)

return sink.getvalue()
4 changes: 2 additions & 2 deletions superset/views/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
logger = logging.getLogger(__name__)
stats_logger = app.config["STATS_LOGGER"]


REJECTED_FORM_DATA_KEYS: List[str] = []
if not feature_flag_manager.is_feature_enabled("ENABLE_JAVASCRIPT_CONTROLS"):
REJECTED_FORM_DATA_KEYS = ["js_tooltip", "js_onclick_href", "js_data_mutator"]
Expand Down Expand Up @@ -562,7 +561,8 @@ def _deserialize_results_payload(

with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger):
try:
pa_table = pa.deserialize(ds_payload["data"])
reader = pa.BufferReader(ds_payload["data"])
pa_table = pa.ipc.open_stream(reader).read_all()
except pa.ArrowSerializationError as ex:
raise SerializationError("Unable to deserialize table") from ex

Expand Down