diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index b40360d..8d1defc 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -22,10 +22,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade uv + uv venv uv pip install -e .[dev,tests] - name: Run pre-commit uses: pre-commit/action@v3.0.1 - name: Run pytest - run: pytest + run: | + source .venv/bin/activate + pytest tests diff --git a/README.md b/README.md index e09255d..6ae12b5 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,20 @@ Our dream is that this package is super simple to use and supports your use case - Supports persisting and reusing Graphic Walker specifications. - Scales to even the largest datasets, only limited by your server, cluster, or database. +## Supported Backends + +| Name | `kernel_computation=False` | `kernel_computation=True` | Comment | +| ---- | - | - | - | +| Pandas | ✅ | ✅ | | +| Polars | ✅ | ✅ | | +| Dask | ✅ | ❌ | [Not supported by Pygwalker](https://github.com/Kanaries/pygwalker/issues/658) | +| DuckDB Relation | ✅ | ❌ | [Not supported by Pygwalker](https://github.com/Kanaries/pygwalker/issues/658) | +| Pygwalker Database Connector | ❌ | ❌ | [Not supported by Narwhals](https://github.com/narwhals-dev/narwhals/issues/1289) | + +Other backends might be supported if they are supported by both [Narwhals](https://github.com/narwhals-dev/narwhals) and [PygWalker](https://github.com/Kanaries/pygwalker). + +Via the [backends](examples/reference/backends.py) example its possible to explore backends. In the [`data` test fixture](tests/conftest.py) you can see which backends we currently test. + ## ❤️ Contributions Contributions and co-maintainers are very welcome! Please submit issues or pull requests to the [GitHub repository](https://github.com/panel-extensions/panel-graphic-walker). Check out the [DEVELOPER_GUIDE](DEVELOPER_GUIDE.md) for more information. diff --git a/examples/bikesharing_dashboard/bikesharing_dashboard.py b/examples/bikesharing_dashboard/bikesharing_dashboard.py index 5a77ae8..9a9f5cc 100644 --- a/examples/bikesharing_dashboard/bikesharing_dashboard.py +++ b/examples/bikesharing_dashboard/bikesharing_dashboard.py @@ -2,6 +2,7 @@ import pandas as pd import panel as pn +import requests from panel_gwalker import GraphicWalker @@ -10,7 +11,7 @@ ROOT = Path(__file__).parent # Source: https://kanaries-app.s3.ap-northeast-1.amazonaws.com/public-datasets/bike_sharing_dc.csv DATASET = "https://datasets.holoviz.org/bikesharing_dc/v1/bikesharing_dc.parquet" -SPEC_PATH = ROOT / "bikesharing_dashboard.json" +SPEC = "https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/bikesharing_dashboard/bikesharing_dashboard.json" ACCENT = "#ff4a4a" if pn.config.theme == "dark": @@ -34,16 +35,18 @@ } """ + @pn.cache def get_data(): return pd.read_parquet(DATASET) + data = get_data() walker = GraphicWalker( data, theme_key="streamlit", - spec=SPEC_PATH, + spec=SPEC, sizing_mode="stretch_both", kernel_computation=True, ) diff --git a/examples/earthquake_dashboard/earthquake_dashboard.py b/examples/earthquake_dashboard/earthquake_dashboard.py index ada553b..98f6d1e 100644 --- a/examples/earthquake_dashboard/earthquake_dashboard.py +++ b/examples/earthquake_dashboard/earthquake_dashboard.py @@ -2,6 +2,7 @@ import pandas as pd import panel as pn +import requests from panel_gwalker import GraphicWalker @@ -29,8 +30,7 @@ } """ DATASET = "https://datasets.holoviz.org/significant_earthquakes/v1/significant_earthquakes.parquet" -# https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/earthquake_dashboard/earthquake_dashboard.json -SPEC = ROOT / "earthquake_dashboard.json" +SPEC = "https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/earthquake_dashboard/earthquake_dashboard.json" @pn.cache diff --git a/examples/reference/backends.py b/examples/reference/backends.py new file mode 100644 index 0000000..2cb8dcb --- /dev/null +++ b/examples/reference/backends.py @@ -0,0 +1,47 @@ +import dask.dataframe as dd +import duckdb +import pandas as pd +import panel as pn +import polars as pl + +from panel_gwalker import GraphicWalker + +pn.extension() + + +DATA = "https://datasets.holoviz.org/significant_earthquakes/v1/significant_earthquakes.parquet" +df_pandas = pd.read_parquet(DATA) +duckdb_relation = duckdb.sql("SELECT * FROM df_pandas") + +DATAFRAMES = { + "pandas": df_pandas, + "polars": pl.read_parquet(DATA), + "dask": dd.read_parquet(DATA, npartitions=1), + "duckdb": duckdb_relation, +} + +select = pn.widgets.Select(options=list(DATAFRAMES), name="Data Source") +kernel_computation = pn.widgets.Checkbox(name="Kernel Computation", value=False) + + +@pn.depends(select, kernel_computation) +def get_data(value, kernel_computation): + data = DATAFRAMES[value] + if not kernel_computation: + try: + data = data.head(10) + except: + data = data.df().head(10) + try: + return GraphicWalker( + data, + kernel_computation=kernel_computation, + sizing_mode="stretch_width", + tab="data", + ) + except Exception as ex: + msg = f"Combination of {value=} and {kernel_computation=} is currently not supported." + return pn.pane.Alert(msg, alert_type="danger") + + +pn.Column(select, kernel_computation, get_data).servable() diff --git a/examples/reference_app/reference_app.py b/examples/reference_app/reference_app.py index 43bf74d..904e497 100644 --- a/examples/reference_app/reference_app.py +++ b/examples/reference_app/reference_app.py @@ -3,6 +3,7 @@ import pandas as pd import panel as pn +import requests from panel_gwalker import GraphicWalker @@ -13,10 +14,8 @@ GW_LOGO = "https://kanaries.net/_next/static/media/kanaries-logo.0a9eb041.png" GW_API = "https://github.com/Kanaries/graphic-walker" GW_GUIDE_URL = "https://docs.kanaries.net/graphic-walker/data-viz/create-data-viz" -# https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/reference_app/spec_simple.json -SPEC_CAPACITY_STATE = ROOT / "spec_capacity_state.json" -# https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/reference_app/spec_capacity_state.json -SPEC_SIMPLE = ROOT / "spec_simple.json" +SPEC_CAPACITY_STATE = "https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/reference_app/spec_simple.json" +SPEC_SIMPLE = "https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/reference_app/spec_capacity_state.json" ACCENT = "#5B8FF9" diff --git a/pyproject.toml b/pyproject.toml index a4cb92c..5d3b5e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "panel-graphic-walker" -version = "0.4.0" +version = "0.5.0" description = "A project providing a Graphic Walker Pane for use with HoloViz Panel." readme = "README.md" authors = [ { name = "Philipp Rudiger", email = "philipp.jfr@gmail.com" } ] requires-python = ">=3.9" -dependencies = ["panel>=1.5.2"] +dependencies = ["panel>=1.5.2", "narwhals"] [build-system] requires = ["hatchling"] @@ -18,34 +18,42 @@ dev = [ "jedi-language-server;sys_platform == 'linux'", "mypy", "pandas-stubs", + "types-requests", "pre-commit", "pytest", "ruff", "watchfiles", ] tests = [ + "aiohttp", + "dask[dataframe]", "duckdb", "fastparquet", "gw-dsl-parser", + "polars", "pygwalker", "pytest-asyncio", "pytest", + "requests", ] examples = [ + "dask[dataframe]", "duckdb", "fastparquet", "gw-dsl-parser", + "polars", "pygwalker", + "requests", ] kernel = [ "duckdb ; platform_system != 'Emscripten'", "gw-dsl-parser ; platform_system != 'Emscripten'", - "pygwalker ; platform_system != 'Emscripten'" + "pygwalker ; platform_system != 'Emscripten'", ] [tool.hatch.build.targets.wheel] packages = ["src/panel_gwalker"] [[tool.mypy.overrides]] -module = "param.*,pygwalker.*,gw_dsl_parser.*" +module = "param.*,pygwalker.*,gw_dsl_parser.*,requests.*" ignore_missing_imports = true diff --git a/src/panel_gwalker/_gwalker.js b/src/panel_gwalker/_gwalker.js index f17af03..25d5215 100644 --- a/src/panel_gwalker/_gwalker.js +++ b/src/panel_gwalker/_gwalker.js @@ -22,25 +22,13 @@ function cleanToDict(value){ return value } -function fetchSpec(url) { - return fetch(url) - .then(response => response.json()) - .catch(err => { - console.error('Error fetching spec from URL', err); - }); -} - function transformSpec(spec) { /* The spec must be an null or array of objects */ if (spec === null) { return null; } if (typeof spec === 'string') { - if (spec.startsWith('http://') || spec.startsWith('https://')) { - spec = fetchSpec(spec); - } else { - spec = JSON.parse(spec); - } + spec = JSON.parse(spec); } if (!Array.isArray(spec)) { diff --git a/src/panel_gwalker/_gwalker.py b/src/panel_gwalker/_gwalker.py index 06d1e31..3a9f1a0 100644 --- a/src/panel_gwalker/_gwalker.py +++ b/src/panel_gwalker/_gwalker.py @@ -29,6 +29,7 @@ from panel.widgets import Button, IntInput, RadioButtonGroup, TextInput from panel_gwalker._pygwalker import get_data_parser, get_sql_from_payload +from panel_gwalker._tabular_data import TabularData, TabularDataType from panel_gwalker._utils import ( SPECTYPES, SpecType, @@ -233,7 +234,7 @@ class GraphicWalker(ReactComponent): ``` """ - object: pd.DataFrame = param.DataFrame( + object: TabularDataType = TabularData( doc="""The data to explore. Please note that if you update the `object`, then the existing charts will not be deleted.""" ) @@ -304,6 +305,15 @@ class GraphicWalker(ReactComponent): } } + _rename = { + "export": None, + "export_mode": None, + "export_scope": None, + "export_timeout": None, + "save": None, + "save_path": None, + } + _esm = "_gwalker.js" _THEME_CONFIG = { @@ -394,11 +404,13 @@ def _handle_msg(self, msg: Any) -> None: if action == "export" and event_id in self._exports: self._exports[event_id] = msg["data"] elif action == "compute": - self._send_msg({ - "action": "compute", - "id": event_id, - "result": self._compute(msg["payload"]), - }) + self._send_msg( + { + "action": "compute", + "id": event_id, + "result": self._compute(msg["payload"]), + } + ) async def export_chart( self, @@ -423,6 +435,10 @@ async def export_chart( ------- Dictionary containing the exported chart(s). """ + mode = mode or self.export_mode + scope = scope or self.export_scope + timeout = timeout or self.export_timeout + event_id = uuid.uuid4().hex self._send_msg( {"action": "export", "id": event_id, "scope": f"{scope}", "mode": mode} diff --git a/src/panel_gwalker/_pygwalker.py b/src/panel_gwalker/_pygwalker.py index 0c20d7a..394d7df 100644 --- a/src/panel_gwalker/_pygwalker.py +++ b/src/panel_gwalker/_pygwalker.py @@ -1,6 +1,9 @@ -from typing import TYPE_CHECKING, Any, Dict, List +import json +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Protocol import pandas as pd +from sqlalchemy import create_engine, text +from sqlalchemy.engine import Engine if TYPE_CHECKING: try: @@ -44,20 +47,22 @@ def get_data_parser( try: from pygwalker import data_parsers from pygwalker.data_parsers.base import FieldSpec + from pygwalker.services.data_parsers import _get_data_parser except ImportError as exc: raise ImportError( "Server dependencies are not installed. Please: pip install panel-graphic-walker[kernel]." ) from exc _field_specs = [FieldSpec(**_convert_to_field_spec(spec)) for spec in field_specs] - - if isinstance(object, pd.DataFrame): - return data_parsers.pandas_parser.PandasDataFrameDataParser( + try: + parser, name = _get_data_parser(object) + return parser( object, _field_specs, infer_string_to_date, infer_number_to_dimension, other_params, ) - msg = f"Data type {type(object)} is currently not supported" - raise NotImplementedError(msg) + except TypeError as exc: + msg = f"Data type {type(object)} is currently not supported" + raise NotImplementedError(msg) from exc diff --git a/src/panel_gwalker/_tabular_data.py b/src/panel_gwalker/_tabular_data.py new file mode 100644 index 0000000..d670a12 --- /dev/null +++ b/src/panel_gwalker/_tabular_data.py @@ -0,0 +1,72 @@ +from typing import TYPE_CHECKING, Any, Union + +import bokeh.core.properties as bp +import narwhals as nw +import param +from bokeh.core.property.bases import Property +from bokeh.models import ColumnDataSource +from narwhals.dependencies import ( + is_dask_dataframe, + is_duckdb_relation, + is_into_dataframe, + is_polars_lazyframe, +) +from narwhals.typing import FrameT, IntoFrame +from panel.io.datamodel import PARAM_MAPPING + +TabularDataType = IntoFrame + + +def _validate(val: Any): + # is_into_dataframe does not support dataframe interchange protocol in general + # https://github.com/narwhals-dev/narwhals/issues/1337#issuecomment-2466142486 + if ( + is_into_dataframe(val) + or is_dask_dataframe(val) + or is_polars_lazyframe(val) + or is_duckdb_relation(val) + ): + return + + msg = f"Expected object that can be converted into Narwhals Dataframe but got '{type(val)}'" + raise ValueError(msg) + + +class TabularData(param.Parameter): + def _validate(self, val): + super()._validate(val=val) + _validate(val) + + +# See https://github.com/holoviz/panel/issues/7468 +@nw.narwhalify +def _column_datasource_from_tabular_df(data: FrameT): + if isinstance(data, nw.LazyFrame): + data = data.collect() + data = data.to_pandas() + return ColumnDataSource._data_from_df(data) + + +class BkTabularData(Property["TabularDataType"]): + """Accept TabularDataType values. + + This property only exists to support type validation, e.g. for "accepts" + clauses. It is not serializable itself, and is not useful to add to + Bokeh models directly. + + """ + + def validate(self, value: Any, detail: bool = True) -> None: + super().validate(detail) + + _validate(value) + + +PARAM_MAPPING.update( + { + TabularData: lambda p, kwargs: ( + bp.ColumnData(bp.Any, bp.Seq(bp.Any), **kwargs), + [(BkTabularData, _column_datasource_from_tabular_df)], + ), + } +) diff --git a/src/panel_gwalker/_utils.py b/src/panel_gwalker/_utils.py index 996a9b1..307d102 100644 --- a/src/panel_gwalker/_utils.py +++ b/src/panel_gwalker/_utils.py @@ -5,12 +5,18 @@ from pathlib import Path from typing import Dict +import narwhals as nw import numpy as np import pandas as pd import panel as pn +import requests +from narwhals.dataframe import LazyFrame +from narwhals.dependencies import is_into_dataframe +from narwhals.typing import FrameT logger = logging.getLogger("panel-graphic-walker") FORMAT = "%(asctime)s | %(levelname)s | %(name)s | %(message)s" +from narwhals.typing import FrameT def configure_debug_log_level(): @@ -66,12 +72,29 @@ def _infer_prop(s: pd.Series, i=None) -> dict: } +SAMPLE_ROWS = 100 + + @pn.cache(max_items=20, ttl=60 * 5, policy="LRU") -def _raw_fields(data: pd.DataFrame | Dict[str, np.ndarray]) -> list[dict]: - if isinstance(data, dict): - return [_infer_prop(pd.Series(array, name=col)) for col, array in data.items()] +def _raw_fields_core(data: pd.DataFrame) -> list[dict]: + return [_infer_prop(data[col], i) for i, col in enumerate(data.columns)] + + +@nw.narwhalify +def _raw_fields(data: FrameT) -> list[dict]: + # Workaround for caching issue. See https://github.com/holoviz/panel/issues/7467. + # Should probably use Narwhals schema to one day infer this + if isinstance(data, LazyFrame): + data = data.head(100).collect() else: - return [_infer_prop(data[col], i) for i, col in enumerate(data.columns)] + try: + if len(data) > SAMPLE_ROWS: + data = data.sample(SAMPLE_ROWS) + except Exception as ex: + pass + + pandas_data = data.to_pandas() + return _raw_fields_core(pandas_data) SpecType = None | str | Path | dict | list[dict] @@ -92,16 +115,28 @@ def _load_json(spec): def _is_url(spec): - return spec.startswith(("http", "https")) + return isinstance(spec, str) and spec.startswith(("http", "https")) + + +@pn.cache(max_items=25, policy="LRU", ttl=60 * 5) +def _get_spec(url) -> dict: + # currently client side loading of url does not work + return requests.get(url).json() def process_spec(spec: SpecType): + if not spec: + return spec + if ( isinstance(spec, str) and os.path.isfile(spec) and spec.endswith(".json") ) or isinstance(spec, Path): return _read_and_load_json(spec) - if isinstance(spec, str) and not _is_url(spec): + if _is_url(spec): + return _get_spec(spec) + + if isinstance(spec, str): return _load_json(spec) return spec diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..917cfb9 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,31 @@ +import json +from asyncio import sleep +from pathlib import Path +from unittest.mock import patch + +import dask.dataframe as dd +import duckdb +import pandas as pd +import param +import polars as pl +import pytest +from pygwalker.data_parsers.database_parser import Connector as DatabaseConnector +from sqlalchemy import create_engine, text + +from panel_gwalker import GraphicWalker +from panel_gwalker._utils import _raw_fields + + +@pytest.fixture(params=["pandas", "polars", "dask", "duckdb"]) +def data(request, tmp_path): + if request.param == "pandas": + return pd.DataFrame({"a": [1, 2, 3]}) + if request.param == "polars": + return pl.DataFrame({"a": [1, 2, 3]}) + if request.param == "dask": + return dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]}), npartitions=1) + if request.param == "duckdb": + df_pandas = pd.DataFrame({"a": [1, 2, 3]}) + return duckdb.sql("SELECT * FROM df_pandas") + else: + raise ValueError(f"Unknown data type: {request.param}") diff --git a/tests/test_graphic_walker.py b/tests/test_graphic_walker.py index 4bb6506..6165566 100644 --- a/tests/test_graphic_walker.py +++ b/tests/test_graphic_walker.py @@ -3,19 +3,19 @@ from pathlib import Path from unittest.mock import patch +import dask.dataframe as dd +import duckdb import pandas as pd import param +import polars as pl import pytest +from pygwalker.data_parsers.database_parser import Connector as DatabaseConnector +from sqlalchemy import create_engine, text from panel_gwalker import GraphicWalker from panel_gwalker._utils import _raw_fields -@pytest.fixture -def data(): - return pd.DataFrame({"a": [1, 2, 3]}) - - @pytest.fixture def default_appearance(): return "light" @@ -135,8 +135,8 @@ def _process_spec(spec): assert _process_spec(list_spec) == list_spec # Test with a URL (assuming we are just checking format, not accessing the URL) - url = "http://example.com/data.json" - assert _process_spec(url) == url + url = "https://cdn.jsdelivr.net/gh/panel-extensions/panel-graphic-walker@main/examples/bikesharing_dashboard/bikesharing_dashboard.json" + assert isinstance(_process_spec(url), list) # Test with a JSON string json_string = '{"key": "value"}' diff --git a/tests/test_graphic_walker_apps.py b/tests/test_graphic_walker_apps.py index 1efd327..57b2f93 100644 --- a/tests/test_graphic_walker_apps.py +++ b/tests/test_graphic_walker_apps.py @@ -13,4 +13,8 @@ def test_apps(path): code = f.read() env = globals().copy() env["__file__"] = path - exec(code, env) + try: + exec(code, env) + except: + msg = f"Error running {path}" + raise Exception(msg) diff --git a/tests/test_pygwalker.py b/tests/test_pygwalker.py new file mode 100644 index 0000000..70e20b7 --- /dev/null +++ b/tests/test_pygwalker.py @@ -0,0 +1,12 @@ +import dask.dataframe as dd +import duckdb +import pytest + +from panel_gwalker._gwalker import get_data_parser + + +def test_get_data_parser(data): + if isinstance(data, (dd.DataFrame, duckdb.duckdb.DuckDBPyRelation)): + pytest.xfail(f"Unsupported data type: {type(data)}") + + assert get_data_parser(data, [], False, False, {}) diff --git a/tests/test_tabular_data.py b/tests/test_tabular_data.py new file mode 100644 index 0000000..dec32d9 --- /dev/null +++ b/tests/test_tabular_data.py @@ -0,0 +1,25 @@ +import pandas as pd +import param +import polars as pl +import pytest +from pygwalker.data_parsers.database_parser import Connector as DatabaseConnector + +from panel_gwalker._tabular_data import TabularData, _column_datasource_from_tabular_df + + +class MyClass(param.Parameterized): + value = TabularData() + + +def test_tabular_data(data): + my_class = MyClass(value=data) + + +def test_tabular_data_raises(): + data = [{"a": [1, 2, 3]}] + with pytest.raises(ValueError): + my_class = MyClass(value=data) + + +def test_column_datasource_from_tabular_df(data): + assert _column_datasource_from_tabular_df(data)