Skip to content

Commit

Permalink
Bugfix data-explorer images (#382)
Browse files Browse the repository at this point in the history
PR that fixes the issue with rendering images in the explorer. Images
currently have type `binary` and not `object`. This was changed to
render them properly.

Caveat: not all `binary` types will correspond to images so we will need
to implement another logic for image detection but this fix should do
for now.

Fixed another small bug related to errors in rendering the numerical
explorer when no numeric columns are present
  • Loading branch information
PhilippeMoussalli authored Aug 24, 2023
1 parent 43b0e39 commit 64f7d44
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 18 deletions.
2 changes: 1 addition & 1 deletion data_explorer/app/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This file contains data loading logic"""
import json
import logging
from typing import List, Tuple
from typing import List
from urllib.parse import urlparse

import dask.dataframe as dd
Expand Down
4 changes: 3 additions & 1 deletion data_explorer/app/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
"""Main file of the data explorer interface"""
import logging

import dask
import streamlit as st
from data import load_dataframe
from table import get_image_fields, get_numeric_fields
from widgets import (build_explorer_table, build_image_explorer,
build_numeric_analysis_plots,
build_numeric_analysis_table, build_sidebar)

dask.config.set({"dataframe.convert-string": False})

LOGGER = logging.getLogger(__name__)
# streamlit wide
st.set_page_config(layout="wide")
Expand All @@ -31,7 +34,6 @@
# extract image and numeric columns
image_fields = get_image_fields(fields)
numeric_fields = get_numeric_fields(fields)

# build tabs
tab_explorer, tab_numeric, tab_images = st.tabs(
["Data explorer", "Numerical analysis", "Image explorer"]
Expand Down
2 changes: 1 addition & 1 deletion data_explorer/app/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def get_image_fields(fields: Dict[str, str]) -> List[str]:
# check which of the columns contain byte data
image_fields = []
for k, v in fields.items():
if v == "object":
if v == "binary":
image_fields.append(k)
return image_fields

Expand Down
37 changes: 22 additions & 15 deletions data_explorer/app/widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def build_sidebar() -> Tuple[Optional[str], Optional[str], Optional[Dict]]:


def build_explorer_table(
dataframe: Union[dd.DataFrame, pd.DataFrame], image_fields: List[str]
dataframe: Union[dd.DataFrame, pd.DataFrame], image_fields: List[str]
) -> None:
"""Build the dataframe explorer table.
Expand Down Expand Up @@ -129,7 +129,7 @@ def build_explorer_table(


def build_numeric_analysis_table(
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
) -> None:
"""Build the numeric analysis table.
Expand All @@ -138,7 +138,9 @@ def build_numeric_analysis_table(
numeric_fields (List[str]): list of numeric fields
"""
# check if there are numeric fields
if len(numeric_fields) > 0:
if len(numeric_fields) == 0:
st.warning("There are no numeric fields in this subset")
else:
st.write("## Numerical statistics")

# make numeric statistics table
Expand All @@ -159,25 +161,29 @@ def build_numeric_analysis_table(


def build_numeric_analysis_plots(
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
) -> None:
"""Build the numeric analysis plots.
Args:
dataframe (Union[dd.DataFrame, pd.DataFrame]): dataframe to explore
numeric_fields (List[str]): list of numeric fields
"""
st.write("## Show numeric distributions")
# check if there are numeric fields
if len(numeric_fields) == 0:
st.warning("There are no numeric fields in this subset")
else:
st.write("## Show numeric distributions")

# choose a numeric field in dropdown
cols = st.columns(2)
with cols[0]:
numeric_field = st.selectbox("Field", numeric_fields)
with cols[1]:
plot_type = st.selectbox("Plot type",
["histogram", "violin", "density", "categorical"])
# choose a numeric field in dropdown
cols = st.columns(2)
with cols[0]:
numeric_field = st.selectbox("Field", numeric_fields)
with cols[1]:
plot_type = st.selectbox("Plot type",
["histogram", "violin", "density", "categorical"])

make_numeric_plot(dataframe, numeric_field, plot_type)
make_numeric_plot(dataframe, numeric_field, plot_type)


def build_image_explorer(dataframe: dd.DataFrame, image_fields: List[str]):
Expand All @@ -188,12 +194,13 @@ def build_image_explorer(dataframe: dd.DataFrame, image_fields: List[str]):
dataframe (dd.DataFrame): dataframe to explore
image_fields (List[str]): list of image fields
"""
st.write("## Image explorer")
st.write("In this table, you can explore the images")

if len(image_fields) == 0:
st.warning("There are no image fields in this subset")
else:
st.write("## Image explorer")
st.write("In this table, you can explore the images")

image_field = st.selectbox("Image field", image_fields)

images = dataframe[image_field].compute()
Expand Down

0 comments on commit 64f7d44

Please sign in to comment.