Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add table column statistics to text2sql query info #1402

Merged
merged 3 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "querybook",
"version": "3.29.1",
"version": "3.30.0",
"description": "A Big Data Webapp",
"private": true,
"scripts": {
Expand Down
7 changes: 6 additions & 1 deletion querybook/server/lib/ai_assistant/tools/table_schema.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Callable

from app.db import with_session
from lib.vector_store import get_vector_store
from logic import metastore as m_logic
from models.metastore import DataTable, DataTableColumn
from lib.vector_store import get_vector_store


def get_table_documentation(table: DataTable) -> str:
Expand Down Expand Up @@ -33,6 +33,11 @@ def _get_column(column: DataTableColumn) -> dict[str, str]:
column_json["description"] = column.data_elements[0].description
column_json["data_element"] = column.data_elements[0].name

if len(column.statistics):
column_json["statistics"] = {
stat.key: stat.value for stat in column.statistics if stat.value is not None
}

return column_json


Expand Down
16 changes: 10 additions & 6 deletions querybook/server/models/metastore.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
import sqlalchemy as sql
from sqlalchemy.orm import backref, relationship

from app import db
from const.db import (
utf8mb4_name_length,
name_length,
now,
description_length,
url_length,
mediumtext_length,
name_length,
now,
type_length,
url_length,
utf8mb4_name_length,
)
from const.metastore import DataTableWarningSeverity
from lib.sqlalchemy import CRUDMixin, TruncateString
from sqlalchemy.orm import backref, relationship

Base = db.Base

Expand Down Expand Up @@ -301,6 +300,11 @@ class DataTableColumn(TruncateString("name", "type", "comment"), Base):
data_elements = relationship(
"DataElement", secondary="data_element_association", uselist=True, viewonly=True
)
statistics = relationship(
"DataTableColumnStatistics",
uselist=True,
viewonly=True,
)

def to_dict(self, include_table=False):
column_dict = {
Expand Down
Loading