From 7382ca3980f45ef9e25ae9de209614200445b452 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 21 Nov 2024 03:21:54 +0800 Subject: [PATCH 1/6] simplify pipeline dry run and use formal AsyncDriver --- wren-ai-service/src/core/pipeline.py | 2 +- wren-ai-service/src/pipelines/common.py | 19 +++++++ .../pipelines/generation/data_assistance.py | 28 +++------- .../generation/followup_sql_generation.py | 36 +++--------- .../generation/intent_classification.py | 27 ++------- .../generation/question_recommendation.py | 52 ++++++------------ .../generation/relationship_recommendation.py | 33 ++++------- .../generation/semantics_description.py | 55 +++++-------------- .../src/pipelines/generation/sql_answer.py | 31 ++++------- .../src/pipelines/generation/sql_breakdown.py | 31 ++++------- .../pipelines/generation/sql_correction.py | 26 +++------ .../src/pipelines/generation/sql_expansion.py | 37 ++++--------- .../pipelines/generation/sql_explanation.py | 43 +++------------ .../pipelines/generation/sql_generation.py | 29 +++------- .../pipelines/generation/sql_regeneration.py | 26 +++------ .../src/pipelines/generation/sql_summary.py | 28 +++------- .../src/pipelines/indexing/indexing.py | 27 ++------- .../retrieval/historical_question.py | 32 +++-------- .../src/pipelines/retrieval/retrieval.py | 27 ++------- .../src/web/v1/services/__init__.py | 2 +- 20 files changed, 179 insertions(+), 412 deletions(-) diff --git a/wren-ai-service/src/core/pipeline.py b/wren-ai-service/src/core/pipeline.py index f53096e00..682d85fa1 100644 --- a/wren-ai-service/src/core/pipeline.py +++ b/wren-ai-service/src/core/pipeline.py @@ -4,7 +4,7 @@ from dataclasses import dataclass from typing import Any, Dict -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import Pipeline from src.core.engine import Engine diff --git a/wren-ai-service/src/pipelines/common.py b/wren-ai-service/src/pipelines/common.py index 0ebf8b639..131c054c5 100644 --- a/wren-ai-service/src/pipelines/common.py +++ b/wren-ai-service/src/pipelines/common.py @@ -13,6 +13,7 @@ add_quotes, clean_generation_result, ) +from src.core.pipeline import BasicPipeline from src.web.v1.services.ask import AskConfigurations logger = logging.getLogger("wren-ai-service") @@ -485,3 +486,21 @@ def build_table_ddl( + ",\n ".join(columns_ddl) + "\n);" ) + + +def dry_run_pipeline(pipeline_cls: BasicPipeline, pipeline_name: str, **kwargs): + from langfuse.decorators import langfuse_context + + from src.config import settings + from src.core.pipeline import async_validate + from src.providers import generate_components + from src.utils import init_langfuse + + pipe_components = generate_components(settings.components) + pipeline = pipeline_cls(**pipe_components[pipeline_name]) + init_langfuse() + + pipeline.visualize(**kwargs) + async_validate(lambda: pipeline.run(**kwargs)) + + langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/data_assistance.py b/wren-ai-service/src/pipelines/generation/data_assistance.py index 5a9fce779..9d2e643a6 100644 --- a/wren-ai-service/src/pipelines/generation/data_assistance.py +++ b/wren-ai-service/src/pipelines/generation/data_assistance.py @@ -5,7 +5,7 @@ from typing import Any, Optional from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel @@ -170,22 +170,12 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, _ = init_providers(engine_config=EngineConfig()) - pipeline = DataAssistance( - llm_provider=llm_provider, + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + DataAssistance, + "data_assistance", + query="show me the dataset", + db_schemas=[], + language="English", ) - - pipeline.visualize("show me the dataset", [], "English") - async_validate(lambda: pipeline.run("show me the dataset", [], "English")) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py index b2ba5b241..a2b1812a9 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py @@ -4,7 +4,7 @@ from typing import Any, List from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel @@ -248,30 +248,12 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, engine = init_providers(engine_config=EngineConfig()) - pipeline = FollowUpSQLGeneration(llm_provider=llm_provider, engine=engine) - - pipeline.visualize( - "this is a test query", - [], - AskHistory(sql="SELECT * FROM table", summary="Summary", steps=[]), + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + FollowUpSQLGeneration, + "followup_sql_generation", + query="show me the dataset", + contexts=[], + history=AskHistory(sql="SELECT * FROM table", summary="Summary", steps=[]), ) - async_validate( - lambda: pipeline.run( - "this is a test query", - [], - AskHistory(sql="SELECT * FROM table", summary="Summary", steps=[]), - ) - ) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/intent_classification.py b/wren-ai-service/src/pipelines/generation/intent_classification.py index cfa3d589a..b2849138c 100644 --- a/wren-ai-service/src/pipelines/generation/intent_classification.py +++ b/wren-ai-service/src/pipelines/generation/intent_classification.py @@ -6,7 +6,7 @@ import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import Document from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe @@ -273,25 +273,10 @@ async def run(self, query: str, id: Optional[str] = None): if __name__ == "__main__": - from langfuse.decorators import langfuse_context + from src.pipelines.common import dry_run_pipeline - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, document_store_provider, _ = init_providers( - engine_config=EngineConfig() - ) - pipeline = IntentClassification( - document_store_provider=document_store_provider, - llm_provider=llm_provider, + dry_run_pipeline( + IntentClassification, + "intent_classification", + query="show me the dataset", ) - - pipeline.visualize("this is a query") - async_validate(lambda: pipeline.run("this is a query")) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/question_recommendation.py b/wren-ai-service/src/pipelines/generation/question_recommendation.py index 15b53fe64..b88a1d9e6 100644 --- a/wren-ai-service/src/pipelines/generation/question_recommendation.py +++ b/wren-ai-service/src/pipelines/generation/question_recommendation.py @@ -1,4 +1,3 @@ -import json import logging import sys from datetime import datetime @@ -7,12 +6,12 @@ import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel -from src.core.pipeline import BasicPipeline, async_validate +from src.core.pipeline import BasicPipeline from src.core.provider import LLMProvider logger = logging.getLogger("wren-ai-service") @@ -30,7 +29,7 @@ def prompt( prompt_builder: PromptBuilder, ) -> dict: return prompt_builder.run( - models=mdl["models"], + models=mdl.get("models", []), previous_questions=previous_questions, language=language, current_date=current_date, @@ -203,6 +202,7 @@ def visualize( "current_date": current_date, "max_questions": max_questions, "max_categories": max_categories, + **self._components, }, show_legend=True, orient="LR", @@ -215,7 +215,7 @@ async def run( previous_questions: list[str] = [], categories: list[str] = [], language: str = "English", - current_date: str = datetime.now(), + current_date: str = datetime.now().strftime("%Y-%m-%d %A %H:%M:%S"), max_questions: int = 5, max_categories: int = 3, **_, @@ -237,32 +237,16 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, _ = init_providers(EngineConfig()) - pipeline = QuestionRecommendation(llm_provider=llm_provider) - - with open("sample/ecommerce_duckdb_mdl.json", "r") as file: - mdl = json.load(file) - - input = { - "mdl": mdl, - "previous_questions": [], - "categories": ["Customer Insights", "Product Performance"], - "language": "English", - "max_questions": 5, - "max_categories": 2, - } - - # pipeline.visualize(**input) - async_validate(lambda: pipeline.run(**input)) - - langfuse_context.flush() + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + QuestionRecommendation, + "question_recommendation", + mdl={}, + previous_questions=[], + categories=[], + language="English", + current_date=datetime.now().strftime("%Y-%m-%d %A %H:%M:%S"), + max_questions=5, + max_categories=3, + ) diff --git a/wren-ai-service/src/pipelines/generation/relationship_recommendation.py b/wren-ai-service/src/pipelines/generation/relationship_recommendation.py index bf01816ee..b059f42ba 100644 --- a/wren-ai-service/src/pipelines/generation/relationship_recommendation.py +++ b/wren-ai-service/src/pipelines/generation/relationship_recommendation.py @@ -1,4 +1,3 @@ -import json import logging import sys from enum import Enum @@ -7,13 +6,13 @@ import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel from src.core.engine import Engine -from src.core.pipeline import BasicPipeline, async_validate +from src.core.pipeline import BasicPipeline from src.core.provider import LLMProvider logger = logging.getLogger("wren-ai-service") @@ -26,7 +25,8 @@ def column_filter(columns: list[dict]) -> list[dict]: return [column for column in columns if "relationship" not in column] return [ - {**model, "columns": column_filter(model["columns"])} for model in mdl["models"] + {**model, "columns": column_filter(model.get("columns", []))} + for model in mdl.get("models", []) ] @@ -227,24 +227,11 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context + from src.pipelines.common import dry_run_pipeline - from src.config import settings - from src.core.pipeline import async_validate - from src.providers import generate_components - from src.utils import init_langfuse - - pipe_components = generate_components(settings.components) - pipeline = RelationshipRecommendation( - **pipe_components["relationship_recommendation"] + dry_run_pipeline( + RelationshipRecommendation, + "relationship_recommendation", + mdl={}, + language="English", ) - init_langfuse() - - with open("sample/woocommerce_bigquery_mdl.json", "r") as file: - mdl = json.load(file) - - input = {"mdl": mdl, "language": "Traditional Chinese"} - - async_validate(lambda: pipeline.run(**input)) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/semantics_description.py index a1a2295a2..808da4ecb 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/semantics_description.py @@ -1,4 +1,3 @@ -import json import logging import sys from pathlib import Path @@ -6,12 +5,12 @@ import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel -from src.core.pipeline import BasicPipeline, async_validate +from src.core.pipeline import BasicPipeline from src.core.provider import LLMProvider logger = logging.getLogger("wren-ai-service") @@ -46,7 +45,9 @@ def extract(model: dict) -> dict: } return [ - extract(model) for model in mdl["models"] if model["name"] in selected_models + extract(model) + for model in mdl.get("models", []) + if model.get("name", "") in selected_models ] @@ -252,39 +253,13 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, _ = init_providers(EngineConfig()) - pipeline = SemanticsDescription(llm_provider=llm_provider) - - with open("sample/college_3_bigquery_mdl.json", "r") as file: - mdl = json.load(file) - - input = { - "user_prompt": "Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", - "selected_models": [ - "Student", - "Minor_in", - "Member_of", - "Gradeconversion", - "Faculty", - "Enrolled_in", - "Department", - "Course", - ], - "mdl": mdl, - "language": "Chinese", - } - - # pipeline.visualize(**input) - async_validate(lambda: pipeline.run(**input)) - - langfuse_context.flush() + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + SemanticsDescription, + "semantics_description", + user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", + selected_models=[], + mdl={}, + language="English", + ) diff --git a/wren-ai-service/src/pipelines/generation/sql_answer.py b/wren-ai-service/src/pipelines/generation/sql_answer.py index 70dc551b8..de9107c32 100644 --- a/wren-ai-service/src/pipelines/generation/sql_answer.py +++ b/wren-ai-service/src/pipelines/generation/sql_answer.py @@ -6,14 +6,14 @@ import aiohttp import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import component from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel from src.core.engine import Engine -from src.core.pipeline import BasicPipeline, async_validate +from src.core.pipeline import BasicPipeline from src.core.provider import LLMProvider from src.utils import async_timer, timer @@ -238,23 +238,12 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, engine = init_providers(EngineConfig()) - pipeline = SQLAnswer( - llm_provider=llm_provider, - engine=engine, + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + SQLAnswer, + "sql_answer", + query="query", + sql="SELECT * FROM table_name", + language="English", ) - - pipeline.visualize("query", "SELECT * FROM table_name", "English") - async_validate(lambda: pipeline.run("query", "SELECT * FROM table_name", "English")) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/sql_breakdown.py b/wren-ai-service/src/pipelines/generation/sql_breakdown.py index 01639a80c..a93b600c4 100644 --- a/wren-ai-service/src/pipelines/generation/sql_breakdown.py +++ b/wren-ai-service/src/pipelines/generation/sql_breakdown.py @@ -4,13 +4,13 @@ from typing import Any from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel from src.core.engine import Engine -from src.core.pipeline import BasicPipeline, async_validate +from src.core.pipeline import BasicPipeline from src.core.provider import LLMProvider from src.pipelines.common import TEXT_TO_SQL_RULES, SQLBreakdownGenPostProcessor from src.utils import ( @@ -246,23 +246,12 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, engine = init_providers(EngineConfig()) - pipeline = SQLBreakdown( - llm_provider=llm_provider, - engine=engine, + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + SQLBreakdown, + "sql_breakdown", + query="query", + sql="SELECT * FROM table_name", + language="English", ) - - pipeline.visualize("", "SELECT * FROM table_name") - async_validate(lambda: pipeline.run("", "SELECT * FROM table_name")) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/sql_correction.py b/wren-ai-service/src/pipelines/generation/sql_correction.py index 44d89bae7..960c733d8 100644 --- a/wren-ai-service/src/pipelines/generation/sql_correction.py +++ b/wren-ai-service/src/pipelines/generation/sql_correction.py @@ -5,7 +5,7 @@ from typing import Any, Dict, List from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import Document from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe @@ -189,23 +189,11 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context + from src.pipelines.common import dry_run_pipeline - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, engine = init_providers(engine_config=EngineConfig()) - pipeline = SQLCorrection( - llm_provider=llm_provider, - engine=engine, + dry_run_pipeline( + SQLCorrection, + "sql_correction", + invalid_generation_results=[], + contexts=[], ) - - pipeline.visualize([], []) - async_validate(lambda: pipeline.run([], [])) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/sql_expansion.py b/wren-ai-service/src/pipelines/generation/sql_expansion.py index 7c47f2229..564667299 100644 --- a/wren-ai-service/src/pipelines/generation/sql_expansion.py +++ b/wren-ai-service/src/pipelines/generation/sql_expansion.py @@ -4,7 +4,7 @@ from typing import Any, List from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel @@ -182,30 +182,13 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, engine = init_providers(engine_config=EngineConfig()) - pipeline = SQLExpansion(llm_provider=llm_provider, engine=engine) - - pipeline.visualize( - "this is a test query", - [], - AskHistory(sql="SELECT * FROM table", summary="Summary", steps=[]), + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + SQLExpansion, + "sql_expansion", + query="query", + contexts=[], + history=AskHistory(sql="SELECT * FROM table", summary="Summary", steps=[]), + timezone=AskConfigurations.Timezone(name="UTC", utc_offset="+00:00"), ) - async_validate( - lambda: pipeline.run( - "this is a test query", - [], - AskHistory(sql="SELECT * FROM table", summary="Summary", steps=[]), - ) - ) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/sql_explanation.py b/wren-ai-service/src/pipelines/generation/sql_explanation.py index bf7e23269..f4c80845e 100644 --- a/wren-ai-service/src/pipelines/generation/sql_explanation.py +++ b/wren-ai-service/src/pipelines/generation/sql_explanation.py @@ -6,7 +6,7 @@ import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import component from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe @@ -656,38 +656,13 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, _ = init_providers(EngineConfig()) - pipeline = SQLExplanation( - llm_provider=llm_provider, - ) - - pipeline.visualize( - "this is a test question", - StepWithAnalysisResult( - sql="xxx", - summary="xxx", - sql_analysis_results=[], + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + SQLExplanation, + "sql_explanation", + question="this is a test question", + step_with_analysis_results=StepWithAnalysisResult( + sql="xxx", summary="xxx", sql_analysis_results=[] ), ) - async_validate( - lambda: pipeline.run( - "this is a test question", - StepWithAnalysisResult( - sql="xxx", - summary="xxx", - sql_analysis_results=[], - ), - ) - ) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/sql_generation.py b/wren-ai-service/src/pipelines/generation/sql_generation.py index 358e8a1df..3a21a8c79 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation.py @@ -4,7 +4,7 @@ from typing import Any, Dict, List from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe from pydantic import BaseModel @@ -221,23 +221,12 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, engine = init_providers(engine_config=EngineConfig()) - pipeline = SQLGeneration( - llm_provider=llm_provider, - engine=engine, + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + SQLGeneration, + "sql_generation", + query="this is a test query", + contexts=[], + exclude=[], ) - - pipeline.visualize("this is a test query", [], []) - async_validate(lambda: pipeline.run("this is a test query", [], [])) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/sql_regeneration.py b/wren-ai-service/src/pipelines/generation/sql_regeneration.py index e4cd4dddf..bfef62c51 100644 --- a/wren-ai-service/src/pipelines/generation/sql_regeneration.py +++ b/wren-ai-service/src/pipelines/generation/sql_regeneration.py @@ -4,7 +4,7 @@ from typing import Any, Dict, List from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import component from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe @@ -232,23 +232,11 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context + from src.pipelines.common import dry_run_pipeline - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, engine = init_providers(EngineConfig()) - pipeline = SQLRegeneration( - llm_provider=llm_provider, - engine=engine, + dry_run_pipeline( + SQLRegeneration, + "sql_regeneration", + description="This is a description", + steps=[], ) - - pipeline.visualize("This is a description", []) - async_validate(lambda: pipeline.run("This is a description", [])) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/generation/sql_summary.py b/wren-ai-service/src/pipelines/generation/sql_summary.py index 7fadc9850..e8987cfd7 100644 --- a/wren-ai-service/src/pipelines/generation/sql_summary.py +++ b/wren-ai-service/src/pipelines/generation/sql_summary.py @@ -5,7 +5,7 @@ import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import component from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe @@ -187,22 +187,12 @@ async def run( if __name__ == "__main__": - from langfuse.decorators import langfuse_context - - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - llm_provider, _, _, _ = init_providers(engine_config=EngineConfig()) - pipeline = SQLSummary( - llm_provider=llm_provider, + from src.pipelines.common import dry_run_pipeline + + dry_run_pipeline( + SQLSummary, + "sql_summary", + query="this is a test query", + sqls=[], + language="English", ) - - pipeline.visualize("", []) - async_validate(lambda: pipeline.run("", [])) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/indexing/indexing.py b/wren-ai-service/src/pipelines/indexing/indexing.py index b61c4d8de..ff8c8b642 100644 --- a/wren-ai-service/src/pipelines/indexing/indexing.py +++ b/wren-ai-service/src/pipelines/indexing/indexing.py @@ -9,7 +9,7 @@ import orjson from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from hamilton.function_modifiers import extract_fields from haystack import Document, component from haystack.components.writers import DocumentWriter @@ -669,25 +669,10 @@ async def run(self, mdl_str: str, id: Optional[str] = None) -> Dict[str, Any]: if __name__ == "__main__": - from langfuse.decorators import langfuse_context + from src.pipelines.common import dry_run_pipeline - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - _, embedder_provider, document_store_provider, _ = init_providers(EngineConfig()) - - pipeline = Indexing( - embedder_provider=embedder_provider, - document_store_provider=document_store_provider, + dry_run_pipeline( + Indexing, + "indexing", + mdl_str='{"models": [], "views": [], "relationships": [], "metrics": []}', ) - - input = '{"models": [], "views": [], "relationships": [], "metrics": []}' - pipeline.visualize(input) - async_validate(lambda: pipeline.run(input)) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/retrieval/historical_question.py b/wren-ai-service/src/pipelines/retrieval/historical_question.py index f950d671d..56b21423c 100644 --- a/wren-ai-service/src/pipelines/retrieval/historical_question.py +++ b/wren-ai-service/src/pipelines/retrieval/historical_question.py @@ -4,9 +4,9 @@ from typing import Any, Dict, List, Optional from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import Document, component -from haystack.document_stores.types import DocumentStore +from haystack_integrations.document_stores.qdrant import QdrantDocumentStore from langfuse.decorators import observe from src.core.pipeline import BasicPipeline @@ -57,7 +57,7 @@ def run(self, documents: List[Document]): ## Start of Pipeline @async_timer @observe(capture_input=False) -async def count_documents(store: DocumentStore, id: Optional[str] = None) -> int: +async def count_documents(store: QdrantDocumentStore, id: Optional[str] = None) -> int: filters = ( { "operator": "AND", @@ -187,26 +187,10 @@ async def run(self, query: str, id: Optional[str] = None): if __name__ == "__main__": - from langfuse.decorators import langfuse_context + from src.pipelines.common import dry_run_pipeline - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - _, embedder_provider, document_store_provider, _ = init_providers( - engine_config=EngineConfig() - ) - - pipeline = HistoricalQuestion( - embedder_provider=embedder_provider, - document_store_provider=document_store_provider, + dry_run_pipeline( + HistoricalQuestion, + "historical_question", + query="this is a test query", ) - - pipeline.visualize("this is a query") - async_validate(lambda: pipeline.run("this is a query")) - - langfuse_context.flush() diff --git a/wren-ai-service/src/pipelines/retrieval/retrieval.py b/wren-ai-service/src/pipelines/retrieval/retrieval.py index 8325d41e2..cdedd941d 100644 --- a/wren-ai-service/src/pipelines/retrieval/retrieval.py +++ b/wren-ai-service/src/pipelines/retrieval/retrieval.py @@ -7,7 +7,7 @@ import orjson import tiktoken from hamilton import base -from hamilton.experimental.h_async import AsyncDriver +from hamilton.async_driver import AsyncDriver from haystack import Document from haystack.components.builders.prompt_builder import PromptBuilder from langfuse.decorators import observe @@ -438,25 +438,10 @@ async def run(self, query: str, id: Optional[str] = None): if __name__ == "__main__": - from langfuse.decorators import langfuse_context + from src.pipelines.common import dry_run_pipeline - from src.core.engine import EngineConfig - from src.core.pipeline import async_validate - from src.providers import init_providers - from src.utils import init_langfuse, load_env_vars - - load_env_vars() - init_langfuse() - - _, embedder_provider, document_store_provider, _ = init_providers( - engine_config=EngineConfig() - ) - pipeline = Retrieval( - embedder_provider=embedder_provider, - document_store_provider=document_store_provider, + dry_run_pipeline( + Retrieval, + "retrieval", + query="this is a test query", ) - - pipeline.visualize("this is a query") - async_validate(lambda: pipeline.run("this is a query")) - - langfuse_context.flush() diff --git a/wren-ai-service/src/web/v1/services/__init__.py b/wren-ai-service/src/web/v1/services/__init__.py index 1a1c875b4..f5003c05b 100644 --- a/wren-ai-service/src/web/v1/services/__init__.py +++ b/wren-ai-service/src/web/v1/services/__init__.py @@ -37,7 +37,7 @@ def show_current_time(self): ) # Assuming timezone.name contains the timezone string current_time = datetime.now(tz) - return f'{current_time.strftime("%Y-%m-%d %A")}' # YYYY-MM-DD weekday_name, ex: 2024-10-23 Wednesday + return f'{current_time.strftime("%Y-%m-%d %A %H:%M:%S")}' # YYYY-MM-DD weekday_name HH:MM:SS, ex: 2024-10-23 Wednesday 12:00:00 fiscal_year: Optional[FiscalYear] = None language: Optional[str] = "English" From 434a4d1df3981949e9ca4b803363255f9c850ecf Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 21 Nov 2024 08:44:23 +0800 Subject: [PATCH 2/6] update ecommerce sample dataset --- .../sample_dataset/ecommerce_duckdb_mdl.json | 1948 ++++++++--------- 1 file changed, 974 insertions(+), 974 deletions(-) diff --git a/wren-ai-service/demo/sample_dataset/ecommerce_duckdb_mdl.json b/wren-ai-service/demo/sample_dataset/ecommerce_duckdb_mdl.json index 8a301f060..3edf70e46 100644 --- a/wren-ai-service/demo/sample_dataset/ecommerce_duckdb_mdl.json +++ b/wren-ai-service/demo/sample_dataset/ecommerce_duckdb_mdl.json @@ -1,976 +1,976 @@ { - "schema": "public", - "catalog": "wrenai", - "models": [ - { - "name": "olist_customers_dataset", - "columns": [ - { - "name": "customer_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": null, - "displayName": "customer_id" - } - }, - { - "name": "customer_unique_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique id of the customer", - "displayName": "customer_unique_id" - } - }, - { - "name": "customer_zip_code_prefix", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "First 5 digits of customer zip code", - "displayName": "customer_zip_code_prefix" - } - }, - { - "name": "customer_city", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Name of the city where the customer is located", - "displayName": "customer_city" - } - }, - { - "name": "customer_state", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Name of the state where the customer is located", - "displayName": "customer_state" - } - }, - { - "name": "olist_orders_dataset", - "type": "olist_orders_dataset", - "properties": null, - "relationship": "Olist_orders_datasetCustomer_idOlist_customers_datasetCustomer_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_geolocation_dataset", - "type": "olist_geolocation_dataset", - "properties": null, - "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_customers_datasetCustomer_zip_code_prefix", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_customers_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "customers" - }, - "primaryKey": "customer_id" - }, - { - "name": "olist_geolocation_dataset", - "columns": [ - { - "name": "geolocation_zip_code_prefix", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "displayName": "geolocation_zip_code_prefix", - "description": "First 5 digits of zip code" - } - }, - { - "name": "geolocation_lat", - "type": "DOUBLE", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "displayName": "geolocation_lat", - "description": "The coordinations for the locations latitude" - } - }, - { - "name": "geolocation_lng", - "type": "DOUBLE", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "displayName": "geolocation_lng", - "description": "The coordinations for the locations longitude" - } - }, - { - "name": "geolocation_city", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "displayName": "geolocation_city", - "description": "The city name of the geolocation" - } - }, - { - "name": "geolocation_state", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "displayName": "geolocation_state", - "description": "The state of the geolocation" - } - }, - { - "name": "olist_customers_dataset", - "type": "olist_customers_dataset", - "properties": null, - "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_customers_datasetCustomer_zip_code_prefix", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_sellers_dataset", - "type": "olist_sellers_dataset", - "properties": null, - "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_sellers_datasetSeller_zip_code_prefix", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_geolocation_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "geolocation", - "description": "This table contains detailed information about Brazilian zip codes and their corresponding latitude and longitude coordinates. It can be used to plot maps, calculate distances between sellers and customers, and perform geographic analysis." - }, - "primaryKey": "" - }, - { - "name": "olist_orders_dataset", - "columns": [ - { - "name": "order_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the specific order", - "displayName": "order_id" - } - }, - { - "name": "customer_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the customer who placed the order.", - "displayName": "customer_id" - } - }, - { - "name": "order_status", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Current status of the order (e.g., delivered, shipped, canceled).", - "displayName": "order_status" - } - }, - { - "name": "order_purchase_timestamp", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Date and time when the order was placed by the customer.", - "displayName": "order_purchase_timestamp" - } - }, - { - "name": "order_approved_at", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Date and time when the order was approved for processing.", - "displayName": "order_approved_at" - } - }, - { - "name": "order_delivered_carrier_date", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Date when the order was handed over to the carrier or freight forwarder for delivery.", - "displayName": "order_delivered_carrier_date" - } - }, - { - "name": "order_delivered_customer_date", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Date when the order was delivered to the customer.", - "displayName": "order_delivered_customer_date" - } - }, - { - "name": "order_estimated_delivery_date", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Expected delivery date based on the initial estimate.", - "displayName": "order_estimated_delivery_date" - } - }, - { - "name": "olist_customers_dataset", - "type": "olist_customers_dataset", - "properties": null, - "relationship": "Olist_orders_datasetCustomer_idOlist_customers_datasetCustomer_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_order_items_dataset", - "type": "olist_order_items_dataset", - "properties": null, - "relationship": "Olist_orders_datasetOrder_idOlist_order_items_datasetOrder_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_order_reviews_dataset", - "type": "olist_order_reviews_dataset", - "properties": null, - "relationship": "Olist_orders_datasetOrder_idOlist_order_reviews_datasetOrder_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_order_payments_dataset", - "type": "olist_order_payments_dataset", - "properties": null, - "relationship": "Olist_orders_datasetOrder_idOlist_order_payments_datasetOrder_id", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_orders_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "orders", - "description": "This table contains detailed information about customer orders, including timestamps for various stages of the order process (approval, shipping, delivery), as well as the order status and customer identification. It helps track the lifecycle of an order from purchase to delivery." - }, - "primaryKey": "order_id" - }, - { - "name": "olist_order_items_dataset", - "columns": [ - { - "name": "order_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the order across the platform", - "displayName": "order_id" - } - }, - { - "name": "order_item_id", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for each item within a specific order", - "displayName": "order_item_id" - } - }, - { - "name": "product_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the product sold in the order.", - "displayName": "product_id" - } - }, - { - "name": "seller_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier of the seller who fulfilled the order item.", - "displayName": "seller_id" - } - }, - { - "name": "shipping_limit_date", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Deadline for the order item to be shipped by the seller.", - "displayName": "shipping_limit_date" - } - }, - { - "name": "price", - "type": "DOUBLE", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Price of the individual item within the order", - "displayName": "price" - } - }, - { - "name": "freight_value", - "type": "DOUBLE", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Cost of shipping associated with the specific order item", - "displayName": "freight_value" - } - }, - { - "name": "olist_orders_dataset", - "type": "olist_orders_dataset", - "properties": null, - "relationship": "Olist_orders_datasetOrder_idOlist_order_items_datasetOrder_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_products_dataset", - "type": "olist_products_dataset", - "properties": null, - "relationship": "Olist_order_items_datasetProduct_idOlist_products_datasetProduct_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_sellers_dataset", - "type": "olist_sellers_dataset", - "properties": null, - "relationship": "Olist_order_items_datasetSeller_idOlist_sellers_datasetSeller_id", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_order_items_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "order items", - "description": "This table contains the information related to a specific order containing its shipping cost, products, cost, number of order items, and the seller." - }, - "primaryKey": "order_item_id" - }, - { - "name": "olist_order_payments_dataset", - "columns": [ - { - "name": "order_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the order associated with the payment.", - "displayName": "order_id" - } - }, - { - "name": "payment_sequential", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Sequence number for tracking multiple payments within the same order.", - "displayName": "payment_sequential" - } - }, - { - "name": "payment_type", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Method used for the payment, such as credit card, debit, or voucher.", - "displayName": "payment_type" - } - }, - { - "name": "payment_installments", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Number of installments the payment is divided into for the order.", - "displayName": "payment_installments" - } - }, - { - "name": "payment_value", - "type": "DOUBLE", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Total amount paid in the specific transaction.", - "displayName": "payment_value" - } - }, - { - "name": "olist_orders_dataset", - "type": "olist_orders_dataset", - "properties": null, - "relationship": "Olist_orders_datasetOrder_idOlist_order_payments_datasetOrder_id", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_order_payments_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "order payments", - "description": "This table contains information about payment details for each order, including payment methods, amounts, installment plans, and payment sequences, helping to track how orders were paid and processed within the e-commerce platform." - }, - "primaryKey": "order_id" - }, - { - "name": "olist_order_reviews_dataset", - "columns": [ - { - "name": "review_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the specific review entry.", - "displayName": "review_id" - } - }, - { - "name": "order_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier linking the review to the corresponding order.", - "displayName": "order_id" - } - }, - { - "name": "review_score", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Numeric rating given by the customer, typically ranging from 1 (worst) to 5 (best).", - "displayName": "review_score" - } - }, - { - "name": "review_comment_title", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Summary or title of the customer's review", - "displayName": "review_comment_title" - } - }, - { - "name": "review_comment_message", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Detailed feedback or comments provided by the customer regarding the order.", - "displayName": "review_comment_message" - } - }, - { - "name": "review_creation_date", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Date and time when the customer initially submitted the review.", - "displayName": "review_creation_date" - } - }, - { - "name": "review_answer_timestamp", - "type": "TIMESTAMP", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Date and time when the review was responded to by the seller", - "displayName": "review_answer_timestamp" - } - }, - { - "name": "olist_orders_dataset", - "type": "olist_orders_dataset", - "properties": null, - "relationship": "Olist_orders_datasetOrder_idOlist_order_reviews_datasetOrder_id", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_order_reviews_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "order reviews", - "description": "This table contains customer reviews for each order, including feedback comments, ratings, and timestamps for when the review was submitted and responded to. It helps track customer satisfaction and review management on the e-commerce platform." - }, - "primaryKey": "review_id" - }, - { - "name": "olist_products_dataset", - "columns": [ - { - "name": "product_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the product", - "displayName": "product_id" - } - }, - { - "name": "product_category_name", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Name of the product category to which the item belongs.", - "displayName": "product_category_name" - } - }, - { - "name": "product_name_lenght", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Length of the product name in characters", - "displayName": "product_name_lenght" - } - }, - { - "name": "product_description_lenght", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Length of the product description in characters.", - "displayName": "product_description_lenght" - } - }, - { - "name": "product_photos_qty", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Number of photos available for the product", - "displayName": "product_photos_qty" - } - }, - { - "name": "product_weight_g", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Weight of the product in grams", - "displayName": "product_weight_g" - } - }, - { - "name": "product_length_cm", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Length of the product in centimeters", - "displayName": "product_length_cm" - } - }, - { - "name": "product_height_cm", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Height of the product in centimeters.", - "displayName": "product_height_cm" - } - }, - { - "name": "product_width_cm", - "type": "BIGINT", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Width of the product in centimeters", - "displayName": "product_width_cm" - } - }, - { - "name": "olist_order_items_dataset", - "type": "olist_order_items_dataset", - "properties": null, - "relationship": "Olist_order_items_datasetProduct_idOlist_products_datasetProduct_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "product_category_name_translation", - "type": "product_category_name_translation", - "properties": null, - "relationship": "Product_category_name_translationProduct_category_nameOlist_products_datasetProduct_category_name", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_products_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "products", - "description": "This table provides detailed information about products, including their category, dimensions, weight, description length, and the number of photos. This helps in managing product details and enhancing the shopping experience on the e-commerce platform." - }, - "primaryKey": "product_id" - }, - { - "name": "olist_sellers_dataset", - "columns": [ - { - "name": "seller_id", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Unique identifier for the seller on the platform", - "displayName": "seller_id" - } - }, - { - "name": "seller_zip_code_prefix", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "First 5 digits of seller zip code", - "displayName": "seller_zip_code_prefix" - } - }, - { - "name": "seller_city", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "The Brazilian city where the seller is located", - "displayName": "seller_city" - } - }, - { - "name": "seller_state", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "The Brazilian state where the seller is located", - "displayName": "seller_state" - } - }, - { - "name": "olist_order_items_dataset", - "type": "olist_order_items_dataset", - "properties": null, - "relationship": "Olist_order_items_datasetSeller_idOlist_sellers_datasetSeller_id", - "isCalculated": false, - "notNull": false - }, - { - "name": "olist_geolocation_dataset", - "type": "olist_geolocation_dataset", - "properties": null, - "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_sellers_datasetSeller_zip_code_prefix", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "olist_sellers_dataset" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "sellers", - "description": "This table includes data about the sellers that fulfilled orders made. Use it to find the seller location and to identify which seller fulfilled each product." - }, - "primaryKey": "" - }, - { - "name": "product_category_name_translation", - "columns": [ - { - "name": "product_category_name", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Original name of the product category in Portuguese.", - "displayName": "product_category_name" - } - }, - { - "name": "product_category_name_english", - "type": "VARCHAR", - "isCalculated": false, - "notNull": false, - "expression": "", - "properties": { - "description": "Translated name of the product category in English.", - "displayName": "product_category_name_english" - } - }, - { - "name": "olist_products_dataset", - "type": "olist_products_dataset", - "properties": null, - "relationship": "Product_category_name_translationProduct_category_nameOlist_products_datasetProduct_category_name", - "isCalculated": false, - "notNull": false - } - ], - "tableReference": { - "catalog": "memory", - "schema": "main", - "table": "product_category_name_translation" - }, - "refSql": null, - "cached": false, - "refreshTime": null, - "properties": { - "displayName": "product category name translation", - "description": "This table contains translations of product categories from Portuguese to English." - }, - "primaryKey": "product_category_name" - } - ], - "relationships": [ - { - "name": "Olist_orders_datasetCustomer_idOlist_customers_datasetCustomer_id", - "models": [ - "olist_orders_dataset", - "olist_customers_dataset" - ], - "joinType": "MANY_TO_ONE", - "condition": "\"olist_orders_dataset\".customer_id = \"olist_customers_dataset\".customer_id", - "properties": {} - }, - { - "name": "Olist_orders_datasetOrder_idOlist_order_items_datasetOrder_id", - "models": [ - "olist_orders_dataset", - "olist_order_items_dataset" - ], - "joinType": "ONE_TO_MANY", - "condition": "\"olist_orders_dataset\".order_id = \"olist_order_items_dataset\".order_id", - "properties": {} - }, - { - "name": "Olist_orders_datasetOrder_idOlist_order_reviews_datasetOrder_id", - "models": [ - "olist_orders_dataset", - "olist_order_reviews_dataset" - ], - "joinType": "ONE_TO_MANY", - "condition": "\"olist_orders_dataset\".order_id = \"olist_order_reviews_dataset\".order_id", - "properties": {} - }, - { - "name": "Olist_orders_datasetOrder_idOlist_order_payments_datasetOrder_id", - "models": [ - "olist_orders_dataset", - "olist_order_payments_dataset" - ], - "joinType": "ONE_TO_MANY", - "condition": "\"olist_orders_dataset\".order_id = \"olist_order_payments_dataset\".order_id", - "properties": {} - }, - { - "name": "Olist_order_items_datasetProduct_idOlist_products_datasetProduct_id", - "models": [ - "olist_order_items_dataset", - "olist_products_dataset" - ], - "joinType": "MANY_TO_ONE", - "condition": "\"olist_order_items_dataset\".product_id = \"olist_products_dataset\".product_id", - "properties": {} - }, - { - "name": "Olist_order_items_datasetSeller_idOlist_sellers_datasetSeller_id", - "models": [ - "olist_order_items_dataset", - "olist_sellers_dataset" - ], - "joinType": "MANY_TO_ONE", - "condition": "\"olist_order_items_dataset\".seller_id = \"olist_sellers_dataset\".seller_id", - "properties": {} - }, - { - "name": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_customers_datasetCustomer_zip_code_prefix", - "models": [ - "olist_geolocation_dataset", - "olist_customers_dataset" - ], - "joinType": "ONE_TO_MANY", - "condition": "\"olist_geolocation_dataset\".geolocation_zip_code_prefix = \"olist_customers_dataset\".customer_zip_code_prefix", - "properties": {} - }, - { - "name": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_sellers_datasetSeller_zip_code_prefix", - "models": [ - "olist_geolocation_dataset", - "olist_sellers_dataset" - ], - "joinType": "ONE_TO_MANY", - "condition": "\"olist_geolocation_dataset\".geolocation_zip_code_prefix = \"olist_sellers_dataset\".seller_zip_code_prefix", - "properties": {} - }, - { - "name": "Product_category_name_translationProduct_category_nameOlist_products_datasetProduct_category_name", - "models": [ - "product_category_name_translation", - "olist_products_dataset" - ], - "joinType": "ONE_TO_MANY", - "condition": "\"product_category_name_translation\".product_category_name = \"olist_products_dataset\".product_category_name", - "properties": {} - } - ], - "views": [] + "schema": "public", + "catalog": "wrenai", + "models": [ + { + "name": "olist_customers_dataset", + "columns": [ + { + "name": "customer_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": null, + "displayName": "customer_id" + } + }, + { + "name": "customer_unique_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique id of the customer", + "displayName": "customer_unique_id" + } + }, + { + "name": "customer_zip_code_prefix", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "First 5 digits of customer zip code", + "displayName": "customer_zip_code_prefix" + } + }, + { + "name": "customer_city", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Name of the city where the customer is located", + "displayName": "customer_city" + } + }, + { + "name": "customer_state", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Name of the state where the customer is located", + "displayName": "customer_state" + } + }, + { + "name": "olist_orders_dataset", + "type": "olist_orders_dataset", + "properties": null, + "relationship": "Olist_orders_datasetCustomer_idOlist_customers_datasetCustomer_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_geolocation_dataset", + "type": "olist_geolocation_dataset", + "properties": null, + "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_customers_datasetCustomer_zip_code_prefix", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_customers_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "customers" + }, + "primaryKey": "customer_id" + }, + { + "name": "olist_geolocation_dataset", + "columns": [ + { + "name": "geolocation_zip_code_prefix", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "displayName": "geolocation_zip_code_prefix", + "description": "First 5 digits of zip code" + } + }, + { + "name": "geolocation_lat", + "type": "DOUBLE", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "displayName": "geolocation_lat", + "description": "The coordinations for the locations latitude" + } + }, + { + "name": "geolocation_lng", + "type": "DOUBLE", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "displayName": "geolocation_lng", + "description": "The coordinations for the locations longitude" + } + }, + { + "name": "geolocation_city", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "displayName": "geolocation_city", + "description": "The city name of the geolocation" + } + }, + { + "name": "geolocation_state", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "displayName": "geolocation_state", + "description": "The state of the geolocation" + } + }, + { + "name": "olist_customers_dataset", + "type": "olist_customers_dataset", + "properties": null, + "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_customers_datasetCustomer_zip_code_prefix", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_sellers_dataset", + "type": "olist_sellers_dataset", + "properties": null, + "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_sellers_datasetSeller_zip_code_prefix", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_geolocation_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "geolocation", + "description": "This table contains detailed information about Brazilian zip codes and their corresponding latitude and longitude coordinates. It can be used to plot maps, calculate distances between sellers and customers, and perform geographic analysis." + }, + "primaryKey": "" + }, + { + "name": "olist_orders_dataset", + "columns": [ + { + "name": "order_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the specific order", + "displayName": "order_id" + } + }, + { + "name": "customer_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the customer who placed the order.", + "displayName": "customer_id" + } + }, + { + "name": "order_status", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Current status of the order (e.g., delivered, shipped, canceled).", + "displayName": "order_status" + } + }, + { + "name": "order_purchase_timestamp", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Date and time when the order was placed by the customer.", + "displayName": "order_purchase_timestamp" + } + }, + { + "name": "order_approved_at", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Date and time when the order was approved for processing.", + "displayName": "order_approved_at" + } + }, + { + "name": "order_delivered_carrier_date", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Date when the order was handed over to the carrier or freight forwarder for delivery.", + "displayName": "order_delivered_carrier_date" + } + }, + { + "name": "order_delivered_customer_date", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Date when the order was delivered to the customer.", + "displayName": "order_delivered_customer_date" + } + }, + { + "name": "order_estimated_delivery_date", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Expected delivery date based on the initial estimate.", + "displayName": "order_estimated_delivery_date" + } + }, + { + "name": "olist_customers_dataset", + "type": "olist_customers_dataset", + "properties": null, + "relationship": "Olist_orders_datasetCustomer_idOlist_customers_datasetCustomer_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_order_items_dataset", + "type": "olist_order_items_dataset", + "properties": null, + "relationship": "Olist_orders_datasetOrder_idOlist_order_items_datasetOrder_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_order_reviews_dataset", + "type": "olist_order_reviews_dataset", + "properties": null, + "relationship": "Olist_orders_datasetOrder_idOlist_order_reviews_datasetOrder_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_order_payments_dataset", + "type": "olist_order_payments_dataset", + "properties": null, + "relationship": "Olist_orders_datasetOrder_idOlist_order_payments_datasetOrder_id", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_orders_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "orders", + "description": "This table contains detailed information about customer orders, including timestamps for various stages of the order process (approval, shipping, delivery), as well as the order status and customer identification. It helps track the lifecycle of an order from purchase to delivery." + }, + "primaryKey": "order_id" + }, + { + "name": "olist_order_items_dataset", + "columns": [ + { + "name": "order_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the order across the platform", + "displayName": "order_id" + } + }, + { + "name": "order_item_id", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for each item within a specific order", + "displayName": "order_item_id" + } + }, + { + "name": "product_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the product sold in the order.", + "displayName": "product_id" + } + }, + { + "name": "seller_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier of the seller who fulfilled the order item.", + "displayName": "seller_id" + } + }, + { + "name": "shipping_limit_date", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Deadline for the order item to be shipped by the seller.", + "displayName": "shipping_limit_date" + } + }, + { + "name": "price", + "type": "DOUBLE", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Price of the individual item within the order", + "displayName": "price" + } + }, + { + "name": "freight_value", + "type": "DOUBLE", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Cost of shipping associated with the specific order item", + "displayName": "freight_value" + } + }, + { + "name": "olist_orders_dataset", + "type": "olist_orders_dataset", + "properties": null, + "relationship": "Olist_orders_datasetOrder_idOlist_order_items_datasetOrder_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_products_dataset", + "type": "olist_products_dataset", + "properties": null, + "relationship": "Olist_order_items_datasetProduct_idOlist_products_datasetProduct_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_sellers_dataset", + "type": "olist_sellers_dataset", + "properties": null, + "relationship": "Olist_order_items_datasetSeller_idOlist_sellers_datasetSeller_id", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_order_items_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "order items", + "description": "This table contains the information related to a specific order containing its shipping cost, products, cost, number of order items, and the seller." + }, + "primaryKey": "order_item_id" + }, + { + "name": "olist_order_payments_dataset", + "columns": [ + { + "name": "order_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the order associated with the payment.", + "displayName": "order_id" + } + }, + { + "name": "payment_sequential", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Sequence number for tracking multiple payments within the same order.", + "displayName": "payment_sequential" + } + }, + { + "name": "payment_type", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Method used for the payment, such as credit card, debit, or voucher.", + "displayName": "payment_type" + } + }, + { + "name": "payment_installments", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Number of installments the payment is divided into for the order.", + "displayName": "payment_installments" + } + }, + { + "name": "payment_value", + "type": "DOUBLE", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Total amount paid in the specific transaction.", + "displayName": "payment_value" + } + }, + { + "name": "olist_orders_dataset", + "type": "olist_orders_dataset", + "properties": null, + "relationship": "Olist_orders_datasetOrder_idOlist_order_payments_datasetOrder_id", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_order_payments_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "order payments", + "description": "This table contains information about payment details for each order, including payment methods, amounts, installment plans, and payment sequences, helping to track how orders were paid and processed within the e-commerce platform." + }, + "primaryKey": "order_id" + }, + { + "name": "olist_order_reviews_dataset", + "columns": [ + { + "name": "review_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the specific review entry.", + "displayName": "review_id" + } + }, + { + "name": "order_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier linking the review to the corresponding order.", + "displayName": "order_id" + } + }, + { + "name": "review_score", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Numeric rating given by the customer, typically ranging from 1 (worst) to 5 (best).", + "displayName": "review_score" + } + }, + { + "name": "review_comment_title", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Summary or title of the customer's review", + "displayName": "review_comment_title" + } + }, + { + "name": "review_comment_message", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Detailed feedback or comments provided by the customer regarding the order.", + "displayName": "review_comment_message" + } + }, + { + "name": "review_creation_date", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Date and time when the customer initially submitted the review.", + "displayName": "review_creation_date" + } + }, + { + "name": "review_answer_timestamp", + "type": "TIMESTAMP", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Date and time when the review was responded to by the seller", + "displayName": "review_answer_timestamp" + } + }, + { + "name": "olist_orders_dataset", + "type": "olist_orders_dataset", + "properties": null, + "relationship": "Olist_orders_datasetOrder_idOlist_order_reviews_datasetOrder_id", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_order_reviews_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "order reviews", + "description": "This table contains customer reviews for each order, including feedback comments, ratings, and timestamps for when the review was submitted and responded to. It helps track customer satisfaction and review management on the e-commerce platform." + }, + "primaryKey": "review_id" + }, + { + "name": "olist_products_dataset", + "columns": [ + { + "name": "product_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the product", + "displayName": "product_id" + } + }, + { + "name": "product_category_name", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Name of the product category to which the item belongs.", + "displayName": "product_category_name" + } + }, + { + "name": "product_name_lenght", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Length of the product name in characters", + "displayName": "product_name_lenght" + } + }, + { + "name": "product_description_lenght", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Length of the product description in characters.", + "displayName": "product_description_lenght" + } + }, + { + "name": "product_photos_qty", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Number of photos available for the product", + "displayName": "product_photos_qty" + } + }, + { + "name": "product_weight_g", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Weight of the product in grams", + "displayName": "product_weight_g" + } + }, + { + "name": "product_length_cm", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Length of the product in centimeters", + "displayName": "product_length_cm" + } + }, + { + "name": "product_height_cm", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Height of the product in centimeters.", + "displayName": "product_height_cm" + } + }, + { + "name": "product_width_cm", + "type": "BIGINT", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Width of the product in centimeters", + "displayName": "product_width_cm" + } + }, + { + "name": "olist_order_items_dataset", + "type": "olist_order_items_dataset", + "properties": null, + "relationship": "Olist_order_items_datasetProduct_idOlist_products_datasetProduct_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "product_category_name_translation", + "type": "product_category_name_translation", + "properties": null, + "relationship": "Product_category_name_translationProduct_category_nameOlist_products_datasetProduct_category_name", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_products_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "products", + "description": "This table provides detailed information about products, including their category, dimensions, weight, description length, and the number of photos. This helps in managing product details and enhancing the shopping experience on the e-commerce platform." + }, + "primaryKey": "product_id" + }, + { + "name": "olist_sellers_dataset", + "columns": [ + { + "name": "seller_id", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Unique identifier for the seller on the platform", + "displayName": "seller_id" + } + }, + { + "name": "seller_zip_code_prefix", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "First 5 digits of seller zip code", + "displayName": "seller_zip_code_prefix" + } + }, + { + "name": "seller_city", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "The Brazilian city where the seller is located", + "displayName": "seller_city" + } + }, + { + "name": "seller_state", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "The Brazilian state where the seller is located", + "displayName": "seller_state" + } + }, + { + "name": "olist_order_items_dataset", + "type": "olist_order_items_dataset", + "properties": null, + "relationship": "Olist_order_items_datasetSeller_idOlist_sellers_datasetSeller_id", + "isCalculated": false, + "notNull": false + }, + { + "name": "olist_geolocation_dataset", + "type": "olist_geolocation_dataset", + "properties": null, + "relationship": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_sellers_datasetSeller_zip_code_prefix", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "olist_sellers_dataset" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "sellers", + "description": "This table includes data about the sellers that fulfilled orders made. Use it to find the seller location and to identify which seller fulfilled each product." + }, + "primaryKey": "" + }, + { + "name": "product_category_name_translation", + "columns": [ + { + "name": "product_category_name", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Original name of the product category in Portuguese.", + "displayName": "product_category_name" + } + }, + { + "name": "product_category_name_english", + "type": "VARCHAR", + "isCalculated": false, + "notNull": false, + "expression": "", + "properties": { + "description": "Translated name of the product category in English.", + "displayName": "product_category_name_english" + } + }, + { + "name": "olist_products_dataset", + "type": "olist_products_dataset", + "properties": null, + "relationship": "Product_category_name_translationProduct_category_nameOlist_products_datasetProduct_category_name", + "isCalculated": false, + "notNull": false + } + ], + "tableReference": { + "catalog": "memory", + "schema": "main", + "table": "product_category_name_translation" + }, + "refSql": null, + "cached": false, + "refreshTime": null, + "properties": { + "displayName": "product category name translation", + "description": "This table contains translations of product categories from Portuguese to English." + }, + "primaryKey": "product_category_name" + } + ], + "relationships": [ + { + "name": "Olist_orders_datasetCustomer_idOlist_customers_datasetCustomer_id", + "models": [ + "olist_orders_dataset", + "olist_customers_dataset" + ], + "joinType": "MANY_TO_ONE", + "condition": "\"olist_orders_dataset\".customer_id = \"olist_customers_dataset\".customer_id", + "properties": {} + }, + { + "name": "Olist_orders_datasetOrder_idOlist_order_items_datasetOrder_id", + "models": [ + "olist_orders_dataset", + "olist_order_items_dataset" + ], + "joinType": "ONE_TO_MANY", + "condition": "\"olist_orders_dataset\".order_id = \"olist_order_items_dataset\".order_id", + "properties": {} + }, + { + "name": "Olist_orders_datasetOrder_idOlist_order_reviews_datasetOrder_id", + "models": [ + "olist_orders_dataset", + "olist_order_reviews_dataset" + ], + "joinType": "ONE_TO_MANY", + "condition": "\"olist_orders_dataset\".order_id = \"olist_order_reviews_dataset\".order_id", + "properties": {} + }, + { + "name": "Olist_orders_datasetOrder_idOlist_order_payments_datasetOrder_id", + "models": [ + "olist_orders_dataset", + "olist_order_payments_dataset" + ], + "joinType": "ONE_TO_MANY", + "condition": "\"olist_orders_dataset\".order_id = \"olist_order_payments_dataset\".order_id", + "properties": {} + }, + { + "name": "Olist_order_items_datasetProduct_idOlist_products_datasetProduct_id", + "models": [ + "olist_order_items_dataset", + "olist_products_dataset" + ], + "joinType": "MANY_TO_ONE", + "condition": "\"olist_order_items_dataset\".product_id = \"olist_products_dataset\".product_id", + "properties": {} + }, + { + "name": "Olist_order_items_datasetSeller_idOlist_sellers_datasetSeller_id", + "models": [ + "olist_order_items_dataset", + "olist_sellers_dataset" + ], + "joinType": "MANY_TO_ONE", + "condition": "\"olist_order_items_dataset\".seller_id = \"olist_sellers_dataset\".seller_id", + "properties": {} + }, + { + "name": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_customers_datasetCustomer_zip_code_prefix", + "models": [ + "olist_geolocation_dataset", + "olist_customers_dataset" + ], + "joinType": "ONE_TO_MANY", + "condition": "\"olist_geolocation_dataset\".geolocation_zip_code_prefix = \"olist_customers_dataset\".customer_zip_code_prefix", + "properties": {} + }, + { + "name": "Olist_geolocation_datasetGeolocation_zip_code_prefixOlist_sellers_datasetSeller_zip_code_prefix", + "models": [ + "olist_geolocation_dataset", + "olist_sellers_dataset" + ], + "joinType": "ONE_TO_MANY", + "condition": "\"olist_geolocation_dataset\".geolocation_zip_code_prefix = \"olist_sellers_dataset\".seller_zip_code_prefix", + "properties": {} + }, + { + "name": "Product_category_name_translationProduct_category_nameOlist_products_datasetProduct_category_name", + "models": [ + "product_category_name_translation", + "olist_products_dataset" + ], + "joinType": "ONE_TO_MANY", + "condition": "\"product_category_name_translation\".product_category_name = \"olist_products_dataset\".product_category_name", + "properties": {} + } + ], + "views": [] } \ No newline at end of file From 7efa455c6228e9cdb9e6825f7e3c62a4c25bd4b9 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 21 Nov 2024 11:52:00 +0800 Subject: [PATCH 3/6] unify configuration for apis --- wren-ai-service/src/pipelines/common.py | 12 ++++++------ .../generation/followup_sql_generation.py | 17 +++++++++-------- .../src/pipelines/generation/sql_expansion.py | 11 ++++++----- .../src/pipelines/generation/sql_generation.py | 8 ++++---- wren-ai-service/src/web/v1/routers/ask.py | 2 +- .../src/web/v1/services/__init__.py | 1 + wren-ai-service/src/web/v1/services/ask.py | 18 ++---------------- .../src/web/v1/services/ask_details.py | 7 ++----- .../web/v1/services/question_recommendation.py | 3 +-- .../src/web/v1/services/sql_answer.py | 7 ++----- .../src/web/v1/services/sql_expansion.py | 12 +++--------- 11 files changed, 37 insertions(+), 61 deletions(-) diff --git a/wren-ai-service/src/pipelines/common.py b/wren-ai-service/src/pipelines/common.py index 131c054c5..556e2d0c1 100644 --- a/wren-ai-service/src/pipelines/common.py +++ b/wren-ai-service/src/pipelines/common.py @@ -14,7 +14,7 @@ clean_generation_result, ) from src.core.pipeline import BasicPipeline -from src.web.v1.services.ask import AskConfigurations +from src.web.v1.services import Configuration logger = logging.getLogger("wren-ai-service") @@ -445,16 +445,16 @@ async def _task(result: Dict[str, str]): """ -def construct_instructions(configurations: AskConfigurations | None): +def construct_instructions(configuration: Configuration | None): instructions = "" - if configurations: - if configurations.fiscal_year: - instructions += f"- For calendar year related computation, it should be started from {configurations.fiscal_year.start} to {configurations.fiscal_year.end}" + if configuration: + if configuration.fiscal_year: + instructions += f"- For calendar year related computation, it should be started from {configuration.fiscal_year.start} to {configuration.fiscal_year.end}" return instructions -def show_current_time(timezone: AskConfigurations.Timezone): +def show_current_time(timezone: Configuration.Timezone): # Get the current time in the specified timezone tz = pytz.timezone( timezone.name diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py index a2b1812a9..af4fb2a80 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py @@ -20,7 +20,8 @@ sql_generation_system_prompt, ) from src.utils import async_timer, timer -from src.web.v1.services.ask import AskConfigurations, AskHistory +from src.web.v1.services import Configuration +from src.web.v1.services.ask import AskHistory logger = logging.getLogger("wren-ai-service") @@ -115,7 +116,7 @@ def prompt( documents: List[str], history: AskHistory, alert: str, - configurations: AskConfigurations, + configuration: Configuration, prompt_builder: PromptBuilder, ) -> dict: return prompt_builder.run( @@ -123,8 +124,8 @@ def prompt( documents=documents, history=history, alert=alert, - instructions=construct_instructions(configurations), - current_time=show_current_time(configurations.timezone), + instructions=construct_instructions(configuration), + current_time=show_current_time(configuration.timezone), ) @@ -199,7 +200,7 @@ def visualize( query: str, contexts: List[str], history: AskHistory, - configurations: AskConfigurations = AskConfigurations(), + configuration: Configuration = Configuration(), project_id: str | None = None, ) -> None: destination = "outputs/pipelines/generation" @@ -214,7 +215,7 @@ def visualize( "documents": contexts, "history": history, "project_id": project_id, - "configurations": configurations, + "configuration": configuration, **self._components, **self._configs, }, @@ -229,7 +230,7 @@ async def run( query: str, contexts: List[str], history: AskHistory, - configurations: AskConfigurations = AskConfigurations(), + configuration: Configuration = Configuration(), project_id: str | None = None, ): logger.info("Follow-Up SQL Generation pipeline is running...") @@ -240,7 +241,7 @@ async def run( "documents": contexts, "history": history, "project_id": project_id, - "configurations": configurations, + "configurations": configuration, **self._components, **self._configs, }, diff --git a/wren-ai-service/src/pipelines/generation/sql_expansion.py b/wren-ai-service/src/pipelines/generation/sql_expansion.py index 564667299..b7eec7136 100644 --- a/wren-ai-service/src/pipelines/generation/sql_expansion.py +++ b/wren-ai-service/src/pipelines/generation/sql_expansion.py @@ -14,7 +14,8 @@ from src.core.provider import LLMProvider from src.pipelines.common import SQLGenPostProcessor, show_current_time from src.utils import async_timer, timer -from src.web.v1.services.ask import AskConfigurations, AskHistory +from src.web.v1.services import Configuration +from src.web.v1.services.ask import AskHistory logger = logging.getLogger("wren-ai-service") @@ -57,7 +58,7 @@ def prompt( query: str, documents: List[str], history: AskHistory, - timezone: AskConfigurations.Timezone, + timezone: Configuration.Timezone, prompt_builder: PromptBuilder, ) -> dict: return prompt_builder.run( @@ -135,7 +136,7 @@ def visualize( query: str, contexts: List[str], history: AskHistory, - timezone: AskConfigurations.Timezone, + timezone: Configuration.Timezone, project_id: str | None = None, ) -> None: destination = "outputs/pipelines/generation" @@ -164,7 +165,7 @@ async def run( query: str, contexts: List[str], history: AskHistory, - timezone: AskConfigurations.Timezone = AskConfigurations().timezone, + timezone: Configuration.Timezone = Configuration().timezone, project_id: str | None = None, ): logger.info("Sql Expansion Generation pipeline is running...") @@ -190,5 +191,5 @@ async def run( query="query", contexts=[], history=AskHistory(sql="SELECT * FROM table", summary="Summary", steps=[]), - timezone=AskConfigurations.Timezone(name="UTC", utc_offset="+00:00"), + timezone=Configuration.Timezone(name="UTC", utc_offset="+00:00"), ) diff --git a/wren-ai-service/src/pipelines/generation/sql_generation.py b/wren-ai-service/src/pipelines/generation/sql_generation.py index 3a21a8c79..d34088392 100644 --- a/wren-ai-service/src/pipelines/generation/sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/sql_generation.py @@ -20,7 +20,7 @@ sql_generation_system_prompt, ) from src.utils import async_timer, timer -from src.web.v1.services.ask import AskConfigurations +from src.web.v1.services import Configuration logger = logging.getLogger("wren-ai-service") @@ -84,7 +84,7 @@ def prompt( exclude: List[Dict], text_to_sql_rules: str, prompt_builder: PromptBuilder, - configurations: AskConfigurations | None = None, + configurations: Configuration | None = None, samples: List[Dict] | None = None, ) -> dict: return prompt_builder.run( @@ -168,7 +168,7 @@ def visualize( query: str, contexts: List[str], exclude: List[Dict], - configurations: AskConfigurations = AskConfigurations(), + configurations: Configuration = Configuration(), samples: List[Dict] | None = None, project_id: str | None = None, ) -> None: @@ -200,7 +200,7 @@ async def run( query: str, contexts: List[str], exclude: List[Dict], - configurations: AskConfigurations = AskConfigurations(), + configurations: Configuration = Configuration(), samples: List[Dict] | None = None, project_id: str | None = None, ): diff --git a/wren-ai-service/src/web/v1/routers/ask.py b/wren-ai-service/src/web/v1/routers/ask.py index 9b755c21e..84ec54bad 100644 --- a/wren-ai-service/src/web/v1/routers/ask.py +++ b/wren-ai-service/src/web/v1/routers/ask.py @@ -36,7 +36,7 @@ - `thread_id`: (Optional) Thread identifier for the query. - `user_id`: (Optional) User identifier. - `history`: (Optional) Query history (SQL steps). - - `configurations`: (Optional) Configurations such as fiscal year. + - `configurations`: (Optional) Configuration such as fiscal year. - **Response**: - `query_id`: A unique identifier (UUID) for tracking the query. diff --git a/wren-ai-service/src/web/v1/services/__init__.py b/wren-ai-service/src/web/v1/services/__init__.py index f5003c05b..3e0f788dd 100644 --- a/wren-ai-service/src/web/v1/services/__init__.py +++ b/wren-ai-service/src/web/v1/services/__init__.py @@ -29,6 +29,7 @@ class FiscalYear(BaseModel): class Timezone(BaseModel): name: str = "Asia/Taipei" + utc_offset: str = "" # Deprecated, will be removed in the future def show_current_time(self): # Get the current time in the specified timezone diff --git a/wren-ai-service/src/web/v1/services/ask.py b/wren-ai-service/src/web/v1/services/ask.py index ea9d201a9..4f534ae10 100644 --- a/wren-ai-service/src/web/v1/services/ask.py +++ b/wren-ai-service/src/web/v1/services/ask.py @@ -8,7 +8,7 @@ from src.core.pipeline import BasicPipeline from src.utils import async_timer, trace_metadata -from src.web.v1.services import SSEEvent +from src.web.v1.services import Configuration, SSEEvent from src.web.v1.services.ask_details import SQLBreakdown logger = logging.getLogger("wren-ai-service") @@ -19,20 +19,6 @@ class AskHistory(BaseModel): steps: List[SQLBreakdown] -class AskConfigurations(BaseModel): - class FiscalYear(BaseModel): - start: str - end: str - - class Timezone(BaseModel): - name: str - utc_offset: str - - fiscal_year: Optional[FiscalYear] = None - language: Optional[str] = "English" - timezone: Optional[Timezone] = Timezone(name="Asia/Taipei", utc_offset="+8:00") - - # POST /v1/asks class AskRequest(BaseModel): _query_id: str | None = None @@ -45,7 +31,7 @@ class AskRequest(BaseModel): thread_id: Optional[str] = None user_id: Optional[str] = None history: Optional[AskHistory] = None - configurations: Optional[AskConfigurations] = AskConfigurations() + configurations: Optional[Configuration] = Configuration() @property def query_id(self) -> str: diff --git a/wren-ai-service/src/web/v1/services/ask_details.py b/wren-ai-service/src/web/v1/services/ask_details.py index d06a8c55c..4405172ad 100644 --- a/wren-ai-service/src/web/v1/services/ask_details.py +++ b/wren-ai-service/src/web/v1/services/ask_details.py @@ -8,6 +8,7 @@ from src.core.engine import add_quotes from src.utils import async_timer, trace_metadata +from src.web.v1.services import Configuration logger = logging.getLogger("wren-ai-service") @@ -19,10 +20,6 @@ class SQLBreakdown(BaseModel): # POST /v1/ask-details -class AskDetailsConfigurations(BaseModel): - language: str = "English" - - class AskDetailsRequest(BaseModel): _query_id: str | None = None query: str @@ -31,7 +28,7 @@ class AskDetailsRequest(BaseModel): thread_id: Optional[str] = None project_id: Optional[str] = None user_id: Optional[str] = None - configurations: AskDetailsConfigurations = AskDetailsConfigurations() + configurations: Configuration = Configuration() @property def query_id(self) -> str: diff --git a/wren-ai-service/src/web/v1/services/question_recommendation.py b/wren-ai-service/src/web/v1/services/question_recommendation.py index dc87e24ca..0dfe78b35 100644 --- a/wren-ai-service/src/web/v1/services/question_recommendation.py +++ b/wren-ai-service/src/web/v1/services/question_recommendation.py @@ -10,7 +10,6 @@ from src.core.pipeline import BasicPipeline from src.utils import trace_metadata from src.web.v1.services import Configuration, MetadataTraceable -from src.web.v1.services.ask import AskConfigurations logger = logging.getLogger("wren-ai-service") @@ -79,7 +78,7 @@ async def _validate_question( query=candidate["question"], contexts=documents, exclude=[], - configurations=AskConfigurations(), + configurations=Configuration(), ) post_process = generated_sql["post_process"] diff --git a/wren-ai-service/src/web/v1/services/sql_answer.py b/wren-ai-service/src/web/v1/services/sql_answer.py index f5bfc4391..1bb45cb63 100644 --- a/wren-ai-service/src/web/v1/services/sql_answer.py +++ b/wren-ai-service/src/web/v1/services/sql_answer.py @@ -7,14 +7,11 @@ from src.core.pipeline import BasicPipeline from src.utils import async_timer, trace_metadata +from src.web.v1.services import Configuration logger = logging.getLogger("wren-ai-service") -class SqlAnswerConfigurations(BaseModel): - language: str = "English" - - # POST /v1/sql-answers class SqlAnswerRequest(BaseModel): _query_id: str | None = None @@ -22,7 +19,7 @@ class SqlAnswerRequest(BaseModel): sql: str thread_id: Optional[str] = None user_id: Optional[str] = None - configurations: Optional[SqlAnswerConfigurations] = SqlAnswerConfigurations() + configurations: Optional[Configuration] = Configuration() @property def query_id(self) -> str: diff --git a/wren-ai-service/src/web/v1/services/sql_expansion.py b/wren-ai-service/src/web/v1/services/sql_expansion.py index 07f9acd90..116ba3163 100644 --- a/wren-ai-service/src/web/v1/services/sql_expansion.py +++ b/wren-ai-service/src/web/v1/services/sql_expansion.py @@ -7,20 +7,14 @@ from src.core.pipeline import BasicPipeline from src.utils import async_timer, remove_sql_summary_duplicates, trace_metadata -from src.web.v1.services.ask import AskConfigurations, AskError, AskHistory +from src.web.v1.services import Configuration +from src.web.v1.services.ask import AskError, AskHistory from src.web.v1.services.ask_details import SQLBreakdown logger = logging.getLogger("wren-ai-service") # POST /v1/sql-expansions -class SqlExpansionConfigurations(BaseModel): - language: Optional[str] = "English" - timezone: Optional[AskConfigurations.Timezone] = AskConfigurations.Timezone( - name="Asia/Taipei", utc_offset="+8:00" - ) - - class SqlExpansionRequest(BaseModel): _query_id: str | None = None query: str @@ -30,7 +24,7 @@ class SqlExpansionRequest(BaseModel): mdl_hash: Optional[str] = None thread_id: Optional[str] = None user_id: Optional[str] = None - configurations: Optional[SqlExpansionConfigurations] = SqlExpansionConfigurations() + configurations: Optional[Configuration] = Configuration() @property def query_id(self) -> str: From 86c884ba48b6162a5d661d7d52364566af735f41 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 21 Nov 2024 12:04:05 +0800 Subject: [PATCH 4/6] fix bug --- wren-ai-service/tests/pytest/pipelines/test_ask.py | 9 +++++---- wren-ai-service/tests/pytest/services/mocks.py | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/wren-ai-service/tests/pytest/pipelines/test_ask.py b/wren-ai-service/tests/pytest/pipelines/test_ask.py index 49c4c17da..d53edf16b 100644 --- a/wren-ai-service/tests/pytest/pipelines/test_ask.py +++ b/wren-ai-service/tests/pytest/pipelines/test_ask.py @@ -11,7 +11,8 @@ from src.pipelines.indexing.indexing import Indexing from src.pipelines.retrieval.retrieval import Retrieval from src.providers import init_providers -from src.web.v1.services.ask import AskConfigurations, AskHistory +from src.web.v1.services import Configuration +from src.web.v1.services.ask import AskHistory from src.web.v1.services.ask_details import SQLBreakdown GLOBAL_DATA = { @@ -135,7 +136,7 @@ async def test_generation_pipeline(): "How many authors are there?", contexts=GLOBAL_DATA["contexts"], exclude=[], - configurations=AskConfigurations(), + configurations=Configuration(), ) # TODO: we'll refactor almost all test case with a mock server, thus temporarily only assert it is not None. @@ -146,7 +147,7 @@ async def test_generation_pipeline(): "How many authors are there?", contexts=GLOBAL_DATA["contexts"], exclude=[{"statement": "SELECT 1 FROM author"}], - configurations=AskConfigurations(), + configurations=Configuration(), ) assert generation_result["post_process"]["valid_generation_results"] is not None @@ -173,7 +174,7 @@ async def test_followup_generation_pipeline(): ) ], ), - configurations=AskConfigurations(), + configurations=Configuration(), ) # TODO: we'll refactor almost all test case with a mock server, thus temporarily only assert it is not None. diff --git a/wren-ai-service/tests/pytest/services/mocks.py b/wren-ai-service/tests/pytest/services/mocks.py index 3a206bf30..dc6b1b9e1 100644 --- a/wren-ai-service/tests/pytest/services/mocks.py +++ b/wren-ai-service/tests/pytest/services/mocks.py @@ -2,7 +2,7 @@ from src.pipelines.generation import intent_classification, sql_generation, sql_summary from src.pipelines.retrieval import historical_question, retrieval -from src.web.v1.services.ask import AskConfigurations +from src.web.v1.services import Configuration class RetrievalMock(retrieval.Retrieval): @@ -40,7 +40,7 @@ async def run( contexts: list[str], exclude: list[dict], project_id: str | None = None, - configurations: AskConfigurations | None = None, + configurations: Configuration | None = None, ): return { "post_process": { From f4883ed46c6105642602440c28f124266449659b Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 21 Nov 2024 12:17:29 +0800 Subject: [PATCH 5/6] fix --- wren-ai-service/tests/pytest/pipelines/test_ask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/tests/pytest/pipelines/test_ask.py b/wren-ai-service/tests/pytest/pipelines/test_ask.py index d53edf16b..49c611809 100644 --- a/wren-ai-service/tests/pytest/pipelines/test_ask.py +++ b/wren-ai-service/tests/pytest/pipelines/test_ask.py @@ -174,7 +174,7 @@ async def test_followup_generation_pipeline(): ) ], ), - configurations=Configuration(), + configuration=Configuration(), ) # TODO: we'll refactor almost all test case with a mock server, thus temporarily only assert it is not None. From 7830f856ab7e574ee49eacc67e91cff449f0f694 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 21 Nov 2024 12:23:52 +0800 Subject: [PATCH 6/6] fix --- .../src/pipelines/generation/followup_sql_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py index af4fb2a80..67d826b7c 100644 --- a/wren-ai-service/src/pipelines/generation/followup_sql_generation.py +++ b/wren-ai-service/src/pipelines/generation/followup_sql_generation.py @@ -241,7 +241,7 @@ async def run( "documents": contexts, "history": history, "project_id": project_id, - "configurations": configuration, + "configuration": configuration, **self._components, **self._configs, },