DAGWorks-Inc · elijahbenizzy · Jun 13, 2024 · Jun 8, 2024 · Jun 11, 2024 · Jun 11, 2024
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,4 +1,6 @@
 include requirements.txt
 include requirements-test.txt
+include hamilton/server/requirements-mini.txt
+recursive-include hamilton/server/build *
 include LICENSE
 include *.md
diff --git a/README.md b/README.md
@@ -92,18 +92,20 @@ While it is installing we encourage you to start on the next section.
 Note: the content (i.e. names, function bodies) of our example code snippets are for illustrative purposes only, and don't reflect what we actually do internally.
 
 ## Hamilton in <15 minutes
-Hamilton is a new paradigm when it comes to creating, um, dataframes (let's use dataframes as an example, otherwise you can create _ANY_ python object).
-Rather than thinking about manipulating a central dataframe, as is normal in some data engineering/data science work,
-you instead think about the column(s) you want to create, and what inputs are required. There
-is no need for you to think about maintaining this dataframe, meaning you do not need to think about any "glue" code;
+Hamilton is a new paradigm when it comes to building datasets (in this case we'll use Hamilton to create columns of a
+dataframe as an example. Otherwise hamilton can handle _any_ python object.
+
+Rather than thinking about manipulating a central object (dataframe in this case),
+you instead declare the components (columns in this case)/intermediate results you want to create, and the inputs that are required. There
+is no need for you to worry about maintaining this object, meaning you do not need to think about any "glue" code;
 this is all taken care of by the Hamilton framework.
 
-For example rather than writing the following to manipulate a central dataframe object `df`:
+For example, rather than writing the following to manipulate a central dataframe object `df`:
 ```python
 df['col_c'] = df['col_a'] + df['col_b']
 ```
 
-you write
+you would write
 ```python
 def col_c(col_a: pd.Series, col_b: pd.Series) -> pd.Series:
     """Creating column c from summing column a and column b."""
@@ -136,36 +138,28 @@ this just means these need to be provided as input when we come to actually want
 Note: functions can take or create scalar values, in addition to any python object type.
 
 2. Create a `my_script.py` which is where code will live to tell Hamilton what to do:
-```python
-import sys
-import logging
-import importlib
 
+```python
 import pandas as pd
+import my_functions
+
 from hamilton import driver
 
-logging.basicConfig(stream=sys.stdout)
-initial_columns = {  # load from actuals or wherever -- this is our initial data we use as input.
-    # Note: these do not have to be all series, they could be scalar inputs.
-    'signups': pd.Series([1, 10, 50, 100, 200, 400]),
-    'spend': pd.Series([10, 10, 20, 40, 40, 50]),
+# This uses one module, but you are free to pass in multiple
+dr = driver.Builder().with_modules(my_functions).build()
+
+# This is input data -- you can get it from anywhere
+initial_columns = {
+   'signups': pd.Series([1, 10, 50, 100, 200, 400]),
+   'spend': pd.Series([10, 10, 20, 40, 40, 50]),
 }
-# we need to tell hamilton where to load function definitions from
-module_name = 'my_functions'
-module = importlib.import_module(module_name) # or we could just do `import my_functions`
-dr = driver.Driver(initial_columns, module)  # can pass in multiple modules
-# we need to specify what we want in the final dataframe.
 output_columns = [
-    'spend',  # or module.spend
-    'signups',  # or module.signups
-    'avg_3wk_spend',  # or module.avg_3wk_spend
-    'spend_per_signup',  # or module.spend_per_signup
+   'spend',
+   'signups',
+   'avg_3wk_spend',
+   'spend_per_signup',
 ]
-# let's create the dataframe!
-# if you only did `pip install sf-hamilton` earlier:
-df = dr.execute(output_columns)
-# else if you did `pip install "sf-hamilton[visualization]"` earlier:
-# dr.visualize_execution(output_columns, './my-dag.dot', {})
+df = dr.execute(output_columns, inputs=initial_columns)
 print(df)
 ```
 3. Run my_script.py
@@ -189,6 +183,60 @@ nodes repeated.
 
 Congratulations - you just created your Hamilton dataflow that created a dataframe!
 
+### Tracking in the UI
+
+To get started with tracking in the UI, you'll first have to install the `sf-hamilton[ui]` package:
+
+```bash
+pip install "sf-hamilton[ui, sdk]".
+```
+
+Then, you can run the following code to start the UI:
+
+```bash
+hamilton ui
+```
+
+This will start the UI at [localhost:8241](https://localhost:8241). You can then navigate to the UI to see your dataflows.
+You will next want to create a project (you'll have an empty project page), and remember the project ID (E.G. 2 in the following case).
+You will also be prompted to enter a username -- recall that as well!
+
+To track, we'll modify the driver you wrote above:
+
+```python
+import pandas as pd
+import my_functions
+from hamilton import driver
+from hamilton_sdk import driver
+dr = (
+   driver
+   .Builder()
+   .with_modules(my_functions)
+   .with_adapters(adapters.HamiltonTracker(
+        username="elijah", # replace with your username
+        project_id=2,
+        dag_name="hello_world",
+    ))
+   .build()
+)
+
+# This is input data -- you can get it from anywhere
+initial_columns = {
+   'signups': pd.Series([1, 10, 50, 100, 200, 400]),
+   'spend': pd.Series([10, 10, 20, 40, 40, 50]),
+}
+output_columns = [
+   'spend',
+   'signups',
+   'avg_3wk_spend',
+   'spend_per_signup',
+]
+df = dr.execute(output_columns, inputs=initial_columns)
+print(df)
+```
+Run this script, navigate back to the UI/select your project, and click on the `runs`
+link on the left hand side. You'll see your run!
+
 ## Example Hamilton Dataflows
 We have a growing list of examples showcasing how one might use Hamilton. You currently have two places to find them:
 

diff --git a/docs/hamilton-ui/ui.rst b/docs/hamilton-ui/ui.rst
@@ -17,15 +17,37 @@ In short, the Hamilton UI aims to combine a large swath of MLOps/data observabil
 
 ---
 
+The Hamilton UI has two modes:
+1. Run locally using sqlite3
+2. Run on docker images with postgres (meant for deployment)
 
-The Hamilton UI is contained within a set of Docker images. You launch with `docker-compose <https://docs.docker.com/compose/>`_, and it will start up the UI, the backend server,
+----------
+Local Mode
+----------
+
+To run the hamilton UI in local mode, you can do the following:
+
+.. code-block:: bash
+
+    pip install `hamilton[ui]`
+    hamilton ui
+
+This will launch a browser window in localhost:8241. You can then navigate to the UI and start using it!
+While this can potentially handle a small production workflow, you may want to run on postgres with a separate frontend/backend/db
+for full scalability and a multi-read/write db.
+
+--------------------
+Docker/Deployed Mode
+--------------------
+
+The Hamilton UI can be contained within a set of Docker images. You launch with `docker-compose <https://docs.docker.com/compose/>`_, and it will start up the UI, the backend server,
 and a Postgres database. If you'd like a quick overview of some of the features, you can watch the following:
 
 .. raw:: html
 
     <iframe width="560" height="315" src="https://www.youtube.com/embed/0VIVSeN7Ij8?si=i3vTsfTNorzh5y2C" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
 
--------
+
 Install
 -------
 

diff --git a/examples/hamilton_ui/requirements.txt b/examples/hamilton_ui/requirements.txt
@@ -1,4 +1,5 @@
 click
+fastparquet
 pandas
 scikit-learn
 sf-hamilton[sdk]
diff --git a/hamilton/admin.py b/hamilton/admin.py
@@ -0,0 +1,104 @@
+import os
+import shutil
+import subprocess
+import time
+import webbrowser
+from contextlib import contextmanager
+
+import click
+import requests
+from loguru import logger
+
+
+def _command(command: str, capture_output: bool) -> str:
+    """Runs a simple command"""
+    logger.info(f"Running command: {command}")
+    if isinstance(command, str):
+        command = command.split(" ")
+        if capture_output:
+            try:
+                return (
+                    subprocess.check_output(command, stderr=subprocess.PIPE, shell=False)
+                    .decode()
+                    .strip()
+                )
+            except subprocess.CalledProcessError as e:
+                print(e.stdout.decode())
+                print(e.stderr.decode())
+                raise e
+        subprocess.run(command, shell=False, check=True)
+
+
+def _get_git_root() -> str:
+    return _command("git rev-parse --show-toplevel", capture_output=True)
+
+
+def open_when_ready(check_url: str, open_url: str):
+    while True:
+        try:
+            response = requests.get(check_url)
+            if response.status_code == 200:
+                webbrowser.open(open_url)
+                return
+            else:
+                pass
+        except requests.exceptions.RequestException:
+            pass
+        time.sleep(1)
+
+
+@contextmanager
+def cd(path):
+    old_dir = os.getcwd()
+    os.chdir(path)
+    try:
+        yield
+    finally:
+        os.chdir(old_dir)
+
+
+@click.group()
+def cli():
+    pass
+
+
+def _build_ui():
+    # building the UI
+    cmd = "npm run build --prefix ui/frontend"
+    _command(cmd, capture_output=False)
+    # wipring the old build if it exists
+    cmd = "rm -rf hamilton/server/build"
+    _command(cmd, capture_output=False)
+    cmd = "cp -R ui/frontend/build hamilton/server/build"
+    _command(cmd, capture_output=False)
+
+
+@cli.command()
+def build_ui():
+    logger.info("Building UI -- this may take a bit...")
+    git_root = _get_git_root()
+    with cd(git_root):
+        _build_ui()
+    logger.info("Built UI!")
+
+
+@cli.command(help="Publishes the package to a repository")
+@click.option("--prod", is_flag=True, help="Publish to pypi (rather than test pypi)")
+@click.option("--no-wipe-dist", is_flag=True, help="Wipe the dist/ directory before building")
+def build_and_publish(prod: bool, no_wipe_dist: bool):
+    git_root = _get_git_root()
+    with cd(git_root):
+        logger.info("Building UI -- this may take a bit...")
+        _build_ui()
+        logger.info("Built UI!")
+        if not no_wipe_dist:
+            logger.info("Wiping dist/ directory for a clean publish.")
+            shutil.rmtree("dist", ignore_errors=True)
+        _command("python3 -m build", capture_output=False)
+        repository = "pypi" if prod else "testpypi"
+        _command(f"python3 -m twine upload --repository {repository} dist/*", capture_output=False)
+        logger.info(f"Published to {repository}! 🎉")
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/hamilton/cli/__main__.py b/hamilton/cli/__main__.py
@@ -1,6 +1,7 @@
 import dataclasses
 import json
 import logging
+import os
 import sys
 import warnings
 from pathlib import Path
@@ -42,7 +43,6 @@ class CliState:
 cli = typer.Typer(rich_markup_mode="rich")
 state = CliState()
 
-
 MODULES_ANNOTATIONS = Annotated[
     List[Path],
     typer.Argument(
@@ -284,5 +284,18 @@ def view(
     )
 
 
+@cli.command()
+def ui(
+    ctx: typer.Context,
+    port: int = 8241,
+    base_dir: str = os.path.join(Path.home(), ".hamilton", "db"),
+    no_migration: bool = False,
+):
+    """Runs the Hamilton UI on sqllite in port 8241"""
+    from hamilton.server import commands
+
+    ctx.invoke(commands.run, port=port, base_dir=base_dir, no_migration=no_migration)
+
+
 if __name__ == "__main__":
     cli()
diff --git a/hamilton/server b/hamilton/server
@@ -0,0 +1 @@
+../ui/backend/server
diff --git a/hamilton/version.py b/hamilton/version.py
@@ -1 +1 @@
-VERSION = (1, 64, 1)
+VERSION = (1, 65, 0, "rc1")
diff --git a/setup.py b/setup.py
@@ -35,6 +35,14 @@ def load_requirements():
     return list(requirements)
 
 
+def load_server_requirements():
+    # TODO -- confirm below works/delete this
+    requirements = {"click", "loguru", "requests", "typer"}
+    with open("hamilton/server/requirements-mini.txt") as f:
+        requirements.update(line.strip() for line in f)
+    return list(requirements)
+
+
 setup(
     name="sf-hamilton",  # there's already a hamilton in pypi
     version=VERSION,
@@ -44,7 +52,14 @@ def load_requirements():
     author="Stefan Krawczyk, Elijah ben Izzy",
     author_email="stefan@dagworks.io,elijah@dagworks.io",
     url="https://github.com/dagworks-inc/hamilton",
-    packages=find_packages(exclude=["tests"]),
+    packages=find_packages(exclude=["tests"], include=["hamilton", "hamilton.*"]),
+    # package_data={
+    #     "hamilton": [
+    #         "ui/server/build/**/*",
+    #         "hamilton/server/**/*",
+    #         "hamilton/server/build/**/*"
+    #     ]
+    # },
     include_package_data=True,
     install_requires=load_requirements(),
     zip_safe=False,
@@ -97,11 +112,15 @@ def load_requirements():
         "diskcache": ["diskcache"],
         "cli": ["typer"],
         "sdk": ["sf-hamilton-sdk"],
+        "ui": load_server_requirements(),
     },
     entry_points={
         "console_scripts": [
             "h_experiments = hamilton.plugins.h_experiments.__main__:main",
             "hamilton = hamilton.cli.__main__:cli",
+            "hamilton-serve = hamilton.server.__main__:run",
+            "hamilton-admin-build-ui = hamilton.admin:build_ui",
+            "hamilton-admin-build-and-publish = hamilton.admin:build_and_publish",
         ]
     },
     # Relevant project URLs

diff --git a/ui/backend/server/__init__.py b/ui/backend/server/__init__.py