diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md index 723e2535..3657449c 100644 --- a/doc/changes/changelog.md +++ b/doc/changes/changelog.md @@ -1,5 +1,6 @@ # Changes +* [3.2.1](changes_3.2.1.md) * [3.2.0](changes_3.2.0.md) * [3.1.0](changes_3.1.0.md) * [3.0.0](changes_3.0.0.md) diff --git a/doc/changes/changes_3.2.1.md b/doc/changes/changes_3.2.1.md new file mode 100644 index 00000000..2f8df489 --- /dev/null +++ b/doc/changes/changes_3.2.1.md @@ -0,0 +1,17 @@ +# AI-Lab 3.2.1 released 2025-?-? + +Code name: S3 Virtual schema support + +## Summary + +This release adds support of S3 Virtual schema extension which allows to access +the structured and semi-structured data from S3 buckets. + +## Features + +* #344: S3 Virtual Schema installation and configuration + +## Refactorings + +## Bug Fixes + diff --git a/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook/cloud/cloud_store_config.ipynb b/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook/cloud/cloud_store_config.ipynb index 681dbeca..d23b5634 100644 --- a/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook/cloud/cloud_store_config.ipynb +++ b/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook/cloud/cloud_store_config.ipynb @@ -48,18 +48,13 @@ "source": [ "from exasol.nb_connector import github, bfs_utils, cloud_storage\n", "from exasol.nb_connector.connections import open_bucketfs_connection, open_pyexasol_connection\n", - "import exasol.bucketfs as bfs\n", - "import pathlib\n", "\n", "jar_local_path = github.retrieve_jar(github.Project.CLOUD_STORAGE_EXTENSION, use_local_cache=True)\n", - "local_name = jar_local_path.name\n", - "bfs_path = bfs.path.BucketPath(local_name, open_bucketfs_connection(ai_lab_config))\n", - "with jar_local_path.open(\"rb\") as file:\n", - " bfs_path.write(file)\n", - "udf_path = bfs_path.as_udf_path()\n", + "bfs_bucket = open_bucketfs_connection(ai_lab_config)\n", + "bfs_path = bfs_utils.put_file(bfs_bucket, jar_local_path)\n", "\n", "with open_pyexasol_connection(ai_lab_config) as conn:\n", - " cloud_storage.setup_scripts(conn, ai_lab_config.db_schema, udf_path)\n", + " cloud_storage.setup_scripts(conn, ai_lab_config.db_schema, bfs_path.as_udf_path())\n", " \n", "print(\"Could Storage Extension was initialized\")" ] diff --git a/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook/cloud/s3_vs_setup.ipynb b/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook/cloud/s3_vs_setup.ipynb new file mode 100644 index 00000000..cd9266a0 --- /dev/null +++ b/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook/cloud/s3_vs_setup.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# S3 Virtual Schema configuration\n", + "\n", + "[S3 Virtual Schema](https://github.com/exasol/s3-document-files-virtual-schema) is an Exasol extension that allows access\n", + "to structured and semi-structured documents residing in AWS S3 buckets. Once configured and set up, you can query JSON, \n", + "Parquet and CSV data directly from database, as if they were imported into the Exasol tables.\n", + "\n", + "In this notebook we setting up the extension into database and creating required scripts." + ], + "id": "b2ec73c7cc2ca7de" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from exasol.nb_connector import github, bfs_utils, cloud_storage\n", + "from exasol.nb_connector.connections import open_bucketfs_connection, open_pyexasol_connection\n", + "\n", + "# TODO: to be moved into notebook-connector's Project enum\n", + "import enum\n", + "\n", + "class MyProj(enum.Enum):\n", + " S3_DOCUMENT_VS = \"s3-document-files-virtual-schema\" \n", + "\n", + "\n", + "jar_local_path = github.retrieve_jar(MyProj.S3_DOCUMENT_VS, use_local_cache=True)\n", + "bfs_bucket = open_bucketfs_connection(ai_lab_config)\n", + "bfs_path = bfs_utils.put_file(bfs_bucket, jar_local_path)" + ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "SQLS = [\n", + " \"OPEN SCHEMA {schema!i}\",\n", + " \"\"\"\n", + "--/\n", + "CREATE OR REPLACE JAVA ADAPTER SCRIPT S3_FILES_ADAPTER AS\n", + " %scriptclass com.exasol.adapter.RequestDispatcher;\n", + " %jar {jar_path!r};\n", + "/\n", + " \"\"\",\n", + " \"\"\"\n", + "--/\n", + "CREATE OR REPLACE JAVA SET SCRIPT IMPORT_FROM_S3_DOCUMENT_FILES(\n", + " DATA_LOADER VARCHAR(2000000),\n", + " SCHEMA_MAPPING_REQUEST VARCHAR(2000000),\n", + " CONNECTION_NAME VARCHAR(500))\n", + " EMITS(...) AS\n", + " %scriptclass com.exasol.adapter.document.UdfEntryPoint;\n", + " %jar {jar_path!r};\n", + "/\n", + " \"\"\",\n", + "]" + ], + "id": "69422ac03d02bb03" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "with open_pyexasol_connection(ai_lab_config) as conn:\n", + " for sql in SQLS:\n", + " conn.execute(sql, query_params={\n", + " \"schema\": ai_lab_config.db_schema,\n", + " \"jar_path\": bfs_path.as_udf_path(),\n", + " })\n", + "\n", + "print(\"S3 Virtual Schema was initialized\")" + ], + "id": "75b31161b2ffbc72" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook_requirements.txt b/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook_requirements.txt index e9f25ec9..0218d184 100644 --- a/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook_requirements.txt +++ b/exasol/ds/sandbox/runtime/ansible/roles/jupyter/files/notebook_requirements.txt @@ -2,7 +2,11 @@ scikit-learn==1.6.1 # required for notebook sklearn matplotlib==3.10.0 # required for notebook sklearn jupysql==0.10.17 # required for multiple notebooks stopwatch.py>=2.0.1 # also required by ITDE -exasol-notebook-connector==0.4.0 +exasol-notebook-connector==0.4.2 pickleshare==0.7.5 # See https://github.com/exasol/ai-lab/issues/291 for details. ipyfilechooser==0.6.0 # required for SLC notebooks ipywidgets==8.1.5 # enable interactive Javascript widgets in the notebooks + +# Temporary pin for script-language-container-tool. +# Could be removed on notebook-container dependency fix: https://github.com/exasol/notebook-connector/issues/159 +exasol-script-languages-container-tool<1.0.0 \ No newline at end of file diff --git a/exasol/ds/sandbox/runtime/ansible/roles/rsync/defaults/main.yml b/exasol/ds/sandbox/runtime/ansible/roles/rsync/defaults/main.yml index d9ce8433..eda233c4 100644 --- a/exasol/ds/sandbox/runtime/ansible/roles/rsync/defaults/main.yml +++ b/exasol/ds/sandbox/runtime/ansible/roles/rsync/defaults/main.yml @@ -1,4 +1,4 @@ --- apt_dependencies: - - rsync=3.2.7-0ubuntu0.22.04.3 + - rsync=3.2.7-0ubuntu0.22.04.4 diff --git a/pyproject.toml b/pyproject.toml index 38114d66..b3fc9d8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "exasol-ai-lab" -version = "3.2.0" +version = "3.2.1" description = "Provide AI-Lab editions." packages = [ {include = "exasol"}, ] license = "MIT"