Skip to content

Commit

Permalink
Merge branch 'main' into security/#298-Fixed_vulnerabilities_2
Browse files Browse the repository at this point in the history
  • Loading branch information
ckunki committed Aug 23, 2024
2 parents bebb1c9 + ace2417 commit 5c72b61
Show file tree
Hide file tree
Showing 13 changed files with 32 additions and 123 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/notebook_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ jobs:
- { name: "sklearn notebook", test_file: "nbtest_sklearn.py" }
- { name: "ibis notebook", test_file: "nbtest_ibis.py" }
- { name: "SLC notebook", test_file: "nbtest_script_languages_container.py" }
- { name: "SME notebooks", test_file: "nbtest_sagemaker.py"}
- { name: "TE notebooks", test_file: "nbtest_transformers.py"}
- { name: "short notebook tests", test_file: "\"nbtest_environment_test.py nbtest_itde.py\""}
name: Running ${{ matrix.nb_test.name }}
steps:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
"source": [
"from exasol.nb_connector.sagemaker_extension_wrapper import initialize_sme_extension\n",
"\n",
"initialize_sme_extension(ai_lab_config)"
"initialize_sme_extension(ai_lab_config, version='0.10.0')"
]
},
{
Expand Down Expand Up @@ -110,7 +110,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
"outputs": [],
"source": [
"%run utils/model_retrieval.ipynb\n",
"load_huggingface_model(ai_lab_config, MODEL_NAME)"
"load_huggingface_model(ai_lab_config, MODEL_NAME, 'filling_mask')"
]
},
{
Expand Down Expand Up @@ -147,7 +147,6 @@
" SELECT TE_FILLING_MASK_UDF(\n",
" NULL,\n",
" '{{ai_lab_config.te_bfs_connection}}',\n",
" '{{ai_lab_config.te_hf_connection}}',\n",
" '{{ai_lab_config.te_models_bfs_dir}}',\n",
" '{{MODEL_NAME}}',\n",
" '{{MY_TEXT}}',\n",
Expand Down Expand Up @@ -176,7 +175,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "851eadbc-a8fb-4ddf-bdda-4f4016ed0913",
"id": "00934fda-f64e-45f4-9686-c37cf34ea500",
"metadata": {},
"outputs": [],
"source": []
Expand All @@ -198,7 +197,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
"outputs": [],
"source": [
"%run utils/model_retrieval.ipynb\n",
"load_huggingface_model(ai_lab_config, MODEL_NAME)"
"load_huggingface_model(ai_lab_config, MODEL_NAME, 'question_answering')"
]
},
{
Expand Down Expand Up @@ -156,7 +156,6 @@
" SELECT TE_QUESTION_ANSWERING_UDF(\n",
" NULL,\n",
" '{{ai_lab_config.te_bfs_connection}}',\n",
" '{{ai_lab_config.te_hf_connection}}',\n",
" '{{ai_lab_config.te_models_bfs_dir}}',\n",
" '{{MODEL_NAME}}',\n",
" '{{TEST_QUESTION}}',\n",
Expand Down Expand Up @@ -212,7 +211,6 @@
" SELECT TE_QUESTION_ANSWERING_UDF(\n",
" NULL,\n",
" '{{ai_lab_config.te_bfs_connection}}',\n",
" '{{ai_lab_config.te_hf_connection}}',\n",
" '{{ai_lab_config.te_models_bfs_dir}}',\n",
" '{{MODEL_NAME}}',\n",
" '{{TEST_QUESTION}}',\n",
Expand Down Expand Up @@ -264,7 +262,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
"outputs": [],
"source": [
"%run utils/model_retrieval.ipynb\n",
"load_huggingface_model(ai_lab_config, MODEL_NAME, method='udf')"
"load_huggingface_model(ai_lab_config, MODEL_NAME, 'sequence_classification')"
]
},
{
Expand Down Expand Up @@ -126,7 +126,6 @@
" SELECT TE_SEQUENCE_CLASSIFICATION_SINGLE_TEXT_UDF(\n",
" NULL,\n",
" '{{ai_lab_config.te_bfs_connection}}',\n",
" '{{ai_lab_config.te_hf_connection}}',\n",
" '{{ai_lab_config.te_models_bfs_dir}}',\n",
" '{{MODEL_NAME}}',\n",
" 'Oh my God!'\n",
Expand Down Expand Up @@ -163,7 +162,6 @@
" SELECT TE_SEQUENCE_CLASSIFICATION_TEXT_PAIR_UDF(\n",
" NULL,\n",
" '{{ai_lab_config.te_bfs_connection}}',\n",
" '{{ai_lab_config.te_hf_connection}}',\n",
" '{{ai_lab_config.te_models_bfs_dir}}',\n",
" '{{MODEL_NAME}}',\n",
" 'Oh my God!',\n",
Expand Down Expand Up @@ -214,7 +212,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@
"source": [
"from exasol.nb_connector.transformers_extension_wrapper import initialize_te_extension\n",
"\n",
"initialize_te_extension(ai_lab_config)"
"initialize_te_extension(ai_lab_config, version='2.0.0')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c05bb67d-a5fc-4a46-9fe5-a0c701f42c1c",
"id": "94e4f62a-b37f-4bdd-ba96-f0719ed171d3",
"metadata": {},
"outputs": [],
"source": []
Expand All @@ -106,7 +106,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
"outputs": [],
"source": [
"%run utils/model_retrieval.ipynb\n",
"load_huggingface_model(ai_lab_config, MODEL_NAME)"
"load_huggingface_model(ai_lab_config, MODEL_NAME, 'text_generation')"
]
},
{
Expand Down Expand Up @@ -155,7 +155,6 @@
"SELECT {ai_lab_config.db_schema}.TE_TEXT_GENERATION_UDF(\n",
" NULL,\n",
" '{ai_lab_config.te_bfs_connection}',\n",
" '{ai_lab_config.te_hf_connection}',\n",
" '{ai_lab_config.te_models_bfs_dir}',\n",
" '{MODEL_NAME}',\n",
" '{MY_TEXT}',\n",
Expand Down Expand Up @@ -191,7 +190,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
"outputs": [],
"source": [
"%run utils/model_retrieval.ipynb\n",
"load_huggingface_model(ai_lab_config, MODEL_NAME)"
"load_huggingface_model(ai_lab_config, MODEL_NAME, 'token_classification')"
]
},
{
Expand Down Expand Up @@ -161,7 +161,6 @@
" SELECT TE_TOKEN_CLASSIFICATION_UDF(\n",
" NULL,\n",
" '{{ai_lab_config.te_bfs_connection}}',\n",
" '{{ai_lab_config.te_hf_connection}}',\n",
" '{{ai_lab_config.te_models_bfs_dir}}',\n",
" '{{MODEL_NAME}}',\n",
" '{{MY_TEXT}}',\n",
Expand Down Expand Up @@ -219,7 +218,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
"outputs": [],
"source": [
"%run utils/model_retrieval.ipynb\n",
"load_huggingface_model(ai_lab_config, MODEL_NAME)"
"load_huggingface_model(ai_lab_config, MODEL_NAME, 'translation')"
]
},
{
Expand Down Expand Up @@ -152,7 +152,6 @@
" SELECT TE_TRANSLATION_UDF(\n",
" NULL,\n",
" '{{ai_lab_config.te_bfs_connection}}',\n",
" '{{ai_lab_config.te_hf_connection}}',\n",
" '{{ai_lab_config.te_models_bfs_dir}}',\n",
" '{{MODEL_NAME}}',\n",
" '{{MY_TEXT}}',\n",
Expand Down Expand Up @@ -197,7 +196,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
"There are two ways of doing this.\n",
"\n",
"1. Using the TE_MODEL_DOWNLOADER_UDF UDF.\n",
"2. Downloading a model to a local drive and subsequently uploading it into the [BucketFS](https://docs.exasol.com/db/latest/database_concepts/bucketfs/bucketfs.htm) using CLI or an API.\n",
"2. Calling the CLI script `upload_model.py`.\n",
"\n",
"The first method requires the database machine to have internet access. The second method provides a workaround if this is a problem. Another advantage of the second method is that it caches downloaded models on the local drive. This can make the model transfer quicker if it needs to be repeated.\n",
"Here we will use the first method.\n",
"\n",
"<b>This notebook is not supposed to be run on its own. It contains model loading functions that are called by other notebooks.</b>"
]
Expand All @@ -24,27 +24,26 @@
"id": "ee4e9765-c67b-4f85-85c2-c7bb99a464ac",
"metadata": {},
"source": [
"## Loading model with UDF\n",
"\n",
"Here is the first way of loading the model. We wrap it into a function so that other notebooks can call it."
"## Loading model with UDF"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "a7d88d33-8028-4843-a05c-ff4ddb7b02af",
"metadata": {},
"outputs": [],
"source": [
"from exasol.nb_connector.secret_store import Secrets\n",
"\n",
"\n",
"def load_huggingface_model_udf(conf: Secrets, model_name: str) -> None:\n",
"def load_huggingface_model(conf: Secrets, model_name: str, task_type: str) -> None:\n",
" \"\"\"\n",
" Loads specified model into BucketFS using a UDF provided with the Transformer Extension.\n",
"\n",
" conf - Access to the secret store holding the configuration data.\n",
" model_name - Name of the model at Huggingface hub, e.g. facebook/nllb-moe-54b.\n",
" task_type - Name of the task the model will be used for.\n",
" \"\"\"\n",
"\n",
" from exasol.nb_connector.connections import open_pyexasol_connection\n",
Expand All @@ -54,6 +53,7 @@
" sql = f\"\"\"\n",
" SELECT {conf.get(CKey.db_schema)}.TE_MODEL_DOWNLOADER_UDF(\n",
" '{model_name}',\n",
" '{task_type}',\n",
" '{conf.get(CKey.te_models_bfs_dir)}',\n",
" '{conf.get(CKey.te_bfs_connection)}',\n",
" '{conf.get(CKey.te_hf_connection)}'\n",
Expand All @@ -64,90 +64,6 @@
" conn.execute(query=get_activation_sql(conf))\n",
" conn.execute(query=sql)"
]
},
{
"cell_type": "markdown",
"id": "e187f53b-e9c1-4376-800f-50571dce0b06",
"metadata": {},
"source": [
"## Loading model using the notebook\n",
"\n",
"Here is the second way of loading the model. This method is very similar to the command line interface. For details on how to use the CLI please refer to the Transformer Extension <a href=\"https://github.com/exasol/transformers-extension/blob/main/doc/user_guide/user_guide.md\" target=\"_blank\" rel=\"noopener\">User Guide</a>."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a5d00ad-4e99-4f4b-888c-78a080142d68",
"metadata": {},
"outputs": [],
"source": [
"def load_huggingface_model_cli(conf: Secrets, model_name: str, force_download: bool = False) -> None:\n",
" \"\"\"\n",
" Loads specified model into BucketFS by saving it first to a local drive, as per the command-line interface.\n",
"\n",
" conf - Access to the secret store holding the configuration data.\n",
" model_name - Name of the model at Huggingface hub, e.g. facebook/nllb-moe-54b.\n",
" force_download - If True the model will be reloaded from the hub even if it has been cached before.\n",
" \"\"\"\n",
"\n",
" from pathlib import Path\n",
" import re\n",
" from exasol.nb_connector.transformers_extension_wrapper import upload_model\n",
" from exasol.nb_connector.ai_lab_config import AILabConfig as CKey\n",
"\n",
" # Make a name for the model sub-directory\n",
" sub_dir = re.sub(r\"[/\\\\?%*:|\\\"<>\\x7F\\x00-\\x1F]\", \"-\", model_name)\n",
" models_dir = conf.get(CKey.te_models_cache_dir)\n",
" cache_dir = str(Path(models_dir) / sub_dir)\n",
"\n",
" # Upload it to the BucketFS\n",
" upload_model(conf, model_name=model_name, cache_dir=cache_dir, force_download=force_download)"
]
},
{
"cell_type": "markdown",
"id": "8332928b-6ee5-4a04-aca3-4e60c93836f5",
"metadata": {},
"source": [
"## Method selector\n",
"\n",
"This is the main entry point. The call will be dispatched to one of the above functions depending on the selected method."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4968774-3054-4762-8d0a-462c7cc712c5",
"metadata": {},
"outputs": [],
"source": [
"def load_huggingface_model(conf: Secrets, model_name: str, method: str = 'cli', force_download: bool = False) -> None:\n",
" \"\"\"\n",
" Loads specified model into BucketFS choosing one of the two available methods.\n",
"\n",
" conf - Access to the secret store holding the configuration data.\n",
" model_name - Name of the model at Huggingface hub, e.g. facebook/nllb-moe-54b.\n",
" method - The recognized values are \"udf\" and \"cli\". If the parameter is not set then\n",
" will look for method selection in the configuration. That failed the CLI method\n",
" is used. \n",
" force_download - If True the model will be reloaded from the hub even if it has been cached before.\n",
" \"\"\"\n",
"\n",
" method = method.lower()\n",
" if method == 'udf':\n",
" load_huggingface_model_udf(conf, model_name)\n",
" else:\n",
" load_huggingface_model_cli(conf, model_name, force_download=force_download)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6bc718d-6811-4963-9a65-bb030eae711e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -166,7 +82,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 5c72b61

Please sign in to comment.