diff --git a/Jenkinsfile b/Jenkinsfile index 6461c0f069..0c83c02ba9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -19,7 +19,7 @@ node('cuda-module') { docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1 docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py38 py39 docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1 - docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310 + docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310 py311 docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1 || exit 0 """ currentBuild.result = 'SUCCESS' diff --git a/README.md b/README.md index 95cae5e4a9..53a25c0a22 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![License Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE) -![Python 3.6, 3.7, 3.8, 3.9, 3.10](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-green.svg) +![Python 3.6, 3.7, 3.8, 3.9, 3.10, 3.11](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-green.svg) [![Downloads](https://pepy.tech/badge/deeppavlov)](https://pepy.tech/project/deeppavlov) diff --git a/deeppavlov/_meta.py b/deeppavlov/_meta.py index b5a90674f9..3384dd3d03 100644 --- a/deeppavlov/_meta.py +++ b/deeppavlov/_meta.py @@ -1,4 +1,4 @@ -__version__ = '1.5.0' +__version__ = '1.6.0' __author__ = 'Neural Networks and Deep Learning lab, MIPT' __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.' __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot'] diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py index f49043af9f..701956bf64 100644 --- a/deeppavlov/core/data/utils.py +++ b/deeppavlov/core/data/utils.py @@ -78,7 +78,7 @@ def s3_download(url: str, destination: str) -> None: file_object.download_file(destination, Callback=pbar.update) -def simple_download(url: str, destination: Union[Path, str], headers: Optional[dict] = None) -> None: +def simple_download(url: str, destination: Union[Path, str], headers: Optional[dict] = None, n_tries: int = 3) -> None: """Download a file from URL to target location. Displays a progress bar to the terminal during the download process. @@ -87,58 +87,66 @@ def simple_download(url: str, destination: Union[Path, str], headers: Optional[d url: The source URL. destination: Path to the file destination (including file name). headers: Headers for file server. + n_tries: Number of retries if download fails. """ - destination = Path(destination) - destination.parent.mkdir(parents=True, exist_ok=True) - - log.info('Downloading from {} to {}'.format(url, destination)) - - if url.startswith('s3://'): - return s3_download(url, str(destination)) - - chunk_size = 32 * 1024 - temporary = destination.with_suffix(destination.suffix + '.part') - - r = requests.get(url, stream=True, headers=headers) - if r.status_code != 200: - raise RuntimeError(f'Got status code {r.status_code} when trying to download {url}') - total_length = int(r.headers.get('content-length', 0)) - - if temporary.exists() and temporary.stat().st_size > total_length: - temporary.write_bytes(b'') # clearing temporary file when total_length is inconsistent - - with temporary.open('ab') as f: - downloaded = f.tell() - if downloaded != 0: - log.warning(f'Found a partial download {temporary}') - with tqdm(initial=downloaded, total=total_length, unit='B', unit_scale=True) as pbar: - while True: - if downloaded != 0: - log.warning(f'Download stopped abruptly, trying to resume from {downloaded} ' - f'to reach {total_length}') - headers['Range'] = f'bytes={downloaded}-' - r = requests.get(url, headers=headers, stream=True) - if 'content-length' not in r.headers or \ - total_length - downloaded != int(r.headers['content-length']): - raise RuntimeError('It looks like the server does not support resuming downloads.') - - try: - for chunk in r.iter_content(chunk_size=chunk_size): - if chunk: # filter out keep-alive new chunks - downloaded += len(chunk) - pbar.update(len(chunk)) - f.write(chunk) - except requests.exceptions.ChunkedEncodingError: - if downloaded == 0: - r = requests.get(url, stream=True, headers=headers) - - if downloaded >= total_length: - # Note that total_length is 0 if the server didn't return the content length, - # in this case we perform just one iteration and assume that we are done. - break - - temporary.rename(destination) + try: + destination = Path(destination) + destination.parent.mkdir(parents=True, exist_ok=True) + + log.info('Downloading from {} to {}'.format(url, destination)) + + if url.startswith('s3://'): + return s3_download(url, str(destination)) + + chunk_size = 32 * 1024 + temporary = destination.with_suffix(destination.suffix + '.part') + + r = requests.get(url, stream=True, headers=headers) + if r.status_code != 200: + raise RuntimeError(f'Got status code {r.status_code} when trying to download {url}') + total_length = int(r.headers.get('content-length', 0)) + + if temporary.exists() and temporary.stat().st_size > total_length: + temporary.write_bytes(b'') # clearing temporary file when total_length is inconsistent + + with temporary.open('ab') as f: + downloaded = f.tell() + if downloaded != 0: + log.warning(f'Found a partial download {temporary}') + with tqdm(initial=downloaded, total=total_length, unit='B', unit_scale=True) as pbar: + while True: + if downloaded != 0: + log.warning(f'Download stopped abruptly, trying to resume from {downloaded} ' + f'to reach {total_length}') + headers['Range'] = f'bytes={downloaded}-' + r = requests.get(url, headers=headers, stream=True) + if 'content-length' not in r.headers or \ + total_length - downloaded != int(r.headers['content-length']): + raise RuntimeError('It looks like the server does not support resuming downloads.') + + try: + for chunk in r.iter_content(chunk_size=chunk_size): + if chunk: # filter out keep-alive new chunks + downloaded += len(chunk) + pbar.update(len(chunk)) + f.write(chunk) + except requests.exceptions.ChunkedEncodingError: + if downloaded == 0: + r = requests.get(url, stream=True, headers=headers) + + if downloaded >= total_length: + # Note that total_length is 0 if the server didn't return the content length, + # in this case we perform just one iteration and assume that we are done. + break + + temporary.rename(destination) + except Exception as e: + if n_tries > 0: + log.warning(f'Download failed: {e}, retrying') + simple_download(url, destination, headers, n_tries - 1) + else: + raise e def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_download: bool = True, diff --git a/deeppavlov/requirements/datasets.txt b/deeppavlov/requirements/datasets.txt index 5e25e63be6..89157c4094 100644 --- a/deeppavlov/requirements/datasets.txt +++ b/deeppavlov/requirements/datasets.txt @@ -1 +1,2 @@ -datasets>=1.16.0,<2.5.0 +datasets>=1.16.0,<2.5.0;python_version<="3.10" +datasets==2.2.*;python_version=="3.11.*" diff --git a/deeppavlov/requirements/faiss.txt b/deeppavlov/requirements/faiss.txt index 9c8dd2b5f1..dbb39a0bcf 100644 --- a/deeppavlov/requirements/faiss.txt +++ b/deeppavlov/requirements/faiss.txt @@ -1 +1,2 @@ -faiss-cpu==1.7.2 +faiss-cpu==1.7.2;python_version<="3.10" +faiss-cpu==1.7.4;python_version=="3.11.*" diff --git a/deeppavlov/requirements/kenlm.txt b/deeppavlov/requirements/kenlm.txt index 8bd21c6112..5456b04d2d 100644 --- a/deeppavlov/requirements/kenlm.txt +++ b/deeppavlov/requirements/kenlm.txt @@ -1 +1,2 @@ -pypi-kenlm==0.1.20220713 +pypi-kenlm==0.1.20220713;python_version<="3.10" +kenlm==0.2.*;python_version=="3.11.*" diff --git a/docs/features/models/KBQA.ipynb b/docs/features/models/KBQA.ipynb index f0b501a45b..741bce7c79 100644 --- a/docs/features/models/KBQA.ipynb +++ b/docs/features/models/KBQA.ipynb @@ -22,13 +22,13 @@ " \n", " 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n", "\n", - " 4.3. [Using entity linking and Wiki parser as standalone services for KBQA](#4.3-Using-entity-linking-and-Wiki-parser-as-standalone-services-for-KBQA)\n", + " 4.3. [Using entity linking and Wiki parser as standalone services for KBQA](#4.3-Using-entity-linking-and-Wiki-parser-as-standalone-tools-for-KBQA)\n", " \n", "5. [Customize the model](#5.-Customize-the-model)\n", " \n", - " 5.1. [Train your model from Python](#5.1-Train-your-model-from-Python)\n", + " 5.1. [Description of config parameters](#5.1-Description-of-config-parameters)\n", " \n", - " 5.2. [Train your model from CLI](#5.2-Train-your-model-from-CLI)\n", + " 5.2. [Train KBQA components](#5.2-Train-KBQA-components)\n", "\n", "# 1. Introduction to the task\n", "\n", diff --git a/docs/features/models/NER.ipynb b/docs/features/models/NER.ipynb index da8473f5e3..44140896bb 100644 --- a/docs/features/models/NER.ipynb +++ b/docs/features/models/NER.ipynb @@ -22,7 +22,7 @@ " \n", " 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n", " \n", - "5. [Evaluate](#6.-Evaluate)\n", + "5. [Evaluate](#5.-Evaluate)\n", " \n", " 5.1. [Evaluate from Python](#5.1-Evaluate-from-Python)\n", " \n", diff --git a/docs/features/models/SQuAD.ipynb b/docs/features/models/SQuAD.ipynb index a158475d9e..5c7552a698 100644 --- a/docs/features/models/SQuAD.ipynb +++ b/docs/features/models/SQuAD.ipynb @@ -105,7 +105,7 @@ "`squad_bert` is the name of the model's *config_file*. [What is a Config File?](http://docs.deeppavlov.ai/en/master/intro/configuration.html) \n", "\n", "Configuration file defines the model and describes its hyperparameters. To use another model, change the name of the *config_file* here and further.\n", - "The full list of the models with their config names can be found in the [table](#6.-Models-list).\n", + "The full list of the models with their config names can be found in the [table](#3.-Models-list).\n", "\n", "# 3. Models list\n", "\n", diff --git a/docs/features/models/classification.ipynb b/docs/features/models/classification.ipynb index e7158c7cef..89d9ddc408 100644 --- a/docs/features/models/classification.ipynb +++ b/docs/features/models/classification.ipynb @@ -162,7 +162,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3.2 Predict using CLI\n", + "## 4.2 Predict using CLI\n", "\n", "You can also get predictions in an interactive mode through CLI (Command Line Interface)." ] @@ -198,9 +198,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 4. Evaluation\n", + "# 5. Evaluation\n", "\n", - "## 4.1 Evaluate from Python" + "## 5.1 Evaluate from Python" ] }, { @@ -218,7 +218,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4.2 Evaluate from CLI" + "## 5.2 Evaluate from CLI" ] }, { @@ -234,9 +234,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 5. Customize the model\n", + "# 6. Train the model on your data\n", "\n", - "## 5.1 Train your model from Python\n", + "## 6.1 Train your model from Python\n", "\n", "### Provide your data path\n", "\n", @@ -346,7 +346,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 5.2 Train your model from CLI\n", + "## 6.2 Train your model from CLI\n", "\n", "To train the model on your data, create a copy of a config file and change the *data_path* variable in it. After that, train the model using your new *config_file*. You can also change any of the hyperparameters of the model." ] diff --git a/docs/features/models/few_shot_classification.ipynb b/docs/features/models/few_shot_classification.ipynb index c0207fc23f..910a35181f 100644 --- a/docs/features/models/few_shot_classification.ipynb +++ b/docs/features/models/few_shot_classification.ipynb @@ -119,7 +119,7 @@ "\n", "## 4.2 Predict using Python\n", "\n", - "After [installing](#4.-Get-started-with-the-model) the model, build it from the config and predict." + "After [installing](#2.-Get-started-with-the-model) the model, build it from the config and predict." ] }, { @@ -192,7 +192,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4.2 Predict using CLI\n", + "## 4.3 Predict using CLI\n", "\n", "You can also get predictions in an interactive mode through CLI (Сommand Line Interface)." ] diff --git a/docs/features/models/morpho_tagger.ipynb b/docs/features/models/morpho_tagger.ipynb index 15df88e3b7..fd8a0d13b3 100644 --- a/docs/features/models/morpho_tagger.ipynb +++ b/docs/features/models/morpho_tagger.ipynb @@ -22,7 +22,7 @@ "\n", " 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n", "\n", - "5. [Customize the model](#4.-Customize-the-model)\n", + "5. [Customize the model](#5.-Customize-the-model)\n", "\n", "# 1. Introduction to the task\n", "\n", diff --git a/docs/features/models/relation_extraction.ipynb b/docs/features/models/relation_extraction.ipynb index 9e962d742f..2cbadb341d 100644 --- a/docs/features/models/relation_extraction.ipynb +++ b/docs/features/models/relation_extraction.ipynb @@ -198,7 +198,7 @@ "|NUM | Percents, money, quantities |\n", "|MISC | Products, including vehicles, weapons, etc.
Events, including elections, battles, sporting MISC events, etc. Laws, cases, languages, etc. |\n", "\n", - "**Model Output**: one or several of the [97 relations](#5.1-Relations-used-in-English-model) found between the given entities; relation id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P26') and relation name ('spouse').\n", + "**Model Output**: one or several of the [97 relations](#6.1-Relations-used-in-English-model) found between the given entities; relation id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P26') and relation name ('spouse').\n", "\n", "### Russian" ] @@ -244,8 +244,34 @@ "- list of entities positions (i.e. all start and end positions of both entities' mentions)\n", "- list of NER tags of both entities.\n", "\n", - "**Model Output**: one or several of the [30 relations](#5.2-Relations-used-in-Russian-model) found between the given entities; a Russian relation name (e.g. \"участник\") or an English one, if Russian one is unavailable, and, if applicable, its id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P710').\n", + "**Model Output**: one or several of the [30 relations](#6.2-Relations-used-in-Russian-model) found between the given entities; a Russian relation name (e.g. \"участник\") or an English one, if Russian one is unavailable, and, if applicable, its id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P710').\n", "\n", + "## 4.2 Predict using CLI\n", + "\n", + "You can also get predictions in an interactive mode through CLI." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! python -m deeppavlov interact re_docred [-d]\n", + "! python -m deeppavlov interact re_rured [-d]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`-d` is an optional download key (alternative to `download=True` in Python code). It is used to download the pre-trained model along with embeddings and all other files needed to run the model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# 5. Customize the model\n", "\n", "## 5.1 Description of config parameters\n", diff --git a/docs/features/models/spelling_correction.ipynb b/docs/features/models/spelling_correction.ipynb index b675940d89..7ebcc7e971 100644 --- a/docs/features/models/spelling_correction.ipynb +++ b/docs/features/models/spelling_correction.ipynb @@ -22,7 +22,7 @@ "\n", " 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n", "\n", - "5. [Customize the model](#4.-Customize-the-model)\n", + "5. [Customize the model](#5.-Customize-the-model)\n", "\n", " 5.1. [Training configuration](#5.1-Training-configuration)\n", "\n", diff --git a/docs/features/models/syntax_parser.ipynb b/docs/features/models/syntax_parser.ipynb index abdea2d627..5654593f4d 100644 --- a/docs/features/models/syntax_parser.ipynb +++ b/docs/features/models/syntax_parser.ipynb @@ -22,7 +22,7 @@ "\n", " 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n", "\n", - "5. [Customize the model](#4.-Customize-the-model)\n", + "5. [Customize the model](#5.-Customize-the-model)\n", "\n", "# 1. Introduction to the task\n", "\n", diff --git a/docs/intro/installation.rst b/docs/intro/installation.rst index 6c8c51a4c9..58eb3264d7 100644 --- a/docs/intro/installation.rst +++ b/docs/intro/installation.rst @@ -1,7 +1,7 @@ Installation ============ -DeepPavlov supports **Linux**, **Windows 10+** (through WSL/WSL2), **MacOS** (Big Sur+) platforms, **Python 3.6-3.10**. +DeepPavlov supports **Linux**, **Windows 10+** (through WSL/WSL2), **MacOS** (Big Sur+) platforms, **Python 3.6-3.11**. Depending on the model used, you may need from 4 to 16 GB RAM. Install with pip diff --git a/docs/intro/quick_start.rst b/docs/intro/quick_start.rst index 66626b4ffd..680377302e 100644 --- a/docs/intro/quick_start.rst +++ b/docs/intro/quick_start.rst @@ -2,7 +2,7 @@ QuickStart ------------ First, follow instructions on :doc:`Installation page ` -to install ``deeppavlov`` package for Python 3.6/3.7/3.8/3.9/3.10. +to install ``deeppavlov`` package for Python 3.6-3.11. DeepPavlov contains a bunch of great pre-trained NLP models. Each model is determined by its config file. List of models is available on diff --git a/requirements.txt b/requirements.txt index fa4928622b..48e4c2fbb8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,8 @@ prometheus-client>=0.13.0,<=1.16.0 pydantic<2 pybind11==2.10.3 requests>=2.19.0,<3.0.0 -scikit-learn>=0.24,<1.1.0 +scikit-learn>=0.24,<1.1.0;python_version<="3.10" +scikit-learn==1.4.0;python_version=="3.11.*" tqdm>=4.42.0,<4.65.0 uvicorn>=0.13.0,<0.19.0 wheel diff --git a/setup.py b/setup.py index db912de26c..875335e992 100644 --- a/setup.py +++ b/setup.py @@ -68,11 +68,17 @@ def readme(): 'pexpect' ], 'docs': [ - 'sphinx==3.5.4;python_version<"3.10"', - 'sphinx==4.5.0;python_version>="3.10"', - 'sphinx_rtd_theme==0.5.2', - 'docutils<0.17,>=0.12', - 'nbsphinx==0.8.4', + 'sphinx==3.5.4;python_version<="3.7"', + 'sphinx==5.0.0;python_version=="3.8"', + 'sphinx==5.0.0;python_version=="3.9"', + 'sphinx==5.0.0;python_version=="3.10"', + 'sphinx==7.2.*;python_version=="3.11.*"', + 'sphinx_rtd_theme==0.5.2;python_version<="3.10"', + 'sphinx_rtd_theme==2.0.0;python_version=="3.11.*"', + 'docutils<0.17,>=0.12;python_version<="3.10"', + 'docutils==0.20.1;python_version=="3.11.*"', + 'nbsphinx==0.8.4;python_version<="3.10"', + 'nbsphinx==0.9.3;python_version=="3.11.*"', 'ipykernel==5.5.4', 'jinja2<=3.0.3', 'sphinx-copybutton==0.5.0', diff --git a/utils/Docker/docker-compose.yml b/utils/Docker/docker-compose.yml index 4a0dadd436..4d434d9767 100644 --- a/utils/Docker/docker-compose.yml +++ b/utils/Docker/docker-compose.yml @@ -65,3 +65,16 @@ services: - CUDA_VISIBLE_DEVICES=$TEST_GPU_0 - PYTEST_ARGS=$PYTEST_ARGS - DP_PYTEST_NO_CACHE=True + py311: + build: + context: ../../ + dockerfile: utils/Docker/Dockerfile + args: + - EPOCH=$EPOCH + - PYTHON_VERSION=3.11.6 + - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04 + user: '${UID}:${GID}' + environment: + - CUDA_VISIBLE_DEVICES=$TEST_GPU_1 + - PYTEST_ARGS=$PYTEST_ARGS + - DP_PYTEST_NO_CACHE=True