diff --git a/Jenkinsfile b/Jenkinsfile
index 6461c0f069..0c83c02ba9 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -19,7 +19,7 @@ node('cuda-module') {
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py38 py39
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
- docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310
+ docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310 py311
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1 || exit 0
"""
currentBuild.result = 'SUCCESS'
diff --git a/README.md b/README.md
index 95cae5e4a9..53a25c0a22 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
[![License Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
-![Python 3.6, 3.7, 3.8, 3.9, 3.10](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-green.svg)
+![Python 3.6, 3.7, 3.8, 3.9, 3.10, 3.11](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-green.svg)
[![Downloads](https://pepy.tech/badge/deeppavlov)](https://pepy.tech/project/deeppavlov)
diff --git a/deeppavlov/_meta.py b/deeppavlov/_meta.py
index b5a90674f9..3384dd3d03 100644
--- a/deeppavlov/_meta.py
+++ b/deeppavlov/_meta.py
@@ -1,4 +1,4 @@
-__version__ = '1.5.0'
+__version__ = '1.6.0'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py
index f49043af9f..701956bf64 100644
--- a/deeppavlov/core/data/utils.py
+++ b/deeppavlov/core/data/utils.py
@@ -78,7 +78,7 @@ def s3_download(url: str, destination: str) -> None:
file_object.download_file(destination, Callback=pbar.update)
-def simple_download(url: str, destination: Union[Path, str], headers: Optional[dict] = None) -> None:
+def simple_download(url: str, destination: Union[Path, str], headers: Optional[dict] = None, n_tries: int = 3) -> None:
"""Download a file from URL to target location.
Displays a progress bar to the terminal during the download process.
@@ -87,58 +87,66 @@ def simple_download(url: str, destination: Union[Path, str], headers: Optional[d
url: The source URL.
destination: Path to the file destination (including file name).
headers: Headers for file server.
+ n_tries: Number of retries if download fails.
"""
- destination = Path(destination)
- destination.parent.mkdir(parents=True, exist_ok=True)
-
- log.info('Downloading from {} to {}'.format(url, destination))
-
- if url.startswith('s3://'):
- return s3_download(url, str(destination))
-
- chunk_size = 32 * 1024
- temporary = destination.with_suffix(destination.suffix + '.part')
-
- r = requests.get(url, stream=True, headers=headers)
- if r.status_code != 200:
- raise RuntimeError(f'Got status code {r.status_code} when trying to download {url}')
- total_length = int(r.headers.get('content-length', 0))
-
- if temporary.exists() and temporary.stat().st_size > total_length:
- temporary.write_bytes(b'') # clearing temporary file when total_length is inconsistent
-
- with temporary.open('ab') as f:
- downloaded = f.tell()
- if downloaded != 0:
- log.warning(f'Found a partial download {temporary}')
- with tqdm(initial=downloaded, total=total_length, unit='B', unit_scale=True) as pbar:
- while True:
- if downloaded != 0:
- log.warning(f'Download stopped abruptly, trying to resume from {downloaded} '
- f'to reach {total_length}')
- headers['Range'] = f'bytes={downloaded}-'
- r = requests.get(url, headers=headers, stream=True)
- if 'content-length' not in r.headers or \
- total_length - downloaded != int(r.headers['content-length']):
- raise RuntimeError('It looks like the server does not support resuming downloads.')
-
- try:
- for chunk in r.iter_content(chunk_size=chunk_size):
- if chunk: # filter out keep-alive new chunks
- downloaded += len(chunk)
- pbar.update(len(chunk))
- f.write(chunk)
- except requests.exceptions.ChunkedEncodingError:
- if downloaded == 0:
- r = requests.get(url, stream=True, headers=headers)
-
- if downloaded >= total_length:
- # Note that total_length is 0 if the server didn't return the content length,
- # in this case we perform just one iteration and assume that we are done.
- break
-
- temporary.rename(destination)
+ try:
+ destination = Path(destination)
+ destination.parent.mkdir(parents=True, exist_ok=True)
+
+ log.info('Downloading from {} to {}'.format(url, destination))
+
+ if url.startswith('s3://'):
+ return s3_download(url, str(destination))
+
+ chunk_size = 32 * 1024
+ temporary = destination.with_suffix(destination.suffix + '.part')
+
+ r = requests.get(url, stream=True, headers=headers)
+ if r.status_code != 200:
+ raise RuntimeError(f'Got status code {r.status_code} when trying to download {url}')
+ total_length = int(r.headers.get('content-length', 0))
+
+ if temporary.exists() and temporary.stat().st_size > total_length:
+ temporary.write_bytes(b'') # clearing temporary file when total_length is inconsistent
+
+ with temporary.open('ab') as f:
+ downloaded = f.tell()
+ if downloaded != 0:
+ log.warning(f'Found a partial download {temporary}')
+ with tqdm(initial=downloaded, total=total_length, unit='B', unit_scale=True) as pbar:
+ while True:
+ if downloaded != 0:
+ log.warning(f'Download stopped abruptly, trying to resume from {downloaded} '
+ f'to reach {total_length}')
+ headers['Range'] = f'bytes={downloaded}-'
+ r = requests.get(url, headers=headers, stream=True)
+ if 'content-length' not in r.headers or \
+ total_length - downloaded != int(r.headers['content-length']):
+ raise RuntimeError('It looks like the server does not support resuming downloads.')
+
+ try:
+ for chunk in r.iter_content(chunk_size=chunk_size):
+ if chunk: # filter out keep-alive new chunks
+ downloaded += len(chunk)
+ pbar.update(len(chunk))
+ f.write(chunk)
+ except requests.exceptions.ChunkedEncodingError:
+ if downloaded == 0:
+ r = requests.get(url, stream=True, headers=headers)
+
+ if downloaded >= total_length:
+ # Note that total_length is 0 if the server didn't return the content length,
+ # in this case we perform just one iteration and assume that we are done.
+ break
+
+ temporary.rename(destination)
+ except Exception as e:
+ if n_tries > 0:
+ log.warning(f'Download failed: {e}, retrying')
+ simple_download(url, destination, headers, n_tries - 1)
+ else:
+ raise e
def download(dest_file_path: [List[Union[str, Path]]], source_url: str, force_download: bool = True,
diff --git a/deeppavlov/requirements/datasets.txt b/deeppavlov/requirements/datasets.txt
index 5e25e63be6..89157c4094 100644
--- a/deeppavlov/requirements/datasets.txt
+++ b/deeppavlov/requirements/datasets.txt
@@ -1 +1,2 @@
-datasets>=1.16.0,<2.5.0
+datasets>=1.16.0,<2.5.0;python_version<="3.10"
+datasets==2.2.*;python_version=="3.11.*"
diff --git a/deeppavlov/requirements/faiss.txt b/deeppavlov/requirements/faiss.txt
index 9c8dd2b5f1..dbb39a0bcf 100644
--- a/deeppavlov/requirements/faiss.txt
+++ b/deeppavlov/requirements/faiss.txt
@@ -1 +1,2 @@
-faiss-cpu==1.7.2
+faiss-cpu==1.7.2;python_version<="3.10"
+faiss-cpu==1.7.4;python_version=="3.11.*"
diff --git a/deeppavlov/requirements/kenlm.txt b/deeppavlov/requirements/kenlm.txt
index 8bd21c6112..5456b04d2d 100644
--- a/deeppavlov/requirements/kenlm.txt
+++ b/deeppavlov/requirements/kenlm.txt
@@ -1 +1,2 @@
-pypi-kenlm==0.1.20220713
+pypi-kenlm==0.1.20220713;python_version<="3.10"
+kenlm==0.2.*;python_version=="3.11.*"
diff --git a/docs/features/models/KBQA.ipynb b/docs/features/models/KBQA.ipynb
index f0b501a45b..741bce7c79 100644
--- a/docs/features/models/KBQA.ipynb
+++ b/docs/features/models/KBQA.ipynb
@@ -22,13 +22,13 @@
" \n",
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
"\n",
- " 4.3. [Using entity linking and Wiki parser as standalone services for KBQA](#4.3-Using-entity-linking-and-Wiki-parser-as-standalone-services-for-KBQA)\n",
+ " 4.3. [Using entity linking and Wiki parser as standalone services for KBQA](#4.3-Using-entity-linking-and-Wiki-parser-as-standalone-tools-for-KBQA)\n",
" \n",
"5. [Customize the model](#5.-Customize-the-model)\n",
" \n",
- " 5.1. [Train your model from Python](#5.1-Train-your-model-from-Python)\n",
+ " 5.1. [Description of config parameters](#5.1-Description-of-config-parameters)\n",
" \n",
- " 5.2. [Train your model from CLI](#5.2-Train-your-model-from-CLI)\n",
+ " 5.2. [Train KBQA components](#5.2-Train-KBQA-components)\n",
"\n",
"# 1. Introduction to the task\n",
"\n",
diff --git a/docs/features/models/NER.ipynb b/docs/features/models/NER.ipynb
index da8473f5e3..44140896bb 100644
--- a/docs/features/models/NER.ipynb
+++ b/docs/features/models/NER.ipynb
@@ -22,7 +22,7 @@
" \n",
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
" \n",
- "5. [Evaluate](#6.-Evaluate)\n",
+ "5. [Evaluate](#5.-Evaluate)\n",
" \n",
" 5.1. [Evaluate from Python](#5.1-Evaluate-from-Python)\n",
" \n",
diff --git a/docs/features/models/SQuAD.ipynb b/docs/features/models/SQuAD.ipynb
index a158475d9e..5c7552a698 100644
--- a/docs/features/models/SQuAD.ipynb
+++ b/docs/features/models/SQuAD.ipynb
@@ -105,7 +105,7 @@
"`squad_bert` is the name of the model's *config_file*. [What is a Config File?](http://docs.deeppavlov.ai/en/master/intro/configuration.html) \n",
"\n",
"Configuration file defines the model and describes its hyperparameters. To use another model, change the name of the *config_file* here and further.\n",
- "The full list of the models with their config names can be found in the [table](#6.-Models-list).\n",
+ "The full list of the models with their config names can be found in the [table](#3.-Models-list).\n",
"\n",
"# 3. Models list\n",
"\n",
diff --git a/docs/features/models/classification.ipynb b/docs/features/models/classification.ipynb
index e7158c7cef..89d9ddc408 100644
--- a/docs/features/models/classification.ipynb
+++ b/docs/features/models/classification.ipynb
@@ -162,7 +162,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## 3.2 Predict using CLI\n",
+ "## 4.2 Predict using CLI\n",
"\n",
"You can also get predictions in an interactive mode through CLI (Command Line Interface)."
]
@@ -198,9 +198,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# 4. Evaluation\n",
+ "# 5. Evaluation\n",
"\n",
- "## 4.1 Evaluate from Python"
+ "## 5.1 Evaluate from Python"
]
},
{
@@ -218,7 +218,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## 4.2 Evaluate from CLI"
+ "## 5.2 Evaluate from CLI"
]
},
{
@@ -234,9 +234,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# 5. Customize the model\n",
+ "# 6. Train the model on your data\n",
"\n",
- "## 5.1 Train your model from Python\n",
+ "## 6.1 Train your model from Python\n",
"\n",
"### Provide your data path\n",
"\n",
@@ -346,7 +346,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## 5.2 Train your model from CLI\n",
+ "## 6.2 Train your model from CLI\n",
"\n",
"To train the model on your data, create a copy of a config file and change the *data_path* variable in it. After that, train the model using your new *config_file*. You can also change any of the hyperparameters of the model."
]
diff --git a/docs/features/models/few_shot_classification.ipynb b/docs/features/models/few_shot_classification.ipynb
index c0207fc23f..910a35181f 100644
--- a/docs/features/models/few_shot_classification.ipynb
+++ b/docs/features/models/few_shot_classification.ipynb
@@ -119,7 +119,7 @@
"\n",
"## 4.2 Predict using Python\n",
"\n",
- "After [installing](#4.-Get-started-with-the-model) the model, build it from the config and predict."
+ "After [installing](#2.-Get-started-with-the-model) the model, build it from the config and predict."
]
},
{
@@ -192,7 +192,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## 4.2 Predict using CLI\n",
+ "## 4.3 Predict using CLI\n",
"\n",
"You can also get predictions in an interactive mode through CLI (Сommand Line Interface)."
]
diff --git a/docs/features/models/morpho_tagger.ipynb b/docs/features/models/morpho_tagger.ipynb
index 15df88e3b7..fd8a0d13b3 100644
--- a/docs/features/models/morpho_tagger.ipynb
+++ b/docs/features/models/morpho_tagger.ipynb
@@ -22,7 +22,7 @@
"\n",
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
"\n",
- "5. [Customize the model](#4.-Customize-the-model)\n",
+ "5. [Customize the model](#5.-Customize-the-model)\n",
"\n",
"# 1. Introduction to the task\n",
"\n",
diff --git a/docs/features/models/relation_extraction.ipynb b/docs/features/models/relation_extraction.ipynb
index 9e962d742f..2cbadb341d 100644
--- a/docs/features/models/relation_extraction.ipynb
+++ b/docs/features/models/relation_extraction.ipynb
@@ -198,7 +198,7 @@
"|NUM | Percents, money, quantities |\n",
"|MISC | Products, including vehicles, weapons, etc.
Events, including elections, battles, sporting MISC events, etc. Laws, cases, languages, etc. |\n",
"\n",
- "**Model Output**: one or several of the [97 relations](#5.1-Relations-used-in-English-model) found between the given entities; relation id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P26') and relation name ('spouse').\n",
+ "**Model Output**: one or several of the [97 relations](#6.1-Relations-used-in-English-model) found between the given entities; relation id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P26') and relation name ('spouse').\n",
"\n",
"### Russian"
]
@@ -244,8 +244,34 @@
"- list of entities positions (i.e. all start and end positions of both entities' mentions)\n",
"- list of NER tags of both entities.\n",
"\n",
- "**Model Output**: one or several of the [30 relations](#5.2-Relations-used-in-Russian-model) found between the given entities; a Russian relation name (e.g. \"участник\") or an English one, if Russian one is unavailable, and, if applicable, its id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P710').\n",
+ "**Model Output**: one or several of the [30 relations](#6.2-Relations-used-in-Russian-model) found between the given entities; a Russian relation name (e.g. \"участник\") or an English one, if Russian one is unavailable, and, if applicable, its id in [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) (e.g. 'P710').\n",
"\n",
+ "## 4.2 Predict using CLI\n",
+ "\n",
+ "You can also get predictions in an interactive mode through CLI."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! python -m deeppavlov interact re_docred [-d]\n",
+ "! python -m deeppavlov interact re_rured [-d]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`-d` is an optional download key (alternative to `download=True` in Python code). It is used to download the pre-trained model along with embeddings and all other files needed to run the model."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
"# 5. Customize the model\n",
"\n",
"## 5.1 Description of config parameters\n",
diff --git a/docs/features/models/spelling_correction.ipynb b/docs/features/models/spelling_correction.ipynb
index b675940d89..7ebcc7e971 100644
--- a/docs/features/models/spelling_correction.ipynb
+++ b/docs/features/models/spelling_correction.ipynb
@@ -22,7 +22,7 @@
"\n",
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
"\n",
- "5. [Customize the model](#4.-Customize-the-model)\n",
+ "5. [Customize the model](#5.-Customize-the-model)\n",
"\n",
" 5.1. [Training configuration](#5.1-Training-configuration)\n",
"\n",
diff --git a/docs/features/models/syntax_parser.ipynb b/docs/features/models/syntax_parser.ipynb
index abdea2d627..5654593f4d 100644
--- a/docs/features/models/syntax_parser.ipynb
+++ b/docs/features/models/syntax_parser.ipynb
@@ -22,7 +22,7 @@
"\n",
" 4.2. [Predict using CLI](#4.2-Predict-using-CLI)\n",
"\n",
- "5. [Customize the model](#4.-Customize-the-model)\n",
+ "5. [Customize the model](#5.-Customize-the-model)\n",
"\n",
"# 1. Introduction to the task\n",
"\n",
diff --git a/docs/intro/installation.rst b/docs/intro/installation.rst
index 6c8c51a4c9..58eb3264d7 100644
--- a/docs/intro/installation.rst
+++ b/docs/intro/installation.rst
@@ -1,7 +1,7 @@
Installation
============
-DeepPavlov supports **Linux**, **Windows 10+** (through WSL/WSL2), **MacOS** (Big Sur+) platforms, **Python 3.6-3.10**.
+DeepPavlov supports **Linux**, **Windows 10+** (through WSL/WSL2), **MacOS** (Big Sur+) platforms, **Python 3.6-3.11**.
Depending on the model used, you may need from 4 to 16 GB RAM.
Install with pip
diff --git a/docs/intro/quick_start.rst b/docs/intro/quick_start.rst
index 66626b4ffd..680377302e 100644
--- a/docs/intro/quick_start.rst
+++ b/docs/intro/quick_start.rst
@@ -2,7 +2,7 @@ QuickStart
------------
First, follow instructions on :doc:`Installation page `
-to install ``deeppavlov`` package for Python 3.6/3.7/3.8/3.9/3.10.
+to install ``deeppavlov`` package for Python 3.6-3.11.
DeepPavlov contains a bunch of great pre-trained NLP models. Each model is
determined by its config file. List of models is available on
diff --git a/requirements.txt b/requirements.txt
index fa4928622b..48e4c2fbb8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,8 @@ prometheus-client>=0.13.0,<=1.16.0
pydantic<2
pybind11==2.10.3
requests>=2.19.0,<3.0.0
-scikit-learn>=0.24,<1.1.0
+scikit-learn>=0.24,<1.1.0;python_version<="3.10"
+scikit-learn==1.4.0;python_version=="3.11.*"
tqdm>=4.42.0,<4.65.0
uvicorn>=0.13.0,<0.19.0
wheel
diff --git a/setup.py b/setup.py
index db912de26c..875335e992 100644
--- a/setup.py
+++ b/setup.py
@@ -68,11 +68,17 @@ def readme():
'pexpect'
],
'docs': [
- 'sphinx==3.5.4;python_version<"3.10"',
- 'sphinx==4.5.0;python_version>="3.10"',
- 'sphinx_rtd_theme==0.5.2',
- 'docutils<0.17,>=0.12',
- 'nbsphinx==0.8.4',
+ 'sphinx==3.5.4;python_version<="3.7"',
+ 'sphinx==5.0.0;python_version=="3.8"',
+ 'sphinx==5.0.0;python_version=="3.9"',
+ 'sphinx==5.0.0;python_version=="3.10"',
+ 'sphinx==7.2.*;python_version=="3.11.*"',
+ 'sphinx_rtd_theme==0.5.2;python_version<="3.10"',
+ 'sphinx_rtd_theme==2.0.0;python_version=="3.11.*"',
+ 'docutils<0.17,>=0.12;python_version<="3.10"',
+ 'docutils==0.20.1;python_version=="3.11.*"',
+ 'nbsphinx==0.8.4;python_version<="3.10"',
+ 'nbsphinx==0.9.3;python_version=="3.11.*"',
'ipykernel==5.5.4',
'jinja2<=3.0.3',
'sphinx-copybutton==0.5.0',
diff --git a/utils/Docker/docker-compose.yml b/utils/Docker/docker-compose.yml
index 4a0dadd436..4d434d9767 100644
--- a/utils/Docker/docker-compose.yml
+++ b/utils/Docker/docker-compose.yml
@@ -65,3 +65,16 @@ services:
- CUDA_VISIBLE_DEVICES=$TEST_GPU_0
- PYTEST_ARGS=$PYTEST_ARGS
- DP_PYTEST_NO_CACHE=True
+ py311:
+ build:
+ context: ../../
+ dockerfile: utils/Docker/Dockerfile
+ args:
+ - EPOCH=$EPOCH
+ - PYTHON_VERSION=3.11.6
+ - BASE_IMAGE=nvidia/cuda:11.5.2-cudnn8-runtime-ubuntu20.04
+ user: '${UID}:${GID}'
+ environment:
+ - CUDA_VISIBLE_DEVICES=$TEST_GPU_1
+ - PYTEST_ARGS=$PYTEST_ARGS
+ - DP_PYTEST_NO_CACHE=True