Skip to content

Commit

Permalink
Release 1.0.2
Browse files Browse the repository at this point in the history
  • Loading branch information
IgnatovFedor authored Jan 10, 2023
2 parents 49e74be + 9ff98b6 commit a1e9477
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 31 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ from deeppavlov import evaluate_model
model = evaluate_model(<config_path>, install=True, download=True)
```

DeepPavlov also [allows](https://docs.deeppavlov.ai/en/master/features/python.html) to build a model from components for
inference using Python.

## License

DeepPavlov is Apache 2.0 - licensed.
2 changes: 1 addition & 1 deletion deeppavlov/_meta.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '1.0.1'
__version__ = '1.0.2'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Welcome to DeepPavlov's documentation!
QuickStart <intro/quick_start>
General concepts <intro/overview>
Configuration file <intro/configuration>
Python pipelines <intro/python.ipynb>
Models overview <features/overview>


Expand Down
141 changes: 141 additions & 0 deletions docs/intro/python.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "6d5cd16b",
"metadata": {},
"source": [
"#### Python pipelines"
]
},
{
"cell_type": "markdown",
"id": "da10fd80",
"metadata": {},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deeppavlov/DeepPavlov/blob/master/docs/intro/python.ipynb)"
]
},
{
"cell_type": "markdown",
"id": "d55ebe35",
"metadata": {},
"source": [
"Python models could be used without .json configuration files.\n",
"\n",
"The code below is an alternative to building [insults_kaggle_bert](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/classifiers/insults_kaggle_bert.json) model and using it with\n",
"\n",
"```python\n",
"from deeppavlov import build_model\n",
"\n",
"model = build_model('insults_kaggle_bert', download=True)\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "fa1db63b",
"metadata": {},
"source": [
"At first, define variables for model components and download model data."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d6671e2",
"metadata": {},
"outputs": [],
"source": [
"from deeppavlov.core.commands.utils import expand_path\n",
"from deeppavlov.download import download_resource\n",
"\n",
"\n",
"classifiers_path = expand_path('~/.deeppavlov/models/classifiers')\n",
"model_path = classifiers_path / 'insults_kaggle_torch_bert'\n",
"transformer_name = 'bert-base-uncased'\n",
"\n",
"download_resource(\n",
" 'http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v5.tar.gz',\n",
" {classifiers_path}\n",
")\n"
]
},
{
"cell_type": "markdown",
"id": "332d644e",
"metadata": {},
"source": [
"Then, initialize model components."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "809c31ad",
"metadata": {},
"outputs": [],
"source": [
"from deeppavlov.core.data.simple_vocab import SimpleVocabulary\n",
"from deeppavlov.models.classifiers.proba2labels import Proba2Labels\n",
"from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor\n",
"from deeppavlov.models.torch_bert.torch_transformers_classifier import TorchTransformersClassifierModel\n",
"\n",
"\n",
"preprocessor = TorchTransformersPreprocessor(\n",
" vocab_file=transformer_name,\n",
" max_seq_length=64\n",
")\n",
"\n",
"classes_vocab = SimpleVocabulary(\n",
" load_path=model_path/'classes.dict',\n",
" save_path=model_path/'classes.dict'\n",
")\n",
"\n",
"classifier = TorchTransformersClassifierModel(\n",
" n_classes=classes_vocab.len,\n",
" return_probas=True,\n",
" pretrained_bert=transformer_name,\n",
" save_path=model_path/'model',\n",
" optimizer_parameters={'lr': 1e-05}\n",
")\n",
"\n",
"proba2labels = Proba2Labels(max_proba=True)"
]
},
{
"cell_type": "markdown",
"id": "87e8ec20",
"metadata": {},
"source": [
"Finally, create model from components. ``Element`` is a wrapper for a component. ``Element`` receives the component and the names of the incoming and outgoing arguments. ``Model`` combines ``Element``s into pipeline."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "acfe29de",
"metadata": {},
"outputs": [],
"source": [
"from deeppavlov import Element, Model\n",
"\n",
"model = Model(\n",
" x=['x'],\n",
" out=['y_pred_labels'],\n",
" pipe=[\n",
" Element(component=preprocessor, x=['x'], out=['bert_features']),\n",
" Element(component=classifier, x=['bert_features'], out=['y_pred_probas']),\n",
" Element(component=proba2labels, x=['y_pred_probas'], out=['y_pred_ids']),\n",
" Element(component=classes_vocab, x=['y_pred_ids'], out=['y_pred_labels'])\n",
" ]\n",
")\n",
"\n",
"model(['you are stupid', 'you are smart'])"
]
}
],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ aio-pika>=3.2.2,<6.9.0
fastapi>=0.47.0,<0.78.0
filelock>=3.0.0,<3.8.0
nltk>=3.2.5,<3.8.0
numpy
numpy<1.24
overrides==4.1.2
pandas>=1.0.0,<1.5.0
prometheus-client>=0.13.0,<0.15.0
Expand Down
60 changes: 31 additions & 29 deletions utils/prepare/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
# limitations under the License.

import argparse
import os
import shutil
import pathlib
import tarfile
from pathlib import Path

Expand All @@ -23,45 +22,48 @@
from hashes import main


def upload(config_in_file):
def upload(config_in_file: str, tar_name: str, tar_output_dir: Path):
if not tar_output_dir.exists():
raise RuntimeError(f'A folder {tar_output_dir} does not exist')

print(f'Config: {config_in_file}')
if not Path(config_in_file).exists():
raise RuntimeError(f'A config {config_in_file} does not exist')

print(config_in_file)
config_in = parse_config(config_in_file)
config_in_file = find_config(config_in_file)

model_path = Path(config_in['metadata']['variables']['MODEL_PATH']).expanduser()
models_path = Path(config_in['metadata']['variables']['MODELS_PATH']).expanduser()
model_name, class_name = config_in_file.stem, config_in_file.parent.name

if str(model_name) not in str(model_path):
raise(f'{model_name} is not the path of the {model_path}')

arcname = str(model_path).split("models/")[1]
tar_path = models_path/model_name
tmp_folder = f'/tmp/'
tmp_tar = tmp_folder + f'{model_name}.tar.gz'

print("model_path", model_path)
print("class_name", class_name)
print("model_name", model_name)

print("Start tarring")
archive = tarfile.open(tmp_tar, "w|gz")
archive.add(model_path, arcname=arcname)
archive.close()
if tar_name is None:
tar_name = f'{model_name}'
print(f'tar_name set to {tar_name}')

full_tar_name = tar_output_dir / f'{tar_name}.tar.gz'
if Path(full_tar_name).exists():
raise RuntimeError(f'An archive {Path(full_tar_name)} already exists')

print(f'model_path: {model_path}')
print(f'class_name: {class_name}')
print(f'model_name: {model_name}')
print(f'Start tarring to {full_tar_name}')
with tarfile.open(str(full_tar_name), "w|gz") as archive:
archive.add(model_path, arcname=pathlib.os.sep)

print("Stop tarring")
print(f'Tar archive: {Path(full_tar_name)} has been created')

print("Calculating hash")
main(tmp_tar)

print("tmp_tar", tmp_tar)
command = f'scp -r {tmp_folder}{model_name}* share.ipavlov.mipt.ru:/home/export/v1/{class_name}'
donwload_url = f'http://files.deeppavlov.ai/v1/{class_name}/{model_name}.tar.gz'
print(command, donwload_url, sep='\n')
main(full_tar_name)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("config_in", help="path to a config", type=str)
parser.add_argument('-c', '--config_in', help='path to a config', type=str)
parser.add_argument('-n', '--tar_name', help='name of the tar archive (without tar.gz extension)',
default=None, required=False, type=str)
parser.add_argument('-o', '--tar_output_dir', help='dir to save a tar archive', default='./',
required=False, type=Path)
args = parser.parse_args()
upload(args.config_in)
upload(args.config_in, args.tar_name, args.tar_output_dir)

0 comments on commit a1e9477

Please sign in to comment.