forked from PyThaiNLP/pythainlp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.py
133 lines (116 loc) · 3.44 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
from setuptools import find_packages, setup
readme = """
![PyThaiNLP Logo](https://avatars0.githubusercontent.com/u/32934255?s=200&v=4)
PyThaiNLP is a Python library for Thai natural language processing.
The library provides functions like word tokenization, part-of-speech tagging,
transliteration, soundex generation, and spell checking.
## Install
For stable version:
```sh
pip install pythainlp
```
For development version:
```sh
pip install --upgrade --pre pythainlp
```
Some functionalities, like named-entity recognition, required extra packages.
See https://github.com/PyThaiNLP/pythainlp for installation options.
Made with ❤️
PyThaiNLP Team
"We build Thai NLP"
"""
requirements = [
"dill>=0.3.0",
"nltk>=3.3",
"requests>=2.22.0",
"tinydb>=3.0",
"tqdm>=4.1",
]
extras = {
"attacut": ["attacut>=1.0.6"],
"benchmarks": ["numpy>=1.16", "pandas>=0.24"],
"icu": ["pyicu>=2.3"],
"ipa": ["epitran>=1.1"],
"ml": ["numpy>=1.16", "torch>=1.0.0"],
"ner": ["sklearn-crfsuite>=0.3.6"],
"ssg": ["ssg>=0.0.6"],
"thai2fit": ["emoji>=0.5.1", "gensim>=3.2.0", "numpy>=1.16"],
"thai2rom": ["torch>=1.0.0", "numpy>=1.16"],
"full": [
"attacut>=1.0.4",
"emoji>=0.5.1",
"epitran>=1.1",
"gensim>=3.2.0",
"numpy>=1.16",
"pandas>=0.24",
"pyicu>=2.3",
"sklearn-crfsuite>=0.3.6",
"ssg>=0.0.6",
"torch>=1.0.0",
],
}
setup(
name="pythainlp",
version="2.1",
description="Thai Natural Language Processing library",
long_description=readme,
long_description_content_type="text/markdown",
author="PyThaiNLP",
author_email="wannaphong@kkumail.com",
url="https://github.com/PyThaiNLP/pythainlp",
packages=find_packages(),
test_suite="tests",
python_requires=">=3.6",
package_data={
"pythainlp.corpus": [
"corpus_license.md",
"countries_th.txt",
"negations_th.txt",
"orchid_pos_th.json",
"orchid_pt_tagger.dill",
"person_names_female_th.txt",
"person_names_male_th.txt",
"stopwords_th.txt",
"syllables_th.txt",
"tha-wn.db",
"thailand_provinces_th.txt",
"tnc_freq.txt",
"ttc_freq.txt",
"ud_thai_pud_pt_tagger.dill",
"ud_thai_pud_unigram_tagger.dill",
"words_th_thai2fit_201810.txt",
"words_th.txt",
],
},
include_package_data=True,
install_requires=requirements,
extras_require=extras,
license="Apache Software License 2.0",
zip_safe=False,
keywords=[
"pythainlp",
"NLP",
"natural language processing",
"text analytics",
"ThaiNLP",
"text processing",
"localization",
],
classifiers=[
"Development Status :: 5 - Production/Stable",
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Natural Language :: Thai",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Text Processing",
"Topic :: Text Processing :: General",
"Topic :: Text Processing :: Linguistic",
],
scripts=[
'bin/pythainlp',
'bin/word-tokenization-benchmark',
]
)
# TODO: Check extras and decide to download additional data, like model files