-
Notifications
You must be signed in to change notification settings - Fork 20
/
pyproject.toml
121 lines (111 loc) · 3.84 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
[build-system]
requires = ["setuptools>=40.8.0", "setuptools-scm>=8.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "fms_dgt"
dynamic = ["version"]
description = "A scalable framework for synthetic data generation"
license = { "text" = "Apache-2.0" }
readme = "README.md"
classifiers = [
"License :: OSI Approved :: Apache License",
]
requires-python = ">=3.10"
dependencies = [
"accelerate>=0.26.0",
"datasets>=2.16.0",
"jsonlines",
"peft>=0.5.0",
"pybind11>=2.6.2",
"pytablewriter",
"rouge-score>=0.0.4",
"scikit-learn>=0.24.1",
"sqlitedict",
"torch>=2.3",
"tqdm-multiprocess",
"transformers>=4.1",
"tabulate>=0.9",
"zstandard",
"dill",
"word2number",
"more_itertools",
"GitPython",
"Jinja2",
"openapi-schema-validator",
"fastparquet>=2024.5.0",
"psutil",
"uvloop",
"ray[default]",
"python-dotenv",
]
[tool.setuptools.packages.find]
include = ["fms_dgt*"]
# required to include yaml files in pip installation
[tool.setuptools.package-data]
fms_dgt = ["**/*.yaml", "tasks/**/*"]
[project.scripts]
fms_dgt = "fms_dgt.__main__:main"
[project.urls]
Homepage = "https://github.com/foundation-model-stack/fms-dgt"
Repository = "https://github.com/foundation-model-stack/fms-dgt"
[project.optional-dependencies]
genai = ["ibm-generative-ai"]
watsonx = ["ibm-watsonx-ai"]
gptq = ["auto-gptq[triton]>=0.6.0"]
openai = ["openai>=1.3.9", "tiktoken"]
sentencepiece = ["sentencepiece>=0.1.98"]
sql = ["pydantic>=2.4.2", "pydantic-settings>=2.0.3", "pyyaml>=6.0.1", "sqlglot==23.11.2"]
testing = ["pytest", "pytest-cov", "pytest-xdist"]
star = ["fms-hf-tuning", "fms-hf-tuning[dev]", "fms-hf-tuning[flash-attn]", "fms-hf-tuning[fms-accel]"]
vllm = ["vllm"]
dpk = [
"data_prep_toolkit @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=data-processing-lib/python",
"data_prep_toolkit_ray @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=data-processing-lib/ray",
"dpk_doc_id_transform_python @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=transforms/universal/doc_id/python",
"dpk_doc_id_transform_ray @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=transforms/universal/doc_id/ray",
"dpk_ededup_transform_python @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=transforms/universal/ededup/python",
"dpk_ededup_transform_ray @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=transforms/universal/ededup/ray",
"dpk_fdedup_transform_ray @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=transforms/universal/fdedup/ray",
"dpk_code_quality_transform_python @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=transforms/code/code_quality/python",
"dpk_doc_quality_transform_python @ git+https://github.com/IBM/data-prep-kit.git#subdirectory=transforms/language/doc_quality/python",
]
datatrove = ["datatrove[all]"]
dev-test = [
# TODO: Add packages when tests are built out
# "pytest>=6",
# "pytest-cov>=2.10.1",
# "pytest-timeout>=2.1.0,<3",
]
dev-fmt = [
"pre-commit>=3.0.4,<4.0",
"ruff==0.4.1",
]
dev-build = [
"setuptools>=60",
"setuptools-scm>=8.0",
]
# NOTE: This is "all" from the user and dev perspective
all-dev = [
"fms_dgt[all, dev-test, dev-fmt, dev-build]"
]
all = [
"fms_dgt[genai]",
"fms_dgt[openai]",
"fms_dgt[sentencepiece]",
"fms_dgt[sql]",
"fms_dgt[dpk]",
# Disable as its Linux CUDA specific
# Use can set with the [vllm] target
# "fms_dgt[vllm]",
]
[tool.ruff]
line-length = 100
target-version = "py38"
ignore = ["I001"] # Import block is un-sorted or un-formatted (clash with isort instead)
[tool.ruff.lint]
select = ["I"]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = [
"F401", # imported but unused
"F403" # unable to detect undefined names
]