From 3cc7a64d2b5c1b60891cab5f77974ccfb8e7f816 Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Sat, 11 May 2024 17:08:30 +0300 Subject: [PATCH] refactor!: compile each language individually --- .editorconfig | 2 +- .github/CONTRIBUTING.md | 8 +- .github/workflows/release.yml | 8 -- .github/workflows/tests.yml | 14 ++- MANIFEST.in | 5 +- README.rst | 3 + languages.json | 50 ++++++++ setup.py | 123 +++++++++++++++----- tests/test_tree_sitter_languages.py | 4 +- tree_sitter_languages/__init__.py | 13 ++- tree_sitter_languages/_language/__init__.py | 1 + tree_sitter_languages/language.c | 39 +++++++ tree_sitter_languages/languages.c | 48 -------- tree_sitter_languages/languages.h | 18 --- tree_sitter_languages/repos/.gitignore | 1 - tree_sitter_languages/repos/clone.sh | 26 ----- tree_sitter_languages/repos/repos.txt | 14 --- 17 files changed, 212 insertions(+), 165 deletions(-) create mode 100644 languages.json create mode 100644 tree_sitter_languages/_language/__init__.py create mode 100644 tree_sitter_languages/language.c delete mode 100644 tree_sitter_languages/languages.c delete mode 100644 tree_sitter_languages/languages.h delete mode 100644 tree_sitter_languages/repos/.gitignore delete mode 100755 tree_sitter_languages/repos/clone.sh delete mode 100644 tree_sitter_languages/repos/repos.txt diff --git a/.editorconfig b/.editorconfig index fb99a82..63862c3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -13,5 +13,5 @@ indent_size = 3 [*.md] indent_size = 2 -[*.yml] +[*.{yml,json}] indent_size = 2 diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 6636c22..61696ac 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -2,9 +2,7 @@ ## Adding a new language -- Add the language repo to _README.rst_ (along with its license) and - _tree_sitter_languages/repos/repos.txt_. -- Add the language name to _tests/test_tree_sitter_languages.py_ (sorted). -- Add `TS_LANGUAGE_INIT(name)` and `TS_LANGUAGE_METHOD(name),` to - _tree_sitter_languages/languages.c_ (sorted). +- Add the language data to _languages.json_. +- Add the language repo and license to _README.rst_. +- Add the language name to _tests/test_tree_sitter_languages.py_. - Submit a pull request. diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c63a07b..de984ff 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,10 +12,6 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Clone repos - working-directory: tree_sitter_languages/repos - shell: bash - run: ./clone.sh - name: Set up Python uses: actions/setup-python@v5 with: @@ -41,10 +37,6 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Clone repos - working-directory: tree_sitter_languages/repos - shell: bash - run: ./clone.sh - name: Set up Python uses: actions/setup-python@v5 with: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2c1ad84..fbfa674 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,10 +4,14 @@ on: push: branches: ["*"] paths: + - setup.py + - languages.json - tests/ - tree_sitter_languages/ pull_request: paths: + - setup.py + - languages.json - tests/ - tree_sitter_languages/ @@ -19,16 +23,16 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Clone repos - working-directory: tree_sitter_languages/repos - shell: bash - run: ./clone.sh - name: Set up Python uses: actions/setup-python@v5 with: cache: pip python-version: "3.9" + - name: Set up tree-sitter CLI + uses: tree-sitter/setup-action/cli/@v1 - name: Install package run: pip install -v -e . + env: + TS_REGENERATE: "1" - name: Run tests - run: python -munittest discover tests + run: python -munittest discover -v tests diff --git a/MANIFEST.in b/MANIFEST.in index 1634c62..c3530c9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ -include tree_sitter_languages/languages.h +include languages.json -prune tree_sitter_languages/repos -recursive-include tree_sitter_languages/repos src/**/*.[ch] +prune vendor diff --git a/README.rst b/README.rst index 36f5d64..b003ba8 100644 --- a/README.rst +++ b/README.rst @@ -17,6 +17,9 @@ Install pip install tree-sitter-languages + +**Note:** building from source requires ``git`` and a C compiler. + Usage ===== diff --git a/languages.json b/languages.json new file mode 100644 index 0000000..9df4845 --- /dev/null +++ b/languages.json @@ -0,0 +1,50 @@ +{ + "dot": { + "repo": "https://github.com/rydesun/tree-sitter-dot" + }, + "elisp": { + "repo": "https://github.com/Wilfred/tree-sitter-elisp" + }, + "elm": { + "repo": "https://github.com/elm-tooling/tree-sitter-elm" + }, + "fixed_form_fortran": { + "repo": "https://github.com/ZedThree/tree-sitter-fixed-form-fortran", + "branch": "f77" + }, + "fortran": { + "repo": "https://github.com/stadelmanma/tree-sitter-fortran" + }, + "gomod": { + "repo": "https://github.com/camdencheek/tree-sitter-go-mod" + }, + "hack": { + "repo": "https://github.com/slackhq/tree-sitter-hack" + }, + "hcl": { + "repo": "https://github.com/tree-sitter-grammars/tree-sitter-hcl", + "languages": { + "hcl": "", + "terraform": "dialects/terraform" + } + }, + "kotlin": { + "repo": "https://github.com/fwcd/tree-sitter-kotlin" + }, + "make": { + "repo": "https://github.com/tree-sitter-grammars/tree-sitter-make" + }, + "objc": { + "repo": "https://github.com/tree-sitter-grammars/tree-sitter-objc" + }, + "rst": { + "repo": "https://github.com/stsewd/tree-sitter-rst" + }, + "scala": { + "repo": "https://github.com/tree-sitter/tree-sitter-scala" + }, + "sql": { + "repo": "https://github.com/derekstride/tree-sitter-sql", + "branch": "gh-pages" + } +} diff --git a/setup.py b/setup.py index 57afb4b..bf04999 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,90 @@ -from glob import glob +from json import loads +from pathlib import Path from platform import system +from os import chdir, environ, getcwd +from typing import TypedDict -from setuptools import Extension, setup # type: ignore -from wheel.bdist_wheel import bdist_wheel # type: ignore +from setuptools import Extension, setup +from setuptools.command.build_ext import build_ext +from wheel.bdist_wheel import bdist_wheel + +Language = TypedDict("Language", { + "repo": str, + "branch": str | None, + "languages": dict[str, str] | None +}) + +languages: dict[str, Language] = \ + loads(Path(__file__).with_name("languages.json").read_text()) + +extensions: list[Extension] = [] + +common_source = Path(__file__).parent / "tree_sitter_languages" / "language.c" + +for lang, data in languages.items(): + for name in data.get("languages", {lang: ""}).keys(): + extensions.append( + Extension( + name=f"tree_sitter_languages._language.{name}", + sources=[str(common_source)], + include_dirs=[lang], + define_macros=[ + ("PY_SSIZE_T_CLEAN", None), + ("TREE_SITTER_HIDE_SYMBOLS", None), + ("TS_LANGUAGE_NAME", name), + ], + extra_compile_args=[ + "-std=c11", + "-fvisibility=hidden", + "-Wno-cast-function-type", + "-Wno-unused-but-set-variable", + "-Werror=implicit-function-declaration", + ] if system() != "Windows" else [ + "/std:c11", + "/wd4244", + ], + py_limited_api=True, + optional=True, + ) + ) + + +class BuildExt(build_ext): + def build_extension(self, ext: Extension): + name = ext.include_dirs.pop() + lang = languages[name] + cwd = getcwd() + + if not (dir := Path(cwd) / "vendor" / name).is_dir(): + clone = ["git", "clone", "-q", "--depth=1", "--sparse"] + if branch := lang.get("branch"): + clone.append(f"--branch={branch}") + clone.extend([lang["repo"], dir.relative_to(cwd)]) + self.spawn(clone) + self.spawn([ + "git", "-C", dir.relative_to(cwd), + "sparse-checkout", "set", "--no-cone", "/**/src/**" + ]) + else: + self.spawn([ + "git", "-C", dir.relative_to(cwd), + "pull", "-q", "--depth=1" + ]) + + name = ext.name.split(".")[-1] + path = dir / lang.get("languages", {name: ""})[name] + src = path / "src" + if "TS_REGENERATE" in environ: + chdir(path) + self.spawn([ + "tree-sitter", "generate", + "--no-bindings", "src/grammar.json" + ]) + chdir(cwd) + ext.sources.extend(list(map(str, src.glob("*.c")))) + ext.include_dirs = [str(src)] + + return super().build_extension(ext) class BdistWheel(bdist_wheel): @@ -13,32 +95,15 @@ def get_tag(self): return python, abi, platform -sources = glob('tree_sitter_languages/repos/**/src/*.c', recursive=True) -sources.append("tree_sitter_languages/languages.c") - setup( - packages=["tree_sitter_languages"], - include_package_data=False, - ext_modules=[ - Extension( - name="tree_sitter_languages.languages", - sources=sources, - define_macros=[ - ("PY_SSIZE_T_CLEAN", None), - ("TREE_SITTER_HIDE_SYMBOLS", None), - ], - extra_compile_args=[ - "-std=c11", - "-fvisibility=hidden", - "-Wno-cast-function-type", - "-Wno-unused-but-set-variable", - "-Werror=implicit-function-declaration", - ] if system() != "Windows" else [ - "/std:c11", - "/wd4244", - ], - py_limited_api=True - ) + packages=[ + "tree_sitter_languages", + "tree_sitter_languages._language", ], - cmdclass={"bdist_wheel": BdistWheel}, + include_package_data=False, + ext_modules=extensions, + cmdclass={ + "build_ext": BuildExt, + "bdist_wheel": BdistWheel, + }, ) diff --git a/tests/test_tree_sitter_languages.py b/tests/test_tree_sitter_languages.py index 108c7fc..edec6f3 100644 --- a/tests/test_tree_sitter_languages.py +++ b/tests/test_tree_sitter_languages.py @@ -34,5 +34,5 @@ def test_get_language(self): self.assertIsInstance(get_language(language), Language) def test_invalid_name(self): - self.assertRaises(AttributeError, get_language, "invalid") - self.assertRaises(AttributeError, get_parser, "invalid") + self.assertRaises(LookupError, get_language, "invalid") + self.assertRaises(LookupError, get_parser, "invalid") diff --git a/tree_sitter_languages/__init__.py b/tree_sitter_languages/__init__.py index 5f83eb2..bbc6118 100644 --- a/tree_sitter_languages/__init__.py +++ b/tree_sitter_languages/__init__.py @@ -1,15 +1,18 @@ """Tree-sitter languages""" -from tree_sitter import Language, Parser +from importlib import import_module as _import -from . import languages +from tree_sitter import Language, Parser def get_language(name: str) -> Language: """Get the language with the given name.""" - if not hasattr(languages, name): - raise AttributeError(f"Language not found: {name}") - return Language(getattr(languages, name)()) + try: + module = _import(f"._language.{name}", __package__) + except ModuleNotFoundError: + raise LookupError(f"Language not found: {name}") + else: + return Language(module.language()) def get_parser(language: str) -> Parser: diff --git a/tree_sitter_languages/_language/__init__.py b/tree_sitter_languages/_language/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tree_sitter_languages/_language/__init__.py @@ -0,0 +1 @@ + diff --git a/tree_sitter_languages/language.c b/tree_sitter_languages/language.c new file mode 100644 index 0000000..6a69e89 --- /dev/null +++ b/tree_sitter_languages/language.c @@ -0,0 +1,39 @@ +#include + +typedef struct TSLanguage TSLanguage; + +#ifndef TS_LANGUAGE_NAME +#error TS_LANGUAGE_NAME must be defined +#endif + +#define _str(s) #s +#define str(s) _str(s) +#define _cat(a, b) a##b +#define cat(a, b) _cat(a, b) + +#define TS_LANGUAGE_FUNC cat(tree_sitter_, TS_LANGUAGE_NAME) +#define TS_LANGUAGE_METHOD cat(TS_LANGUAGE_NAME, _language) +#define TS_LANGUAGE_MODULE cat(PyInit_, TS_LANGUAGE_NAME) + +TSLanguage *TS_LANGUAGE_FUNC(void); + +static PyObject* TS_LANGUAGE_METHOD(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) { + return PyLong_FromVoidPtr(TS_LANGUAGE_FUNC()); +} + +static PyMethodDef methods[] = { + {"language", TS_LANGUAGE_METHOD, METH_NOARGS, NULL}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = str(TS_LANGUAGE_NAME), + .m_doc = NULL, + .m_size = -1, + .m_methods = methods +}; + +PyMODINIT_FUNC TS_LANGUAGE_MODULE(void) { + return PyModule_Create(&module); +} diff --git a/tree_sitter_languages/languages.c b/tree_sitter_languages/languages.c deleted file mode 100644 index 92cacbd..0000000 --- a/tree_sitter_languages/languages.c +++ /dev/null @@ -1,48 +0,0 @@ -#include "./languages.h" - -TS_LANGUAGE_INIT(dot) -TS_LANGUAGE_INIT(elisp) -TS_LANGUAGE_INIT(elm) -TS_LANGUAGE_INIT(fixed_form_fortran) -TS_LANGUAGE_INIT(fortran) -TS_LANGUAGE_INIT(gomod) -TS_LANGUAGE_INIT(hack) -TS_LANGUAGE_INIT(hcl) -TS_LANGUAGE_INIT(kotlin) -TS_LANGUAGE_INIT(make) -TS_LANGUAGE_INIT(objc) -TS_LANGUAGE_INIT(rst) -TS_LANGUAGE_INIT(scala) -TS_LANGUAGE_INIT(sql) -TS_LANGUAGE_INIT(terraform) - -static PyMethodDef methods[] = { - TS_LANGUAGE_METHOD(dot), - TS_LANGUAGE_METHOD(elisp), - TS_LANGUAGE_METHOD(elm), - TS_LANGUAGE_METHOD(fixed_form_fortran), - TS_LANGUAGE_METHOD(fortran), - TS_LANGUAGE_METHOD(gomod), - TS_LANGUAGE_METHOD(hack), - TS_LANGUAGE_METHOD(hcl), - TS_LANGUAGE_METHOD(kotlin), - TS_LANGUAGE_METHOD(make), - TS_LANGUAGE_METHOD(objc), - TS_LANGUAGE_METHOD(rst), - TS_LANGUAGE_METHOD(scala), - TS_LANGUAGE_METHOD(sql), - TS_LANGUAGE_METHOD(terraform), - {NULL, NULL, 0, NULL} -}; - -static struct PyModuleDef module = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "languages", - .m_doc = NULL, - .m_size = -1, - .m_methods = methods -}; - -PyMODINIT_FUNC PyInit_languages(void) { - return PyModule_Create(&module); -} diff --git a/tree_sitter_languages/languages.h b/tree_sitter_languages/languages.h deleted file mode 100644 index 8286273..0000000 --- a/tree_sitter_languages/languages.h +++ /dev/null @@ -1,18 +0,0 @@ -#include - -typedef struct TSLanguage TSLanguage; - -#define _NL - -#define _LANG_FUNC(name) tree_sitter_##name - -#define _METHOD_FUNC(name) languages_##name - -#define TS_LANGUAGE_INIT(name) \ - TSLanguage *_LANG_FUNC(name)(void); \ - \ - static PyObject *_METHOD_FUNC(name)(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) { \ - return PyLong_FromVoidPtr(_LANG_FUNC(name)()); \ - } - -#define TS_LANGUAGE_METHOD(name) { #name, _METHOD_FUNC(name), METH_NOARGS, NULL } diff --git a/tree_sitter_languages/repos/.gitignore b/tree_sitter_languages/repos/.gitignore deleted file mode 100644 index 355164c..0000000 --- a/tree_sitter_languages/repos/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*/ diff --git a/tree_sitter_languages/repos/clone.sh b/tree_sitter_languages/repos/clone.sh deleted file mode 100755 index 8032b6d..0000000 --- a/tree_sitter_languages/repos/clone.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -e - -while read -r repo; do - name="${repo##*/tree-sitter-}" - [[ ! -d "$name" ]] || rm -fr "$name" - args=(--depth=1 --sparse) - if [[ $name == fixed-form-fortran ]]; then - args+=(--branch=f77) - elif [[ $name == sql ]]; then - args+=(--branch=gh-pages) - fi - git clone "${args[@]}" "$repo" "$name" - git -C "$name" sparse-checkout set --no-cone '/**/src/**' - while read -r file; do - printf 'Patching #include in file: %s\n' "$file" >&2 - if [[ ${file%/*} != rst/src/tree_sitter_rst ]]; then - sed -i'' -e 's||"./tree_sitter/parser.h"|' "$file" - else - sed -i'' -e 's||"../tree_sitter/parser.h"|' "$file" - fi - if [[ $file =~ hcl/.+/scanner[.]c ]]; then - printf 'Patching symbols in file: %s\n' "$file" >&2 - sed -i'' -e 's/^String string_new/static inline &/' "$file" - fi - done < <(grep -rl '' --exclude "${0##*/}") -done < repos.txt diff --git a/tree_sitter_languages/repos/repos.txt b/tree_sitter_languages/repos/repos.txt deleted file mode 100644 index d7a2a64..0000000 --- a/tree_sitter_languages/repos/repos.txt +++ /dev/null @@ -1,14 +0,0 @@ -https://github.com/rydesun/tree-sitter-dot -https://github.com/Wilfred/tree-sitter-elisp -https://github.com/elm-tooling/tree-sitter-elm -https://github.com/ZedThree/tree-sitter-fixed-form-fortran -https://github.com/stadelmanma/tree-sitter-fortran -https://github.com/camdencheek/tree-sitter-go-mod -https://github.com/slackhq/tree-sitter-hack -https://github.com/tree-sitter-grammars/tree-sitter-hcl -https://github.com/fwcd/tree-sitter-kotlin -https://github.com/tree-sitter-grammars/tree-sitter-make -https://github.com/tree-sitter-grammars/tree-sitter-objc -https://github.com/stsewd/tree-sitter-rst -https://github.com/tree-sitter/tree-sitter-scala -https://github.com/derekstride/tree-sitter-sql