Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
KOLANICH committed Oct 13, 2023
0 parents commit 836ed45
Show file tree
Hide file tree
Showing 39 changed files with 2,418 additions and 0 deletions.
15 changes: 15 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
root = true

[*]
charset = utf-8
indent_style = tab
indent_size = 4
insert_final_newline = true
end_of_line = lf

[*.{yml,yaml,yug}]
indent_style = space
indent_size = 2

[grammars/*.txt]
insert_final_newline = false
1 change: 1 addition & 0 deletions .github/.templateMarker
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
KOLANICH/python_project_boilerplate.py
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
allow:
- dependency-type: "all"
15 changes: 15 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]

jobs:
build:
runs-on: ubuntu-22.04
steps:
- name: typical python workflow
uses: KOLANICH-GHActions/typical-python-workflow@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
__pycache__
*.pyc
*.pyo
*.pgt
*.dot
/*.egg-info
/build
/dist
/.eggs
/tests/grammars
monkeytype.sqlite3
*.srctrlprj
*.srctrldb
*.srctrlbm
55 changes: 55 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest

variables:
DOCKER_DRIVER: overlay2
SAST_ANALYZER_IMAGE_TAG: latest
SAST_DISABLE_DIND: "true"
SAST_CONFIDENCE_LEVEL: 5
CODECLIMATE_VERSION: latest

include:
- template: SAST.gitlab-ci.yml
- template: Code-Quality.gitlab-ci.yml

.build:
tags:
- shared
- linux
stage: build
interruptible: true
variables:
GIT_DEPTH: "1"
PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages

before_script:
- export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables`
#- git clone --depth=1 --filter=sparse:path=src/python https://github.com/waxeye-org/waxeye.git
- git clone --depth=1 https://github.com/waxeye-org/waxeye.git
- cd ./waxeye/src/python
- python3 ./setup.py bdist_wheel
- pip3 install --upgrade ./dist/*.whl
- cd ../../../

cache:
paths:
- /usr/local/site-packages
- /usr/local/lib/python*/site-packages

script:
- python3 setup.py bdist_wheel
- pip3 install --user --upgrade ./dist/*.whl
- cd ./tests
#- coverage run -a --branch --source=UniGrammar -m pytest --junitxml=./rspec.xml --forked ./test*.py
#- coverage report -m || true
#- coveralls || true
#- codecov || true
#- cd ..
- mkdir wheels
- mv ./dist/*.whl ./wheels/AptSourcesList-0.CI-py3-none-any.whl

artifacts:
paths:
- wheels
- $PYTHONUSERBASE
reports:
junit: ./rspec.xml
1 change: 1 addition & 0 deletions Code_Of_Conduct.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
No codes of conduct!
6 changes: 6 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include UNLICENSE
include *.md
include tests
global-include .editorconfig
global-include *.pgt
global-include *.pglr
62 changes: 62 additions & 0 deletions ReadMe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
UniGrammarRuntime.py [![Unlicensed work](https://raw.githubusercontent.com/unlicense/unlicense.org/master/static/favicon.png)](https://unlicense.org/)
===================
~~![GitLab Build Status](https://gitlab.com/UniGrammar/UniGrammarRuntime.py/badges/master/pipeline.svg)~~
~~![GitLab Coverage](https://gitlab.com/UniGrammar/UniGrammarRuntime.py/badges/master/coverage.svg)~~
[![Libraries.io Status](https://img.shields.io/librariesio/github/UniGrammar/UniGrammarRuntime.py.svg)](https://libraries.io/github/UniGrammar/UniGrammarRuntime.py)
[![Code style: antiflash](https://img.shields.io/badge/code%20style-antiflash-FFF.svg)](https://codeberg.org/KOLANICH-tools/antiflash.py)

Runtime for UniGrammar-generated wrappers for generated parsers. Generated parsers can be used without wrappers, but wrappers allow to use them uniformly, swapping implementation but keeping the interface.

This allows to
* get rid of hard dependencies on specific libraries, instead any supported parser library can be used, for which a parser is generated;
* benchmark and compare performance of various parsing libraries;
* use the most performant of the available libraries.


How-to use
-----------

* Generate or construct manually a `parser bundle`. A parser bundle is an object storing and giving out
* pregenerated parsers for different backends (can be generated standalonely using `transpile`)
* auxilary information (can be generated using `gen-aux`):
* production names to capture groups mappings, for the parser generators not supporting capturing;
* production names to booleans mappings, telling if the AST node is a collection, for the parser generators not capable to tell the difference between an iterable or a node in AST;
* benchmark results
* a wrapper, transforming backend-specific AST into backend-agnostic one
Parser bundle can be constructed from a dir on storage or compiled directly into an object in memory. In any case it can be used by a backend.

* Construct a backend. A backend here is an object
* storing underlying parser objects
* providing necessary functions to be used by a wrapper to transform backend-specific AST into backend-agnostic one.

There are 2 ways to construct a backend:
* You can import the backend manually: `from UniGrammarRuntime.backends.<backend name> import <backend class name>` and construct it: `b = <backend class name>("<your grammar name>", <your bundle>)`.
* Or you can just call a method of the bundle, constructing the needed backend. Pass `None` to select the backend automatically based on benchmarking results.

* Now you can do low-level stuff using backend methods:
* You can parse your grammar into its backend-native format using `b.parse("<your string to parse>")` method.
* You can preprocess the AST generated by `parse` and observe the result, using `preprocessAST`.
* You can check if preprocessed AST nodes represent a collection using `isList` and iterate over them using `iterateList`.
* You can transform terminal nodes into `str`s using `getTextFromToken`.
* You can merge subtrees into a single `str` using `mergeShit`.

This all can be useful if you
* don't want to use a generated wrapper
* are designing a new Template, so you need the generator to generate custom postprocessing, in order to do it you need to craft it manually first
* are debugging
* are just playing around

* Now we go a level higher. You can use a wrapper to get a prettied backend-agnostic postprocessed AST.
* Import the generated wrapper module.
* manually `import <wrapper module name>`
* Via a backend:
* Then it contains some classes. The class you usually need is aliased to `__MAIN_PARSER__`.
* Construct the wrapper, initializing it with the backend: `w = <wrapper module name>.__MAIN_PARSER__(b)`
* Parse what you need: `ast = w("<your string to parse>")`

Examples
--------

* https://codeberg.org/prebuilder/pyMetakitDefinitionString/src/branch/master/pyMetakitDefinitionString/__init__.py
* https://codeberg.org/KOLANICH-libs/FullingMotorModelDecoder.py/src/branch/master/FullingMotorModelDecoder/__init__.py
* https://codeberg.org/KOLANICH-libs/AptSourcesList.py/src/branch/master/AptSourcesList/__init__.py
24 changes: 24 additions & 0 deletions UNLICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
This is free and unencumbered software released into the public domain.

Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.

In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

For more information, please refer to <https://unlicense.org/>
13 changes: 13 additions & 0 deletions UniGrammarRuntime/DSLMetadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import typing
import warnings

from .FormatMetadata import FormatMetadata
from .ToolMetadata import Product


class DSLMetadata(FormatMetadata):
__slots__ = ("officialLibraryRepo",)

def __init__(self, officialLibraryRepo: typing.Optional[str] = None, grammarExtensions: typing.Optional[typing.Union[typing.Tuple[str, str], typing.Tuple[str]]] = None, product: typing.Optional[Product] = None) -> None:
super().__init__(grammarExtensions, product)
self.officialLibraryRepo = officialLibraryRepo
20 changes: 20 additions & 0 deletions UniGrammarRuntime/FormatMetadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import typing
import warnings

from UniGrammarRuntime.ToolMetadata import Product


class FormatMetadata:
__slots__ = ("product", "grammarExtensions")

def __init__(self, grammarExtensions: typing.Optional[typing.Union[typing.Tuple[str, str], typing.Tuple[str]]] = None, product: typing.Optional[Product] = None) -> None:
self.product = product
self.grammarExtensions = grammarExtensions

@property
def mainExtension(self):
if self.grammarExtensions:
return self.grammarExtensions[0]
else:
warnings.warn(self.product.name + " has no well-known extension for grammar files. Using DSL name (" + self.product.name + ") instead of the extension.")
return self.product.name
125 changes: 125 additions & 0 deletions UniGrammarRuntime/IParser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import typing
from abc import abstractmethod, ABCMeta
from pathlib import Path

from UniGrammarRuntimeCore.IParser import IParser
from UniGrammarRuntimeCore.IParser import IParserFactory as IParserFactoryCore
from UniGrammarRuntimeCore.IParser import IParserFactoryFromPrecompiled as IParserFactoryFromPrecompiledCore
from UniGrammarRuntimeCore.IParser import IParserFactoryFromSource as IParserFactoryFromSourceCore

from .FormatMetadata import FormatMetadata
from .ToolMetadata import Product

# pylint:disable=too-few-public-methods


class IParserFactoryMeta(ABCMeta):
__slots__ = ()

def __new__(cls: typing.Type["IParserFactoryCore"], className: str, parents: typing.Tuple[typing.Type, ...], attrs: typing.Dict[str, typing.Any]) -> "Tool": # pylint:disable=arguments-differ

FORMAT = attrs.get("FORMAT", None)
META = attrs.get("META", None)
if FORMAT is not None and META is not None:
if FORMAT.product is None:
FORMAT.product = META.product

return super().__new__(cls, className, parents, attrs)


class IParserFactory(IParserFactoryCore, metaclass=IParserFactoryMeta):
__slots__ = ()

FORMAT = None # type: FormatMetadata

@abstractmethod
def fromBundle(self, grammarResources: "InMemoryGrammarResources"):
"""Creates an executor from the files within bundle"""
raise NotImplementedError


class IParserFactoryFromSource(IParserFactoryFromSourceCore, metaclass=IParserFactoryMeta): # pylint:disable=abstract-method
__slots__ = ()

FORMAT = None # type: FormatMetadata

def fromBundle(self, grammarResources: "InMemoryGrammarResources") -> IParser:
return self.fromInternal(self.getSource(grammarResources)) # since they cannot be precompiled, for them internal repr is source text

@classmethod
def _getExt(cls):
if cls.FORMAT is not None:
return cls.FORMAT.mainExtension
else:
return cls.META.product.name

def getSource(self, grammarResources: "InMemoryGrammarResources") -> str:
"""Must return source code of the grammar in its DSL"""
return grammarResources.parent.backendsTextData[self.__class__.META.product.name, grammarResources.name + "." + self.__class__._getExt()]


class IParserFactoryFromPrecompiled(IParserFactoryFromPrecompiledCore): # pylint:disable=abstract-method
__slots__ = ()

FORMAT = FormatMetadata(
grammarExtensions=("py",),
product=Product(
name="python",
website="https://docs.python.org/3/tutorial/index.html",
),
)

def fromBundle(self, grammarResources: "InMemoryGrammarResources") -> IParser:
ctor = self.compile(self.getSource(grammarResources), grammarResources.name)
return self.fromInternal(ctor())

def getSource(self, grammarResources: "InMemoryGrammarResources") -> "ast.Module":
"""Must return source code of the grammar in its DSL"""
return grammarResources.parent.backendsPythonAST[self.__class__.META.product.name, grammarResources.name]


class IParserFactoryFromPrecompiledOrSource(IParserFactoryFromSourceCore):
"""Hybrid between `IParserFromPrecompiled` and `IParserFromSource`:
tries to find and use precompiled file first,
if there is no, tries to find and use source
"""

PRECOMPILED = None
SOURCE = None

__slots__ = ("_precompiled", "_source")

def __init__(self):
self._precompiled = None
self._source = None
super().__init__()

@property
def precompiled(self) -> IParserFactoryFromPrecompiled:
res = self._precompiled
if res is None:
self._precompiled = res = self.__class__.PRECOMPILED()
return res

@property
def source(self) -> IParserFactoryFromSource:
res = self._source
if res is None:
self._source = res = self.__class__.SOURCE()
return res

def fromBundle(self, grammarResources: "InMemoryGrammarResources"):
"""tries to find and use precompiled file first,
if there is no, tries to find and use source"""
try:
return self.precompiled.fromBundle(grammarResources)
except FileNotFoundError:
return self.source.fromBundle(grammarResources)

def compileStr(self, grammarText: str, target: typing.Any = None, fileName: typing.Optional[typing.Union[Path, str]] = None):
"""Proxies to the factory defined by `SOURCE`"""
return self.source.compileStr(grammarText, target, fileName)

def compileFile(self, grammarFile: Path, target: typing.Any = None):
"""Proxies to the factory defined by `SOURCE`"""
return self.source.compileFile(grammarFile, target)
Loading

0 comments on commit 836ed45

Please sign in to comment.