Skip to content

Commit

Permalink
Merge pull request #27950 from arusahni/feat/chroma-patcher
Browse files Browse the repository at this point in the history
Introduce Chroma syntax-generation script
  • Loading branch information
arusahni committed Jun 29, 2024
2 parents 1b314b7 + f6c17dc commit 77ee7e7
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 0 deletions.
15 changes: 15 additions & 0 deletions bin/gen-chroma-syntax
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env bash

# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.
#
# gen-chroma-syntax -- regenerates a Materialize-dialect Chroma syntax file
# using the currently-checked out Materialize keywords

exec "$(dirname "$0")"/pyactivate -m materialize.cli.gen-chroma-syntax "$@"
12 changes: 12 additions & 0 deletions doc/developer/chroma-syntax-generation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Generating new Chroma syntax highlights

Chroma is the syntax highlighter used by Hugo, the static site generator that powers Materialize's docs. We have upstreamed a Materialize lexer (which is a slightly-modified version of their Postgres lexer). When new keywords are added we should upstream an update.

## Generating a new lexer definition

1. Fork the Chroma repo and clone it locally as a sibling of the `materialize` repo.
2. From the root directory of the `materialize` repo, run the generate script:
```shell
./bin/gen-chroma-syntax
```
3. In the Chroma repo, commit the changes to the Materialize dialect file and submit them as a PR.
81 changes: 81 additions & 0 deletions misc/python/materialize/cli/gen-chroma-syntax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3

# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

"""Regenerates a Materialize-dialect Chroma syntax file using the local Materialize keywords"""

import argparse
import xml.etree.ElementTree as ET
from pathlib import Path

from materialize import MZ_ROOT

CONFIG_FIELDS = {
"name": "Materialize SQL dialect",
"alias": ["materialize", "mzsql"],
"mime_type": "text/x-materializesql",
}


def keyword_pattern():
keywords_file = MZ_ROOT / "src/sql-lexer/src/keywords.txt"
keywords = [
line.upper()
for line in keywords_file.read_text().splitlines()
if not (line.startswith("#") or not line.strip())
]
return f"({'|'.join(keywords)})\\b"


def set_keywords(root: ET.Element):
rule = root.find(".//rule/token[@type='Keyword']/..")
if not rule:
raise RuntimeError("No keyword rule found")
rule.set("pattern", keyword_pattern())


def set_config(root: ET.Element):
config = root.find("config")
if not config:
raise RuntimeError("No config found")
for field_name, field_value in CONFIG_FIELDS.items():
if isinstance(field_value, list):
for element in config.findall(field_name):
config.remove(element)
for item in field_value:
field = ET.SubElement(config, field_name)
field.text = item
else:
field = config.find(field_name)
if field is None:
raise RuntimeError(f"No such config field: '{field_name}'")
field.text = field_value


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--chroma-dir",
default="../chroma",
)
args = parser.parse_args()
lexer_dir = Path(f"{args.chroma_dir}/lexers/embedded/")
tree = ET.parse(lexer_dir / "postgresql_sql_dialect.xml")
root = tree.getroot()
if not root:
raise RuntimeError("Could not find root element")
set_keywords(root)
set_config(root)
ET.indent(root, " ")
tree.write(lexer_dir / "materialize_sql_dialect.xml", encoding="unicode")


if __name__ == "__main__":
main()

0 comments on commit 77ee7e7

Please sign in to comment.