Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the Materialize lexer #978

Merged
merged 3 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ for that setup the `chroma` executable can be just symlinked to `~/.lessfilter`.
If you edit some lexers and want to try it, open a shell in `cmd/chromad` and run:

```shell
go run .
go run . --csrf-key=securekey
```

A Link will be printed. Open it in your Browser. Now you can test on the Playground with your local changes.
Expand Down
154 changes: 77 additions & 77 deletions lexers/embedded/materialize_sql_dialect.xml
Original file line number Diff line number Diff line change
@@ -1,154 +1,154 @@
<lexer>
<config>
<name>Materialize SQL dialect</name>
<alias>materialize</alias>
<alias>mzsql</alias>
<mime_type>text/x-materializesql</mime_type>
<case_insensitive>true</case_insensitive>
<not_multiline>true</not_multiline>
<alias>materialize</alias>
<alias>mzsql</alias>
</config>
<rules>
<state name="root">
<rule pattern="\s+">
<token type="Text"/>
<token type="Text" />
</rule>
<rule pattern="--.*\n?">
<token type="CommentSingle"/>
<token type="CommentSingle" />
</rule>
<rule pattern="/\*">
<token type="CommentMultiline"/>
<push state="multiline-comments"/>
<token type="CommentMultiline" />
<push state="multiline-comments" />
</rule>
<rule pattern="(bigint|bigserial|bit|bit\s+varying|bool|boolean|box|bytea|char|character|character\s+varying|cidr|circle|date|decimal|double\s+precision|float4|float8|inet|int|int2|int4|int8|integer|interval|json|jsonb|line|lseg|macaddr|money|numeric|path|pg_lsn|point|polygon|real|serial|serial2|serial4|serial8|smallint|smallserial|text|time|timestamp|timestamptz|timetz|tsquery|tsvector|txid_snapshot|uuid|varbit|varchar|with\s+time\s+zone|without\s+time\s+zone|xml|anyarray|anyelement|anyenum|anynonarray|anyrange|cstring|fdw_handler|internal|language_handler|opaque|record|void)\b">
<token type="NameBuiltin"/>
<token type="NameBuiltin" />
</rule>
<rule pattern="(?s)(DO)(\s+)(?:(LANGUAGE)?(\s+)(&#39;?)(\w+)?(&#39;?)(\s+))?(\$)([^$]*)(\$)(.*?)(\$)(\10)(\$)">
<rule pattern="(?s)(DO)(\s+)(?:(LANGUAGE)?(\s+)('?)(\w+)?('?)(\s+))?(\$)([^$]*)(\$)(.*?)(\$)(\10)(\$)">
<usingbygroup>
<sublexer_name_group>6</sublexer_name_group>
<code_group>12</code_group>
<emitters>
<token type="Keyword"/>
<token type="Text"/>
<token type="Keyword"/>
<token type="Text"/>
<token type="LiteralStringSingle"/>
<token type="LiteralStringSingle"/>
<token type="LiteralStringSingle"/>
<token type="Text"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="Keyword" />
<token type="Text" />
<token type="Keyword" />
<token type="Text" />
<token type="LiteralStringSingle" />
<token type="LiteralStringSingle" />
<token type="LiteralStringSingle" />
<token type="Text" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
</emitters>
</usingbygroup>
</rule>
<rule pattern="(ACCESS|ACKS|ADD|ADDRESSES|AGGREGATE|ALL|ALTER|AND|ANY|ARN|ARRANGEMENT|ARRAY|AS|ASC|ASSERT|AT|AUCTION|AUTHORITY|AVAILABILITY|AVRO|AWS|BEGIN|BETWEEN|BIGINT|BILLED|BODY|BOOLEAN|BOTH|BPCHAR|BROKEN|BROKER|BROKERS|BY|BYTES|CARDINALITY|CASCADE|CASE|CAST|CERTIFICATE|CHAIN|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CLIENT|CLOSE|CLUSTER|CLUSTERS|COALESCE|COLLATE|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPACTION|COMPRESSION|COMPUTE|COMPUTECTL|CONFLUENT|CONNECTION|CONNECTIONS|CONSTRAINT|COPY|COUNT|COUNTER|CREATE|CREATECLUSTER|CREATEDB|CREATEROLE|CROSS|CSV|CURRENT|CURSOR|DATABASE|DATABASES|DATUMS|DAY|DAYS|DEALLOCATE|DEBEZIUM|DEBUG|DEBUGGING|DEC|DECIMAL|DECLARE|DECORRELATED|DEFAULT|DEFAULTS|DELETE|DELIMITED|DELIMITER|DESC|DETAILS|DISCARD|DISK|DISTINCT|DOC|DOT|DOUBLE|DROP|EFFORT|ELEMENT|ELSE|ENABLE|END|ENDPOINT|ENFORCED|ENVELOPE|ERROR|ESCAPE|EXCEPT|EXECUTE|EXISTS|EXPECTED|EXPLAIN|EXPOSE|EXTRACT|FACTOR|FALSE|FETCH|FIELDS|FILTER|FIRST|FLOAT|FOLLOWING|FOR|FOREIGN|FORMAT|FORWARD|FROM|FULL|FULLNAME|FUNCTION|GENERATOR|GRANT|GREATEST|GROUP|GROUPS|HAVING|HEADER|HEADERS|HOLD|HOST|HOUR|HOURS|ID|IDEMPOTENCE|IDLE|IF|IGNORE|ILIKE|IN|INCLUDE|INDEX|INDEXES|INFO|INHERIT|INLINE|INNER|INPUT|INSERT|INSPECT|INT|INTEGER|INTERNAL|INTERSECT|INTERVAL|INTO|INTROSPECTION|IS|ISNULL|ISOLATION|JOIN|JSON|KAFKA|KEY|KEYS|LAST|LATERAL|LATEST|LEADING|LEAST|LEFT|LEVEL|LIKE|LIMIT|LIST|LOAD|LOCAL|LOG|LOGICAL|LOGIN|MANAGED|MAP|MARKETING|MATERIALIZE|MATERIALIZED|MAX|MECHANISMS|MEMBERSHIP|MERGE|MESSAGE|METADATA|MINUTE|MINUTES|MODE|MONTH|MONTHS|MS|MUTUALLY|NAME|NAMES|NATURAL|NEXT|NO|NOCREATECLUSTER|NOCREATEDB|NOCREATEROLE|NOINHERIT|NOLOGIN|NONE|NOSUPERUSER|NOT|NOTICE|NULL|NULLIF|NULLS|OBJECTS|OF|OFFSET|ON|ONLY|OPERATOR|OPTIMIZED|OPTIMIZER|OPTIONS|OR|ORDER|ORDINALITY|OUTER|OVER|OWNED|OWNER|PARTITION|PASSWORD|PHYSICAL|PLAN|PLANS|PORT|POSITION|POSTGRES|PRECEDING|PRECISION|PREFIX|PREPARE|PRIMARY|PRIVATELINK|PRIVILEGES|PROGRESS|PROTOBUF|PROTOCOL|PUBLICATION|QUERY|QUOTE|RAISE|RANGE|RAW|READ|REAL|REASSIGN|RECURSION|RECURSIVE|REFERENCES|REFRESH|REGEX|REGION|REGISTRY|RENAME|REPEATABLE|REPLACE|REPLICA|REPLICAS|REPLICATION|RESET|RESPECT|RESTRICT|RETENTION|RETURN|RETURNING|REVOKE|RIGHT|ROLE|ROLES|ROLLBACK|ROTATE|ROW|ROWS|SASL|SCALE|SCHEMA|SCHEMAS|SCRIPT|SECOND|SECONDS|SECRET|SECRETS|SECURITY|SEED|SELECT|SEQUENCES|SERIALIZABLE|SERVICE|SESSION|SET|SHARD|SHOW|SINK|SINKS|SIZE|SMALLINT|SNAPSHOT|SOME|SOURCE|SOURCES|SSH|SSL|START|STDIN|STDOUT|STORAGE|STORAGECTL|STRATEGY|STRICT|STRING|SUBSCRIBE|SUBSOURCE|SUBSOURCES|SUBSTRING|SUPERUSER|SWAP|SYSTEM|TABLE|TABLES|TAIL|TEMP|TEMPORARY|TEST|TEXT|THEN|TICK|TIES|TIME|TIMELINE|TIMEOUT|TIMESTAMP|TIMESTAMPTZ|TO|TOKEN|TOPIC|TPCH|TRACE|TRAILING|TRANSACTION|TRIM|TRUE|TUNNEL|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNION|UNIQUE|UNKNOWN|UP|UPDATE|UPSERT|URL|USAGE|USER|USERNAME|USERS|USING|VALIDATE|VALUE|VALUES|VARCHAR|VARYING|VIEW|VIEWS|WARNING|WEBHOOK|WHEN|WHERE|WINDOW|WIRE|WITH|WITHIN|WITHOUT|WORK|WORKERS|WRITE|YEAR|YEARS|ZONE|ZONES)\b">
<token type="Keyword"/>
<rule pattern="(ACCESS|ADD|ADDRESSES|AGGREGATE|ALIGNED|ALL|ALTER|ANALYSIS|AND|ANY|ARITY|ARN|ARRANGEMENT|ARRAY|AS|ASC|ASSERT|ASSUME|AT|AUCTION|AUTHORITY|AVAILABILITY|AVRO|AWS|BATCH|BEGIN|BETWEEN|BIGINT|BILLED|BODY|BOOLEAN|BOTH|BPCHAR|BROKEN|BROKER|BROKERS|BY|BYTES|CARDINALITY|CASCADE|CASE|CAST|CERTIFICATE|CHAIN|CHAINS|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CLIENT|CLOSE|CLUSTER|CLUSTERS|COALESCE|COLLATE|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPACTION|COMPATIBILITY|COMPRESSION|COMPUTE|COMPUTECTL|CONFIG|CONFLUENT|CONNECTION|CONNECTIONS|CONSTRAINT|COPY|COUNT|COUNTER|CREATE|CREATECLUSTER|CREATEDB|CREATEROLE|CREATION|CROSS|CSV|CURRENT|CURSOR|DATABASE|DATABASES|DATUMS|DAY|DAYS|DEALLOCATE|DEBEZIUM|DEBUG|DEBUGGING|DEC|DECIMAL|DECLARE|DECODING|DECORRELATED|DEFAULT|DEFAULTS|DELETE|DELIMITED|DELIMITER|DELTA|DESC|DETAILS|DISCARD|DISK|DISTINCT|DOC|DOT|DOUBLE|DROP|EAGER|ELEMENT|ELSE|ENABLE|END|ENDPOINT|ENFORCED|ENVELOPE|ERROR|ERRORS|ESCAPE|ESTIMATE|EVERY|EXCEPT|EXECUTE|EXISTS|EXPECTED|EXPLAIN|EXPOSE|EXPRESSIONS|EXTERNAL|EXTRACT|FACTOR|FALSE|FAST|FEATURES|FETCH|FIELDS|FILE|FILTER|FIRST|FIXPOINT|FLOAT|FOLLOWING|FOR|FOREIGN|FORMAT|FORWARD|FROM|FULL|FULLNAME|FUNCTION|GENERATOR|GRANT|GREATEST|GROUP|GROUPS|HAVING|HEADER|HEADERS|HISTORY|HOLD|HOST|HOUR|HOURS|HUMANIZED|ID|IDENTIFIERS|IDS|IF|IGNORE|ILIKE|IMPLEMENTATIONS|IMPORTED|IN|INCLUDE|INDEX|INDEXES|INFO|INHERIT|INLINE|INNER|INPUT|INSERT|INSIGHTS|INSPECT|INT|INTEGER|INTERNAL|INTERSECT|INTERVAL|INTO|INTROSPECTION|IS|ISNULL|ISOLATION|JOIN|JOINS|JSON|KAFKA|KEY|KEYS|LAST|LATERAL|LATEST|LEADING|LEAST|LEFT|LEGACY|LETREC|LEVEL|LIKE|LIMIT|LINEAR|LIST|LOAD|LOCAL|LOCALLY|LOG|LOGICAL|LOGIN|LOWERING|MANAGED|MANUAL|MAP|MARKETING|MATERIALIZE|MATERIALIZED|MAX|MECHANISMS|MEMBERSHIP|MESSAGE|METADATA|MINUTE|MINUTES|MODE|MONTH|MONTHS|MUTUALLY|MYSQL|NAME|NAMES|NATURAL|NEGATIVE|NEW|NEXT|NO|NOCREATECLUSTER|NOCREATEDB|NOCREATEROLE|NODE|NOINHERIT|NOLOGIN|NON|NONE|NOSUPERUSER|NOT|NOTICE|NOTICES|NULL|NULLIF|NULLS|OBJECTS|OF|OFFSET|ON|ONLY|OPERATOR|OPTIMIZED|OPTIMIZER|OPTIONS|OR|ORDER|ORDINALITY|OUTER|OVER|OWNED|OWNER|PARTITION|PARTITIONS|PASSWORD|PATH|PHYSICAL|PLAN|PLANS|PORT|POSITION|POSTGRES|PRECEDING|PRECISION|PREFIX|PREPARE|PRIMARY|PRIVATELINK|PRIVILEGES|PROGRESS|PROTOBUF|PROTOCOL|PUBLICATION|PUSHDOWN|QUERY|QUOTE|RAISE|RANGE|RATE|RAW|READ|REAL|REASSIGN|RECURSION|RECURSIVE|REDACTED|REFERENCE|REFERENCES|REFRESH|REGEX|REGION|REGISTRY|REHYDRATION|RENAME|REOPTIMIZE|REPEATABLE|REPLACE|REPLAN|REPLICA|REPLICAS|REPLICATION|RESET|RESPECT|RESTRICT|RETAIN|RETURN|RETURNING|REVOKE|RIGHT|ROLE|ROLES|ROLLBACK|ROTATE|ROUNDS|ROW|ROWS|SASL|SCALE|SCHEDULE|SCHEMA|SCHEMAS|SECOND|SECONDS|SECRET|SECRETS|SECURITY|SEED|SELECT|SEQUENCES|SERIALIZABLE|SERVICE|SESSION|SET|SHARD|SHOW|SINK|SINKS|SIZE|SMALLINT|SNAPSHOT|SOME|SOURCE|SOURCES|SSH|SSL|START|STDIN|STDOUT|STORAGE|STORAGECTL|STRATEGY|STRICT|STRING|STRONG|SUBSCRIBE|SUBSOURCE|SUBSOURCES|SUBSTRING|SUBTREE|SUPERUSER|SWAP|SYNTAX|SYSTEM|TABLE|TABLES|TAIL|TEMP|TEMPORARY|TEXT|THEN|TICK|TIES|TIME|TIMELINE|TIMEOUT|TIMESTAMP|TIMESTAMPTZ|TIMING|TO|TOKEN|TOPIC|TPCH|TRACE|TRAILING|TRANSACTION|TRANSACTIONAL|TRIM|TRUE|TUNNEL|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNION|UNIQUE|UNKNOWN|UP|UPDATE|UPSERT|URL|USAGE|USER|USERNAME|USERS|USING|VALIDATE|VALUE|VALUES|VARCHAR|VARIADIC|VARYING|VERSION|VIEW|VIEWS|WARNING|WEBHOOK|WHEN|WHERE|WINDOW|WIRE|WITH|WITHIN|WITHOUT|WORK|WORKERS|WRITE|YEAR|YEARS|ZONE|ZONES)\b">
<token type="Keyword" />
</rule>
<rule pattern="[+*/&lt;&gt;=~!@#%^&amp;|`?-]+">
<token type="Operator"/>
<token type="Operator" />
</rule>
<rule pattern="::">
<token type="Operator"/>
<token type="Operator" />
</rule>
<rule pattern="\$\d+">
<token type="NameVariable"/>
<token type="NameVariable" />
</rule>
<rule pattern="([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?">
<token type="LiteralNumberFloat"/>
<token type="LiteralNumberFloat" />
</rule>
<rule pattern="[0-9]+">
<token type="LiteralNumberInteger"/>
<token type="LiteralNumberInteger" />
</rule>
<rule pattern="((?:E|U&amp;)?)(&#39;)">
<rule pattern="((?:E|U&amp;)?)(')">
<bygroups>
<token type="LiteralStringAffix"/>
<token type="LiteralStringSingle"/>
<token type="LiteralStringAffix" />
<token type="LiteralStringSingle" />
</bygroups>
<push state="string"/>
<push state="string" />
</rule>
<rule pattern="((?:U&amp;)?)(&#34;)">
<rule pattern="((?:U&amp;)?)(&quot;)">
<bygroups>
<token type="LiteralStringAffix"/>
<token type="LiteralStringName"/>
<token type="LiteralStringAffix" />
<token type="LiteralStringName" />
</bygroups>
<push state="quoted-ident"/>
<push state="quoted-ident" />
</rule>
<rule pattern="(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)(\s+)(LANGUAGE)?(\s+)(&#39;?)(\w+)?(&#39;?)">
<rule pattern="(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)(\s+)(LANGUAGE)?(\s+)('?)(\w+)?('?)">
<usingbygroup>
<sublexer_name_group>12</sublexer_name_group>
<code_group>4</code_group>
<emitters>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc"/>
<token type="Text"/>
<token type="Keyword"/>
<token type="Text"/>
<token type="LiteralStringSingle"/>
<token type="LiteralStringSingle"/>
<token type="LiteralStringSingle"/>
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="LiteralStringHeredoc" />
<token type="Text" />
<token type="Keyword" />
<token type="Text" />
<token type="LiteralStringSingle" />
<token type="LiteralStringSingle" />
<token type="LiteralStringSingle" />
</emitters>
</usingbygroup>
</rule>
<rule pattern="(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)">
<token type="LiteralStringHeredoc"/>
<token type="LiteralStringHeredoc" />
</rule>
<rule pattern="[a-z_]\w*">
<token type="Name"/>
<token type="Name" />
</rule>
<rule pattern=":([&#39;&#34;]?)[a-z]\w*\b\1">
<token type="NameVariable"/>
<rule pattern=":(['&quot;]?)[a-z]\w*\b\1">
<token type="NameVariable" />
</rule>
<rule pattern="[;:()\[\]{},.]">
<token type="Punctuation"/>
<token type="Punctuation" />
</rule>
</state>
<state name="multiline-comments">
<rule pattern="/\*">
<token type="CommentMultiline"/>
<push state="multiline-comments"/>
<token type="CommentMultiline" />
<push state="multiline-comments" />
</rule>
<rule pattern="\*/">
<token type="CommentMultiline"/>
<pop depth="1"/>
<token type="CommentMultiline" />
<pop depth="1" />
</rule>
<rule pattern="[^/*]+">
<token type="CommentMultiline"/>
<token type="CommentMultiline" />
</rule>
<rule pattern="[/*]">
<token type="CommentMultiline"/>
<token type="CommentMultiline" />
</rule>
</state>
<state name="string">
<rule pattern="[^&#39;]+">
<token type="LiteralStringSingle"/>
<rule pattern="[^']+">
<token type="LiteralStringSingle" />
</rule>
<rule pattern="&#39;&#39;">
<token type="LiteralStringSingle"/>
<rule pattern="''">
<token type="LiteralStringSingle" />
</rule>
<rule pattern="&#39;">
<token type="LiteralStringSingle"/>
<pop depth="1"/>
<rule pattern="'">
<token type="LiteralStringSingle" />
<pop depth="1" />
</rule>
</state>
<state name="quoted-ident">
<rule pattern="[^&#34;]+">
<token type="LiteralStringName"/>
<rule pattern="[^&quot;]+">
<token type="LiteralStringName" />
</rule>
<rule pattern="&#34;&#34;">
<token type="LiteralStringName"/>
<rule pattern="&quot;&quot;">
<token type="LiteralStringName" />
</rule>
<rule pattern="&#34;">
<token type="LiteralStringName"/>
<pop depth="1"/>
<rule pattern="&quot;">
<token type="LiteralStringName" />
<pop depth="1" />
</rule>
</state>
</rules>
Expand Down
49 changes: 49 additions & 0 deletions lexers/testdata/materialize.actual
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
-- basic statements

CREATE VIEW my_typed_source AS
SELECT
(data->>'field1')::boolean AS field_1,
(data->>'field2')::int AS field_2,
(data->>'field3')::float AS field_3
FROM my_jsonb_source;

WITH
regional_sales (region, total_sales) AS (
SELECT region, sum(amount)
FROM orders
GROUP BY region
),
top_regions AS (
SELECT region
FROM regional_sales
ORDER BY total_sales DESC
LIMIT 5
)
SELECT region,
product,
SUM(quantity) AS product_units,
SUM(amount) AS product_sales
FROM orders
WHERE region IN (SELECT region FROM top_regions)
GROUP BY region, product;

-- sources

CREATE SOURCE webhook_with_basic_auth
FROM WEBHOOK
BODY FORMAT JSON
CHECK (
WITH (
HEADERS,
BODY AS request_body,
SECRET basic_hook_auth AS validation_secret
)
-- The constant_time_eq validation function **does not support** fully
-- qualified secret names. We recommend always aliasing the secret name
-- for ease of use.
constant_time_eq(headers->'authorization', validation_secret)
);

CREATE SOURCE mz_source
FROM MYSQL CONNECTION mysql_connection
FOR TABLES (schema1.table_1 AS s1_table_1, schema2.table_1 AS s2_table_1);
Loading