Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
104938: pgrepl: implement the parser for the PostgreSQL replication protocol r=ZhouXing19 a=otan

This commit implements parsing the [PG replication protocol syntax](https://www.postgresql.org/docs/current/protocol-replication.html) which is derived from the `.y` file from PG itself. We had to invent some new lexing protocols as the lexing mechanism is different to standard plpgsql/pgsql.

This is in preparation for supporting pglogical within CRDB.

Epic: https://cockroachlabs.atlassian.net/browse/CRDB-26486
Release note: None

Co-authored-by: Oliver Tan <otan@cockroachlabs.com>
  • Loading branch information
craig[bot] and otan committed Jun 19, 2023
2 parents 0ccc55e + 66c354e commit 9633594
Show file tree
Hide file tree
Showing 39 changed files with 2,048 additions and 3 deletions.
32 changes: 31 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ SQLPARSER_TARGETS = \
pkg/sql/lexbase/tokens.go \
pkg/sql/lexbase/keywords.go \
pkg/sql/lexbase/reserved_keywords.go \
pkg/sql/pgrepl/pgreplparser/pgrepl.go \
pkg/sql/plpgsql/parser/plpgsql.go \
pkg/sql/plpgsql/parser/lexbase/tokens.go \
pkg/sql/plpgsql/parser/lexbase/keywords.go \
Expand Down Expand Up @@ -1516,6 +1517,14 @@ pkg/sql/plpgsql/parser/gen/plpgsql.go.tmp: pkg/sql/plpgsql/parser/gen/plpgsql-ge
echo "$$ret"; exit 1; \
fi

.SECONDARY: pkg/sql/pgrepl/pgreplparser/gen/pgrepl.go.tmp
pkg/sql/pgrepl/pgreplparser/gen/pgrepl.go.tmp: pkg/sql/pgrepl/pgreplparser/gen/pgrepl-gen.y bin/.bootstrap
set -euo pipefail; \
ret=$$(cd pkg/sql/pgrepl/pgreplparser/gen && goyacc -p pgrepl -o pgrepl.go.tmp pgrepl-gen.y); \
if expr "$$ret" : ".*conflicts" >/dev/null; then \
echo "$$ret"; exit 1; \
fi

pkg/sql/plpgsql/parser/lexbase/tokens.go: pkg/sql/plpgsql/parser/gen/plpgsql.go.tmp
(echo "// Code generated by make. DO NOT EDIT."; \
echo "// GENERATED FILE DO NOT EDIT"; \
Expand All @@ -1534,6 +1543,13 @@ pkg/sql/plpgsql/parser/plpgsql.go: pkg/sql/plpgsql/parser/gen/plpgsql.go.tmp | b
mv -f $@.tmp $@
goimports -w $@

pkg/sql/pgrepl/pgreplparser/pgrepl.go: pkg/sql/pgrepl/pgreplparser/gen/pgrepl.go.tmp | bin/.bootstrap
(echo "// Code generated by goyacc. DO NOT EDIT."; \
echo "// GENERATED FILE DO NOT EDIT"; \
cat $^) > $@.tmp || rm $@.tmp
mv -f $@.tmp $@
goimports -w $@

# This modifies the grammar to:
# - improve the types used by the generated parser for non-terminals
# - expand the help rules.
Expand Down Expand Up @@ -1586,6 +1602,20 @@ pkg/sql/plpgsql/parser/gen/plpgsql-gen.y: pkg/sql/plpgsql/parser/plpgsql.y
mv -f $@.tmp $@
rm pkg/sql/plpgsql/parser/gen/types_regex.tmp


.SECONDARY: pkg/sql/pgrepl/pgreplparser/gen/pgrepl-gen.y
pkg/sql/pgrepl/pgreplparser/gen/pgrepl-gen.y: pkg/sql/pgrepl/pgreplparser/pgrepl.y
mkdir -p pkg/sql/pgrepl/pgreplparser/gen
set -euo pipefail; \
awk '/func.*pgreplSymUnion/ {print $$(NF - 1)}' pkg/sql/pgrepl/pgreplparser/pgrepl.y | \
sed -e 's/[]\/$$*.^|[]/\\&/g' | \
sed -e "s/^/s_(type|token) <(/" | \
awk '{print $$0")>_\\1 <union> /* <\\2> */_"}' > pkg/sql/pgrepl/pgreplparser/gen/types_regex.tmp; \
sed -E -f pkg/sql/pgrepl/pgreplparser/gen/types_regex.tmp < pkg/sql/pgrepl/pgreplparser/pgrepl.y | \
sed -Ee 's,//.*$$,,g;s,/[*]([^*]|[*][^/])*[*]/, ,g;s/ +$$//g' > $@.tmp || rm $@.tmp
mv -f $@.tmp $@
rm pkg/sql/pgrepl/pgreplparser/gen/types_regex.tmp

pkg/sql/plpgsql/parser/lexbase/keywords.go: pkg/sql/plpgsql/parser/plpgsql.y pkg/sql/lexbase/allkeywords/main.go | bin/.bootstrap
$(GO) run -tags all-keywords pkg/sql/lexbase/allkeywords/main.go < $< > $@.tmp || rm $@.tmp
mv -f $@.tmp $@
Expand Down Expand Up @@ -1766,7 +1796,7 @@ cleanshort:
-$(GO) clean $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' -i github.com/cockroachdb/cockroach...
$(FIND_RELEVANT) -type f -name '*.test' -exec rm {} +
for f in cockroach*; do if [ -f "$$f" ]; then rm "$$f"; fi; done
rm -rf pkg/sql/parser/gen pkg/sql/plpgsql/parser/gen
rm -rf pkg/sql/parser/gen pkg/sql/plpgsql/parser/gen pkg/sql/pgrepl/pgreplparser/gen

.PHONY: clean
clean: ## Like cleanshort, but also includes C++ artifacts, Bazel artifacts, and the go build cache.
Expand Down
5 changes: 5 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ ALL_TESTS = [
"//pkg/sql/opt:opt_test",
"//pkg/sql/parser:parser_disallowed_imports_test",
"//pkg/sql/parser:parser_test",
"//pkg/sql/pgrepl/pgreplparser:pgreplparser_test",
"//pkg/sql/pgwire/hba:hba_test",
"//pkg/sql/pgwire/identmap:identmap_test",
"//pkg/sql/pgwire/pgerror:pgerror_test",
Expand Down Expand Up @@ -1858,6 +1859,10 @@ GO_TARGETS = [
"//pkg/sql/parser/statements:statements",
"//pkg/sql/parser:parser",
"//pkg/sql/parser:parser_test",
"//pkg/sql/pgrepl/lsn:lsn",
"//pkg/sql/pgrepl/pgreplparser:pgreplparser",
"//pkg/sql/pgrepl/pgreplparser:pgreplparser_test",
"//pkg/sql/pgrepl/pgrepltree:pgrepltree",
"//pkg/sql/pgwire/hba:hba",
"//pkg/sql/pgwire/hba:hba_test",
"//pkg/sql/pgwire/identmap:identmap",
Expand Down
1 change: 1 addition & 0 deletions pkg/gen/misc.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ MISC_SRCS = [
"//pkg/roachprod/vm/aws:terraform/main.tf",
"//pkg/spanconfig/spanconfigstore:entry_interval_btree.go",
"//pkg/spanconfig/spanconfigstore:entry_interval_btree_test.go",
"//pkg/sql/pgrepl/pgreplparser:pgrepl.go",
"//pkg/sql/plpgsql/parser/lexbase:keywords.go",
"//pkg/sql/plpgsql/parser/lexbase:tokens.go",
"//pkg/sql/plpgsql/parser:plpgsql.go",
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/lexbase/sql-gen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ GENYACC=$LANG-gen.y
awk '{print $0")>_\\1 <union> /* <\\2> */_"}' > types_regex.tmp

sed -E -f types_regex.tmp < $1 | \
if [ $LANG != plpgsql ]; then \
if [ $LANG != plpgsql ] && [ $LANG != pgrepl ]; then \
awk -f $3 | \
sed -Ee 's,//.*$$,,g;s,/[*]([^*]|[*][^/])*[*]/, ,g;s/ +$$//g' > $GENYACC
else
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/pgrepl/lsn/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "lsn",
srcs = ["lsn.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/pgrepl/lsn",
visibility = ["//visibility:public"],
)
28 changes: 28 additions & 0 deletions pkg/sql/pgrepl/lsn/lsn.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

// Package lsn contains logic for handling the pg_lsn type.
package lsn

import "fmt"

type LSN uint64

func (lsn LSN) String() string {
return fmt.Sprintf("%X/%X", uint32(lsn>>32), uint32(lsn))
}

func ParseLSN(str string) (LSN, error) {
var lo, hi uint32
if _, err := fmt.Sscanf(str, "%X/%X", &hi, &lo); err != nil {
return 0, err
}
return (LSN(hi) << 32) | LSN(lo), nil
}
6 changes: 6 additions & 0 deletions pkg/sql/pgrepl/pgreplparser/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Do not add environment-specific entries here (see the top-level .gitignore
# for reasoning and alternatives).

pgrepl.go
y.output
gen
78 changes: 78 additions & 0 deletions pkg/sql/pgrepl/pgreplparser/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

sh_binary(
name = "pgrepl-gen",
srcs = ["//pkg/sql/lexbase:sql-gen.sh"],
)

# Define the target to auto-generate sql.go from the grammar file.
genrule(
name = "pgrepl-goyacc",
srcs = [
"pgrepl.y",
],
outs = ["pgrepl.go"],
cmd = """
export GOPATH=/nonexist-gopath
$(location :pgrepl-gen) $(location pgrepl.y) pgrepl ""\
$(location pgrepl.go) $(location @org_golang_x_tools//cmd/goyacc) \
$(location @com_github_cockroachdb_gostdlib//x/tools/cmd/goimports) \
""",
exec_tools = [
":pgrepl-gen",
"@com_github_cockroachdb_gostdlib//x/tools/cmd/goimports",
"@org_golang_x_tools//cmd/goyacc",
],
visibility = ["//visibility:public"],
)

go_library(
name = "pgreplparser",
srcs = [
"lexer.go",
"parser.go",
"pgrepl.go",
"pgreplparser.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/pgrepl/pgreplparser",
visibility = ["//visibility:public"],
deps = [
"//pkg/sql/lexbase",
"//pkg/sql/parser",
"//pkg/sql/pgrepl/lsn",
"//pkg/sql/pgrepl/pgrepltree",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/sem/tree",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_redact//:redact", # keep
],
)

exports_files(
[
"pgrepl.y",
],
visibility = ["//visibility:public"],
)

go_test(
name = "pgreplparser_test",
srcs = [
"lexer_test.go",
"parser_test.go",
],
args = ["-test.timeout=295s"],
data = glob(["testdata/**"]),
embed = [":pgreplparser"],
deps = [
"//pkg/sql/pgrepl/lsn",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/sem/tree",
"//pkg/testutils/datapathutils",
"@com_github_cockroachdb_datadriven//:datadriven",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
],
)
Loading

0 comments on commit 9633594

Please sign in to comment.