-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add TypeParser based on Flex and Bison
- Loading branch information
1 parent
7cb7b2d
commit 260f66a
Showing
8 changed files
with
574 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
if(${VELOX_BUILD_TESTING}) | ||
add_subdirectory(tests) | ||
endif() | ||
|
||
bison_target( | ||
TypeParserParser TypeParser.yy ${CMAKE_CURRENT_BINARY_DIR}/TypeParser.yy.cc | ||
DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/TypeParser.yy.h) | ||
|
||
flex_target( | ||
TypeParserScanner TypeParser.ll ${CMAKE_CURRENT_BINARY_DIR}/Scanner.cpp | ||
COMPILE_FLAGS "-Cf --prefix=veloxtp") | ||
|
||
add_flex_bison_dependency(TypeParserScanner TypeParserParser) | ||
|
||
include_directories(${PROJECT_BINARY_DIR}) | ||
include_directories(${FLEX_INCLUDE_DIRS}) | ||
add_library( | ||
velox_type_parser ${BISON_TypeParserParser_OUTPUTS} | ||
${FLEX_TypeParserScanner_OUTPUTS} Scanner.h TypeParser.h) | ||
target_link_libraries(velox_type_parser velox_common_base) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <cmath> | ||
#include <iostream> | ||
#include <sstream> | ||
#include <string> | ||
#include <unordered_map> | ||
|
||
#include "velox/common/base/Exceptions.h" | ||
#include "velox/type/Type.h" | ||
|
||
namespace facebook::velox::type { | ||
|
||
class Scanner : public yyFlexLexer { | ||
public: | ||
Scanner( | ||
std::istream& arg_yyin, | ||
std::ostream& arg_yyout, | ||
TypePtr& outputType, | ||
const std::string& input) | ||
: yyFlexLexer(&arg_yyin, &arg_yyout), | ||
outputType_(outputType), | ||
input_(input){}; | ||
int lex(Parser::semantic_type* yylval); | ||
|
||
void setType(TypePtr type) { | ||
outputType_ = std::move(type); | ||
} | ||
|
||
// Store input to print it as part of the error message. | ||
std::string input() { | ||
return input_; | ||
} | ||
|
||
private: | ||
TypePtr& outputType_; | ||
std::string input_; | ||
}; | ||
|
||
} // namespace facebook::velox::type |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <string> | ||
#include "velox/type/Type.h" | ||
|
||
namespace facebook::velox { | ||
TypePtr parseTypeSignature(const std::string& typeText); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
%{ | ||
#include <vector> | ||
#include <memory> | ||
|
||
#include "velox/type/type_parser/TypeParser.yy.h" // @manual | ||
#include "velox/type/type_parser/Scanner.h" | ||
#define YY_DECL int facebook::velox::type::Scanner::lex(facebook::velox::type::Parser::semantic_type *yylval) | ||
%} | ||
|
||
%option c++ noyywrap noyylineno nodefault caseless | ||
|
||
A [A|a] | ||
B [B|b] | ||
C [C|c] | ||
D [D|d] | ||
E [E|e] | ||
F [F|f] | ||
G [G|g] | ||
H [H|h] | ||
I [I|i] | ||
J [J|j] | ||
K [K|k] | ||
L [L|l] | ||
M [M|m] | ||
O [O|o] | ||
P [P|p] | ||
R [R|r] | ||
S [S|s] | ||
T [T|t] | ||
U [U|u] | ||
W [W|w] | ||
X [X|x] | ||
Y [Y|y] | ||
Z [Z|z] | ||
|
||
WORD ([[:alpha:][:alnum:]_]*) | ||
QUOTED_ID (['"'][[:alnum:][:space:]_]*['"']) | ||
NUMBER ([[:digit:]]+) | ||
ROW (ROW|STRUCT) | ||
VARIABLE (VARCHAR|VARBINARY) | ||
TYPE_WITH_SPACES ((DOUBLE[ ]PRECISION)|(TIME[ ]WITH[ ]TIME[ ]ZONE)|(TIMESTAMP[ ]WITH[ ]TIME[ ]ZONE)|(INTERVAL[ ]YEAR[ ]TO[ ]MONTH)|(INTERVAL[ ]DAY[ ]TO[ ]SECOND)) | ||
|
||
%% | ||
|
||
"(" return Parser::token::LPAREN; | ||
")" return Parser::token::RPAREN; | ||
"," return Parser::token::COMMA; | ||
(ARRAY) return Parser::token::ARRAY; | ||
(MAP) return Parser::token::MAP; | ||
(FUNCTION) return Parser::token::FUNCTION; | ||
(DECIMAL) return Parser::token::DECIMAL; | ||
{ROW} return Parser::token::ROW; | ||
{VARIABLE} yylval->build<std::string>(YYText()); return Parser::token::VARIABLE; | ||
{NUMBER} yylval->build<long long>(strtoll(YYText(), nullptr, 10)); return Parser::token::NUMBER; | ||
{WORD} yylval->build<std::string>(YYText()); return Parser::token::WORD; | ||
{TYPE_WITH_SPACES} yylval->build<std::string>(YYText()); return Parser::token::TYPE_WITH_SPACES; | ||
{QUOTED_ID} yylval->build<std::string>(YYText()); return Parser::token::QUOTED_ID; | ||
<<EOF>> return Parser::token::YYEOF; | ||
. /* no action on unmatched input */ | ||
|
||
%% | ||
|
||
int yyFlexLexer::yylex() { | ||
throw std::runtime_error("Bad call to yyFlexLexer::yylex()"); | ||
} | ||
|
||
#include "velox/type/type_parser/TypeParser.h" | ||
|
||
facebook::velox::TypePtr facebook::velox::parseTypeSignature(const std::string& typeText) | ||
{ | ||
std::istringstream is(typeText); | ||
facebook::velox::TypePtr type; | ||
facebook::velox::type::Scanner scanner{ is, std::cerr, type, typeText}; | ||
facebook::velox::type::Parser parser{ &scanner }; | ||
parser.parse(); | ||
VELOX_CHECK(type, "Failed to parse type [{}]", typeText); | ||
return type; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
%{ | ||
#include <FlexLexer.h> | ||
#include "velox/common/base/Exceptions.h" | ||
#include "velox/type/Type.h" | ||
#include "velox/functions/prestosql/types/HyperLogLogType.h" | ||
#include "velox/functions/prestosql/types/JsonType.h" | ||
#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" | ||
|
||
%} | ||
%require "3.0.4" | ||
%language "C++" | ||
|
||
%define parser_class_name {Parser} | ||
%define api.namespace {facebook::velox::type} | ||
%define api.value.type variant | ||
%parse-param {Scanner* scanner} | ||
%define parse.error verbose | ||
|
||
%code requires | ||
{ | ||
namespace facebook::velox::type { | ||
class Scanner; | ||
} // namespace facebook::velox::type | ||
namespace facebook::velox { | ||
class Type; | ||
} // namespace facebook::velox | ||
struct RowArguments { | ||
std::vector<std::string> names; | ||
std::vector<std::shared_ptr<const facebook::velox::Type>> types; | ||
}; | ||
} // %code requires | ||
|
||
%code | ||
{ | ||
#include <velox/type/type_parser/Scanner.h> | ||
#define yylex(x) scanner->lex(x) | ||
using namespace facebook::velox; | ||
TypePtr typeFromString(const std::string& type) { | ||
auto upper = type; | ||
std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper); | ||
if (upper == "UNKNOWN") { | ||
return UNKNOWN(); | ||
} | ||
if (upper == TIMESTAMP_WITH_TIME_ZONE()->toString()) { | ||
return TIMESTAMP_WITH_TIME_ZONE(); | ||
} | ||
if (upper == HYPERLOGLOG()->toString()) { | ||
return HYPERLOGLOG(); | ||
} | ||
if (upper == JSON()->toString()) { | ||
return JSON(); | ||
} | ||
if (upper == "INT") { | ||
upper = "INTEGER"; | ||
} else if (upper == "DOUBLE PRECISION") { | ||
upper = "DOUBLE"; | ||
} | ||
if (!hasType(upper)) { | ||
VELOX_FAIL("Failed to parse type [{}]", type); | ||
} | ||
return getType(upper, {}); | ||
} | ||
} | ||
|
||
%token LPAREN RPAREN COMMA ARRAY MAP ROW FUNCTION DECIMAL | ||
%token <std::string> WORD VARIABLE QUOTED_ID TYPE_WITH_SPACES | ||
%token <long long> NUMBER | ||
%token YYEOF 0 | ||
|
||
%nterm <std::shared_ptr<const Type>> type array_type map_type variable_type | ||
%nterm <std::pair<std::string, std::shared_ptr<const Type>>> named_type | ||
%nterm <std::shared_ptr<const Type>> row_type function_type decimal_type simple_type | ||
%nterm <std::string> identifier | ||
%nterm <std::vector<std::shared_ptr<const Type>>> type_list | ||
%nterm <RowArguments> type_list_opt_names | ||
|
||
%% | ||
|
||
type_spec : named_type { scanner->setType($1.second); } | ||
| type { scanner->setType($1); } | ||
| error { yyerrok; } | ||
; | ||
|
||
named_type : identifier type { $$ = std::make_pair($1, $2); } | ||
; | ||
|
||
type : array_type { $$ = $1; } | ||
| map_type { $$ = $1; } | ||
| row_type { $$ = $1; } | ||
| simple_type { $$ = $1; } | ||
| function_type { $$ = $1; } | ||
| variable_type { $$ = $1; } | ||
| decimal_type { $$ = $1; } | ||
; | ||
|
||
simple_type : WORD { $$ = typeFromString($1); } | ||
| TYPE_WITH_SPACES { $$ = typeFromString($1); } | ||
; | ||
|
||
variable_type : VARIABLE LPAREN NUMBER RPAREN { $$ = typeFromString($1); } | ||
| VARIABLE { $$ = typeFromString($1); } | ||
; | ||
|
||
array_type : ARRAY LPAREN type RPAREN { $$ = ARRAY($3); } | ||
; | ||
|
||
decimal_type : DECIMAL LPAREN NUMBER COMMA NUMBER RPAREN { $$ = DECIMAL($3, $5); } | ||
; | ||
|
||
type_list : type { $$.push_back($1); } | ||
| type_list COMMA type { $1.push_back($3); $$ = std::move($1); } | ||
; | ||
|
||
type_list_opt_names : type { $$.names.push_back(""); $$.types.push_back($1); } | ||
| named_type { $$.names.push_back($1.first); $$.types.push_back($1.second); } | ||
| type_list_opt_names COMMA type { $1.names.push_back(""); $1.types.push_back($3); | ||
$$.names = std::move($1.names); $$.types = std::move($1.types); } | ||
| type_list_opt_names COMMA named_type { $1.names.push_back($3.first); $1.types.push_back($3.second); | ||
$$.names = std::move($1.names); $$.types = std::move($1.types); } | ||
; | ||
|
||
row_type : ROW LPAREN type_list_opt_names RPAREN { $$ = ROW(std::move($3.names), std::move($3.types)); } | ||
; | ||
|
||
map_type : MAP LPAREN type COMMA type RPAREN { $$ = MAP($3, $5); } | ||
; | ||
|
||
function_type : FUNCTION LPAREN type_list RPAREN { auto returnType = $3.back(); $3.pop_back(); | ||
$$ = FUNCTION(std::move($3), returnType); } | ||
|
||
identifier : QUOTED_ID { $1.erase(0, 1); $1.pop_back(); $$ = $1; } // Remove the quotes. | ||
| WORD { $$ = $1; } | ||
; | ||
|
||
%% | ||
|
||
void facebook::velox::type::Parser::error(const std::string& msg) { | ||
VELOX_FAIL("Failed to parse type [{}]", scanner->input()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
add_executable(velox_type_parser_test TypeParserTest.cpp) | ||
|
||
add_test(NAME velox_type_parser_test COMMAND velox_type_parser_test) | ||
|
||
target_link_libraries(velox_type_parser_test velox_type_parser velox_type gtest | ||
gtest_main gmock) |
Oops, something went wrong.