Skip to content

Commit

Permalink
Add TypeParser based on Flex and Bison
Browse files Browse the repository at this point in the history
  • Loading branch information
majetideepak committed Nov 15, 2023
1 parent 7cb7b2d commit 260f66a
Show file tree
Hide file tree
Showing 8 changed files with 574 additions and 0 deletions.
1 change: 1 addition & 0 deletions velox/type/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ if(${VELOX_BUILD_TESTING})
endif()

add_subdirectory(tz)
add_subdirectory(type_parser)
add_subdirectory(fbhive)

add_library(
Expand Down
34 changes: 34 additions & 0 deletions velox/type/type_parser/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
endif()

bison_target(
TypeParserParser TypeParser.yy ${CMAKE_CURRENT_BINARY_DIR}/TypeParser.yy.cc
DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/TypeParser.yy.h)

flex_target(
TypeParserScanner TypeParser.ll ${CMAKE_CURRENT_BINARY_DIR}/Scanner.cpp
COMPILE_FLAGS "-Cf --prefix=veloxtp")

add_flex_bison_dependency(TypeParserScanner TypeParserParser)

include_directories(${PROJECT_BINARY_DIR})
include_directories(${FLEX_INCLUDE_DIRS})
add_library(
velox_type_parser ${BISON_TypeParserParser_OUTPUTS}
${FLEX_TypeParserScanner_OUTPUTS} Scanner.h TypeParser.h)
target_link_libraries(velox_type_parser velox_common_base)
55 changes: 55 additions & 0 deletions velox/type/type_parser/Scanner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cmath>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>

#include "velox/common/base/Exceptions.h"
#include "velox/type/Type.h"

namespace facebook::velox::type {

class Scanner : public yyFlexLexer {
public:
Scanner(
std::istream& arg_yyin,
std::ostream& arg_yyout,
TypePtr& outputType,
const std::string& input)
: yyFlexLexer(&arg_yyin, &arg_yyout),
outputType_(outputType),
input_(input){};
int lex(Parser::semantic_type* yylval);

void setType(TypePtr type) {
outputType_ = std::move(type);
}

// Store input to print it as part of the error message.
std::string input() {
return input_;
}

private:
TypePtr& outputType_;
std::string input_;
};

} // namespace facebook::velox::type
24 changes: 24 additions & 0 deletions velox/type/type_parser/TypeParser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <string>
#include "velox/type/Type.h"

namespace facebook::velox {
TypePtr parseTypeSignature(const std::string& typeText);
}
78 changes: 78 additions & 0 deletions velox/type/type_parser/TypeParser.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
%{
#include <vector>
#include <memory>

#include "velox/type/type_parser/TypeParser.yy.h" // @manual
#include "velox/type/type_parser/Scanner.h"
#define YY_DECL int facebook::velox::type::Scanner::lex(facebook::velox::type::Parser::semantic_type *yylval)
%}

%option c++ noyywrap noyylineno nodefault caseless

A [A|a]
B [B|b]
C [C|c]
D [D|d]
E [E|e]
F [F|f]
G [G|g]
H [H|h]
I [I|i]
J [J|j]
K [K|k]
L [L|l]
M [M|m]
O [O|o]
P [P|p]
R [R|r]
S [S|s]
T [T|t]
U [U|u]
W [W|w]
X [X|x]
Y [Y|y]
Z [Z|z]

WORD ([[:alpha:][:alnum:]_]*)
QUOTED_ID (['"'][[:alnum:][:space:]_]*['"'])
NUMBER ([[:digit:]]+)
ROW (ROW|STRUCT)
VARIABLE (VARCHAR|VARBINARY)
TYPE_WITH_SPACES ((DOUBLE[ ]PRECISION)|(TIME[ ]WITH[ ]TIME[ ]ZONE)|(TIMESTAMP[ ]WITH[ ]TIME[ ]ZONE)|(INTERVAL[ ]YEAR[ ]TO[ ]MONTH)|(INTERVAL[ ]DAY[ ]TO[ ]SECOND))

%%

"(" return Parser::token::LPAREN;
")" return Parser::token::RPAREN;
"," return Parser::token::COMMA;
(ARRAY) return Parser::token::ARRAY;
(MAP) return Parser::token::MAP;
(FUNCTION) return Parser::token::FUNCTION;
(DECIMAL) return Parser::token::DECIMAL;
{ROW} return Parser::token::ROW;
{VARIABLE} yylval->build<std::string>(YYText()); return Parser::token::VARIABLE;
{NUMBER} yylval->build<long long>(strtoll(YYText(), nullptr, 10)); return Parser::token::NUMBER;
{WORD} yylval->build<std::string>(YYText()); return Parser::token::WORD;
{TYPE_WITH_SPACES} yylval->build<std::string>(YYText()); return Parser::token::TYPE_WITH_SPACES;
{QUOTED_ID} yylval->build<std::string>(YYText()); return Parser::token::QUOTED_ID;
<<EOF>> return Parser::token::YYEOF;
. /* no action on unmatched input */

%%

int yyFlexLexer::yylex() {
throw std::runtime_error("Bad call to yyFlexLexer::yylex()");
}

#include "velox/type/type_parser/TypeParser.h"

facebook::velox::TypePtr facebook::velox::parseTypeSignature(const std::string& typeText)
{
std::istringstream is(typeText);
facebook::velox::TypePtr type;
facebook::velox::type::Scanner scanner{ is, std::cerr, type, typeText};
facebook::velox::type::Parser parser{ &scanner };
parser.parse();
VELOX_CHECK(type, "Failed to parse type [{}]", typeText);
return type;
}
139 changes: 139 additions & 0 deletions velox/type/type_parser/TypeParser.yy
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
%{
#include <FlexLexer.h>
#include "velox/common/base/Exceptions.h"
#include "velox/type/Type.h"
#include "velox/functions/prestosql/types/HyperLogLogType.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h"

%}
%require "3.0.4"
%language "C++"

%define parser_class_name {Parser}
%define api.namespace {facebook::velox::type}
%define api.value.type variant
%parse-param {Scanner* scanner}
%define parse.error verbose

%code requires
{
namespace facebook::velox::type {
class Scanner;
} // namespace facebook::velox::type
namespace facebook::velox {
class Type;
} // namespace facebook::velox
struct RowArguments {
std::vector<std::string> names;
std::vector<std::shared_ptr<const facebook::velox::Type>> types;
};
} // %code requires

%code
{
#include <velox/type/type_parser/Scanner.h>
#define yylex(x) scanner->lex(x)
using namespace facebook::velox;
TypePtr typeFromString(const std::string& type) {
auto upper = type;
std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper);
if (upper == "UNKNOWN") {
return UNKNOWN();
}
if (upper == TIMESTAMP_WITH_TIME_ZONE()->toString()) {
return TIMESTAMP_WITH_TIME_ZONE();
}
if (upper == HYPERLOGLOG()->toString()) {
return HYPERLOGLOG();
}
if (upper == JSON()->toString()) {
return JSON();
}
if (upper == "INT") {
upper = "INTEGER";
} else if (upper == "DOUBLE PRECISION") {
upper = "DOUBLE";
}
if (!hasType(upper)) {
VELOX_FAIL("Failed to parse type [{}]", type);
}
return getType(upper, {});
}
}

%token LPAREN RPAREN COMMA ARRAY MAP ROW FUNCTION DECIMAL
%token <std::string> WORD VARIABLE QUOTED_ID TYPE_WITH_SPACES
%token <long long> NUMBER
%token YYEOF 0

%nterm <std::shared_ptr<const Type>> type array_type map_type variable_type
%nterm <std::pair<std::string, std::shared_ptr<const Type>>> named_type
%nterm <std::shared_ptr<const Type>> row_type function_type decimal_type simple_type
%nterm <std::string> identifier
%nterm <std::vector<std::shared_ptr<const Type>>> type_list
%nterm <RowArguments> type_list_opt_names

%%

type_spec : named_type { scanner->setType($1.second); }
| type { scanner->setType($1); }
| error { yyerrok; }
;

named_type : identifier type { $$ = std::make_pair($1, $2); }
;

type : array_type { $$ = $1; }
| map_type { $$ = $1; }
| row_type { $$ = $1; }
| simple_type { $$ = $1; }
| function_type { $$ = $1; }
| variable_type { $$ = $1; }
| decimal_type { $$ = $1; }
;

simple_type : WORD { $$ = typeFromString($1); }
| TYPE_WITH_SPACES { $$ = typeFromString($1); }
;

variable_type : VARIABLE LPAREN NUMBER RPAREN { $$ = typeFromString($1); }
| VARIABLE { $$ = typeFromString($1); }
;

array_type : ARRAY LPAREN type RPAREN { $$ = ARRAY($3); }
;

decimal_type : DECIMAL LPAREN NUMBER COMMA NUMBER RPAREN { $$ = DECIMAL($3, $5); }
;

type_list : type { $$.push_back($1); }
| type_list COMMA type { $1.push_back($3); $$ = std::move($1); }
;

type_list_opt_names : type { $$.names.push_back(""); $$.types.push_back($1); }
| named_type { $$.names.push_back($1.first); $$.types.push_back($1.second); }
| type_list_opt_names COMMA type { $1.names.push_back(""); $1.types.push_back($3);
$$.names = std::move($1.names); $$.types = std::move($1.types); }
| type_list_opt_names COMMA named_type { $1.names.push_back($3.first); $1.types.push_back($3.second);
$$.names = std::move($1.names); $$.types = std::move($1.types); }
;

row_type : ROW LPAREN type_list_opt_names RPAREN { $$ = ROW(std::move($3.names), std::move($3.types)); }
;

map_type : MAP LPAREN type COMMA type RPAREN { $$ = MAP($3, $5); }
;

function_type : FUNCTION LPAREN type_list RPAREN { auto returnType = $3.back(); $3.pop_back();
$$ = FUNCTION(std::move($3), returnType); }

identifier : QUOTED_ID { $1.erase(0, 1); $1.pop_back(); $$ = $1; } // Remove the quotes.
| WORD { $$ = $1; }
;

%%

void facebook::velox::type::Parser::error(const std::string& msg) {
VELOX_FAIL("Failed to parse type [{}]", scanner->input());
}
20 changes: 20 additions & 0 deletions velox/type/type_parser/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_executable(velox_type_parser_test TypeParserTest.cpp)

add_test(NAME velox_type_parser_test COMMAND velox_type_parser_test)

target_link_libraries(velox_type_parser_test velox_type_parser velox_type gtest
gtest_main gmock)
Loading

0 comments on commit 260f66a

Please sign in to comment.