Skip to content

Commit

Permalink
Add 3 '*' operators: one prefix, one infix, and one postfix, per #523. (
Browse files Browse the repository at this point in the history
#582)

The presence or absence of whitespace is used to determine which
operator is in use, following the rules described in #520.

Support for prefix * dereference operator follows #523.

Co-authored-by: Geoff Romer <gromer@google.com>
  • Loading branch information
zygoloid and geoffromer authored Jun 22, 2021
1 parent d6b47ba commit 89e2111
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 21 deletions.
1 change: 1 addition & 0 deletions executable_semantics/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ EXAMPLES = [
"pattern_init",
"pattern_variable_fail",
"record1",
"star",
"struct1",
"struct2",
"struct3",
Expand Down
9 changes: 6 additions & 3 deletions executable_semantics/ast/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,18 @@ auto Expression::MakeIndex(int line_num, const Expression* exp,

static void PrintOp(Operator op) {
switch (op) {
case Operator::Neg:
std::cout << "-";
break;
case Operator::Add:
std::cout << "+";
break;
case Operator::Neg:
case Operator::Sub:
std::cout << "-";
break;
case Operator::Mul:
case Operator::Deref:
case Operator::Ptr:
std::cout << "*";
break;
case Operator::Not:
std::cout << "not";
break;
Expand Down
3 changes: 3 additions & 0 deletions executable_semantics/ast/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ enum class ExpressionKind {
enum class Operator {
Add,
And,
Deref,
Eq,
Mul,
Neg,
Not,
Or,
Sub,
Ptr,
};

struct Expression;
Expand Down
8 changes: 8 additions & 0 deletions executable_semantics/interpreter/interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ auto EvalPrim(Operator op, const std::vector<const Value*>& args, int line_num)
case Operator::Sub:
return Value::MakeIntVal(ValToInt(args[0], line_num) -
ValToInt(args[1], line_num));
case Operator::Mul:
return Value::MakeIntVal(ValToInt(args[0], line_num) *
ValToInt(args[1], line_num));
case Operator::Not:
return Value::MakeBoolVal(!ValToBool(args[0], line_num));
case Operator::And:
Expand All @@ -291,6 +294,11 @@ auto EvalPrim(Operator op, const std::vector<const Value*>& args, int line_num)
ValToBool(args[1], line_num));
case Operator::Eq:
return Value::MakeBoolVal(ValueEqual(args[0], args[1], line_num));
case Operator::Ptr:
return Value::MakePtrTypeVal(args[0]);
case Operator::Deref:
std::cerr << line_num << ": dereference not implemented yet\n";
exit(-1);
}
}

Expand Down
34 changes: 33 additions & 1 deletion executable_semantics/interpreter/typecheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ void ExpectType(int line_num, const std::string& context, const Value* expected,
}
}

void ExpectPointerType(int line_num, const std::string& context,
const Value* actual) {
if (actual->tag != ValKind::PointerTV) {
std::cerr << line_num << ": type error in " << context << std::endl;
std::cerr << "expected a pointer type\n";
std::cerr << "actual: ";
PrintValue(actual, std::cerr);
std::cerr << std::endl;
exit(-1);
}
}

void PrintErrorString(const std::string& s) { std::cerr << s; }

void PrintTypeEnv(TypeEnv types, std::ostream& out) {
Expand Down Expand Up @@ -71,6 +83,9 @@ auto ReifyType(const Value* t, int line_num) -> const Expression* {
return Expression::MakeVar(0, *t->GetStructType().name);
case ValKind::ChoiceTV:
return Expression::MakeVar(0, *t->GetChoiceType().name);
case ValKind::PointerTV:
return Expression::MakeUnOp(
0, Operator::Ptr, ReifyType(t->GetPointerType().type, line_num));
default:
std::cerr << line_num << ": expected a type, not ";
PrintValue(t, std::cerr);
Expand Down Expand Up @@ -317,10 +332,21 @@ auto TypeCheckExp(const Expression* e, TypeEnv types, Env values,
ExpectType(e->line_num, "negation", Value::MakeIntTypeVal(), ts[0]);
return TCResult(new_e, Value::MakeIntTypeVal(), new_types);
case Operator::Add:
ExpectType(e->line_num, "addition(1)", Value::MakeIntTypeVal(),
ts[0]);
ExpectType(e->line_num, "addition(2)", Value::MakeIntTypeVal(),
ts[1]);
return TCResult(new_e, Value::MakeIntTypeVal(), new_types);
case Operator::Sub:
ExpectType(e->line_num, "subtraction(1)", Value::MakeIntTypeVal(),
ts[0]);
ExpectType(e->line_num, "substration(2)", Value::MakeIntTypeVal(),
ExpectType(e->line_num, "subtraction(2)", Value::MakeIntTypeVal(),
ts[1]);
return TCResult(new_e, Value::MakeIntTypeVal(), new_types);
case Operator::Mul:
ExpectType(e->line_num, "multiplication(1)", Value::MakeIntTypeVal(),
ts[0]);
ExpectType(e->line_num, "multiplication(2)", Value::MakeIntTypeVal(),
ts[1]);
return TCResult(new_e, Value::MakeIntTypeVal(), new_types);
case Operator::And:
Expand All @@ -337,6 +363,12 @@ auto TypeCheckExp(const Expression* e, TypeEnv types, Env values,
case Operator::Eq:
ExpectType(e->line_num, "==", ts[0], ts[1]);
return TCResult(new_e, Value::MakeBoolTypeVal(), new_types);
case Operator::Deref:
ExpectPointerType(e->line_num, "*", ts[0]);
return TCResult(new_e, ts[0]->GetPointerType().type, new_types);
case Operator::Ptr:
ExpectType(e->line_num, "*", Value::MakeTypeTypeVal(), ts[0]);
return TCResult(new_e, Value::MakeTypeTypeVal(), new_types);
}
break;
}
Expand Down
3 changes: 1 addition & 2 deletions executable_semantics/interpreter/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,9 +357,8 @@ auto PrintValue(const Value* val, std::ostream& out) -> void {
out << "Continuation";
break;
case ValKind::PointerTV:
out << "Ptr(";
PrintValue(val->GetPointerType().type, out);
out << ")";
out << "*";
break;
case ValKind::FunctionTV:
out << "fn ";
Expand Down
70 changes: 63 additions & 7 deletions executable_semantics/syntax/lexer.lpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
%option yylineno

/* Lexing a token immediately after consuming some whitespace. */
%s AFTER_WHITESPACE
/* Lexing a token immediately after consuming an operand-ending token:
* a closing bracket, identifier, or literal.
*/
%s AFTER_OPERAND

AND "and"
ARROW "->"
AUTO "auto"
Expand Down Expand Up @@ -53,13 +60,21 @@ AWAIT "__await"
identifier [A-Za-z_][A-Za-z0-9_]*
integer_literal [0-9]+
horizontal_whitespace [ \t\r]
whitespace [ \t\r\n]
operand_start [(A-Za-z0-9_"]

%{
// This macro is expanded to run each time a token is recognized.
// This macro is expanded immediately before each action specified below.
//
// Advances the current token position by yyleng columns without changing
// the line number.
# define YY_USER_ACTION context.current_token_position.columns(yyleng);
// the line number, and takes us out of the after-whitespace / after-operand
// state.
# define YY_USER_ACTION \
context.current_token_position.columns(yyleng); \
if (YY_START == AFTER_WHITESPACE || \
YY_START == AFTER_OPERAND) { \
BEGIN(INITIAL); \
}
%}

%%
Expand Down Expand Up @@ -105,27 +120,66 @@ horizontal_whitespace [ \t\r]
"=" return yy::parser::make_EQUAL(context.current_token_position);
"-" return yy::parser::make_MINUS(context.current_token_position);
"+" return yy::parser::make_PLUS(context.current_token_position);
"*" return yy::parser::make_STAR(context.current_token_position);
"/" return yy::parser::make_SLASH(context.current_token_position);
"(" return yy::parser::make_LEFT_PARENTHESIS(context.current_token_position);
")" return yy::parser::make_RIGHT_PARENTHESIS(context.current_token_position);
")" { BEGIN(AFTER_OPERAND); return yy::parser::make_RIGHT_PARENTHESIS(context.current_token_position); }
"{" return yy::parser::make_LEFT_CURLY_BRACE(context.current_token_position);
"}" return yy::parser::make_RIGHT_CURLY_BRACE(context.current_token_position);
"}" { BEGIN(AFTER_OPERAND); return yy::parser::make_RIGHT_CURLY_BRACE(context.current_token_position); }
"[" return yy::parser::make_LEFT_SQUARE_BRACKET(context.current_token_position);
"]" return yy::parser::make_RIGHT_SQUARE_BRACKET(context.current_token_position);
"]" { BEGIN(AFTER_OPERAND); return yy::parser::make_RIGHT_SQUARE_BRACKET(context.current_token_position); }
"." return yy::parser::make_PERIOD(context.current_token_position);
"," return yy::parser::make_COMMA(context.current_token_position);
";" return yy::parser::make_SEMICOLON(context.current_token_position);
":" return yy::parser::make_COLON(context.current_token_position);

/*
For a `*` operator, we look at whitespace and local context to determine the
arity and fixity. There are two ways to write a binary operator:

1) Whitespace on both sides.
2) Whitespace on neither side, and the previous token is considered to be
the end of an operand, and the next token is considered to be the start
of an operand.

Otherwise, the operator is unary, but we also check for whitespace to help
the parser enforce the rule that whitespace is not permitted between the
operator and its operand, leading to three more cases:

3) Whitespace before (but implicitly not after, because that would give a
longer match and hit case 1): this can only be a prefix operator.
4) Whitespace after and not before: this can only be a postfix operator.
5) No whitespace on either side (otherwise the longest match would take us
to case 4): this is a unary operator and could be either prefix or
postfix.
*/
<AFTER_WHITESPACE>"*"{whitespace}+ /*case 1*/ {
BEGIN(AFTER_WHITESPACE);
return yy::parser::make_BINARY_STAR(context.current_token_position);
}
<AFTER_OPERAND>"*"/{operand_start} /*case 2*/ {
return yy::parser::make_BINARY_STAR(context.current_token_position);
}
<AFTER_WHITESPACE>"*" /*case 3*/ {
return yy::parser::make_PREFIX_STAR(context.current_token_position);
}
<INITIAL,AFTER_OPERAND>"*"{whitespace}+ /*case 4*/ {
BEGIN(AFTER_WHITESPACE);
return yy::parser::make_POSTFIX_STAR(context.current_token_position);
}
<INITIAL,AFTER_OPERAND>"*" /*case 5*/ {
return yy::parser::make_UNARY_STAR(context.current_token_position);
}

{identifier} {
BEGIN(AFTER_OPERAND);
int n = strlen(yytext);
auto r = reinterpret_cast<char*>(malloc((n + 1) * sizeof(char)));
strncpy(r, yytext, n + 1);
return yy::parser::make_identifier(r, context.current_token_position);
}

{integer_literal} {
BEGIN(AFTER_OPERAND);
auto r = atof(yytext);
return yy::parser::make_integer_literal(r, context.current_token_position);
}
Expand All @@ -140,13 +194,15 @@ horizontal_whitespace [ \t\r]
{horizontal_whitespace}+ {
// Make the span empty by setting start to end.
context.current_token_position.step();
BEGIN(AFTER_WHITESPACE);
}

\n+ {
// Advance end by yyleng lines, resetting the column to zero.
context.current_token_position.lines(yyleng);
// Make the span empty by setting start to end.
context.current_token_position.step();
BEGIN(AFTER_WHITESPACE);
}

. {
Expand Down
37 changes: 32 additions & 5 deletions executable_semantics/syntax/parser.ypp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ void yy::parser::error(
%token TYPE
%token FN
%token FNTY
%token ARROW
%token ARROW "->"
%token FNARROW "-> in return type"
%token VAR
%token EQUAL_EQUAL
%token IF
Expand All @@ -142,14 +143,20 @@ void yy::parser::error(
%token CHOICE
%token MATCH
%token CASE
%token DBLARROW
%token DBLARROW "=>"
%token DEFAULT
%token AUTO
%token
EQUAL "="
MINUS "-"
PLUS "+"
STAR "*"
// The lexer determines the arity and fixity of each `*` based on whitespace
// and adjacent tokens. UNARY_STAR indicates that the operator is unary but
// could be either prefix or postfix.
UNARY_STAR "unary *"
PREFIX_STAR "prefix *"
POSTFIX_STAR "postfix *"
BINARY_STAR "binary *"
SLASH "/"
LEFT_PARENTHESIS "("
RIGHT_PARENTHESIS ")"
Expand All @@ -163,12 +170,22 @@ void yy::parser::error(
COLON ":"
;

%precedence FNARROW
%precedence "{" "}"
%precedence ":" "," DBLARROW
%left OR AND
%nonassoc EQUAL_EQUAL
%left "+" "-"
%precedence NOT UNARY_MINUS
%left BINARY_STAR
%precedence NOT UNARY_MINUS PREFIX_STAR
// We need to give the `UNARY_STAR` token a precedence, rather than overriding
// the precedence of the `expression UNARY_STAR` rule below, because bison
// compares the precedence of the final token (for a shift) to the precedence
// of the other rule (for a reduce) when attempting to resolve a shift-reduce
// conflict. See https://stackoverflow.com/a/26188429/1041090. When UNARY_STAR
// is the final token of a rule, it must be a postfix usage, so we give it the
// same precedence as POSTFIX_STAR.
%precedence POSTFIX_STAR UNARY_STAR
%left "." ARROW
%precedence "(" ")" "[" "]"

Expand Down Expand Up @@ -214,6 +231,8 @@ expression:
{ $$ = Carbon::Expression::MakeBinOp(yylineno, Carbon::Operator::Add, $1, $3); }
| expression "-" expression
{ $$ = Carbon::Expression::MakeBinOp(yylineno, Carbon::Operator::Sub, $1, $3); }
| expression BINARY_STAR expression
{ $$ = Carbon::Expression::MakeBinOp(yylineno, Carbon::Operator::Mul, $1, $3); }
| expression AND expression
{ $$ = Carbon::Expression::MakeBinOp(yylineno, Carbon::Operator::And, $1, $3); }
| expression OR expression
Expand All @@ -222,8 +241,16 @@ expression:
{ $$ = Carbon::Expression::MakeUnOp(yylineno, Carbon::Operator::Not, $2); }
| "-" expression %prec UNARY_MINUS
{ $$ = Carbon::Expression::MakeUnOp(yylineno, Carbon::Operator::Neg, $2); }
| PREFIX_STAR expression
{ $$ = Carbon::Expression::MakeUnOp(yylineno, Carbon::Operator::Deref, $2); }
| UNARY_STAR expression %prec PREFIX_STAR
{ $$ = Carbon::Expression::MakeUnOp(yylineno, Carbon::Operator::Deref, $2); }
| expression tuple
{ $$ = Carbon::Expression::MakeCall(yylineno, $1, $2); }
| expression POSTFIX_STAR
{ $$ = Carbon::Expression::MakeUnOp(yylineno, Carbon::Operator::Ptr, $1); }
| expression UNARY_STAR
{ $$ = Carbon::Expression::MakeUnOp(yylineno, Carbon::Operator::Ptr, $1); }
| FNTY tuple return_type
{ $$ = Carbon::Expression::MakeFunType(yylineno, $2, $3); }
;
Expand Down Expand Up @@ -324,7 +351,7 @@ statement_list:
return_type:
// Empty
{ $$ = Carbon::Expression::MakeUnit(yylineno); }
| ARROW expression
| ARROW expression %prec FNARROW
{ $$ = $2; }
;
function_definition:
Expand Down
16 changes: 16 additions & 0 deletions executable_semantics/testdata/star.carbon
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// Never actually called, so this only tests typechecking and semantic analysis.
fn F(n: Int, p: Int*, q: Int***) -> Int* {
var a: Int = n * *p;
var b: Int = a*n;
*p = b*(*p);
**q = p;
return **q;
}

fn main() -> Int {
return 0;
}
1 change: 1 addition & 0 deletions executable_semantics/testdata/star.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
result: 0
6 changes: 6 additions & 0 deletions toolchain/parser/parse_tree_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ TEST_F(ParseTreeTest, OperatorFixity) {
" var t: Type = Int*;\n"
" t = t**;\n"
" n = n * n;\n"
" n = n * *p;\n"
" n = n*n;\n"
" G(Int*, n * n);\n"
"}");
Expand Down Expand Up @@ -577,6 +578,11 @@ TEST_F(ParseTreeTest, OperatorFixity) {
MatchNameReference("n"), "=",
MatchInfixOperator(MatchNameReference("n"), "*",
MatchNameReference("n")))),
MatchExpressionStatement(MatchInfixOperator(
MatchNameReference("n"), "=",
MatchInfixOperator(
MatchNameReference("n"), "*",
MatchPrefixOperator("*", MatchNameReference("p"))))),
MatchExpressionStatement(MatchInfixOperator(
MatchNameReference("n"), "=",
MatchInfixOperator(MatchNameReference("n"), "*",
Expand Down
Loading

0 comments on commit 89e2111

Please sign in to comment.