Skip to content

Commit

Permalink
WIP: Generate an IR.
Browse files Browse the repository at this point in the history
The compiler has come a long way, but we still can't do int64, because
it requires a non-cell storage size. There's no (sane) way to express
conversions between int32 and int64 within the AST, because we have no
uniform way of inserting conversion nodes. This is already a deep
problem that has been hacked around for operator overloads and property
accessors, and it doesn't scale.

The solution is obvious: transform the AST into an IR. That's what we
should have done from the beginning but didn't. Unfortunately it
requires a *lot* of refactoring and a ton of boilerplate. So far, I have
most of the boilerplate done, but the refactoring is only halfway there.
CodeGenerator has not been ported to the IR yet.

Once this gigantic patch is done, we'll have the following changes:

- `struct value` will be eliminated, and good riddance.
- The AST will be immutable after parsing.
- The semantic analysis phase will output a new IR tree.
- CodeGenerator will generate off the IR instead. Since the IR is a
  transformation of the AST, I'm expecting minimal changes to the end
  result.
- functag_t will be replaced by FunctionType.

V2: CG-IR can now assemble trivial programs.
V3: CG-IR supports basic calls; 341 test failures.
V4: CG-IR supports binary ops; 333 test failures.
V5: CG-IR supports do-while and if; 329 test failures.
V6: CG-IR supports args, local incdec, switch; 319 test failures.
V7: Dropped IncDecOp in favor of Store/BinaryOp primitives.
    Added global variable support.
V8: Added support for heap scopes. 294 test failures.
V9: Add support for Load(IndexOp) and arrays as arguments. 290 test
    failures.
  • Loading branch information
dvander committed Aug 15, 2024
1 parent fd82555 commit 978a233
Show file tree
Hide file tree
Showing 34 changed files with 2,843 additions and 2,062 deletions.
1 change: 1 addition & 0 deletions compiler/AMBuilder
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module.sources += [
'data-queue.cpp',
'errors.cpp',
'expressions.cpp',
'ir.cpp',
'lexer.cpp',
'main.cpp',
'name-resolution.cpp',
Expand Down
28 changes: 14 additions & 14 deletions compiler/array-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@ bool Semantics::CheckArrayDeclaration(VarDeclBase* decl) {
class CompoundEmitter final
{
public:
CompoundEmitter(Type* type, Expr* init)
CompoundEmitter(QualType type, Expr* init)
: type_(type),
init_(init),
pending_zeroes_(0)
Expand Down Expand Up @@ -852,11 +852,11 @@ class CompoundEmitter final
size_t AddString(StringExpr* expr);
void AddInlineArray(LayoutFieldDecl* field, ArrayExpr* expr);
void AddInlineEnumStruct(EnumStructDecl* es, ArrayExpr* expr);
void EmitPadding(size_t rank_size, Type* type, size_t emitted, bool ellipses,
void EmitPadding(size_t rank_size, QualType type, size_t emitted, bool ellipses,
const ke::Maybe<cell> prev1, const ke::Maybe<cell> prev2);

private:
Type* type_;
QualType type_;
Expr* init_;
tr::vector<cell> iv_;
tr::vector<cell> data_;
Expand Down Expand Up @@ -948,7 +948,7 @@ cell CompoundEmitter::Emit(ArrayType* rank, Expr* init) {
// This only works because enum structs are flattened and don't support
// internal IVs. No plans to change this as it would greatly increase
// complexity unless we radically changed arrays.
EmitPadding(rank->size(), rank->inner(), emitted, ellipses, prev1, prev2);
EmitPadding(rank->size(), QualType(rank->inner()), emitted, ellipses, prev1, prev2);

return (start * sizeof(cell)) | kDataFlag;
}
Expand All @@ -967,7 +967,7 @@ void CompoundEmitter::AddInlineEnumStruct(EnumStructDecl* es, ArrayExpr* array)
assert(field);

auto rank_type = field->type()->to<ArrayType>();
EmitPadding(rank_type->size(), rank_type->inner(), emitted, false, {}, {});
EmitPadding(rank_type->size(), QualType(rank_type->inner()), emitted, false, {}, {});
} else if (ArrayExpr* expr = item->as<ArrayExpr>()) {
// Subarrays can only appear in an enum struct. Normal 2D cases
// would flow through the check at the start of this function.
Expand All @@ -994,12 +994,12 @@ void CompoundEmitter::AddInlineArray(LayoutFieldDecl* field, ArrayExpr* array) {
}

auto rank_size = field->type()->to<ArrayType>()->size();
EmitPadding(rank_size, field->type(), array->exprs().size(),
EmitPadding(rank_size, QualType(field->type()), array->exprs().size(),
array->ellipses(), prev1, prev2);
}

void
CompoundEmitter::EmitPadding(size_t rank_size, Type* type, size_t emitted, bool ellipses,
CompoundEmitter::EmitPadding(size_t rank_size, QualType type, size_t emitted, bool ellipses,
const ke::Maybe<cell> prev1, const ke::Maybe<cell> prev2)
{
// Pad remainder to zeroes if the array was explicitly sized.
Expand Down Expand Up @@ -1053,20 +1053,20 @@ CompoundEmitter::add_data(cell value)
data_.emplace_back(value);
}

void BuildCompoundInitializer(Type* type, Expr* init, ArrayData* array) {
void BuildCompoundInitializer(QualType type, Expr* init, ArrayData* array,
std::optional<cell_t> base_address)
{
CompoundEmitter emitter(type, init);
emitter.Emit();

array->iv = std::move(emitter.iv());
array->data = std::move(emitter.data());
array->zeroes = emitter.pending_zeroes();
}

void BuildCompoundInitializer(VarDeclBase* decl, ArrayData* array, cell base_address) {
BuildCompoundInitializer(decl->type(), decl->init_rhs(), array);

for (auto& v : array->iv)
v += base_address;
if (base_address) {
for (auto& v : array->iv)
v += *base_address;
}
}

} // namespace cc
Expand Down
4 changes: 2 additions & 2 deletions compiler/array-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ bool ResolveArrayType(Semantics* sema, const token_pos_t& pos, typeinfo_t* type,
// Perform type and size checks of an array and its initializer if present.
bool CheckArrayInitialization(Semantics* sema, const typeinfo_t& type, Expr* init);

void BuildCompoundInitializer(VarDeclBase* decl, ArrayData* array, cell_t base_addr);
void BuildCompoundInitializer(Type* type, Expr* init, ArrayData* array);
void BuildCompoundInitializer(QualType type, Expr* init, ArrayData* array,
std::optional<cell_t> base_address = {});

cell_t CalcArraySize(Type* type);

Expand Down
102 changes: 43 additions & 59 deletions compiler/assembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ struct function_entry {
function_entry(const function_entry& other) = delete;
function_entry& operator =(const function_entry& other) = delete;

FunctionDecl* decl = nullptr;
ir::Function* fun;
std::string name;
};

Expand Down Expand Up @@ -123,7 +123,7 @@ class RttiBuilder
RttiBuilder(CompileContext& cc, CodeGenerator& cg, SmxNameTable* names);

void finish(SmxBuilder& builder);
void add_method(FunctionDecl* fun);
void add_method(ir::Function* fun);
void add_native(FunctionDecl* sym);

private:
Expand Down Expand Up @@ -360,17 +360,19 @@ RttiBuilder::add_debug_var(SmxRttiTable<smx_rtti_debug_var>* table, DebugString&
var.type_id = type_id;
}

void RttiBuilder::add_method(FunctionDecl* fun) {
void RttiBuilder::add_method(ir::Function* fun) {
assert(fun->is_live());

uint32_t index = methods_->count();
smx_rtti_method& method = methods_->add();
method.name = names_->add(fun->name());
method.pcode_start = fun->cg()->label.offset();
method.pcode_end = fun->cg()->pcode_end;
method.signature = encode_signature(fun->canonical());

if (!fun->cg()->dbgstrs)
method.name = names_->add(fun->decl()->name());
method.pcode_start = fun->label().offset();
method.pcode_end = fun->pcode_end();
method.signature = encode_signature(fun->decl()->canonical());

(void)index;
#if 0
if (!fun->dbgstrs)
return;

smx_rtti_debug_method debug;
Expand All @@ -392,6 +394,7 @@ void RttiBuilder::add_method(FunctionDecl* fun) {
// Only add a method table entry if we actually had locals.
if (debug.first_local != dbg_locals_->count())
dbg_methods_->add(debug);
#endif
}

void RttiBuilder::add_native(FunctionDecl* fun) {
Expand Down Expand Up @@ -721,59 +724,44 @@ Assembler::Assemble(SmxByteBuffer* buffer)
std::vector<function_entry> functions;
std::unordered_set<Decl*> symbols;

// Sort globals.
std::vector<Decl*> global_symbols;
cc_.globals()->ForEachSymbol([&](Decl* decl) -> void {
global_symbols.push_back(decl);
auto mod = cg_.mod();

// This is only to assert that we embedded pointers properly in the assembly buffer.
symbols.emplace(decl);
// Sort globals.
std::sort(mod->functions().begin(), mod->functions().end(),
[](const ir::Function* a, const ir::Function* b) {
return a->decl()->name()->str() < b->decl()->name()->str();
});
for (const auto& decl : cc_.functions()) {
if (symbols.count(decl))
continue;
if (decl->canonical() != decl)

for (const auto& fun : mod->functions()) {
auto decl = fun->decl();

if (decl->is_native() || !fun->body())
continue;
global_symbols.push_back(decl);
symbols.emplace(decl);
}

std::sort(global_symbols.begin(), global_symbols.end(),
[](const Decl* a, const Decl *b) -> bool {
return a->name()->str() < b->name()->str();
});
function_entry entry;
entry.fun = fun;
if (decl->is_public()) {
entry.name = decl->name()->str();
} else {
// Create a private name.
entry.name = ke::StringPrintf(".%d.%s", fun->label().offset(), decl->name()->chars());
}

functions.emplace_back(std::move(entry));
}

#if 0
// Build the easy symbol tables.
for (const auto& decl : global_symbols) {
if (auto fun = decl->as<FunctionDecl>()) {
if (fun->is_native())
continue;

if (!fun->body())
continue;
if (!fun->is_live())
continue;
if (fun->canonical() != fun)
continue;

function_entry entry;
entry.decl = fun;
if (fun->is_public()) {
entry.name = fun->name()->str();
} else {
// Create a private name.
entry.name = ke::StringPrintf(".%d.%s", fun->cg()->label.offset(), fun->name()->chars());
}

functions.emplace_back(std::move(entry));
} else if (auto var = decl->as<VarDecl>()) {
if (auto var = decl->as<VarDecl>()) {
if (var->is_public() || (var->is_used() && !var->as<ConstDecl>())) {
sp_file_pubvars_t& pubvar = pubvars->add();
pubvar.address = var->addr();
pubvar.name = names->add(var->name());
}
}
}
#endif

// The public list must be sorted.
std::sort(functions.begin(), functions.end(),
Expand All @@ -783,31 +771,27 @@ Assembler::Assemble(SmxByteBuffer* buffer)
for (size_t i = 0; i < functions.size(); i++) {
function_entry& f = functions[i];

assert(f.decl->cg()->label.offset() > 0);
assert(f.decl->impl());
assert(f.decl->cg()->pcode_end > f.decl->cg()->label.offset());

sp_file_publics_t& pubfunc = publics->add();
pubfunc.address = f.decl->cg()->label.offset();
pubfunc.address = f.fun->label().offset();
pubfunc.name = names->add(*cc_.atoms(), f.name.c_str());

auto id = (uint32_t(i) << 1) | 1;
if (!Label::ValueFits(id))
report(421);
cg_.LinkPublicFunction(f.decl, id);
cg_.LinkPublicFunction(f.fun, id);

rtti.add_method(f.decl);
rtti.add_method(f.fun);
}

// Populate the native table.
for (size_t i = 0; i < cg_.native_list().size(); i++) {
FunctionDecl* sym = cg_.native_list()[i];
assert(size_t(sym->cg()->label.offset()) == i);
ir::Function* sym = cg_.native_list()[i];
assert(size_t(sym->label().offset()) == i);

sp_file_natives_t& entry = natives->add();
entry.name = names->add(sym->name());
entry.name = names->add(sym->decl()->name());

rtti.add_native(sym);
rtti.add_native(sym->decl());
}

// Set up the code section.
Expand Down
1 change: 1 addition & 0 deletions compiler/assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "libsmx/data-pool.h"
#include "libsmx/smx-builder.h"
#include "libsmx/smx-encoding.h"
#include "ir.h"
#include "sc.h"
#include "shared/byte-buffer.h"
#include "shared/string-pool.h"
Expand Down
56 changes: 56 additions & 0 deletions compiler/ast-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
// 3. This notice may not be removed or altered from any source distribution.
#pragma once

#include <stdint.h>

#define AST_STMT_TYPE_LIST(FOR_EACH) \
FOR_EACH(StmtList) \
FOR_EACH(BlockStmt) \
Expand Down Expand Up @@ -79,6 +81,50 @@
FOR_EACH(StructExpr) \
FOR_EACH(StructInitFieldExpr)

#define IR_NODE_TYPE_LIST(FOR_EACH) \
/* Decls */ \
FOR_EACH(Function) \
FOR_EACH(Variable) \
FOR_EACH(Argument) \
/* Statements */ \
FOR_EACH(Return) \
FOR_EACH(ValueInsn) \
FOR_EACH(Exit) \
FOR_EACH(Break) \
FOR_EACH(Continue) \
FOR_EACH(Assert) \
FOR_EACH(If) \
FOR_EACH(DoWhile) \
FOR_EACH(Delete) \
FOR_EACH(ForLoop) \
FOR_EACH(Switch) \
FOR_EACH(FunctionDef) \
/* Values */ \
FOR_EACH(ConstVal) \
FOR_EACH(CharArrayLiteral) \
FOR_EACH(VariableRef) \
FOR_EACH(TypeRef) \
FOR_EACH(FunctionRef) \
FOR_EACH(IndexOp) \
FOR_EACH(Load) \
FOR_EACH(TernaryOp) \
FOR_EACH(BinaryOp) \
FOR_EACH(Array) \
FOR_EACH(CommaOp) \
FOR_EACH(CallOp) \
FOR_EACH(TempRef) \
FOR_EACH(PropertyRef) \
FOR_EACH(FieldRef) \
FOR_EACH(UnaryOp) \
FOR_EACH(CallUserOp) \
FOR_EACH(IncDecOp) \
FOR_EACH(Store) \
FOR_EACH(ThisRef) \
FOR_EACH(TempAddr)

namespace sp {
namespace cc {

enum class ExprKind : uint8_t
{
#define _(Name) Name,
Expand All @@ -92,3 +138,13 @@ enum class StmtKind : uint8_t
AST_STMT_TYPE_LIST(_)
#undef _
};

enum class IrKind : uint8_t
{
#define _(Name) Name,
IR_NODE_TYPE_LIST(_)
#undef _
};

} // namespace cc
} // namespace sp
Loading

0 comments on commit 978a233

Please sign in to comment.