diff --git a/README.md b/README.md index 9da05e3..0be327b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[//]: # (TODO: Update README) + # Lang Compiler (Name TBD) This project is an LLVM-based compiler for a custom programming language called "Lang". diff --git a/build.sh b/build.sh index e21339c..ed4553c 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,7 @@ #!/bin/bash -# Default values for variables +set -e + LLVM_DIR="/usr/lib/llvm-14" BUILD_DIR="build" BUILD_TYPE="Debug" @@ -9,7 +10,6 @@ CXX_COMPILER="clang++" CCACHE="ccache" GENERATOR="Ninja" -# Function to display usage information usage() { echo "Usage: $0 [options]" echo "Commands:" @@ -28,7 +28,6 @@ usage() { exit 1 } -# Parse command-line options while [[ $# -gt 0 ]]; do case $1 in --build-dir) @@ -65,12 +64,10 @@ while [[ $# -gt 0 ]]; do esac done -# Check if a command is provided if [ $# -eq 0 ]; then usage fi -# Execute the appropriate command case "$1" in configure) mkdir -p "$BUILD_DIR" diff --git a/include/ADT/List.h b/include/ADT/NonOwningList.h similarity index 81% rename from include/ADT/List.h rename to include/ADT/NonOwningList.h index 30db2ed..09bb58e 100644 --- a/include/ADT/List.h +++ b/include/ADT/NonOwningList.h @@ -1,21 +1,10 @@ -#ifndef LANG_LIST_H -#define LANG_LIST_H +#ifndef LANG_NON_OWNING_LIST_H +#define LANG_NON_OWNING_LIST_H #include #include -/// @brief A singly linked list. -/// @tparam T The type of the elements in the list. -/// -/// @note This is a raw implementation of a singly linked list. It is meant to -/// replace allocating alternatives like std::vector and std::list. This way, -/// the caller can decide how to allocate the nodes, and the list will not -/// delete them on destruction. For now this is used in the AST representation -/// since the whole AST is allocated in one contiguous block of memory with no -/// RAII semantics. This discards having nodes that are not trivially -/// destructible, as would happen if std::vector or std::list were used in nodes -/// with a variable number of children (e.g. BlockStmtAST). -template class List { +template class NonOwningList { public: struct Node { T data; @@ -98,14 +87,14 @@ template class List { const Node *cur; }; - List() noexcept : head(nullptr), tail(nullptr), size_(0) {} - ~List() = default; + NonOwningList() noexcept : head(nullptr), tail(nullptr), size_(0) {} + ~NonOwningList() = default; - List(List &&) = delete; - List &operator=(List &&) = delete; + NonOwningList(NonOwningList &&) = delete; + NonOwningList &operator=(NonOwningList &&) = delete; - List(const List &) = default; - List &operator=(const List &) = default; + NonOwningList(const NonOwningList &) = default; + NonOwningList &operator=(const NonOwningList &) = default; [[nodiscard]] bool empty() const noexcept { return size_ == 0; } [[nodiscard]] std::size_t size() const noexcept { return size_; } @@ -190,4 +179,4 @@ template class List { std::size_t size_; }; -#endif // LANG_LIST_H +#endif // LANG_NON_OWNING_LIST_H diff --git a/include/AST/AST.h b/include/AST/AST.h index 08a557e..c7560aa 100644 --- a/include/AST/AST.h +++ b/include/AST/AST.h @@ -1,9 +1,9 @@ #ifndef LANG_AST_H #define LANG_AST_H -#include "ADT/List.h" +#include "ADT/NonOwningList.h" -#include "Sema/Type.h" +#include "Typing/Type.h" #include #include @@ -94,8 +94,8 @@ struct DeclAST { struct ModuleAST { std::string_view ident; - List decls; - ModuleAST(std::string_view ident, List decls) + NonOwningList decls; + ModuleAST(std::string_view ident, NonOwningList decls) : ident(ident), decls(decls) {} }; @@ -182,8 +182,8 @@ struct AssignStmtAST : public StmtAST { }; struct BlockStmtAST : public StmtAST { - List stmts; - BlockStmtAST(std::string_view span, List stmts) + NonOwningList stmts; + BlockStmtAST(std::string_view span, NonOwningList stmts) : StmtAST(StmtASTKind::Block, span), stmts(stmts) {} }; @@ -207,11 +207,12 @@ struct WhileStmtAST : public StmtAST { /// === Declarations === struct FunctionDeclAST : public DeclAST { - List params; + NonOwningList params; Type *retType; BlockStmtAST *body; - FunctionDeclAST(std::string_view ident, List params, - Type *retType, BlockStmtAST *body) + FunctionDeclAST(std::string_view ident, + NonOwningList params, Type *retType, + BlockStmtAST *body) : DeclAST(DeclASTKind::Function, ident), params(params), retType(retType), body(body) {} }; diff --git a/include/Analysis/CFA.h b/include/Analysis/CFA.h new file mode 100644 index 0000000..ec9af8b --- /dev/null +++ b/include/Analysis/CFA.h @@ -0,0 +1,75 @@ +#ifndef LANG_CONTROL_FLOW_ANALYZER +#define LANG_CONTROL_FLOW_ANALYZER + +#include "Support/Reporting.h" + +#include "AST/ASTVisitor.h" + +#include + +namespace lang { + +enum class CFAErrorKind { + EarlyReturnStmt, + InvalidBreakStmt, +}; + +struct CFAError { + CFAErrorKind kind; + std::string_view span; + CFAError(CFAErrorKind kind, std::string_view span) + : kind(kind), span(span) {} + TextError toTextError() const; + JSONError toJSONError() const; +}; + +struct CFAResult { + std::vector errors; +}; + +class CFA : public MutableASTVisitor { + friend class ASTVisitor; + + public: + CFAResult analyzeModuleAST(ModuleAST &module); + + private: + std::stack breakableStack; + std::vector errors; + + void visit(FunctionDeclAST &node); + + void visit(ExprStmtAST &node) {} + + void visit(BreakStmtAST &node); + + void visit(ReturnStmtAST &node) {} + + void visit(LocalStmtAST &node) {} + + void visit(AssignStmtAST &node) {} + + void visit(BlockStmtAST &node); + + void visit(IfStmtAST &node); + + void visit(WhileStmtAST &node); + + void visit(IdentifierExprAST &node) {} + + void visit(NumberExprAST &node) {} + + void visit(UnaryExprAST &node) {} + + void visit(BinaryExprAST &node) {} + + void visit(CallExprAST &node) {} + + void visit(IndexExprAST &node) {} + + void visit(GroupedExprAST &node) {} +}; + +} // namespace lang + +#endif // LANG_CONTROL_FLOW_ANALYZER diff --git a/include/Sema/Resolver.h b/include/Analysis/Resolver.h similarity index 87% rename from include/Sema/Resolver.h rename to include/Analysis/Resolver.h index c864ab0..0483612 100644 --- a/include/Sema/Resolver.h +++ b/include/Analysis/Resolver.h @@ -3,18 +3,14 @@ #include "Support/Reporting.h" -#include "AST/AST.h" #include "AST/ASTVisitor.h" -#include #include -#include namespace lang { enum class ResolveErrorKind { - InvalidBreakStmt, - UnresolvedIdentifier, + UnknownIdentifier, }; struct ResolveError { @@ -40,7 +36,6 @@ class Resolver : public MutableASTVisitor { private: bool deepResolution; - std::stack breakableStack; std::unordered_map functionsMap; std::vector> localsMap; std::vector errors; @@ -51,7 +46,7 @@ class Resolver : public MutableASTVisitor { void visit(ExprStmtAST &node); - void visit(BreakStmtAST &node); + void visit(BreakStmtAST &node) {} void visit(ReturnStmtAST &node); @@ -67,7 +62,7 @@ class Resolver : public MutableASTVisitor { void visit(IdentifierExprAST &node); - void visit(NumberExprAST &node); + void visit(NumberExprAST &node) {} void visit(UnaryExprAST &node); diff --git a/include/Sema/Sema.h b/include/Analysis/TypeChecker.h similarity index 61% rename from include/Sema/Sema.h rename to include/Analysis/TypeChecker.h index 0bcc124..dbd1905 100644 --- a/include/Sema/Sema.h +++ b/include/Analysis/TypeChecker.h @@ -1,50 +1,52 @@ -#ifndef LANG_SEMA_H -#define LANG_SEMA_H +#ifndef LANG_TYPE_CHECKER_H +#define LANG_TYPE_CHECKER_H #include "Support/Reporting.h" #include "AST/AST.h" #include "AST/ASTVisitor.h" +#include "Typing/TypeContext.h" + #include namespace lang { -enum class SemaErrorKind { +enum class TypeCheckerErrorKind { InvalidReturn, InvalidAssignment, InvalidBinaryOperation, }; -struct SemaError { - SemaErrorKind kind; +struct TypeCheckerError { + TypeCheckerErrorKind kind; std::string_view span; TextError toTextError() const; JSONError toJSONError() const; }; -struct SemaResult { - std::vector errors; +struct TypeCheckerResult { + std::vector errors; }; -class Sema : public MutableASTVisitor { - friend class ASTVisitor; +class TypeChecker : public MutableASTVisitor { + friend class ASTVisitor; public: - Sema(TypeContext &typeCtx) : typeCtx(typeCtx), currentFunction(nullptr) {} + TypeChecker(TypeContext &typeCtx) : typeCtx(typeCtx), currentFunction(nullptr) {} - SemaResult analyzeModuleAST(ModuleAST &module); + TypeCheckerResult analyzeModuleAST(ModuleAST &module); private: TypeContext &typeCtx; FunctionDeclAST *currentFunction; - std::vector errors; + std::vector errors; void visit(FunctionDeclAST &node); void visit(ExprStmtAST &node); - void visit(BreakStmtAST &node); + void visit(BreakStmtAST &node) {} void visit(ReturnStmtAST &node); @@ -75,4 +77,4 @@ class Sema : public MutableASTVisitor { } // namespace lang -#endif // LANG_SEMA_H +#endif // LANG_TYPE_CHECKER diff --git a/include/Parse/Parser.h b/include/Parse/Parser.h index b3a3190..6d5dcda 100644 --- a/include/Parse/Parser.h +++ b/include/Parse/Parser.h @@ -9,7 +9,7 @@ #include "AST/AST.h" -#include "Sema/Type.h" +#include "Typing/TypeContext.h" #include @@ -18,7 +18,7 @@ namespace lang { enum class ParseErrorKind { UnexpectedEOF, UnexpectedToken, - ExpectedTypeAnnotation, + ExpectedType, ExpectedPrimaryExpression, }; diff --git a/include/Support/Debug.h b/include/Support/Debug.h index 3f43c62..8a427c4 100644 --- a/include/Support/Debug.h +++ b/include/Support/Debug.h @@ -12,9 +12,9 @@ #ifndef NDEBUG #define DEBUG(...) \ do { \ - fprintf(stderr, "[%s:%d] ", __FILE__, __LINE__); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, "\n"); \ + fprintf(stdout, "[%s:%d] ", __FILE__, __LINE__); \ + fprintf(stdout, __VA_ARGS__); \ + fprintf(stdout, "\n"); \ } while (0) #else #define DEBUG(...) diff --git a/include/Support/Reporting.h b/include/Support/Reporting.h index 3bfec83..91d2602 100644 --- a/include/Support/Reporting.h +++ b/include/Support/Reporting.h @@ -23,22 +23,23 @@ struct TextError { std::string label; }; -void reportTextError(const SourceFile &file, const TextError &error, - unsigned lineNoWidthHint = 0); +void reportTextError(llvm::raw_ostream &os, const SourceFile &file, + const TextError &error, unsigned lineNoWidthHint = 0); struct JSONError { std::string_view span; std::string_view title; }; -void reportJSONError(const SourceFile &file, const JSONError &error); +void reportJSONError(llvm::raw_ostream &os, const SourceFile &file, + const JSONError &error); /// @brief Reports a vector of errors in batch in plain text /// @pre errors only contains errors from the same file /// @pre errors is sorted in the order of appearence within the file template void reportTextErrors( - const SourceFile &file, const std::vector &errors, + llvm::raw_ostream &os, const SourceFile &file, const std::vector &errors, std::size_t maxErrors = std::numeric_limits::max()) { maxErrors = std::min(errors.size(), maxErrors); if (maxErrors == 0) { @@ -52,9 +53,9 @@ void reportTextErrors( const std::size_t lastButOne = maxErrors - 1; for (std::size_t i = 0; i < maxErrors; ++i) { const auto &error = errors[i]; - lang::reportTextError(file, error.toTextError(), lineNoMaxWidth); + lang::reportTextError(os, file, error.toTextError(), lineNoMaxWidth); if (i < lastButOne) { - llvm::errs() << lineNoSpacesBody << "|\n"; + os << lineNoSpacesBody << "|\n"; } } } @@ -62,7 +63,7 @@ void reportTextErrors( /// @brief Reports a vector of errors in batch in JSON format template void reportJSONErrors( - const SourceFile &file, const std::vector &errors, + llvm::raw_ostream &os, const SourceFile &file, const std::vector &errors, std::size_t maxErrors = std::numeric_limits::max()) { maxErrors = std::min(errors.size(), maxErrors); if (maxErrors == 0) { @@ -70,9 +71,16 @@ void reportJSONErrors( } llvm::errs() << "[\n"; - for (const auto &error : errors) { - llvm::errs() << " "; - lang::reportJSONError(file, error.toJSONError()); + const std::size_t lastButOne = maxErrors - 1; + for (std::size_t i = 0; i < maxErrors; ++i) { + const auto &error = errors[i]; + + os << " "; + lang::reportJSONError(os, file, error.toJSONError()); + + if (i < lastButOne) { + os << ",\n"; + } } llvm::errs() << "]\n"; } diff --git a/include/Typing/Type.h b/include/Typing/Type.h new file mode 100644 index 0000000..1fcaed1 --- /dev/null +++ b/include/Typing/Type.h @@ -0,0 +1,33 @@ +#ifndef LANG_TYPE_H +#define LANG_TYPE_H + +#include + +namespace lang { + +enum class TypeKind { + Void, + Number, + Pointer, + Function, +}; + +struct Type { + TypeKind kind; + explicit Type(TypeKind kind) : kind(kind) {} + std::string toString() const; + template T& as() { return static_cast(this); } + template const T& as() { return static_cast(this); } +}; + +struct PointerType : public Type { + PointerType() : Type(TypeKind::Pointer) {} +}; + +struct FunctionType : public Type { + FunctionType() : Type(TypeKind::Function) {} +}; + +} // namespace lang + +#endif // LANG_TYPE_H diff --git a/include/Sema/Type.h b/include/Typing/TypeContext.h similarity index 52% rename from include/Sema/Type.h rename to include/Typing/TypeContext.h index 57992cc..aa0bc18 100644 --- a/include/Sema/Type.h +++ b/include/Typing/TypeContext.h @@ -1,27 +1,12 @@ -#ifndef LANG_TYPE_H -#define LANG_TYPE_H +#ifndef LANG_TYPE_CONTEXT +#define LANG_TYPE_CONTEXT #include "Alloc/Arena.h" -#include +#include "Type.h" namespace lang { -enum class TypeKind { - Void, - Number, - Pointer, - Function, -}; - -struct Type { - TypeKind kind; - explicit Type(TypeKind kind) : kind(kind) {} - std::string toString() const; - template T& as() { return static_cast(this); } - template const T& as() { return static_cast(this); } -}; - class TypeContext { public: explicit TypeContext(Arena &arena) : arena(&arena) { @@ -40,21 +25,12 @@ class TypeContext { private: Arena *arena; - Type *tyVoid; Type *tyNumber; // TODO: declare hashCons(Type &type); }; -struct PointerType : public Type { - PointerType() : Type(TypeKind::Pointer) {} -}; - -struct FunctionType : public Type { - FunctionType() : Type(TypeKind::Function) {} -}; - } // namespace lang -#endif // LANG_TYPE_H +#endif // LANG_TYPE_CONTEXT diff --git a/samples/12.lang b/samples/12.lang deleted file mode 100644 index e69de29..0000000 diff --git a/samples/error/1.lang b/samples/error/1.lang new file mode 100644 index 0000000..61bf014 --- /dev/null +++ b/samples/error/1.lang @@ -0,0 +1,4 @@ +fn main() : void { + return; + let a = 1; +} diff --git a/samples/11.lang b/samples/error/2.lang similarity index 100% rename from samples/11.lang rename to samples/error/2.lang diff --git a/samples/test.py b/samples/test.py new file mode 100644 index 0000000..038e71f --- /dev/null +++ b/samples/test.py @@ -0,0 +1,160 @@ +import sys +import subprocess + + +def test_compiler_valid(compiler: str, files: list[str]) -> None: + """ + Test the compiler with the given files. + + Args: + compiler (str): The compiler to test + files (list[str]): A list of file names + """ + + for file in files: + res = subprocess.run( + [compiler, file], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Assert that the compiler returned successfully + if res.returncode != 0: + print(f"Error: {file} did not return successfully") + print(res.stderr) + sys.exit(1) + + # Assert stderr is empty + if res.stderr: + print(f"Error: {file} has non-empty stderr") + print(res.stderr) + sys.exit(1) + + # Assert that the compiler emits the correct output + print(f"Test passed: {file}") + + +def test_compiler_error(compiler: str, files: list[tuple[str, str, str]]) -> None: + """ + Test the compiler's error handling by checking if the compiler emits the correct error + for the given files. + + Args: + compiler (str): The compiler to test + files (list[tuple[str, str, str]]): A list of tuples containing the file name, the expected error id and the location + """ + import json + + for file, error_id, location in files: + res = subprocess.run( + [compiler, "--error-format=json", file], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Assert that the compiler returned an error + if res.returncode == 0: + print(f"Error: {file} did not return an error") + sys.exit(1) + + # Assert stderr is not empty + if not res.stderr: + print(f"Error: {file} has empty stderr") + sys.exit(1) + + # Assert stderr is a valid JSON + error_json = None + try: + error_json = json.loads(res.stderr) + except json.JSONDecodeError: + print(f"Error: {file} has invalid JSON in stderr") + print(res.stderr) + sys.exit(1) + + # Check the length of the errors is 1 + if len(error_json) != 1: + print(f"Error: {file} has more than one error") + sys.exit(1) + + # Check the error id + if error_json[0]["id"] != error_id: + print( + f"Error: {file} has incorrect error id: got {error_json[0]['id']}, expected {error_id}" + ) + sys.exit(1) + + # Check the error location + if error_json[0]["loc"] != location: + print( + f"Error: {file} has incorrect error location: got {error_json[0]['loc']}, expected {location}" + ) + sys.exit(1) + + # Assert that the correct error is emitted + print(f"Test passed: {file}") + + +def main(): + import argparse + + FILE_EXT = "lang" + VALID_PATH = "samples/valid" + ERROR_PATH = "samples/error" + + # Parse the command line arguments + + parser = argparse.ArgumentParser( + description="A testing script for compiler's error handling" + ) + + parser.add_argument("compiler", help="The compiler to test") + + # TODO: Actually implement this + parser.add_argument( + "-p", + "--pipeline", + help="The pipeline to test", + choices=["lex", "syn", "output", "all"], + default="all", + ) + + # Parse command line arguments + + args = parser.parse_args() + + # Test the compiler with the valid files + + valid_test_set = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + valid_files = map( + lambda file: f"{VALID_PATH}/{file}.{FILE_EXT}", + valid_test_set, + ) + + print("\nTesting the compiler with the valid files") + test_compiler_valid(args.compiler, valid_files) + + # Test the compiler with the error files + + error_test_set = [ + ("1", "cfa-early-return-stmt", "2:5"), + ("2", "cfa-invalid-break-stmt", "2:5"), + ] + + error_files = map( + lambda file: ( + f"{ERROR_PATH}/{file[0]}.{FILE_EXT}", + file[1], + f"{ERROR_PATH}/{file[0]}.{FILE_EXT}:{file[2]}", + ), + error_test_set, + ) + + print("\nTesting the compiler with the error files") + test_compiler_error(args.compiler, error_files) + + +if __name__ == "__main__": + main() diff --git a/samples/test.sh b/samples/test.sh deleted file mode 100755 index ed668d6..0000000 --- a/samples/test.sh +++ /dev/null @@ -1,9 +0,0 @@ -BOLD=$(tput bold) -UNDERLINE=$(tput smul) -RESET=$(tput sgr0) -set -e -./build.sh build -for file in ./samples/*.lang; do - echo -e "\n${BOLD}${UNDERLINE}Compiling $file${RESET}" - ./build/src/compiler $file --emit=ast $@ -done diff --git a/samples/1.lang b/samples/valid/1.lang similarity index 100% rename from samples/1.lang rename to samples/valid/1.lang diff --git a/samples/10.lang b/samples/valid/10.lang similarity index 100% rename from samples/10.lang rename to samples/valid/10.lang diff --git a/samples/2.lang b/samples/valid/2.lang similarity index 100% rename from samples/2.lang rename to samples/valid/2.lang diff --git a/samples/3.lang b/samples/valid/3.lang similarity index 100% rename from samples/3.lang rename to samples/valid/3.lang diff --git a/samples/4.lang b/samples/valid/4.lang similarity index 100% rename from samples/4.lang rename to samples/valid/4.lang diff --git a/samples/5.lang b/samples/valid/5.lang similarity index 100% rename from samples/5.lang rename to samples/valid/5.lang diff --git a/samples/6.lang b/samples/valid/6.lang similarity index 100% rename from samples/6.lang rename to samples/valid/6.lang diff --git a/samples/7.lang b/samples/valid/7.lang similarity index 100% rename from samples/7.lang rename to samples/valid/7.lang diff --git a/samples/8.lang b/samples/valid/8.lang similarity index 100% rename from samples/8.lang rename to samples/valid/8.lang diff --git a/samples/9.lang b/samples/valid/9.lang similarity index 100% rename from samples/9.lang rename to samples/valid/9.lang diff --git a/src/Analysis/CFA.cpp b/src/Analysis/CFA.cpp new file mode 100644 index 0000000..66b491f --- /dev/null +++ b/src/Analysis/CFA.cpp @@ -0,0 +1,76 @@ +#include "Analysis/CFA.h" + +namespace lang { + +TextError CFAError::toTextError() const { + switch (kind) { + case CFAErrorKind::EarlyReturnStmt: + return {span, "Early return statement", + "Code after return statement will never be executed"}; + case CFAErrorKind::InvalidBreakStmt: + return {span, "Invalid break statement", + "Break statement outside loop"}; + } + return {span, "Uknown control flow analysis error title", + "Unknown control flow analysis error label"}; +} + +JSONError CFAError::toJSONError() const { + switch (kind) { + case CFAErrorKind::EarlyReturnStmt: + return {span, "cfa-early-return-stmt"}; + case CFAErrorKind::InvalidBreakStmt: + return {span, "cfa-invalid-break-stmt"}; + } + return {span, "cfa-unknown-error"}; +} + +CFAResult CFA::analyzeModuleAST(ModuleAST &module) { + for (auto *decl : module.decls) { + ASTVisitor::visit(*decl); + } + return {std::move(errors)}; +} + +void CFA::visit(FunctionDeclAST &node) { visit(*node.body); } + +void CFA::visit(BreakStmtAST &node) { + if (breakableStack.empty()) { + errors.push_back({CFAErrorKind::InvalidBreakStmt, node.span}); + return; + } + + node.stmt = breakableStack.top(); +} + +void CFA::visit(BlockStmtAST &node) { + std::size_t i = 0; + for (auto *stmt : node.stmts) { + ASTVisitor::visit(*stmt); + + ++i; + if (stmt->kind == StmtASTKind::Break || + stmt->kind == StmtASTKind::Return) { + if (i != node.stmts.size()) { + errors.push_back({CFAErrorKind::EarlyReturnStmt, stmt->span}); + break; + } + } + } +} + +void CFA::visit(IfStmtAST &node) { + ASTVisitor::visit(*node.thenBranch); + if (node.elseBranch != nullptr) { + ASTVisitor::visit(*node.elseBranch); + } +} + +void CFA::visit(WhileStmtAST &node) { + ASTVisitor::visit(*node.cond); + breakableStack.push(&node); + visit(*node.body); + breakableStack.pop(); +} + +} // namespace lang diff --git a/src/Sema/Resolver.cpp b/src/Analysis/Resolver.cpp similarity index 75% rename from src/Sema/Resolver.cpp rename to src/Analysis/Resolver.cpp index 5daed83..b8d806a 100644 --- a/src/Sema/Resolver.cpp +++ b/src/Analysis/Resolver.cpp @@ -1,5 +1,4 @@ -#include "Sema/Resolver.h" -#include "AST/AST.h" +#include "Analysis/Resolver.h" #include @@ -7,20 +6,17 @@ namespace lang { TextError ResolveError::toTextError() const { switch (kind) { - case ResolveErrorKind::InvalidBreakStmt: - return {span, "Invalid break statement", "Does not break anything"}; - case ResolveErrorKind::UnresolvedIdentifier: - return {span, "Unresolved identifier", "Cannot be resolved"}; + case ResolveErrorKind::UnknownIdentifier: + return {span, "Unknown identifier", "Unknown identifier"}; } - return {span, "Unknown resolve error title", "Unknown resolve error label"}; + return {span, "Unknown resolution error title", + "Unknown resolve error label"}; } JSONError ResolveError::toJSONError() const { switch (kind) { - case ResolveErrorKind::InvalidBreakStmt: - return {span, "resolve-invalid-break-stmt"}; - case ResolveErrorKind::UnresolvedIdentifier: - return {span, "resolve-unresolved-identifier"}; + case ResolveErrorKind::UnknownIdentifier: + return {span, "resolve-unknown-identifier"}; } return {span, "resolve-unknown-error"}; } @@ -62,15 +58,6 @@ void Resolver::visit(FunctionDeclAST &node) { void Resolver::visit(ExprStmtAST &node) { ASTVisitor::visit(*node.expr); } -void Resolver::visit(BreakStmtAST &node) { - if (breakableStack.empty()) { - errors.push_back({ResolveErrorKind::InvalidBreakStmt, node.span}); - return; - } - - node.stmt = breakableStack.top(); -} - void Resolver::visit(ReturnStmtAST &node) { if (node.expr != nullptr) { ASTVisitor::visit(*node.expr); @@ -108,9 +95,7 @@ void Resolver::visit(IfStmtAST &node) { void Resolver::visit(WhileStmtAST &node) { ASTVisitor::visit(*node.cond); - breakableStack.push(&node); visit(*node.body); - breakableStack.pop(); } void Resolver::visit(IdentifierExprAST &node) { @@ -122,15 +107,13 @@ void Resolver::visit(IdentifierExprAST &node) { const auto it = functionsMap.find(node.span); if (it == functionsMap.end()) { - errors.push_back({ResolveErrorKind::UnresolvedIdentifier, node.span}); + errors.push_back({ResolveErrorKind::UnknownIdentifier, node.span}); return; } node.decl = it->second; } -void Resolver::visit(NumberExprAST &node) { /* no-op */ } - void Resolver::visit(UnaryExprAST &node) { ASTVisitor::visit(*node.expr); } void Resolver::visit(BinaryExprAST &node) { diff --git a/src/Sema/Sema.cpp b/src/Analysis/TypeChecker.cpp similarity index 55% rename from src/Sema/Sema.cpp rename to src/Analysis/TypeChecker.cpp index 849e2c1..2d36b11 100644 --- a/src/Sema/Sema.cpp +++ b/src/Analysis/TypeChecker.cpp @@ -1,4 +1,4 @@ -#include "Sema/Sema.h" +#include "Analysis/TypeChecker.h" namespace { @@ -12,67 +12,65 @@ template Overloaded(Ts...) -> Overloaded; namespace lang { -TextError SemaError::toTextError() const { +TextError TypeCheckerError::toTextError() const { switch (kind) { - case SemaErrorKind::InvalidReturn: + case TypeCheckerErrorKind::InvalidReturn: return {span, "Invalid return statement", "Return type mismatch"}; - case SemaErrorKind::InvalidAssignment: + case TypeCheckerErrorKind::InvalidAssignment: return {span, "Invalid assignment", "Type mismatch"}; - case SemaErrorKind::InvalidBinaryOperation: + case TypeCheckerErrorKind::InvalidBinaryOperation: return {span, "Invalid binary operation", "Type mismatch"}; } - return {span, "Unknown sema error title", "Unknown sema error label"}; + return {span, "Unknown type checking error title", "Unknown type checking error label"}; } -JSONError SemaError::toJSONError() const { +JSONError TypeCheckerError::toJSONError() const { switch (kind) { - case SemaErrorKind::InvalidReturn: - return {span, "sema-invalid-return"}; - case SemaErrorKind::InvalidAssignment: - return {span, "sema-invalid-assignment"}; - case SemaErrorKind::InvalidBinaryOperation: - return {span, "sema-invalid-binary-operation"}; + case TypeCheckerErrorKind::InvalidReturn: + return {span, "type-check-invalid-return"}; + case TypeCheckerErrorKind::InvalidAssignment: + return {span, "type-check-invalid-assignment"}; + case TypeCheckerErrorKind::InvalidBinaryOperation: + return {span, "type-check-invalid-binary-operation"}; } - return {span, "sema-unknown-error"}; + return {span, "type-check-unknown-error"}; } -SemaResult Sema::analyzeModuleAST(ModuleAST &module) { +TypeCheckerResult TypeChecker::analyzeModuleAST(ModuleAST &module) { for (auto *decl : module.decls) { ASTVisitor::visit(*decl); } return {std::move(errors)}; } -void Sema::visit(FunctionDeclAST &node) { +void TypeChecker::visit(FunctionDeclAST &node) { currentFunction = &node; ASTVisitor::visit(*node.body); } -void Sema::visit(ExprStmtAST &node) { ASTVisitor::visit(*node.expr); } +void TypeChecker::visit(ExprStmtAST &node) { ASTVisitor::visit(*node.expr); } -void Sema::visit(BreakStmtAST &node) { /* no-op */ } - -void Sema::visit(ReturnStmtAST &node) { +void TypeChecker::visit(ReturnStmtAST &node) { if (node.expr != nullptr) { ASTVisitor::visit(*node.expr); } if (currentFunction->retType == nullptr) { if (node.expr != nullptr) { - errors.push_back({SemaErrorKind::InvalidReturn, node.span}); + errors.push_back({TypeCheckerErrorKind::InvalidReturn, node.span}); } } else { if (node.expr == nullptr) { - errors.push_back({SemaErrorKind::InvalidReturn, node.span}); + errors.push_back({TypeCheckerErrorKind::InvalidReturn, node.span}); } else { if (node.expr->type != currentFunction->retType) { - errors.push_back({SemaErrorKind::InvalidReturn, node.span}); + errors.push_back({TypeCheckerErrorKind::InvalidReturn, node.span}); } } } } -void Sema::visit(LocalStmtAST &node) { +void TypeChecker::visit(LocalStmtAST &node) { if (node.init != nullptr) { ASTVisitor::visit(*node.init); @@ -80,32 +78,32 @@ void Sema::visit(LocalStmtAST &node) { node.type = node.init->type; } else { if (node.type != node.init->type) { - errors.push_back({SemaErrorKind::InvalidAssignment, node.span}); + errors.push_back({TypeCheckerErrorKind::InvalidAssignment, node.span}); } } } else { if (node.type == nullptr) { - errors.push_back({SemaErrorKind::InvalidAssignment, node.span}); + errors.push_back({TypeCheckerErrorKind::InvalidAssignment, node.span}); } } } -void Sema::visit(AssignStmtAST &node) { +void TypeChecker::visit(AssignStmtAST &node) { ASTVisitor::visit(*node.lhs); ASTVisitor::visit(*node.rhs); if (node.lhs->type != node.rhs->type) { - errors.push_back({SemaErrorKind::InvalidAssignment, node.span}); + errors.push_back({TypeCheckerErrorKind::InvalidAssignment, node.span}); } } -void Sema::visit(BlockStmtAST &node) { +void TypeChecker::visit(BlockStmtAST &node) { for (auto *stmt : node.stmts) { ASTVisitor::visit(*stmt); } } -void Sema::visit(IfStmtAST &node) { +void TypeChecker::visit(IfStmtAST &node) { ASTVisitor::visit(*node.cond); ASTVisitor::visit(*node.thenBranch); @@ -114,12 +112,12 @@ void Sema::visit(IfStmtAST &node) { } } -void Sema::visit(WhileStmtAST &node) { +void TypeChecker::visit(WhileStmtAST &node) { ASTVisitor::visit(*node.cond); ASTVisitor::visit(*node.body); } -void Sema::visit(IdentifierExprAST &node) { +void TypeChecker::visit(IdentifierExprAST &node) { std::visit( Overloaded{ [&](const std::monostate) {}, @@ -130,26 +128,26 @@ void Sema::visit(IdentifierExprAST &node) { node.decl); } -void Sema::visit(NumberExprAST &node) { node.type = typeCtx.getTypeNumber(); } +void TypeChecker::visit(NumberExprAST &node) { node.type = typeCtx.getTypeNumber(); } -void Sema::visit(UnaryExprAST &node) { +void TypeChecker::visit(UnaryExprAST &node) { ASTVisitor::visit(*node.expr); node.type = node.expr->type; } -void Sema::visit(BinaryExprAST &node) { +void TypeChecker::visit(BinaryExprAST &node) { ASTVisitor::visit(*node.lhs); ASTVisitor::visit(*node.rhs); if (node.lhs->type != node.rhs->type) { - errors.push_back({SemaErrorKind::InvalidBinaryOperation, node.span}); + errors.push_back({TypeCheckerErrorKind::InvalidBinaryOperation, node.span}); } node.type = node.lhs->type; } -void Sema::visit(CallExprAST &node) { +void TypeChecker::visit(CallExprAST &node) { ASTVisitor::visit(*node.callee); ASTVisitor::visit(*node.arg); @@ -160,14 +158,14 @@ void Sema::visit(CallExprAST &node) { node.type = node.callee->type; } -void Sema::visit(IndexExprAST &node) { +void TypeChecker::visit(IndexExprAST &node) { ASTVisitor::visit(*node.base); ASTVisitor::visit(*node.index); node.type = node.base->type; } -void Sema::visit(GroupedExprAST &node) { +void TypeChecker::visit(GroupedExprAST &node) { ASTVisitor::visit(*node.expr); node.type = node.expr->type; diff --git a/src/Lex/Lexer.cpp b/src/Lex/Lexer.cpp index 68fcf0f..a5c7555 100644 --- a/src/Lex/Lexer.cpp +++ b/src/Lex/Lexer.cpp @@ -35,7 +35,7 @@ TextError LexError::toTextError() const { case LexErrorKind::InvalidCharacter: return {span, "Invalid character", "Invalid character"}; } - return {span, "Unknown lex error title", "Unknown lex error label"}; + return {span, "Unknown lexing error title", "Unknown lex error label"}; } JSONError LexError::toJSONError() const { diff --git a/src/Parse/Parser.cpp b/src/Parse/Parser.cpp index 043f34b..41b7002 100644 --- a/src/Parse/Parser.cpp +++ b/src/Parse/Parser.cpp @@ -72,13 +72,13 @@ TextError ParseError::toTextError() const { case ParseErrorKind::UnexpectedToken: return {span, "Unexpected token", "Expected " + tokenKindToString(expected) + " instead"}; - case ParseErrorKind::ExpectedTypeAnnotation: - return {span, "Unexpected token", "Expected a type annotation instead"}; + case ParseErrorKind::ExpectedType: + return {span, "Unexpected token", "Expected a type instead"}; case ParseErrorKind::ExpectedPrimaryExpression: return {span, "Unexpected token", "Expected a primary expression instead"}; } - return {span, "Unknown parse error title", "Unknown parse error label"}; + return {span, "Unknown parsing error title", "Unknown parse error label"}; } JSONError ParseError::toJSONError() const { @@ -87,10 +87,10 @@ JSONError ParseError::toJSONError() const { return {span, "parse-unexpected-eof"}; case ParseErrorKind::UnexpectedToken: return {span, "parse-unexpeected-token"}; - case ParseErrorKind::ExpectedTypeAnnotation: - return {span, "parse-type-annotation"}; + case ParseErrorKind::ExpectedType: + return {span, "parse-expected-type"}; case ParseErrorKind::ExpectedPrimaryExpression: - return {span, "parse-unexpected-primary-expr"}; + return {span, "parse-expected-primary-expr"}; } return {span, "parser-unknown-error"}; } @@ -144,7 +144,7 @@ void Parser::sync(const std::unordered_set &syncSet) { } ParseResult Parser::parseModuleAST() { - List decls; + NonOwningList decls; DeclAST *decl = nullptr; const Token *tok = peek(); @@ -181,7 +181,7 @@ FunctionDeclAST *Parser::parseFunctionDeclAST() { EXPECT(TokenKind::LParen); - List params; + NonOwningList params; LocalStmtAST *param = nullptr; const Token *tok = peek(); @@ -224,7 +224,7 @@ Type *Parser::parseTypeAnnotation() { case TokenKind::KwNumber: return typeCtx->getTypeNumber(); default: - errors.emplace_back(ParseErrorKind::ExpectedTypeAnnotation, tok->span, + errors.emplace_back(ParseErrorKind::ExpectedType, tok->span, TokenKind::Amp); } @@ -235,7 +235,7 @@ BlockStmtAST *Parser::parseBlockStmtAST() { const Token *lBrace = expect(TokenKind::LBrace); RETURN_IF_NULL(lBrace); - List stmts; + NonOwningList stmts; StmtAST *stmt = nullptr; const Token *tok = peek(); diff --git a/src/Support/Reporting.cpp b/src/Support/Reporting.cpp index c3788b8..af91397 100644 --- a/src/Support/Reporting.cpp +++ b/src/Support/Reporting.cpp @@ -3,8 +3,8 @@ namespace lang { // NOLINTNEXTLINE -void reportTextError(const SourceFile &file, const TextError &error, - unsigned lineNoWidthHint) { +void reportTextError(llvm::raw_ostream &os, const SourceFile &file, + const TextError &error, unsigned lineNoWidthHint) { assert(!error.span.empty() && "span cannot be empty"); const SourceLocation loc = file.getLocation(error.span); const unsigned lineNoWidth = getNumDigits(loc.line); @@ -14,21 +14,20 @@ void reportTextError(const SourceFile &file, const TextError &error, const std::string lineNoSpacesLine(lineNoMaxWidth - lineNoWidth, ' '); const std::string labelSpaces(loc.column - 1, ' '); const std::string labelTildes(error.span.size() - 1, '~'); - llvm::errs() << lineNoSpacesTitle << "--> Error at " << loc.filename << ':' - << loc.line << ':' << loc.column << ": " << error.title - << '\n'; - llvm::errs() << lineNoSpacesBody << "|\n"; - llvm::errs() << ' ' << lineNoSpacesLine << loc.line << " | " << loc.lineText - << '\n'; - llvm::errs() << lineNoSpacesBody << "| " << labelSpaces << '^' - << labelTildes << ' ' << error.label << '\n'; + os << lineNoSpacesTitle << "--> Error at " << loc.filename << ':' + << loc.line << ':' << loc.column << ": " << error.title << '\n'; + os << lineNoSpacesBody << "|\n"; + os << ' ' << lineNoSpacesLine << loc.line << " | " << loc.lineText << '\n'; + os << lineNoSpacesBody << "| " << labelSpaces << '^' << labelTildes << ' ' + << error.label << '\n'; } -void reportJSONError(const SourceFile &file, const JSONError &error) { +void reportJSONError(llvm::raw_ostream &os, const SourceFile &file, + const JSONError &error) { assert(!error.span.empty() && "span cannot be empty"); const SourceLocation loc = file.getLocation(error.span); - llvm::errs() << "{ \"id\" : \"" << error.title << "\", \"location\" : \"" << loc.filename << ':' - << loc.line << ':' << loc.column << "\" },\n"; + os << "{ \"id\" : \"" << error.title << "\", \"loc\" : \"" << loc.filename + << ':' << loc.line << ':' << loc.column << "\" }\n"; } } // namespace lang diff --git a/src/Sema/Type.cpp b/src/Typing/Type.cpp similarity index 93% rename from src/Sema/Type.cpp rename to src/Typing/Type.cpp index 7b0374a..deab9a8 100644 --- a/src/Sema/Type.cpp +++ b/src/Typing/Type.cpp @@ -1,4 +1,4 @@ -#include "Sema/Type.h" +#include "Typing/Type.h" namespace lang { diff --git a/src/main.cpp b/src/main.cpp index 071e515..b632276 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,8 +9,9 @@ #include "Parse/Parser.h" -#include "Sema/Resolver.h" -#include "Sema/Sema.h" +#include "Analysis/CFA.h" +#include "Analysis/Resolver.h" +#include "Analysis/TypeChecker.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -68,15 +69,15 @@ const llvm::cl::opt compilerEmitAction( template void reportErrors( - CompilerErrorFormat format, const lang::SourceFile &file, - const std::vector &errors, + llvm::raw_ostream &os, CompilerErrorFormat format, + const lang::SourceFile &file, const std::vector &errors, std::size_t maxErrors = std::numeric_limits::max()) { switch (format) { case CompilerErrorFormat::Text: - lang::reportTextErrors(file, errors, maxErrors); + lang::reportTextErrors(os, file, errors, maxErrors); break; case CompilerErrorFormat::JSON: - lang::reportJSONErrors(file, errors, maxErrors); + lang::reportJSONErrors(os, file, errors, maxErrors); break; } } @@ -90,7 +91,8 @@ int main(int argc, char **argv) { const auto file = llvm::MemoryBuffer::getFile(inputFilename); if (!file) { - llvm::errs() << "Error: " << file.getError().message() << '\n'; + llvm::errs() << "Error: while opening file " << inputFilename << ": " + << file.getError().message() << '\n'; return EXIT_FAILURE; } @@ -111,7 +113,8 @@ int main(int argc, char **argv) { } if (!lexResult.errors.empty()) { - reportErrors(compilerErrorFormat, source, lexResult.errors); + reportErrors(llvm::errs(), compilerErrorFormat, source, + lexResult.errors); return EXIT_FAILURE; } @@ -126,7 +129,7 @@ int main(int argc, char **argv) { lang::Parser parser(arena, typeCtx, lexResult.tokens); const auto parseResult = parser.parseModuleAST(); - DEBUG("%lu allocations in %lu bytes", arena.totalAllocations(), + DEBUG("%lu alloc() with %lu bytes", arena.totalAllocations(), arena.totalAllocated()); if (compilerEmitAction == CompilerEmitAction::Src) { @@ -139,7 +142,8 @@ int main(int argc, char **argv) { } if (!parseResult.errors.empty()) { - reportErrors(compilerErrorFormat, source, parseResult.errors); + reportErrors(llvm::errs(), compilerErrorFormat, source, + parseResult.errors); return EXIT_FAILURE; } @@ -149,6 +153,15 @@ int main(int argc, char **argv) { lang::ModuleAST *module = parseResult.module; + lang::CFA controlFlowAnalyzer; + const auto cfaResult = controlFlowAnalyzer.analyzeModuleAST(*module); + + if (!cfaResult.errors.empty()) { + reportErrors(llvm::errs(), compilerErrorFormat, source, + cfaResult.errors); + return EXIT_FAILURE; + } + lang::Resolver resolver; const auto resolveResult = resolver.resolveModuleAST(*module); @@ -157,19 +170,21 @@ int main(int argc, char **argv) { } if (!resolveResult.errors.empty()) { - reportErrors(compilerErrorFormat, source, resolveResult.errors); + reportErrors(llvm::errs(), compilerErrorFormat, source, + resolveResult.errors); return EXIT_FAILURE; } - lang::Sema sema(typeCtx); - const auto semaResult = sema.analyzeModuleAST(*module); + lang::TypeChecker typeChecker(typeCtx); + const auto typeCheckerResult = typeChecker.analyzeModuleAST(*module); if (compilerEmitAction == CompilerEmitAction::AST) { astPrinter.visit(*module); } if (!resolveResult.errors.empty()) { - reportErrors(compilerErrorFormat, source, semaResult.errors); + reportErrors(llvm::errs(), compilerErrorFormat, source, + typeCheckerResult.errors); return EXIT_FAILURE; } }