Merge pull request #13 from mhhollomon/refactor

Add semantic actions for rules
mhhollomon · Oct 7, 2019 · bef62cf · bef62cf
2 parents f4048b9 + fdd0fe1
commit bef62cf
Show file tree

Hide file tree

Showing 92 changed files with 9,941 additions and 20,924 deletions.
diff --git a/.cirrus.yml b/.cirrus.yml
@@ -1,5 +1,5 @@
 container:
-    image: mhhollomon/yalr-ci:debian
+    image: mhhollomon/yalr-ci:arch
     memory: 8G
 
 task:
@@ -9,7 +9,7 @@ task:
     CXX: clang++
   build_script:
     - ${CXX} --version
-    - ./scripts/build.sh release
+    - ./scripts/build.sh ci clang
 
 task:
   name: g++-build-release
@@ -18,4 +18,4 @@ task:
     CXX: g++
   build_script:
     - ${CXX} --version
-    - ./scripts/build.sh release
+    - ./scripts/build.sh ci gnu
diff --git a/.ctags b/.ctags
@@ -0,0 +1,3 @@
+--recurse=yes
+--exclude=build/*
+--exclude=extern/*
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 build
 build-*
+tags
+*.sublime-workspace
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,80 @@
+# yalr
+cmake_minimum_required(VERSION 3.13)
+
+project(Yalr VERSION 0.03
+             DESCRIPTION "Yet another LR Parser Generator"
+             LANGUAGES CXX)
+
+#
+# Make sure we use -std=c++17 or higher
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+#
+# Set the default build type to Release (if the user doesn't specify)
+#
+set(default_build_type "Release")
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
+  set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
+      STRING "Choose the type of build." FORCE)
+  # Set the possible values of build type for cmake-gui
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
+      "Debug" "Release" "Developer" "CI")
+endif()
+
+
+add_compile_options("$<$<CONFIG:Developer>:-g;-O1;-Wall;-pedantic>")
+add_compile_options("$<$<CONFIG:CI>:-O1;-Wall;-pedantic>")
+add_compile_options("$<$<CONFIG:Release>:-O3>")
+
+add_subdirectory(extern)
+
+add_subdirectory(src)
+
+##
+## yalr application
+##
+add_executable(yalr)
+
+target_sources(yalr
+    PRIVATE
+        "src/main.cpp"
+    )
+
+target_link_libraries(yalr
+    PRIVATE
+        lib-include
+        translate_objlib
+        analyzer_objlib
+        parser_objlib
+        tablegen_objlib
+        codegen_objlib
+        errorinfo_objlib
+        sourcetext_objlib
+    )
+
+#
+# Uses the yalr executable
+#
+add_subdirectory(examples)
+
+#
+# Testing
+#
+enable_testing()
+add_subdirectory(test)
+
+#
+# ctags
+#
+if(CMAKE_BUILD_TYPE STREQUAL "Developer")
+    add_custom_target(ctags
+        COMMAND ctags --options=${CMAKE_HOME_DIRECTORY}/.ctags -f ${CMAKE_BINARY_DIR}/tags .
+        WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}"
+        BYPRODUCTS tags
+        VERBATIM
+        COMMENT Generating ctags
+        )
+endif()
diff --git a/Dockerfile b/Dockerfile
diff --git a/README.md b/README.md
@@ -1,39 +1,33 @@
 # Yet Another LR Parser Generator
 [![Github Releases](https://img.shields.io/github/release/mhhollomon/yalr.svg)](https://github.com/mhhollomon/yalr/releases)
 [![Build Status](https://api.cirrus-ci.com/github/mhhollomon/yalr.svg)](https://cirrus-ci.com/github/mhhollomon/yalr)
-[![Github Issues](https://img.shields.io/github/issues/mhhollomon/yalr.svg)](http://github.com/pantor/mhhollomon/yalr)
+[![Github Issues](https://img.shields.io/github/issues/mhhollomon/yalr.svg)](http://github.com/mhhollomon/yalr)
 [![GitHub License](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/mhhollomon/yalr/master/LICENSE)
 
-C++ will be generated.
-
-Targetting LALR at first but GLR as the end goal.
-
-The generated parser will use [recursive ascent](https://en.wikipedia.org/wiki/Recursive_ascent_parser) (_is this possible for GLR with its split stack?_)
-
 ## Status
 
-Yalr currently generates a *language recognizer* - that is, the generated code
-will simply give a yes/no answer to the question "Does the input string match
-the grammar?"
+Yalr is fully functional - if bare-boned. See the [Calculator
+example](examples/calculator.yalr)
 
-Both a lexer and a parser are generated.
+## Overview
 
-The next goal is to "tidy up" - add more unit tests, make the parser a
-bit better about error reporting, etc.
+Yalr is yet another LR (actually SLR) compiler generator.
+
+The design goal was to create an generator that created a single file, making
+it easy to integrate into a build system. The code generated is C++17.
 
 ## Building
 
-Yalr requires boost, meson, ninja, and a c++17 compliant compiler. Assuming you have those, then building is:
+Yalr requires CMake, make, and a C++17 compliant compiler. Assuming you have those, then building is:
 ```bash
 git clone https://github.com/mhhollomon/yalr
 cd yalr
 mkdir build
-cd build
-meson ../src
-ninja
+cmake -B build
+cmake --build build
 ```
 
-The new executable will be in the build directory.
+The new `yalr` executable will be in the build directory.
 
 ## Running
 
@@ -56,7 +50,7 @@ yalr --help
 yalr -o foo.hpp my_grammar.yalr
 
 # Instead of outputting the parser,
-# translate the grammer for use on grammophone
+# translate the grammar for use on grammophone
 # (see references)
 yalr -t grammophone my_grammar.yalr
 ```
@@ -70,7 +64,8 @@ Keywords are reserved and may not be used as the name of a terminal or rule.
 
 The [example
 directory](https://github.com/mhhollomon/yalr/tree/master/examples) contains
-some example grammars including a grammar for the yalr grammar itself.
+some example grammars including a (probably out-of-date) grammar for the yalr
+grammar itself.
 
 ### Parser Class Name
 
@@ -102,8 +97,6 @@ This statement may only appear once in the file.
 
 ### Terminals
 
-All terminals must be explicitly declared.
-
 There are two types of terminals - "parser" terminals and "lexer" terminals.
 
 #### Parser Terminals
@@ -128,7 +121,7 @@ The pattern can be specified two different ways.
 
 1. As a single-quote delimited string.
 Patterns in this format are matched in the lexer as simple string compares.
-The pattern can be used as an alias for the term in rules.
+The pattern can be used for the term in rules.
 
 2. std::regex regular expression.
 The starting delimiter is the literal `r:`. The pattern extends to the next
@@ -151,7 +144,7 @@ is given, then the normal terminating semi-colon is not required.
 term <int> INTEGER r:[-+]?[0-9]+ <%{ return std::stoi(lexeme); }%>
 ```
 
-The action should be though of as the body of a lambda that returns the
+The action should be thought of as the body of a lambda that returns the
 semantic value to be passed back to the parser. The identifier `lexeme` is
 available. It is a string that contains the text that was matched by the term's
 pattern. If you wish to simply return the string (e.g. for an Identifier term)
@@ -185,19 +178,34 @@ rule Foo { => WS ; }
 
 
 ### Non-terminals
+
 Rules are declared with the `rule` keyword.
 Each alternate is introduced with `=>` and terminated with a semicolon.
 
-One rule must be marked as the starting or "goal" rule, by preceeding it with the `goal` keyword.
+One rule must be marked as the starting or "goal" rule, by preceeding it with
+the `goal` keyword.
+
+An alias may be given to each symbol in the alternate. The value of that symbol
+will then be available in the action block.
+
+A terminal whose pattern is a single-quoted string may be referenced either by
+the name given it, or by the pattern itseld (complete with the quotes).
+
+If a single-quoted string is used in a rule, but no terminal has been defined
+with that string, then one is automatically created. While this can be very
+convenient, it does not allow you to assign a type or an action/value to the
+terminal. But for common structural lexemes (like semi-colon and the like),
+this may actually be quite helpful. This can also make the rules a bit easier
+to read since they will read more like the string they would match.
 
 ```
 rule MyRule {
   => MYTERM MyRule ;
   => ;  /* an empty alternative */
 }
 
-/* you can use single quoted patterns as aliases */
-term SEMI ';' ;
+/* you can use single quoted patterns directly in the rule */
+/* The system will define a terminal for ';' for us */
 term INT  'int';
 rule A {
     => 'int' ID ';' ;
@@ -210,6 +218,11 @@ rule Compact { => A B ; => C Compact ; }
 goal rule Program {
   => Program Statement ;
 }
+
+// symbol aliases
+term <int> NUM r:[-+]\d+ <%{ return std::stoi(lexeme); }%>
+term ADD 'add' ;
+rule <int> RPN_ADD { => 'add' left:NUM right:NUM <%{ return left + right; }%> }
 ```
 
 ## Generated Code
@@ -246,6 +259,7 @@ int main() {
 - [Elkhound](http://scottmcpeak.com/elkhound/sources/elkhound/index.html)
 - [Lemon](http://www.hwaci.com/sw/lemon/)
 - [Boost::Spirit::X3](https://www.boost.org/doc/libs/develop/libs/spirit/doc/x3/html/index.html)
+- [ANTLR](https://www.antlr.org/)
 - [Grammophone](http://mdaines.github.io/grammophone/) - explore grammars.
 - [LR on Wikipedia](https://en.wikipedia.org/wiki/LR_parser)
 - [GLR on Wikipedia](https://en.wikipedia.org/wiki/GLR_parser)
@@ -258,9 +272,8 @@ int main() {
     parsing](https://webhome.cs.uvic.ca/~nigelh/Publications/rad.pdf)
 
 ## Technologies
-- [Meson](https://mesonbuild.com/) for build configuration.
-- [Ninja](https://ninja-build.org/) for building.
-- [Catch2](https://github.com/catchorg/Catch2) for unit testing.
+- [Cmake](https://cmake.org/) for build configuration.
+- [doctest](https://github.com/onqtam/doctest) for unit testing.
 - [cxxopts](https://github.com/jarro2783/cxxopts) for command line handling.
 - [inja](https://github.com/pantor/inja) to help with code generation.
 

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,5 +1,12 @@
 ## On Master
 
+### Functional Changes
+- Add the ability to name the items in a rule alternatives and use those names in rule actions.
+
+### non-functional changes
+- Moved to CMake for build. The meson system is currently broken.
+- Refactored the code some more.
+
 ## Release v0.0.2
 
 ### Functional Changes

diff --git a/docker/Dockerfile.alpine b/docker/Dockerfile.alpine
@@ -0,0 +1,5 @@
+FROM alpine:edge
+RUN apk update && apk add g++ meson ninja clang cmake make
+RUN rm -rf /etc/apk/cache
+CMD [ "/usr/bin/env", "sh" ]
+
diff --git a/docker/Dockerfile.arch b/docker/Dockerfile.arch
@@ -0,0 +1,7 @@
+FROM archlinux/base:latest
+# I would prefer to use `pacman -Scc`
+# but packamn doesn't give you a way to force the deletion without
+# iteracting with te prompt.
+RUN pacman -Syu --noconfirm && pacman -S --noconfirm gcc clang cmake make && \
+    rm -rf  /var/cache/pacman/pkg/* && rm -rf /var/lib/pacman/*
+
diff --git a/docker/Dockerfile.debian b/docker/Dockerfile.debian
@@ -0,0 +1,2 @@
+FROM debian:unstable-slim
+RUN apt-get update && apt-get install -qq --assume-yes g++ meson ninja-build clang clang-tidy cmake && apt-get clean
diff --git a/docker/Dockerfile.test b/docker/Dockerfile.test
@@ -0,0 +1,4 @@
+FROM mhhollomon/yalr-ci:alpine
+COPY . /tmp/ci-build
+WORKDIR /tmp/ci-build
+CMD ["./scripts/build.sh", "ci"]
diff --git a/docs/Class_Diagram.xml b/docs/Class_Diagram.xml
diff --git a/docs/template_data.md b/docs/template_data.md
@@ -0,0 +1,57 @@
+# JSON Data structure for the code template
+
+This document describes the layout of the JSON data used in the generating the code.
+
+## Overview
+
+The [inja](https://pantor.github.io/inja/) templating engine is used to render the final code. Inja uses JSON data - using the [nlohmann/json](https://github.com/nlohmann/json) library - to hold the values that will be inserted into the template.
+
+The main template resides in src/include/template.hpp.
+
+## Data Layout
+
+### Global
+
+- **namespace** : (scalar) The namespace to wrap the lexer and parser in.
+- **parserclass** : (scalar) The name to give the parser class
+- **lexerclass**  : (scalar) The name to give the lexer class
+
+### Lexer related data
+
+- **enums** : (array) the list of tokens
+    - **name**  : (scalar) the name of the enum entity
+    - **value** : (scalar) the value to give the entity
+- **types** : (array) list of type names. Used to create the values variant.
+- **semantic_actions** : (array) data related to terminal's actions.
+    - **token** : (scalar) Token that owns the action.
+    - **block** : (scalar) Actual code for the action.
+    - **type**  : (scalar) Type of the expected returned value.
+- **patterns** : (array) Data surround the matching logic for terms and skips.
+    - **matcher** : (scalar) The type of matcher - string or regex.
+    - **pattern** : (scalar) The actual thing to match.
+    - **token**   : (scalar) The token that owns the match.
+
+### Parser related data
+
+- **states** : (array) The data for each state
+  - **id** : (scalar) The numeric id of the state.
+  - **actions** : (array) the actions for the state.
+      - **type**         : (scalar) type of action, reduce, shift, accept
+      - **prodid**       : (r, scalar) Numeric id of the production.
+      - **production**   : (r, scalar) string describing the prod for a reduce
+      - **count**        : (r, scalar) Number of items in the production
+      - **returnlevels** : (r, scalar) Number of levels to skip in the return
+      - **symbol**       : (r, scalar) Name of Token for the rule
+      - **valuetype**    : (r, scalar) Type string (e.g. 'int', etc).
+      - **hasvaluetype** : (r, scalar) Boolean - is the valuetype not 'void'
+      - **hassemaction** : (r, scalar) Boolean - prod has a semantic action.
+      - **newstateid**   : (s, scalar) For shift, the number of next state to enter
+  - **gotos** : (array) The gotos for the state
+      - **symbol**  : (scalar) The token name of the look ahead symbol
+      - **stateid** : (scalar) The numeric id of the new state.
+- **reducefuncs** : (array) The data for each reduce function
+    - **prodid**     : (scalar) Numeric id of the production.
+    - **itemtypes**  : (array) List of item types for this production.
+    - **block**      : (scalar) actual code for the action.
+    - **production** : (scalar) string describing the prod for this reduce
+    - **rule_type**  : (scalar) type (eg. int, void) of the rule for this production.