Implement RV32 instructions (#28)

* README.md: Fix call to kriscv run * Check output of run_pattern for better error messages * Fix >>aWord and >>lWord to account for >>Int requiring infinite sign extension to properly do an arithmetic shift * Sign extend the LUI immediate for when XLEN > 32 * Update CHECK_HALT rules to only apply when we haven't halted already * Add macros for simple tests * Refactor simple tests to have a YAML assertion file instead of the full configuration output * Fold addi-overflow.S into addi.S * Implement SLTI and SLTIU * Implement ANDI, ORI, and XORI * Implement SLLI, SRLI, and SRAI * Implement AUIPC and ADD * Implement SUB, SLT, SLTU, AND, OR, and XOR * Implement SLL, SRL, and SRA * Implement JAL * Implement JALR * Implement BEQ, BNE, BLT, BGE, BLTU, and BGEU * Implement LB, LH, LW, LBU, LHU, SB, SH, and SW * Implement FENCE, FENCE.TSO, ECALL, and EBREAK * Refactor the halting mechanism to make it possible to cleanly halt at any point, as will be necessary for, e.g., illegal instruction exceptions. * Pull the PC increment out into a separate pipeline step * Use a branchPC function for branch instrs rather than two separate rules * Set Version: 0.1.20 * Add comments explaining why the arithmetic >>Int is used to implement the logical >>lWord --------- Co-authored-by: devops <devops@runtimeverification.com>
runtimeverification · Aug 1, 2024 · 606f33b · 606f33b
1 parent 280f3d0
commit 606f33b
Show file tree

Hide file tree

Showing 77 changed files with 973 additions and 175 deletions.
diff --git a/Makefile b/Makefile
@@ -44,6 +44,9 @@ test-unit: poetry-install
 test-integration: tests/riscv-arch-test-compiled poetry-install
 	$(POETRY_RUN) pytest src/tests/integration --maxfail=1 --verbose --durations=0 --numprocesses=4 --dist=worksteal $(TEST_ARGS)
 
+test-simple: poetry-install
+	$(POETRY_RUN) pytest src/tests/integration/test_integration.py --maxfail=1 --verbose --durations=0 --numprocesses=4 --dist=worksteal $(TEST_ARGS)
+
 RISCOF_DIRS = $(shell find src/tests/integration/riscof -type d)
 RISCOF_FILES = $(shell find src/tests/integration/riscof -type f)
 

diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ make kdist-build
 ## Usage
 Execute a compiled RISC-V ELF file with the following command:
 ```bash
-poetry -C kriscv run test.elf
+poetry -C kriscv run kriscv run test.elf
 ```
 The output shows the final K configuration, including the state of memory, all registers, and any encountered errors. Execution can also be halted at a particular global symbol by providing the `--end-symbol` flag.
 

diff --git a/package/version b/package/version
@@ -1 +1 @@
-0.1.19
+0.1.20
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "kriscv"
-version = "0.1.19"
+version = "0.1.20"
 description = "K tooling for the RISC-V architecture"
 authors = [
     "Runtime Verification, Inc. <contact@runtimeverification.com>",

diff --git a/src/kriscv/kdist/riscv-semantics/riscv.md b/src/kriscv/kdist/riscv-semantics/riscv.md
diff --git a/src/kriscv/kdist/riscv-semantics/word.md b/src/kriscv/kdist/riscv-semantics/word.md
@@ -89,14 +89,21 @@ is used to zero-out all but the least-significant `XLEN`-bits in case of overflo
   syntax Word ::= Word "<<Word" Int [function]
   rule W(I1) <<Word I2 => chop(I1 <<Int I2)
 ```
-For right shifts, we provide both arithmetic and logical variants
+For right shifts, we provide both arithmetic and logical variants.
+
+Counterintuitively, we use the arithmetic right shift operator `>>Int` for `Int` to implement the logical right shift operator `>>lWord` for `Word`. Indeed, note the following:
+- `Int` values are infinitely sign-extended two's complement integers.
+- `>>Int` arithmetically shifts by padding with the infinitely repeated sign bit.
+- For an `I:Int` underlying `W(I):Word`, only the least-significant `XLEN`-bits of `I` are populated, so the infinitely repeated sign bit is always `0`.
+
+That is, for any `I:Int` underlying some `W(I):Word`, applying `>>Int` will always pad with `0`s, correctly implementing a logical right shift.
 ```k
-  syntax Word ::= Word ">>aWord" Int [function]
-  rule W(I1) >>aWord I2 => W(I1 >>Int I2)
+  syntax Word ::= Word ">>lWord" Int [function]
+  rule W(I1) >>lWord I2 => W(I1 >>Int I2)
 ```
-Because K does not provide a logical right shift for `Int`, we implement it as an arithmetic shift followed by zeroing-out all the sign bits.
+To actually perform an arithmetic shift over `Word`, we first convert to an infinitely sign-extended `Int` of equal value using `Word2SInt`, ensuring `>>Int` will pad with `1`s for a negative `Word`. The final result will still be infinitely sign-extended, so we must `chop` it back to a `Word`.
 ```k
-  syntax Word ::= Word ">>lWord" Int [function]
-  rule W(I1) >>lWord I2 => W((I1 >>Int I2) &Int (2 ^Int (XLEN -Int I2) -Int 1))
+  syntax Word ::= Word ">>aWord" Int [function]
+  rule W1 >>aWord I2 => chop(Word2SInt(W1) >>Int I2)
 endmodule
 ```
diff --git a/src/kriscv/term_manip.py b/src/kriscv/term_manip.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pyk.kore.match import kore_int, match_app, match_symbol
+from pyk.kore.syntax import App
+
+if TYPE_CHECKING:
+    from pyk.kore.syntax import Pattern
+
+
+def kore_word(word: Pattern) -> int:
+    return kore_int(match_app(word, 'LblW').args[0])
+
+
+def strip_inj(pattern: Pattern) -> Pattern:
+    if isinstance(pattern, App) and pattern.symbol == 'inj':
+        return pattern.args[0]
+    return pattern
+
+
+def match_map(pattern: Pattern) -> tuple[tuple[Pattern, Pattern], ...]:
+    # Same as match_map from pyk.kore.match, but not using LeftAssoc and stripping injections
+    stop_symbol = "Lbl'Stop'Map"
+    cons_symbol = "Lbl'Unds'Map'Unds'"
+    item_symbol = "Lbl'UndsPipe'-'-GT-Unds'"
+
+    app = match_app(pattern)
+    if app.symbol == stop_symbol:
+        return ()
+
+    if app.symbol == item_symbol:
+        return ((strip_inj(app.args[0]), strip_inj(app.args[1])),)
+
+    match_symbol(app.symbol, cons_symbol)
+    return match_map(app.args[0]) + match_map(app.args[1])
diff --git a/src/kriscv/tools.py b/src/kriscv/tools.py
@@ -3,11 +3,14 @@
 from typing import TYPE_CHECKING
 
 from elftools.elf.elffile import ELFFile  # type: ignore
-from pyk.kast.inner import Subst
+from pyk.kast.inner import KSort, Subst
+from pyk.kast.manip import split_config_from
+from pyk.kore.match import kore_int
 from pyk.ktool.krun import KRun
 from pyk.prelude.k import GENERATED_TOP_CELL
 
 from kriscv import elf_parser, term_builder
+from kriscv.term_manip import kore_word, match_map
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -47,7 +50,7 @@ def init_config(self, config_vars: dict[str, KInner]) -> KInner:
 
     def run_config(self, config: KInner) -> KInner:
         config_kore = self.krun.kast_to_kore(config, sort=GENERATED_TOP_CELL)
-        final_config_kore = self.krun.run_pattern(config_kore)
+        final_config_kore = self.krun.run_pattern(config_kore, check=True)
         return self.krun.kore_to_kast(final_config_kore)
 
     def run_elf(self, elf_file: Path, *, end_symbol: str | None = None) -> KInner:
@@ -70,3 +73,21 @@ def run_elf(self, elf_file: Path, *, end_symbol: str | None = None) -> KInner:
                 '$HALT': halt_cond,
             }
             return self.run_config(self.init_config(config_vars))
+
+    def get_registers(self, config: KInner) -> dict[int, int]:
+        _, cells = split_config_from(config)
+        regs_kore = self.krun.kast_to_kore(cells['REGS_CELL'], sort=KSort('Map'))
+        regs = {}
+        for reg, val in match_map(regs_kore):
+            regs[kore_int(reg)] = kore_word(val)
+        if 0 not in regs:
+            regs[0] = 0
+        return regs
+
+    def get_memory(self, config: KInner) -> dict[int, int]:
+        _, cells = split_config_from(config)
+        mem_kore = self.krun.kast_to_kore(cells['MEM_CELL'], sort=KSort('Map'))
+        mem = {}
+        for addr, val in match_map(mem_kore):
+            mem[kore_word(addr)] = kore_int(val)
+        return mem
diff --git a/src/tests/conftest.py b/src/tests/conftest.py
@@ -14,10 +14,10 @@
 
 def pytest_addoption(parser: Parser) -> None:
     parser.addoption(
-        '--update-expected-output',
+        '--save-final-config',
         action='store_true',
         default=False,
-        help='Write expected output files for simple tests',
+        help='Save the final configuration for each simple test',
     )
     parser.addoption(
         '--temp-dir',
@@ -27,8 +27,8 @@ def pytest_addoption(parser: Parser) -> None:
 
 
 @pytest.fixture
-def update_expected_output(request: FixtureRequest) -> bool:
-    return request.config.getoption('--update-expected-output')
+def save_final_config(request: FixtureRequest) -> bool:
+    return request.config.getoption('--save-final-config')
 
 
 @pytest.fixture

diff --git a/src/tests/integration/test_integration.py b/src/tests/integration/test_integration.py
@@ -5,7 +5,10 @@
 from typing import TYPE_CHECKING
 
 import pytest
+import yaml
+from elftools.elf.elffile import ELFFile  # type: ignore
 
+from kriscv import elf_parser
 from kriscv.build import semantics
 
 from ..utils import TESTS_DIR
@@ -18,26 +21,116 @@
 SIMPLE_TESTS: Final = tuple(asm_file for asm_file in SIMPLE_DIR.rglob('*.S'))
 
 
-def _test_simple(elf_file: Path, expected_file: Path, update: bool) -> None:
+def _as_unsigned(val: int, bits: int) -> int | None:
+    if 0 <= val and val < 2**bits:
+        return val
+    if -(2 ** (bits - 1)) <= val and val < 0:
+        return val + 2**bits
+    return None
+
+
+def _check_regs_entry(assert_file: Path, registers: dict[int, int], reg: int, val: int) -> None:
+    if not (0 <= reg and reg < 16):
+        raise AssertionError(
+            f"{assert_file}: Invalid register {reg} found in 'regs'. Registers must be in the range [0, 15]."
+        )
+    expect = _as_unsigned(val, 32)
+    if expect is None:
+        val_str = f'{val} (0x{val:08X})' if val >= 0 else f'{val}'
+        raise AssertionError(
+            f'{assert_file}: expected value {val_str} for x{reg} is not a 32-bit integer. Value must be in the range [-2147483648, 2147483647] or [0, 4294967295] ([0, 0xFFFFFFFF]).'
+        )
+    err_msg = f'Expected x{reg} to contain {val} (0x{expect:08X}), but '
+    if reg not in registers:
+        raise AssertionError(err_msg + f'x{reg} was never written to.')
+    actual = registers[reg]
+    if actual != expect:
+        raise AssertionError(err_msg + f'found {actual} (0x{actual:08X}).')
+
+
+def _check_mem_entry(assert_file: Path, mem_symbol: int, memory: dict[int, int], offset: int, val: int) -> None:
+    addr = mem_symbol + offset
+    mem_str = f'{mem_symbol} (0x{mem_symbol:08X})'
+    offset_str = f'{offset} (0x{offset:08X})'
+    addr_str = f'{addr} (0x{addr:08X})'
+    if not (0 <= addr and addr < 2**32):
+        raise AssertionError(
+            f"{assert_file}: Invalid memory offset {offset_str} found in 'mem'. Symbol '_mem' is at address {mem_str}, so the absolute address {addr_str} corresponding to this offset is outside the allowed range [0, 4294967295] ([0, 0xFFFFFFFF])."
+        )
+    expect = _as_unsigned(val, 8)
+    if expect is None:
+        val_str = f'{val} (0x{val:02X})' if val >= 0 else f'{val}'
+        raise AssertionError(
+            f'{assert_file}: expected value {val_str} for memory offset {offset_str} is not an 8-bit integer. Value must be in the range [-128, 127] or [0, 255] ([0, 0xFF]).'
+        )
+    err_msg = (
+        f'Expected memory offset {offset_str} to contain {val} (0x{expect:02X}), but '
+        + '{reason}'
+        + f". Symbol '_mem' is at address {mem_str}, so this offset corresponds to absolute address {addr_str}."
+    )
+    if addr not in memory:
+        raise AssertionError(err_msg.format(reason='this location was never written to'))
+    actual = memory[addr]
+    if actual != expect:
+        raise AssertionError(err_msg.format(reason=f'found {actual} (0x{actual:02X})'))
+
+
+def _test_simple(elf_file: Path, assert_file: Path, final_config_output: Path | None) -> None:
     tools = semantics()
-    actual = tools.kprint.pretty_print(tools.run_elf(elf_file, end_symbol='_halt'), sort_collections=True)
+    final_config = tools.run_elf(elf_file, end_symbol='_halt')
+
+    if final_config_output is not None:
+        final_config_output.touch()
+        pretty_config = tools.kprint.pretty_print(final_config, sort_collections=True)
+        final_config_output.write_text(pretty_config)
 
-    if update:
-        expected_file.touch()
-        expected_file.write_text(actual)
+    registers = tools.get_registers(final_config)
+    memory = tools.get_memory(final_config)
+
+    assert_file = assert_file.resolve(strict=True)
+    asserts = yaml.safe_load(assert_file.read_bytes())
+
+    if asserts is None:
+        return
+    if not (set(asserts.keys()) <= {'regs', 'mem'}):
+        raise AssertionError(f"{assert_file}: Unexpected keys {asserts.keys()}. Expected only 'regs' or 'mem'.")
+
+    for reg, val in asserts.get('regs', {}).items():
+        if not isinstance(reg, int):
+            raise AssertionError(f"{assert_file}: Unexpected key {reg} in 'regs'. Expected an integer register number.")
+        if not isinstance(val, int):
+            raise AssertionError(f"{assert_file}: Unexpected value {val} in 'regs'. Expected a 32-bit integer.")
+        _check_regs_entry(assert_file, registers, reg, val)
+
+    if 'mem' not in asserts:
         return
 
-    expected_file = expected_file.resolve(strict=True)
-    expected = expected_file.read_text()
-    assert actual == expected
+    with open(elf_file, 'rb') as f:
+        mem_symbol_vals = elf_parser.read_symbol(ELFFile(f), '_mem')
+
+    if len(mem_symbol_vals) == 0:
+        raise AssertionError(
+            f"{assert_file}: Found 'mem' assertion entry, but test file does not contain '_mem' symbol."
+        )
+    if len(mem_symbol_vals) > 1:
+        raise AssertionError(
+            f"{assert_file}: Found 'mem' assertion entry, but test file contains multiple instances of '_mem' symbol."
+        )
+    mem_symbol = mem_symbol_vals[0]
+    for addr, val in asserts.get('mem', {}).items():
+        if not isinstance(addr, int):
+            raise AssertionError(f"{assert_file}: Unexpected key {reg} in 'mem'. Expected an integer address.")
+        if not isinstance(val, int):
+            raise AssertionError(f"{assert_file}: Unexpected value {val} in 'mem'. Expected an 8-bit integer.")
+        _check_mem_entry(assert_file, mem_symbol, memory, addr, val)
 
 
 @pytest.mark.parametrize(
     'asm_file',
     SIMPLE_TESTS,
     ids=[str(test.relative_to(SIMPLE_DIR)) for test in SIMPLE_TESTS],
 )
-def test_simple(asm_file: Path, update_expected_output: bool, temp_dir: Path) -> None:
+def test_simple(asm_file: Path, save_final_config: bool, temp_dir: Path) -> None:
     elf_file = Path(temp_dir) / (asm_file.stem + '.elf')
     compile_cmd = [
         'riscv64-unknown-elf-gcc',
@@ -50,11 +143,13 @@ def test_simple(asm_file: Path, update_expected_output: bool, temp_dir: Path) ->
         '-mlittle-endian',
         '-Xassembler',
         '-mno-arch-attr',
+        f'-I {SIMPLE_DIR}',
         str(asm_file),
         '-o',
         str(elf_file),
     ]
     subprocess.run(compile_cmd, check=True)
     assert elf_file.exists()
-    expected_file = Path(str(asm_file) + '.out')
-    _test_simple(elf_file, expected_file, update_expected_output)
+    assert_file = Path(str(asm_file) + '.assert')
+    final_config_output = (Path(temp_dir) / (asm_file.name + '.out')) if save_final_config else None
+    _test_simple(elf_file, assert_file, final_config_output)
diff --git a/tests/README.md b/tests/README.md
@@ -6,10 +6,10 @@ This directory contains the input RISC-V files for various tests.
 ### failing.txt
 A list of tests that are known to fail under the current implementation. Each line consists of a single path to a failing test file. Paths are given relative to the `riscv-semantics/tests` directory.
 
-### riscv-arch-test / riscv-arch-test-compiled
-The official RISC-V Architectural Test Suite. These are exercised in `src/tests/integration/test_conformance.py` as part of `make test_integration`.
+### riscv-arch-test(-compiled)
+The official RISC-V Architectural Test Suite. These are exercised in `src/tests/integration/test_conformance.py` as part of `make test-integration`.
 
-The test source files can be found under `riscv-arch-test/riscv-test-suite` and compiled by calling `make tests/riscv-arch-test-compiled` from the repository root. This will populate the `riscv-arch-test-compiled` directory with the following contents:
+The test source files can be found under `riscv-arch-test/riscv-test-suite` and are compiled by calling `make tests/riscv-arch-test-compiled` from the repository root. This will populate the `riscv-arch-test-compiled` directory with the following contents:
 - `database.yaml`
   - A database of all tests in the suite with associated information pulled from `RVTEST` macros
 - `test_list.yaml`
@@ -24,12 +24,26 @@ The test source files can be found under `riscv-arch-test/riscv-test-suite` and
   - The actual compiled test files for input to `kriscv`
   - The directory structure mirrors `riscv-arch-test/riscv-test-suite`, but with each `<test_name>.S` test file replaced by a directory of the same name containing:
       - `<test_name>.s`, the pre-linking RISC-V ASM
-	  - `<test_name>.elf`, the fully compiled object file
-	  - `<test_name>.disass`, the disassembly of the compiled object file
+      - `<test_name>.elf`, the fully compiled object file
+      - `<test_name>.disass`, the disassembly of the compiled object file
 
 ### simple
-A suite of handwritten RISC-V tests. Inputs are RISC-V ASM files `test.S` which are compiled to ELF with
+A suite of handwritten RISC-V tests. These are exercised in `src/tests/integration/test_integration.py` as part of `make test-simple` or `make test-integration`.
+
+Each test consists of an input RISC-V ASM file `test.S` and a corresponding assertion file `test.S.assert`.
+
+The input is compiled with
 ```
 riscv64-unknown-elf-gcc -nostdlib -nostartfiles -static -march=rv32e -mabi=ilp32e -mno-relax -mlittle-endian -Xassembler -mno-arch-attr test.S
 ```
-Each input file must define global symbols `_start` and `_halt`. The test is executed with the PC initially set to `_start`, and will halt when the PC reaches `_halt`. The final KAST configuration is compared against `test.S.out`.
+and must define global symbols `_start` and `_halt`. See the convenience macros defined in `simple.h`.
+
+The assertion file must be in YAML format and may contain the following entries:
+- `regs`, a dictionary of entries `i: val` asserting that register `xi` has value `val`
+  - `i` must be in the range `[0, 15]`
+  - `val` must be a 32-bit integer, either signed or unsigned
+- `mem`, a dictionary of entries `offset: val` asserting that memory address `_mem + offset` has value `val`
+  - `offset` must be an unsigned integer
+  - `val` must be an 8-bit integer, either signed or unsigned
+
+The test is executed with the PC initially set to `_start` and will halt when the PC reaches `_halt`. The final configuration is then checked against each of entries in the assertion file.
diff --git a/tests/simple/add.S b/tests/simple/add.S
@@ -0,0 +1,14 @@
+#include "simple.h"
+
+START_TEXT
+	li x1, 0xDEADBEEF // x1 = 0xDEADBEEF
+	add x2, x1, x0    // x2 = 0xDEADBEEF
+
+	xori x3, x1, -1   // x3 = ~x2
+	addi x3, x3, 1    // x3 = ~x2 + 1 = -x2
+	li x4, 0xFFFFFFFF // x4 = 0xFFFFFFFF
+	add x4, x4, x3    // x4 = 0xFFFFFFFF - x2
+	addi x4, x4, 1    // x4 = (0xFFFFFFFF - x2) + 1
+
+	add x5, x2, x4 // x5 = 0xFFFFFFFF + 1 = 0
+END_TEXT
diff --git a/tests/simple/add.S.assert b/tests/simple/add.S.assert
@@ -0,0 +1 @@
+regs: {2: 0xDEADBEEF, 5: 0}
diff --git a/tests/simple/addi-overflow.S b/tests/simple/addi-overflow.S