From 97192b05dfd35e6739a70074183cb9c59869b205 Mon Sep 17 00:00:00 2001
From: Rot127 <unisono@quyllur.org>
Date: Fri, 26 Apr 2024 06:41:57 -0500
Subject: [PATCH] Add new generator for MC test trnaslation.

---
 .github/workflows/auto-sync.yml               |   1 +
 suite/MC/README                               |  12 +-
 suite/MC/Update.py                            | 304 ---------------
 suite/auto-sync/src/autosync/MCUpdater.py     | 352 ++++++++++++++++++
 .../autosync/Tests/MCUpdaterTests/test_a.txt  |  71 ++++
 .../Tests/MCUpdaterTests/test_a.txt.cs        |  22 ++
 .../autosync/Tests/MCUpdaterTests/test_b.txt  |   0
 .../autosync/Tests/MCUpdaterTests/test_c.txt  |   0
 .../src/autosync/Tests/test_mcupdater.py      |  31 ++
 suite/auto-sync/src/autosync/path_vars.json   |   5 +-
 10 files changed, 483 insertions(+), 315 deletions(-)
 delete mode 100755 suite/MC/Update.py
 create mode 100755 suite/auto-sync/src/autosync/MCUpdater.py
 create mode 100644 suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt
 create mode 100644 suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt.cs
 create mode 100644 suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_b.txt
 create mode 100644 suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_c.txt
 create mode 100644 suite/auto-sync/src/autosync/Tests/test_mcupdater.py
diff --git a/.github/workflows/auto-sync.yml b/.github/workflows/auto-sync.yml
index f8b45287de..c97636db70 100644
--- a/.github/workflows/auto-sync.yml
+++ b/.github/workflows/auto-sync.yml
@@ -64,6 +64,7 @@ jobs:
       - name: Test Header patcher
         run: |
           python -m unittest src/autosync/Tests/test_header_patcher.py
+          python -m unittest src/autosync/Tests/test_mcupdater.py
 
       - name: Differ - Test save file is up-to-date
         run: |
diff --git a/suite/MC/README b/suite/MC/README
index 7722a3544c..6f2555e04f 100644
--- a/suite/MC/README
+++ b/suite/MC/README
@@ -2,7 +2,7 @@
 
 Input files used to test instructions of architectures and modes.
 
-The test cases are taken from `llvm/test/MC`. Note that the LLVM tests
+The test cases are taken from `llvm/test/MC`. Note that most of the LLVM tests
 are for **encoding** of instructions (`asm_string -> bytes`).
 
 We test the decoding (`bytes -> asm_string`).
@@ -15,15 +15,7 @@ against `llvm-objdump`.
 
 ### Update test files
 
-Run the update script with the paths to LLVM's `MC` test directories
-
-```
-cd capstone/suite/MC
-./Update.py -a <ARCH> -d llvm-project/llvm/test/MC
-./Update.py -a <ARCH> -d llvm-project/llvm/test/MC/Disassembler
-```
-
-You can update more fine grained. Check the help to get the possible options.
+Check `suite/auto-sync/README.md`
 
 ### Test file formatting
 
diff --git a/suite/MC/Update.py b/suite/MC/Update.py
deleted file mode 100755
index 1e4851254a..0000000000
--- a/suite/MC/Update.py
+++ /dev/null
@@ -1,304 +0,0 @@
-#!/usr/bin/env python3
-
-from pathlib import Path
-
-import argparse
-import re
-import os
-
-
-def cwd():
-    """Return current working directory."""
-    return os.path.dirname(os.path.realpath(__file__))
-
-
-def fatal_error(msg: str) -> None:
-    """Prints an error message and exists with error code 1."""
-    print(f"[x] {msg}")
-    exit(1)
-
-
-def warn(msg: str) -> None:
-    """Prints a warning message."""
-    print(f"[!] {msg}")
-
-
-def info(msg: str) -> None:
-    """Prints an info message."""
-    print(f"[*] {msg}")
-
-
-def check_paths(llvm_dir: Path, arch: str) -> None:
-    """Checks all relevant directories for errors and if they exist."""
-    if not llvm_dir.exists():
-        fatal_error(f"{llvm_dir} does not exist.")
-
-    if not llvm_dir.is_dir():
-        fatal_error(f"{llvm_dir} is not a directory.")
-
-    out_dir: Path = Path(cwd()).joinpath(arch)
-    if not out_dir.exists():
-        fatal_error(f"Output directory {out_dir} does not exist.")
-
-    if not out_dir.is_dir():
-        fatal_error(f"Output directory {out_dir} is not a directory.")
-
-    arch_dir = llvm_dir.joinpath(arch)
-    if not arch_dir.exists():
-        fatal_error(f"Test file directory {arch_dir} does not exist.")
-
-    if not arch_dir.is_dir():
-        fatal_error(f"Test file directory {arch_dir} is not a directory.")
-
-
-def get_included_files(
-    arch_dir: Path,
-    out_path: Path,
-    included_files: set[str],
-    excluded_files: set[str] = None,
-) -> list[tuple[Path, Path]]:
-    """
-    Generates the file list to update. Only the files listed
-    via command line arguments are added.
-    """
-    files = list()
-    file: Path
-    for file in arch_dir.iterdir():
-        stem = file.stem
-        if stem not in included_files:
-            continue
-        if excluded_files and stem in excluded_files:
-            included_files.remove(stem)
-            continue
-
-        included_files.remove(stem)
-        files.append((file, out_path.joinpath(file.name + ".cs")))
-
-    if len(included_files) != 0:
-        warn(
-            f"Could not find {', '.join(included_files)} in the LLVM test files."
-        )
-
-    return files
-
-
-def get_all_files(
-    arch_dir: Path,
-    out_path: Path,
-    excluded_files: set[str] = None,
-) -> list[tuple[Path, Path]]:
-    """
-    Generates the file list to update. All files of an
-    architecture are added.
-    """
-    files = list()
-    file: Path
-    for file in arch_dir.iterdir():
-        stem = file.stem
-        if excluded_files and stem in excluded_files:
-            continue
-
-        files.append((file, out_path.joinpath(file.name + ".cs")))
-    return files
-
-
-def get_file_list(
-    llvm_dir: Path,
-    arch: str = None,
-    excluded_files: set[str] = None,
-    included_files: set[str] = None,
-) -> list[tuple[Path, Path]]:
-    """
-    Generates a list of files to update.
-    The list contains tuples of the form: (llvm_file_path, cs_file_path)
-    """
-
-    out_dir: Path = Path(cwd()).joinpath(arch)
-    arch_dir = llvm_dir.joinpath(arch)
-
-    if included_files and len(included_files) != 0:
-        return get_included_files(
-            arch_dir, out_dir, included_files, excluded_files
-        )
-    return get_all_files(arch_dir, out_dir, excluded_files)
-
-
-def create_new_test_file(arch: str, cs_file: Path) -> str:
-    """
-    Creates a new test files and asks for the tesst parameter for it.
-    :return: The test parameter string.
-    """
-    info(f"Add new test file: {cs_file}")
-    info("You need to provide the test parameters for it.")
-    test_parameters = f"# CS_ARCH_{arch.upper()}, "
-    test_parameters += input(
-        "\nAdd architecture mode of tests"
-        "(CS_MODE_THUMB, CS_MODE_BIG_ENDIAN, ...)\n"
-        "> "
-    )
-    test_parameters += ", "
-    test_parameters += input(
-        "\nAdd disassembly options for this test file"
-        "(CS_OPT_SYNTAX_NOREGNAME, CS_OPT_SYNTAX_ATT, None, ...)\n"
-        "> "
-    )
-    test_parameters += "\n"
-    cs_file.touch()
-    return test_parameters
-
-
-def get_test_parameters(cs_file: Path) -> str:
-    """
-    Extracts the test parameters string from
-    an existing Capstone test file.
-    """
-    with open(cs_file) as f:
-        line = f.readline()
-
-    # Check for "# CS_ARCH_<ARCH>, CS_MODE_<MODE>, ..." lines
-    regex = r"#\s*CS_ARCH_.+,\s*CS_MODE_.+,\s*.+"
-    if not re.search(regex, line):
-        fatal_error(
-            f"The first line in {cs_file} is not "
-            f"the test parameter line.\nLine: {line}"
-        )
-    return line
-
-
-def decimal_to_hex_fix(asm: str) -> str:
-    """
-    Replaces every immediate number in the asm string with its hex form.
-    If it is larger than the hex threshold.
-    """
-    # Defined in utils.h
-    hex_threshold = 9
-    matches = re.findall(r"([#\s]-?\d+)", asm)
-    if not matches:
-        return asm
-
-    for m in matches:
-        num = int(m[1:])
-        neg_num = num < 0
-        sign = ""
-        if neg_num:
-            num = num * -1
-            sign = "-"
-        if num < hex_threshold:
-            continue
-        prefix = m[0]
-        asm = re.sub(m, rf"{prefix}{sign}{hex(num)}", asm)
-    return asm
-
-
-def extract_tests(llvm_file: Path) -> str:
-    """
-    Extracts all compatible test cases in the given llvm_file
-    and returns them as string.
-    """
-    hex_encoding = r"(0x[a-fA-F0-9][a-fA-F0-9],?\s*)+"
-    asm_regex = r"(.*)"
-
-    test_case_patterns = [
-        rf"#?\s*@?\s*CHECK:\s+{asm_regex}\s+@\s+encoding:\s+\[({hex_encoding})\]",
-    ]
-
-    result = ""
-
-    if llvm_file.is_dir():
-        return result
-
-    f = open(llvm_file)
-    for line in f.readlines():
-        match = list()
-        for regex in test_case_patterns:
-            match: list = re.findall(regex, line)
-            if match:
-                break
-        if not match:
-            continue
-        match = match[0]
-        asm = re.sub(r"\s+", " ", match[0])
-        asm = asm.strip(" ")
-        asm = decimal_to_hex_fix(asm)
-        hexbytes = re.sub(r"\s", "", match[1])
-        result += f"{hexbytes} = {asm}\n"
-    f.close()
-    return result
-
-
-def update(
-    llvm_dir: Path,
-    arch: str,
-    excluded_files: set[str] = None,
-    included_files: set[str] = None,
-) -> None:
-    """
-    Updates all regression test files for Capstone.
-    """
-
-    check_paths(llvm_dir, arch)
-
-    files: list[tuple[Path, Path]] = get_file_list(
-        llvm_dir, arch, excluded_files, included_files
-    )
-
-    for file in files:
-        llvm_file = file[0]
-        cs_file = file[1]
-
-        cs_tests = extract_tests(llvm_file)
-        if cs_tests == "":
-            continue
-
-        if not cs_file.exists():
-            test_parameters = create_new_test_file(arch, cs_file)
-        else:
-            test_parameters = get_test_parameters(cs_file)
-
-        with open(cs_file, "w") as f:
-            f.write(test_parameters)
-            f.write(cs_tests)
-    info("Update done")
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        prog="Test file updater",
-        description="Synchronizes test files with LLVM",
-    )
-    parser.add_argument(
-        "-d",
-        dest="llvm_dir",
-        help="Path to the LLVM MC Disassembler test files.",
-        required=True,
-        type=Path,
-    )
-    parser.add_argument(
-        "-a",
-        dest="arch",
-        help="Name of architecture to update.",
-        choices=["ARM"],
-        required=True,
-    )
-    parser.add_argument(
-        "-e",
-        dest="excluded_files",
-        metavar="filename",
-        nargs="+",
-        help="File names to exclude from update (without file extension).",
-        type=list,
-    )
-    parser.add_argument(
-        "-f",
-        dest="included_files",
-        metavar="filename",
-        nargs="+",
-        help="Specific list of file names to update (without file extension).",
-    )
-    arguments = parser.parse_args()
-    return arguments
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    update(args.llvm_dir, args.arch, args.excluded_files, args.included_files)
diff --git a/suite/auto-sync/src/autosync/MCUpdater.py b/suite/auto-sync/src/autosync/MCUpdater.py
new file mode 100755
index 0000000000..7f0949d682
--- /dev/null
+++ b/suite/auto-sync/src/autosync/MCUpdater.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python3
+# Copyright © 2024 Rot127 <unisono@quyllur.org>
+# SPDX-License-Identifier: BSD-3
+import argparse
+import re
+from enum import Enum
+from pathlib import Path
+
+from autosync.Helper import get_path
+
+# The CHECK prefix for tests.
+CHECK = r"((#|//)\s*CHECK(-NEXT)?:)"
+ASM = r"(?P<asm_text>[^/@]+)"
+ENC = r"(\[?(?P<enc_bytes>(0x[a-fA-F0-9]{2}[, ]?)+)\]?)"
+match_patterns = {
+    # A commented encoding with only CHECK or something similar in front of it, skip it.
+    "skip_pattern": rf"(^((#|//)\s*[-A-Z0-9]+):\s*{ENC}\s*$)|"
+    f"(warning: invalid instruction encoding)",
+    # The encoding bytes pattern is in every file the same.
+    # But the disassembler and assembler tests pre-fix them differently.
+    # This is only the pattern for the encoding bytes. Without any prefix.
+    #
+    # The bytes are encoded with `0x` prefix and every byte is separated with a `,` or ` `.
+    # Optionally, they are enclosed in `[0x10,...]` brackets.
+    # E.g.: `[0x01,0xaa,0xb1,0x81]` or `0x01,0xaa,0xb1,0x81`.
+    # In the disassembler tests they don't have any prefix.
+    # In assembler tests they might have different prefixes like `CHECK-ENCODING`
+    # The matched bytes can be accessed from the group "enc_bytes"
+    "enc_bytes": ENC,
+    # Encodings in disassembly tests can have several prefixes
+    "enc_prefix_disas":
+    # start of line with CHECK: ... prefix
+    r"((\s*)|"
+    # start of line with `CHECK: ...` prefix and the encoding after the asm text.
+    rf"({CHECK}.+encoding:\s+))",
+    # The asm checking line for `MC/Disassembler/*` tests follows the pattern:
+    # `# CHECK: <asm-text>`
+    # Usually multiple 'CHECK' come before or after the encoding bytes.
+    # Meaning: first comes a block of `# CHECK: ...` and afterwards for every `# CHECK: ...`
+    # line the encoding bytes.
+    # And wise versa, with the encoding bytes first and afterwards the asm text checks.
+    # The matched asm text can be accessed from the group "asm_text"
+    "asm_check": rf"{CHECK}\s+{ASM}(\s*(#|//)\s+encoding:\s+{ENC})?",
+    # Single line disassembly test
+    "single_line_disas": rf"^{ENC}\s+#\s+{ASM}",
+    # The RUN line, with the command to run the test file, contains sometimes the `mattr` flags.
+    # These are relevant, because they enable or disable features we might need to
+    # respect in our tests as well.
+    # The matched `mattr` cmd line option (if any) can be accessed from the group `mattr`
+    "run_line": r"RUN:.*(?P<mattr>mattr=[^ ]+).+",
+}
+
+
+class Test:
+    def __init__(self, encoding: str | None, asm_text: str | None):
+        self.encoding: str | None = encoding
+        self.asm_text: str | None = asm_text
+
+    def __str__(self):
+        self.encoding.replace(" ", ",")
+        self.encoding = self.encoding.strip("[]")
+        return f"{self.encoding} == {self.asm_text}"
+
+    def test_complete(self) -> bool:
+        return self.encoding is not None and self.asm_text is not None
+
+    def add_missing(self, encoding: str | None, asm_text: str | None):
+        if encoding is None and asm_text is None:
+            raise ValueError("One of the arguments must be set.")
+        if not self.encoding:
+            if not encoding:
+                raise ValueError("Test still needs the encoding but it is None.")
+            self.encoding = encoding
+        if not self.asm_text:
+            if not asm_text:
+                raise ValueError("Test still needs the asm_text but it is None.")
+            self.asm_text = asm_text
+
+
+class TestManager:
+    """Class to manage incomplete tests. It automatically assigns the encoding and asm text
+    to the correct Test objects it holds.
+    It assumes that incomplete tests (only encoding OR the asm_text is given)
+    are all given in the same order.
+    Meaning: The first test without any asm_text but the encoding, is the same test
+    which is later given with only the asm_text but without encoding.
+
+    E.g.:
+        Order in which tests must be given to this Manager:
+
+        Test 1 -> (<encoding>, None)
+        Test 2 -> (<encoding>, None)
+        Test 3 -> (<encoding>, None)
+        ...
+
+        Test 1 -> (None, <asm_text>)
+        Test 2 -> (None, <asm_text>)
+        Test 3 -> (None, <asm_text>)
+        ...
+    """
+
+    class AddingState(Enum):
+        ENCODING = 0
+        ASM_TEXT = 1
+        UNSET = 2
+
+    def __init__(self):
+        # If set, the already added tests are completed with the given information.
+        self.switched = False
+        self.state = self.AddingState.UNSET
+        # List of all tests which still miss a part.
+        self.incomplete_tests: list[Test] = list()
+        # Tests which are complete
+        self.completed: list[Test] = list()
+
+    def add_test(self, encoding: str | None, asm_text: str | None):
+        if encoding is not None and asm_text is not None:
+            # No tests can be incomplete.
+            if not (
+                self.state == self.AddingState.UNSET and len(self.incomplete_tests) == 0
+            ):
+                raise ValueError(
+                    "If a complete test is added, all other tests need to be done."
+                )
+            self.state = self.AddingState.UNSET
+            self.completed.append(Test(encoding, asm_text))
+            return
+
+        if self.state == self.AddingState.UNSET:
+            assert len(self.incomplete_tests) == 0
+            # Add the first incomplete test
+            self.state = (
+                self.AddingState.ENCODING
+                if encoding is not None
+                else self.AddingState.ASM_TEXT
+            )
+
+        # Check if we complete the already added tests
+        if (self.state == self.AddingState.ENCODING and encoding is None) or (
+            self.state == self.AddingState.ASM_TEXT and asm_text is None
+        ):
+            self.switched = True
+
+        if self.switched:
+            test = self.incomplete_tests.pop(0)
+            test.add_missing(encoding, asm_text)
+            self.completed.append(test)
+        else:
+            self.incomplete_tests.append(Test(encoding, asm_text))
+
+        # Lastly check if we can reset.
+        if len(self.incomplete_tests) == 0:
+            # All tests are completed. Reset
+            self.state = self.AddingState.UNSET
+            self.switched = False
+
+    def check_all_complete(self) -> bool:
+        if len(self.incomplete_tests) != 0:
+            print(f"[!] We have {len(self.incomplete_tests)} incomplete tests.")
+            return False
+        return True
+
+    def get_completed(self) -> list[Test]:
+        return self.completed
+
+    def get_stats(self) -> str:
+        return (
+            f"completed: {len(self.completed)} incomplete: {len(self.incomplete_tests)}"
+        )
+
+    def get_num_completed(self) -> int:
+        return len(self.completed)
+
+    def get_num_incomplete(self) -> int:
+        return len(self.incomplete_tests)
+
+
+class TestFile:
+    def __init__(
+        self, arch: str, filename: str, manager: TestManager, mattrs: list[str] | None
+    ):
+        self.arch = arch
+        self.filename = filename
+        self.manager = manager
+        self.mattrs: list[str] = mattrs
+        self.test_files: list[TestFile] = list()
+
+    def add_mattr(self, mattr: str):
+        if not self.mattrs:
+            self.mattrs = list()
+        if mattr not in self.mattrs:
+            self.mattrs.append(mattr)
+
+    def add_tests(self, tests: list[Test]):
+        self.tests = tests
+
+    def get_cs_testfile_content(self) -> str:
+        content = ""
+        for test in self.tests:
+            content += f"{test}\n"
+        return content
+
+
+class MCUpdater:
+    def __init__(
+        self,
+        arch: str,
+        mc_dir: Path,
+        excluded: list[str] | None,
+        included: list[str] | None,
+    ):
+        self.arch = arch
+        self.mc_dir = mc_dir
+        self.excluded = excluded if excluded else list()
+        self.included = included if included else list()
+        self.test_files: dict[str:TestFile] = dict()
+
+    def parse_file(self, filepath: Path) -> TestFile:
+        """Parse a MC test file and return it as an object with all tests found.
+        If it couldn't parse the file cleanly, it prints errors but returns it anyways.
+        """
+        with open(filepath) as f:
+            lines = f.readlines()
+
+        test_file = TestFile(self.arch, filepath.name, TestManager(), None)
+        manager = test_file.manager
+        for line in lines:
+            if mattr := self.get_mattr(line):
+                test_file.add_mattr(mattr)
+                continue
+            encoding, asm_text = self.get_enc_asm(line)
+            if not encoding and not asm_text:
+                continue
+            manager.add_test(encoding, asm_text)
+
+        manager.check_all_complete()
+        test_file.add_tests(manager.get_completed())
+        print(f"[*] Parsed {manager.get_num_completed()} tests:\t{filepath.name}")
+        return test_file
+
+    @staticmethod
+    def get_mattr(line: str) -> str | None:
+        match = re.search(match_patterns["run_line"], line)
+        if not match or not match.group("mattr"):
+            return None
+        return match.group("mattr")
+
+    @staticmethod
+    def get_enc_asm(line: str) -> tuple[str | None, str | None]:
+        enc: str | None = None
+        asm_text: str | None = None
+        if re.search(match_patterns["skip_pattern"], line):
+            return None, None
+        # Check for single line tests
+        single_match = re.search(match_patterns["single_line_disas"], line)
+        if single_match:
+            return (
+                single_match.group("enc_bytes"),
+                single_match.group("asm_text").strip(),
+            )
+
+        asm_match = re.search(match_patterns["asm_check"], line)
+        if asm_match:
+            asm_text = asm_match.group("asm_text")
+            if asm_match.group("enc_bytes"):
+                # Single line test
+                enc = asm_match.group("enc_bytes")
+            if asm_text:
+                asm_text = asm_text.strip()
+            # A single line test. Return the result
+            if asm_text and enc:
+                return enc, asm_text
+
+        # Check if the line contains at least encoding bytes
+        pattern = rf"{match_patterns['enc_prefix_disas']}{match_patterns['enc_bytes']}"
+        enc_match = re.search(pattern, line)
+        if enc_match:
+            enc = enc_match.group("enc_bytes")
+
+        return enc, asm_text
+
+    def gen_tests_in_dir(self, curr_dir: Path):
+        for file in curr_dir.iterdir():
+            if file.is_dir():
+                self.gen_tests_in_dir(file)
+                continue
+            if len(self.included) != 0 and any(
+                re.search(x, file.name) is not None for x in self.included
+            ):
+                continue
+            if any(re.search(x, file.name) is not None for x in self.excluded):
+                continue
+            self.test_files[file.name] = self.parse_file(curr_dir.joinpath(file))
+
+    def gen_all(self):
+        assembly_tests = self.mc_dir.joinpath(f"{self.arch}")
+        disas_tests = self.mc_dir.joinpath(f"Disassembler/{self.arch}")
+        if not disas_tests.exists() or not disas_tests.is_dir():
+            raise ValueError(
+                f"'{disas_tests}' does not exits or is not a directory. Cannot generate tests from there."
+            )
+        if not assembly_tests.exists() or not assembly_tests.is_dir():
+            raise ValueError(
+                f"'{assembly_tests}' does not exits or is not a directory. Cannot generate tests from there."
+            )
+
+        self.gen_tests_in_dir(disas_tests)
+        self.gen_tests_in_dir(assembly_tests)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        prog="Test file updater",
+        description="Synchronizes test files with LLVM",
+    )
+    parser.add_argument(
+        "-d",
+        dest="mc_dir",
+        help=f"Path to the LLVM MC test files. Default: {get_path('{LLVM_MC_TEST_DIR}')}",
+        default=get_path("{LLVM_MC_TEST_DIR}"),
+        type=Path,
+    )
+    parser.add_argument(
+        "-a",
+        dest="arch",
+        help="Name of architecture to update.",
+        choices=["ARM", "PowerPC", "AArch64"],
+        required=True,
+    )
+    parser.add_argument(
+        "-e",
+        dest="excluded_files",
+        metavar="filename",
+        nargs="+",
+        help="File names to exclude from update (can be a regex pattern).",
+    )
+    parser.add_argument(
+        "-i",
+        dest="included_files",
+        metavar="filename",
+        nargs="+",
+        help="Specific list of file names to update (can be a regex pattern).",
+    )
+    arguments = parser.parse_args()
+    return arguments
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    MCUpdater(
+        args.arch, args.mc_dir, args.excluded_files, args.included_files
+    ).gen_all()
diff --git a/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt b/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt
new file mode 100644
index 0000000000..39aad0469c
--- /dev/null
+++ b/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt
@@ -0,0 +1,71 @@
+# Test simple disassembly decoding tests
+
+# The RUN line parsing
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=+crc -disassemble < %s | FileCheck %s
+
+
+[0x00,0x0a,0x31,0xd5]
+# CHECK:   mrs x0, TRCRSR
+
+[0x80,0x08,0x31,0xd5]
+# CHECK:   mrs x0, TRCEXTINSELR
+
+[0x80,0x09,0x31,0xd5]
+# CHECK:   mrs x0, TRCEXTINSELR1
+
+# Now a block of instruction tests
+
+[0x41,0x01,0x00,0x19]
+[0x41,0x01,0x10,0x19]
+[0x62,0xf1,0x0f,0x19]
+[0xe3,0xd3,0x1f,0x19]
+
+#CHECK:       stlurb    w1, [x10]
+#CHECK-NEXT:  stlurb  w1, [x10, #-256]
+#CHECK-NEXT:  stlurb  w2, [x11, #255]
+#CHECK-NEXT:  stlurb  w3, [sp, #-3]
+
+# Now the other way around defined
+
+# CHECK: crc32b  w5, w7, w20
+//CHECK: crc32h  w28, wzr, w30
+# CHECK: crc32w  w0, w1, w2
+// CHECK: crc32x  w7, w9, x20
+# CHECK: crc32cb w9, w5, w4
+#CHECK: crc32ch w13, w17, w25
+# CHECK: crc32cw wzr, w3, w5
+# CHECK: crc32cx w18, w16, xzr
+0xe5 0x40 0xd4 0x1a
+0xfc 0x47 0xde 0x1a
+0x20 0x48 0xc2 0x1a
+0x27 0x4d 0xd4 0x9a
+0xa9 0x50 0xc4 0x1a
+0x2d 0x56 0xd9 0x1a
+0x7f 0x58 0xc5 0x1a
+0x12 0x5e 0xdf 0x9a
+
+# Now one line tests
+
+# CHECK-NEXT:  mrs     x0, AMCG1IDR_EL0        // encoding: [0xc0,0xd2,0x3b,0xd5]
+// CHECK-NEXT:  msr     AMEVCNTVOFF00_EL2, x0   // encoding: [0x00,0xd8,0x1c,0xd5]
+// CHECK:  msr     AMEVCNTVOFF00_EL2, x0   // encoding: [0x00,0xd8,0x1c,0xd5]
+
+# Annoying case. The last CHECK: <encdoding> should not be matched.
+
+[0x20,0x84,0xc2,0x6e] # sqrdmlah  v0.2d, v1.2d, v2.2d
+[0x20,0x8c,0xc2,0x6e] # sqrdmlsh  v0.2d, v1.2d, v2.2d
+# CHECK: warning: invalid instruction encoding
+# CHECK: [0x20,0x84,0x02,0x2e]
+
+
+[0x62,0xfc,0x44,0x2e]
+[0x62,0xfc,0x44,0x6e]
+# Dont' parse this:
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0x62,0xfc,0x44,0x2e]
+# NOBF16: warning: invalid instruction encoding
+# NOBF16-NEXT: [0x62,0xfc,0x44,0x6e]
+# But this please. It belongs to the encding above
+# CHECK: bfdot   v2.2s, v3.4h, v4.4h
+# CHECK: bfdot   v2.4s, v3.8h, v4.8h
diff --git a/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt.cs b/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt.cs
new file mode 100644
index 0000000000..a66b596e3c
--- /dev/null
+++ b/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_a.txt.cs
@@ -0,0 +1,22 @@
+0x00,0x0a,0x31,0xd5 == mrs x0, TRCRSR
+0x80,0x08,0x31,0xd5 == mrs x0, TRCEXTINSELR
+0x80,0x09,0x31,0xd5 == mrs x0, TRCEXTINSELR1
+0x41,0x01,0x00,0x19 == stlurb    w1, [x10]
+0x41,0x01,0x10,0x19 == stlurb  w1, [x10, #-256]
+0x62,0xf1,0x0f,0x19 == stlurb  w2, [x11, #255]
+0xe3,0xd3,0x1f,0x19 == stlurb  w3, [sp, #-3]
+0xe5 0x40 0xd4 0x1a == crc32b  w5, w7, w20
+0xfc 0x47 0xde 0x1a == crc32h  w28, wzr, w30
+0x20 0x48 0xc2 0x1a == crc32w  w0, w1, w2
+0x27 0x4d 0xd4 0x9a == crc32x  w7, w9, x20
+0xa9 0x50 0xc4 0x1a == crc32cb w9, w5, w4
+0x2d 0x56 0xd9 0x1a == crc32ch w13, w17, w25
+0x7f 0x58 0xc5 0x1a == crc32cw wzr, w3, w5
+0x12 0x5e 0xdf 0x9a == crc32cx w18, w16, xzr
+0xc0,0xd2,0x3b,0xd5 == mrs     x0, AMCG1IDR_EL0
+0x00,0xd8,0x1c,0xd5 == msr     AMEVCNTVOFF00_EL2, x0
+0x00,0xd8,0x1c,0xd5 == msr     AMEVCNTVOFF00_EL2, x0
+0x20,0x84,0xc2,0x6e == sqrdmlah  v0.2d, v1.2d, v2.2d
+0x20,0x8c,0xc2,0x6e == sqrdmlsh  v0.2d, v1.2d, v2.2d
+0x62,0xfc,0x44,0x2e == bfdot   v2.2s, v3.4h, v4.4h
+0x62,0xfc,0x44,0x6e == bfdot   v2.4s, v3.8h, v4.8h
diff --git a/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_b.txt b/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_b.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_c.txt b/suite/auto-sync/src/autosync/Tests/MCUpdaterTests/test_c.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/suite/auto-sync/src/autosync/Tests/test_mcupdater.py b/suite/auto-sync/src/autosync/Tests/test_mcupdater.py
new file mode 100644
index 0000000000..3b524c6aeb
--- /dev/null
+++ b/suite/auto-sync/src/autosync/Tests/test_mcupdater.py
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: 2024 Rot127 <unisono@quyllur.org>
+# SPDX-License-Identifier: BSD-3
+
+import unittest
+
+from autosync.Helper import get_path
+from autosync.MCUpdater import MCUpdater
+
+
+class TestHeaderPatcher(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.updater = MCUpdater(
+            "ARCH", get_path("{MCUPDATER_TEST_DIR}"), [r".*\.cs"], list()
+        )
+
+    def test_parsing(self):
+        self.updater.gen_tests_in_dir(self.updater.mc_dir)
+        self.assertEqual(len(self.updater.test_files), 3)
+        self.assertListEqual(
+            self.updater.test_files["test_a.txt"].mattrs, ["mattr=+v8.1a", "mattr=+crc"]
+        )
+        self.assertEqual(len(self.updater.test_files["test_a.txt"].tests), 22)
+        self.assertEqual(
+            self.updater.test_files["test_a.txt"].manager.get_num_incomplete(), 0
+        )
+        with open(get_path("{MCUPDATER_TEST_DIR}").joinpath("test_a.txt.cs")) as f:
+            correct = f.read()
+        self.assertEqual(
+            correct, self.updater.test_files["test_a.txt"].get_cs_testfile_content()
+        )
diff --git a/suite/auto-sync/src/autosync/path_vars.json b/suite/auto-sync/src/autosync/path_vars.json
index 498f1e4518..2820c7b367 100644
--- a/suite/auto-sync/src/autosync/path_vars.json
+++ b/suite/auto-sync/src/autosync/path_vars.json
@@ -2,6 +2,7 @@
   "paths": {
     "{LLVM_ROOT}": "{AUTO_SYNC_ROOT}/vendor/llvm_root/",
     "{LLVM_TARGET_DIR}": "{LLVM_ROOT}/llvm/lib/Target/",
+    "{LLVM_MC_TEST_DIR}": "{LLVM_ROOT}/llvm/test/MC/",
     "{LLVM_TBLGEN_BIN}": "{LLVM_ROOT}/build/bin/llvm-tblgen",
     "{LLVM_INCLUDE_DIR}": "{LLVM_ROOT}/llvm/include/",
     "{VENDOR_DIR}": "{AUTO_SYNC_ROOT}/vendor/",
@@ -26,7 +27,9 @@
     "{DIFFER_TEST_CONFIG_FILE}": "{DIFFER_TEST_DIR}/test_arch_config.json",
     "{DIFFER_TEST_OLD_SRC_DIR}": "{DIFFER_TEST_DIR}/old_src/",
     "{DIFFER_TEST_NEW_SRC_DIR}": "{DIFFER_TEST_DIR}/new_src/",
-    "{DIFFER_TEST_PERSISTENCE_FILE}": "{DIFFER_TEST_DIR}/test_saved_patches.json"
+    "{DIFFER_TEST_PERSISTENCE_FILE}": "{DIFFER_TEST_DIR}/test_saved_patches.json",
+    "{AUTO_SYNC_TEST_DIR}": "{AUTO_SYNC_SRC}/Tests/",
+    "{MCUPDATER_TEST_DIR}": "{AUTO_SYNC_TEST_DIR}/MCUpdaterTests/"
   },
   "create_during_runtime": [
     "{BUILD_DIR}",