ucb-bar · soohyuk-cho · Oct 10, 2024 · Oct 23, 2024 · abejgonzalez · Oct 11, 2024
diff --git a/software/firemarshal b/software/firemarshal
diff --git a/sparsity-testing-scripts/create_mem_bin.py b/sparsity-testing-scripts/create_mem_bin.py
@@ -0,0 +1,19 @@
+# create_mem_bin.py
+import struct
+
+def create_mem_bin(filename, total_size=0x10000000, pattern=0xDEADBEEF):
+    with open(filename, 'wb') as f:
+        # Write first 16 bytes (4 repetitions of 0xDEADBEEF)
+        for _ in range(4):
+            f.write(struct.pack('<I', pattern))  # Little endian
+        # Write the remaining bytes as zeros
+        remaining = total_size - 16
+        chunk_size = 4096  # Write in chunks to handle large sizes
+        zero_chunk = b'\x00' * chunk_size
+        while remaining > 0:
+            write_size = min(chunk_size, remaining)
+            f.write(zero_chunk[:write_size])
+            remaining -= write_size
+
+if __name__ == '__main__':
+    create_mem_bin('mem.bin')
diff --git a/sparsity-testing-scripts/create_mem_regions.py b/sparsity-testing-scripts/create_mem_regions.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+
+import sys
+import struct
+import os
+
+def parse_regions(regions_file):
+    """
+    Parses the regions.txt file and returns a list of tuples containing start addresses and sizes.
+    """
+    regions = []
+    with open(regions_file, 'r') as f:
+        for line in f:
+            # Skip empty lines and comments
+            if not line.strip() or line.startswith('#'):
+                continue
+            parts = line.strip().split()
+            if len(parts) != 2:
+                print(f"Warning: Invalid line format: {line.strip()}")
+                continue
+            start_str, size_str = parts
+            try:
+                start = int(start_str, 16)
+                size = int(size_str, 10)
+                regions.append( (start, size) )
+            except ValueError:
+                print(f"Warning: Invalid hexadecimal number in line: {line.strip()}")
+                continue
+    return regions
+
+def read_elf_header(f):
+    """
+    Reads the ELF header from the file and returns a dictionary with relevant fields.
+    """
+    f.seek(0)
+    # ELF header for 64-bit little endian
+    elf_header_struct = struct.Struct('<16sHHIQQQIHHHHHH')
+    elf_header_data = f.read(elf_header_struct.size)
+    unpacked = elf_header_struct.unpack(elf_header_data)
+
+    elf_header = {
+        'e_ident': unpacked[0],
+        'e_type': unpacked[1],
+        'e_machine': unpacked[2],
+        'e_version': unpacked[3],
+        'e_entry': unpacked[4],
+        'e_phoff': unpacked[5],
+        'e_shoff': unpacked[6],
+        'e_flags': unpacked[7],
+        'e_ehsize': unpacked[8],
+        'e_phentsize': unpacked[9],
+        'e_phnum': unpacked[10],
+        'e_shentsize': unpacked[11],
+        'e_shnum': unpacked[12],
+        'e_shstrndx': unpacked[13],
+    }
+    return elf_header
+
+def read_program_headers(f, elf_header):
+    """
+    Reads all program headers and returns a list of dictionaries.
+    """
+    program_headers = []
+    f.seek(elf_header['e_phoff'])
+    ph_struct = struct.Struct('<IIQQQQQQ')  # For 64-bit ELF
+    for _ in range(elf_header['e_phnum']):
+        ph_data = f.read(elf_header['e_phentsize'])
+        if len(ph_data) < ph_struct.size:
+            print("Error: Incomplete program header.")
+            sys.exit(1)
+        unpacked = ph_struct.unpack(ph_data[:ph_struct.size])
+        ph = {
+            'p_type': unpacked[0],
+            'p_flags': unpacked[1],
+            'p_offset': unpacked[2],
+            'p_vaddr': unpacked[3],
+            'p_paddr': unpacked[4],
+            'p_filesz': unpacked[5],
+            'p_memsz': unpacked[6],
+            'p_align': unpacked[7],
+        }
+        program_headers.append(ph)
+    return program_headers
+
+def extract_data(f, program_headers, start_va, size):
+    """
+    Extracts 'size' bytes of data from 'f' starting at virtual address 'start_va'.
+    """
+    data = bytearray()
+    end_va = start_va + size
+    for ph in program_headers:
+        if ph['p_type'] != 1:  # PT_LOAD
+            continue
+        seg_start = ph['p_vaddr']
+        seg_end = seg_start + ph['p_memsz']
+        # Check if segment overlaps with the region
+        if seg_end <= start_va or seg_start >= end_va:
+            continue
+        # Calculate overlap
+        overlap_start = max(start_va, seg_start)
+        overlap_end = min(end_va, seg_end)
+        overlap_size = overlap_end - overlap_start
+        # Calculate file offset
+        offset = ph['p_offset'] + (overlap_start - ph['p_vaddr'])
+        # Read the data
+        f.seek(offset)
+        chunk = f.read(overlap_size)
+        if len(chunk) < overlap_size:
+            print(f"Warning: Could not read enough data for VA 0x{overlap_start:X}")
+            chunk += b'\x00' * (overlap_size - len(chunk))
+        # Calculate where to place the data in the region
+        region_offset = overlap_start - start_va
+        # Ensure data array is big enough
+        while len(data) < region_offset:
+            data += b'\x00'
+        # Insert data_chunk at the correct offset
+        if len(data) < region_offset + overlap_size:
+            data += b'\x00' * (region_offset + overlap_size - len(data))
+        data[region_offset:region_offset + overlap_size] = chunk
+    # After processing all segments, ensure data is exactly 'size' bytes
+    if len(data) < size:
+        data += b'\x00' * (size - len(data))
+    elif len(data) > size:
+        data = data[:size]
+    return data
+
+def create_binary_file(data, output_bin):
+    """
+    Writes the binary data to 'output_bin'.
+    """
+    with open(output_bin, 'wb') as f:
+        f.write(data)
+
+def create_assembly_file(symbol_name, section_name, data_bin, output_asm):
+    """
+    Creates an assembly file that defines a section containing the binary data.
+    """
+    with open(output_asm, 'w') as f:
+        f.write(f"/* {output_asm} - Auto-generated Assembly File */\n\n")
+        f.write(f"    .section {section_name}, \"aw\", @progbits\n")
+        f.write(f"    .global {symbol_name}\n")
+        f.write(f"{symbol_name}:\n")
+        f.write(f"    .incbin \"{data_bin}\"\n\n")
+
+def assemble_section(asm_file, obj_file):
+    """
+    Assembles the assembly file into an object file using the RISC-V assembler.
+    """
+    import subprocess
+    cmd = ['riscv64-unknown-elf-as', '-o', obj_file, asm_file]
+    try:
+        subprocess.check_call(cmd)
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Assembly failed for {asm_file}: {e}")
+        sys.exit(1)
+
+def main():
+    if len(sys.argv) != 3:
+        print("Usage: python3 extract_regions.py <mem.elf> <regions.txt>")
 #!/usr/bin/env bash 
 #!/usr/bin/env python 
 #!/usr/bin/env bash 
 #!/usr/bin/env python 
+        sys.exit(1)
+
+    mem_elf = sys.argv[1]
+    regions_file = sys.argv[2]
+
+    # Check if mem.elf exists
+    if not os.path.isfile(mem_elf):
+        print(f"Error: File '{mem_elf}' does not exist.")
+        sys.exit(1)
+
+    # Check if regions.txt exists
+    if not os.path.isfile(regions_file):
+        print(f"Error: File '{regions_file}' does not exist.")
+        sys.exit(1)
+
+    regions = parse_regions(regions_file)
+    if not regions:
+        print("Error: No valid regions found in regions.txt.")
+        sys.exit(1)
+
+    # Open mem.elf
+    with open(mem_elf, 'rb') as f:
+        elf_header = read_elf_header(f)
+        # Verify ELF Magic Number
+        if elf_header['e_ident'][:4] != b'\x7fELF':
+            print("Error: Not a valid ELF file.")
+            sys.exit(1)
+        # Verify 64-bit ELF
+        if elf_header['e_ident'][4] != 2:
+            print("Error: Only 64-bit ELF files are supported.")
+            sys.exit(1)
+        # Parse program headers
+        program_headers = read_program_headers(f, elf_header)
+
+        for idx, (start, size) in enumerate(regions):
+            # print(f"Processing region {idx}: Start=0x{start:X}, Size=0x{size:X}")
+            data = extract_data(f, program_headers, start, size)
+
+            # Create binary file
+            data_bin = f"data_mem{idx}.bin"
+            create_binary_file(data, data_bin)
+            # print(f"  Created binary file: {data_bin}")
+
+            # Create assembly file
+            section_name = f".data_mem{idx}"
+            symbol_name = f"data_mem{idx}"  # Changed symbol name to avoid leading '.'
+            asm_file = f"data_mem{idx}.S"
+            create_assembly_file(symbol_name, section_name, data_bin, asm_file)
+            # print(f"  Created assembly file: {asm_file}")
+
+            # Assemble into .o file
+            obj_file = f"data_mem{idx}.o"
+            assemble_section(asm_file, obj_file)
+            # print(f"  Assembled object file: {obj_file}")
+
+    print("All memory regions processed successfully.")
+
+if __name__ == "__main__":
+    main()
diff --git a/sparsity-testing-scripts/final_out_mem.elf b/sparsity-testing-scripts/final_out_mem.elf
diff --git a/sparsity-testing-scripts/final_program.elf b/sparsity-testing-scripts/final_program.elf
diff --git a/sparsity-testing-scripts/generate_sparse_elf.sh b/sparsity-testing-scripts/generate_sparse_elf.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+
+# generate_sparse_elf.sh
+
+# Description:
+# This script takes an input ELF file and generates a final sparse ELF file.
+# It automates the scanning of memory regions, generation of the linker script,
+# extraction of data sections, assembly, and linking, including handling
+# 'tohost' and 'fromhost' symbols.
+
+# Usage:
+#   ./generate_sparse_elf.sh <input_elf> <output_elf>
+
+# Check for correct number of arguments
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <input_elf> <output_elf>"
+    exit 1
+fi
+
+# Input and output ELF files
+INPUT_ELF="$1"
+OUTPUT_ELF="$2"
+
+# Temporary and intermediate files directory
+WORK_DIR="sparse_elf_workdir"
+mkdir -p "$WORK_DIR"
+
+# Paths to scripts (assuming they are in the same directory)
+SCRIPT_DIR="$(dirname "$0")"
+EXTRACT_REGIONS_SCRIPT="$SCRIPT_DIR/create_mem_regions.py"
+GENERATE_LINKER_SCRIPT="$SCRIPT_DIR/linker_script_gen.py"
+MEMORY_SCANNER_CPP="$SCRIPT_DIR/memory_region_scanner.cpp"
+MEMORY_SCANNER_EXEC="$WORK_DIR/memory_region_scanner"
+LINKER_SCRIPT="$WORK_DIR/sparse_mem.ld"
+REGIONS_FILE="$WORK_DIR/regions.txt"
+
+# Toolchain prefix (adjust if necessary)
+RISCV_PREFIX="riscv64-unknown-elf-"
+AS="${RISCV_PREFIX}as"
+LD="${RISCV_PREFIX}ld"
+NM="${RISCV_PREFIX}nm"
+OBJCOPY="${RISCV_PREFIX}objcopy"
+
+# Step 1: Compile the memory region scanner
+echo "Compiling memory region scanner..."
+g++ -o "$MEMORY_SCANNER_EXEC" "$MEMORY_SCANNER_CPP" -lelf
+if [ $? -ne 0 ]; then
+    echo "Error: Failed to compile memory_region_scanner.cpp"
+    exit 1
+fi
+
+# Step 2: Scan the input ELF file to generate regions.txt
+echo "Scanning $INPUT_ELF to generate memory regions..."
+CHUNK_SIZE=1024  # Adjust chunk size as needed
+"$MEMORY_SCANNER_EXEC" "$INPUT_ELF" "$CHUNK_SIZE" "$REGIONS_FILE"
+if [ $? -ne 0 ]; then
+    echo "Error: Failed to scan memory regions."
+    exit 1
+fi
+
+echo "Memory regions written to $REGIONS_FILE"
+
+# Step 3: Generate the linker script
+echo "Generating linker script..."
+python3 "$GENERATE_LINKER_SCRIPT" "$REGIONS_FILE" "$LINKER_SCRIPT"
+if [ $? -ne 0 ]; then
+    echo "Error: Failed to generate linker script."
+    exit 1
+fi
+
+# Step 4: Extract data sections and create assembly files
+echo "Extracting data sections and creating assembly files..."
+python3 "$EXTRACT_REGIONS_SCRIPT" "$INPUT_ELF" "$REGIONS_FILE"
+if [ $? -ne 0 ]; then
+    echo "Error: Failed to extract data sections."
+    exit 1
+fi
+
+# Step 5: Assemble the assembly files into object files
+echo "Assembling data section assembly files..."
+
+# Collect all .S files
+ASM_FILES=(data_mem*.S)
+
+for ASM_FILE in "${ASM_FILES[@]}"; do
+    OBJ_FILE="${ASM_FILE%.S}.o"
+    "$AS" -o "$OBJ_FILE" "$ASM_FILE"
+    if [ $? -ne 0 ]; then
+        echo "Error: Assembly failed for $ASM_FILE"
+        exit 1
+    fi
+done
+
+# Move generated files to work directory
+mv data_mem*.bin data_mem*.S data_mem*.o "$WORK_DIR/"
+
+# Step 6: Find 'tohost' and 'fromhost' symbols in the input ELF
+echo "Finding 'tohost' and 'fromhost' symbols in $INPUT_ELF..."
+TOHOST_ADDR=$("$NM" "$INPUT_ELF" | grep " tohost$" | awk '{print $1}')
+FROMHOST_ADDR=$("$NM" "$INPUT_ELF" | grep " fromhost$" | awk '{print $1}')
+
+if [ -z "$TOHOST_ADDR" ] || [ -z "$FROMHOST_ADDR" ]; then
+    echo "Error: 'tohost' or 'fromhost' symbols not found in $INPUT_ELF"
+    exit 1
+fi
+
+echo "tohost address: 0x$TOHOST_ADDR"
+echo "fromhost address: 0x$FROMHOST_ADDR"
+
+# Step 7: Link all object files into the final ELF
+echo "Linking object files to create $OUTPUT_ELF..."
+
+# Collect all object files
+OBJ_FILES=("$WORK_DIR"/data_mem*.o)
+
+# Build the linker command
+LINKER_CMD=("$LD" -T "$LINKER_SCRIPT" "--defsym" "tohost=0x$TOHOST_ADDR" "--defsym" "fromhost=0x$FROMHOST_ADDR" -o "$OUTPUT_ELF")
+LINKER_CMD+=("${OBJ_FILES[@]}")
+
+# Optionally include the main program object file if needed
+# If you have a main.o, include it here:
+# MAIN_OBJ="main.o"
+# LINKER_CMD+=("$MAIN_OBJ")
+
+# Run the linker command
+"${LINKER_CMD[@]}"
+if [ $? -ne 0 ]; then
+    echo "Error: Linking failed."
+    exit 1
+fi
+
+echo "Final sparse ELF file created: $OUTPUT_ELF"
+
+# Optional: Clean up the work directory
+# Uncomment the following line if you want to remove intermediate files
+# rm -rf "$WORK_DIR"
+
+exit 0