Skip to content

Commit

Permalink
Remove the hard coded page tables
Browse files Browse the repository at this point in the history
Currently the guest VM starts with a hardcoded page table to jump
to 64-bit long mode, and set up another one in the low memory area,
and we accept a 2MiB page for the low memory area. However, Linux
kernel may not like it if the memory between 640KiB and 1MiB was
accepted.

In this CR we use the 640KiB TEMP_MEM section in the low memory
area for the page tables. The hypervisor adds it and guest can use
it without doing tdcall. So we can dynamically set up the page
tables in the .bss section.

Change-Id: I7dfc18304e91fbbb7849d2bd2fbc1784ba5bae91
  • Loading branch information
dingelish committed Aug 6, 2024
1 parent 3953b08 commit 74ada5b
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 76 deletions.
34 changes: 0 additions & 34 deletions stage0_bin_tdx/layout.ld
Original file line number Diff line number Diff line change
Expand Up @@ -246,40 +246,6 @@ SECTIONS {

ASSERT(. == TOP - BFV_SIZE + 120 + 24 + 4, "wrong BFV header size")

/* The ROM area is read only. We need to hard code the page table here
* and use it to switch to 64-bit long mode. After entering the 64-bit
* long mode, we are able to make tdcall to accept memory for ram_low
* section and create another set of page tables there. */
.page_tables ALIGN(4K): AT (TOP - BFV_SIZE + 0x1000) {
bios_pml4 = .; /* CR3 */
QUAD(ADDR(.page_tables) + 0x1000 + 0x23) /* 0..512GiB */
FILL(0)
. = bios_pml4 + 0x1000;

bios_pdpt = .;
QUAD(ADDR(.page_tables) + 0x2000 + 0x23) /* 0..1GiB */
QUAD(0)
QUAD(0)
QUAD(ADDR(.page_tables) + 0x3000 + 0x23) /* 3..4GiB */
FILL(0)
. = bios_pdpt + 0x1000;

bios_pd_0 = .;
QUAD(0x0 + 0xA3) /* 0..2MiB, HUGE_PAGE | PAGE_ACCESSED | PAGE_READ_WRITE | PAGE_PRESENT */
FILL(0)
. = bios_pd_0 + 0x1000;

bios_pd_3 = .;
FILL(0)
. = bios_pd_3 + 0xFF8;
QUAD(0xFFE000A1) /* 4GiB-2MiB..4GiB, HUGE_PAGE | PAGE_ACCESSED | PAGE_PRESENT */
} > bios
ASSERT((bios_pml4 == ADDR(.page_tables)), "wrong pml4 address")
ASSERT((bios_pdpt == ADDR(.page_tables) + 0x1000), "wrong pdpt address")
ASSERT((bios_pd_0 == ADDR(.page_tables) + 0x2000), "wrong pd_0 address")
ASSERT((bios_pd_3 == ADDR(.page_tables) + 0x3000), "wrong pd_3 address")
ASSERT((. == ADDR(.page_tables) + 0x4000), "wrong page table size")

.rodata : {
/* Include large section (.lrodata) to support large code model.
* See <https://lld.llvm.org/ELF/large_sections.html>.
Expand Down
80 changes: 38 additions & 42 deletions stage0_bin_tdx/src/asm/tdx.s
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,44 @@ _tdx_32bit_long_mode_start:
bts $0x05, %eax # PAE
movl %eax, %cr4

# page tables are set in the linker script
movl $bios_pml4, %ecx
movl %ecx, %cr3
# Clear BSS: base address goes to EDI, value (0) goes to EAX,
# count goes into ECX. Page tables will be located in BSS
movl $bss_start, %edi
movl $bss_size, %ecx
xorl %eax, %eax
rep stosb

# Set the first entry of PML4 to point to PDPT (0..512GiB).
movl ${pdpt}, %esi
orl $3, %esi # esi |= 3 (PRESENT and WRITABLE)
movl %esi, ({pml4}) # set first half of PML4[0]

# Set the first entry of PDPT to point to PD_0 (0..1GiB).
movl ${pd_0}, %esi
orl $3, %esi # esi |= 3 (PRESENT and WRITABLE)
movl %esi, ({pdpt}) # set first half of PDPT[0]

# Set the fourth entry of PDPT to point to PD_3 (3..4GiB).
movl ${pdpt}, %eax
movl ${pd_3}, %esi
orl $3, %esi # esi |= 3 (PRESENT and WRITABLE)
movl %esi, 24(%eax) # set first half of PDPT[3], each entry is 8 bytes

# Set the first entry of PD_0 to point to and identity mapped huge page (0..2MiB).
movl $0x83, %esi # esi = 0x0 | 131 (PRESENT and WRITABLE and HUGE_PAGE)
movl %esi, ({pd_0}) # set first half of PD_0[0]

# Set the last entry of PD_3 to point to an identity-mapped 2MiB huge page ((4GiB-2MiB)..4GiB).
# This is where the firmware ROM image is mapped, so we don't make it writable.
movl ${pd_3}, %eax
movl $0xFFE00000, %esi # address of 4GiB-2MiB
orl $0x81, %esi # esi |= 129 (PRESENT and HUGE_PAGE)
movl %esi, 0xFF8(%eax) # set first half of PML4[511], each entry is 8 bytes

# Reload PML4 to use the writable PML4
#xorl %eax, %eax
movl ${pml4}, %eax
movl %eax, %cr3

# In a TDX VM, IA32_EFER msr is set by tdx module.
# No need for rdmsr/wrmsr
Expand Down Expand Up @@ -80,50 +115,11 @@ _tdx_64bit_start:
movl $data_size, %ecx
rep movsd

# Clear BSS: base address goes to EDI, value (0) goes to EAX,
# count goes into ECX.
movq $bss_start, %rdi
movq $bss_size, %rcx
xorq %rax, %rax
rep stosq

# Set up the stack. Stack now is in ram_low
movl $stack_start, %esp
push $0

movl $0xdeadbeaf, (TEST_DATA)

# Set the first entry of PML4 to point to PDPT (0..512GiB).
movl ${pdpt}, %esi
orl $3, %esi # esi |= 3 (PRESENT and WRITABLE)
movl %esi, ({pml4}) # set first half of PML4[0]

# Set the first entry of PDPT to point to PD_0 (0..1GiB).
movl ${pd_0}, %esi
orl $3, %esi # esi |= 3 (PRESENT and WRITABLE)
movl %esi, ({pdpt}) # set first half of PDPT[0]

# Set the fourth entry of PDPT to point to PD_3 (3..4GiB).
movl ${pdpt}, %eax
movl ${pd_3}, %esi
orl $3, %esi # esi |= 3 (PRESENT and WRITABLE)
movl %esi, 24(%eax) # set first half of PDPT[3], each entry is 8 bytes

# Set the first entry of PD_0 to point to and identity mapped huge page (0..2MiB).
movl $0x83, %esi # esi = 0x0 | 131 (PRESENT and WRITABLE and HUGE_PAGE)
movl %esi, ({pd_0}) # set first half of PD_0[0]

# Set the last entry of PD_3 to point to an identity-mapped 2MiB huge page ((4GiB-2MiB)..4GiB).
# This is where the firmware ROM image is mapped, so we don't make it writable.
movl ${pd_3}, %eax
movl $0xFFE00000, %esi # address of 4GiB-2MiB
orl $0x81, %esi # esi |= 129 (PRESENT and HUGE_PAGE)
movl %esi, 0xFF8(%eax) # set first half of PML4[511], each entry is 8 bytes

# Reload PML4 to use the writable PML4
xorq %rax, %rax
movl ${pml4}, %eax
movq %rax, %cr3

# ...and jump to Rust code.
jmp rust64_start

0 comments on commit 74ada5b

Please sign in to comment.