From b5727a4b731f1e842bc4ed3bc40c384dd9833545 Mon Sep 17 00:00:00 2001 From: Per Lundberg Date: Mon, 8 Oct 2018 23:15:59 +0300 Subject: [PATCH] Bug fix: thread_create should use PL3 stack for thread entry point (#122) It would previously use the PL0 stack for the newly created thread. This commit fixes this, so that the PL3 stack (which is much less limited in size) is used instead. The use of the PL0 stack was a bug I introduced while fixing #21 via pull request #30, but since it was three years ago I can honestly not say with certainty whether this was "by design" or "by accident". Either way, doing this fix is much better than (ab)using the PL0 stack since the latter is limited in size to a single 4 KiB page => way too little for certain recursive programs and larger programs in general. (This change was a drive-by change while I was researching the root cause for some issues mentioned in #120) --- libraries/system/system_calls.h | 1 + storm/generic/system_call.c | 4 +- storm/include/storm/generic/system_call.h | 3 +- storm/include/storm/generic/thread.h | 2 +- storm/x86/system_calls-auto.c | 2 +- storm/x86/system_calls.rb | 4 +- storm/x86/thread.c | 73 +++++++++++++++-------- storm/x86/wrapper.c | 69 ++++++++++----------- 8 files changed, 91 insertions(+), 67 deletions(-) diff --git a/libraries/system/system_calls.h b/libraries/system/system_calls.h index 15196e20..28587190 100644 --- a/libraries/system/system_calls.h +++ b/libraries/system/system_calls.h @@ -223,6 +223,7 @@ static inline return_type system_call_thread_create(thread_entry_point_type *thr asm volatile("pushl %1\n" "pushl %2\n" + "pushl %%esp\n" // Need to be last (i.e. first) because %1 and %2 will be %esp-relative references. "lcall %3, $0" : "=a" (return_value) : "ri" (argument), diff --git a/storm/generic/system_call.c b/storm/generic/system_call.c index c6b1dc3f..9f4f69fa 100644 --- a/storm/generic/system_call.c +++ b/storm/generic/system_call.c @@ -37,9 +37,9 @@ return_type system_call_thread_name_set(char *name) return STORM_RETURN_SUCCESS; } -return_type system_call_thread_create(void *(*start_routine) (void *), void *argument) +return_type system_call_thread_create(uint32_t current_thread_esp, void *(*start_routine) (void *), void *argument) { - return thread_create(start_routine, argument); + return thread_create(current_thread_esp, start_routine, argument); } return_type system_call_thread_control(thread_id_type thread_id, unsigned int class, unsigned int parameter) diff --git a/storm/include/storm/generic/system_call.h b/storm/include/storm/generic/system_call.h index e4bb9c2c..40632db0 100644 --- a/storm/include/storm/generic/system_call.h +++ b/storm/include/storm/generic/system_call.h @@ -48,6 +48,7 @@ extern return_type system_call_service_get(const char *protocol_name, service_pa extern return_type system_call_service_protocol_get_amount(unsigned int *number_of_protocols); extern return_type system_call_service_protocol_get(unsigned int *maximum_protocols, service_protocol_type *protocol_info); extern return_type system_call_thread_control(thread_id_type thread_id, unsigned int class, unsigned int parameter); -extern return_type system_call_thread_create(void *(*start_routine) (void *), void *argument); +extern return_type system_call_thread_create(uint32_t current_thread_esp, void *(*start_routine) (void *), + void *argument); extern return_type system_call_thread_name_set(char *name); extern return_type system_call_timer_read(time_type *timer); diff --git a/storm/include/storm/generic/thread.h b/storm/include/storm/generic/thread.h index 4247c6e7..e43e6d15 100644 --- a/storm/include/storm/generic/thread.h +++ b/storm/include/storm/generic/thread.h @@ -24,7 +24,7 @@ extern tss_list_type *idle_tss_node; extern void thread_init(void); extern thread_id_type thread_get_free_id(void); extern return_type thread_control(thread_id_type thread_id, unsigned int class, unsigned int parameter); -extern return_type thread_create(void *(*start_routine)(void *), void *argument); +extern return_type thread_create(uint32_t current_thread_esp, void *(*start_routine)(void *), void *argument); extern storm_tss_type *thread_get_tss(thread_id_type thread_id); extern tss_list_type *thread_link(storm_tss_type *tss); extern void thread_unlink(thread_id_type thread_id); diff --git a/storm/x86/system_calls-auto.c b/storm/x86/system_calls-auto.c index 5aee4e6c..ba6fb9ce 100644 --- a/storm/x86/system_calls-auto.c +++ b/storm/x86/system_calls-auto.c @@ -34,7 +34,7 @@ const system_call_type system_call[] = { SYSTEM_CALL_PROCESS_CREATE, wrapper_process_create, 1 }, { SYSTEM_CALL_PROCESS_NAME_SET, wrapper_process_name_set, 1 }, { SYSTEM_CALL_PROCESS_PARENT_UNBLOCK, wrapper_process_parent_unblock, 0 }, - { SYSTEM_CALL_THREAD_CREATE, wrapper_thread_create, 2 }, + { SYSTEM_CALL_THREAD_CREATE, wrapper_thread_create, 3 }, { SYSTEM_CALL_THREAD_CONTROL, wrapper_thread_control, 3 }, { SYSTEM_CALL_THREAD_NAME_SET, wrapper_thread_name_set, 1 }, { SYSTEM_CALL_TIMER_READ, wrapper_timer_read, 1 }, diff --git a/storm/x86/system_calls.rb b/storm/x86/system_calls.rb index 7ce11357..e9440a8c 100755 --- a/storm/x86/system_calls.rb +++ b/storm/x86/system_calls.rb @@ -49,7 +49,7 @@ 'process_name_set', 1, 'process_parent_unblock', 0, - 'thread_create', 2, + 'thread_create', 3, # 2 ordinary arguments and one "extra" argument for old PL3 ESP value. 'thread_control', 3, 'thread_name_set', 1, @@ -93,7 +93,7 @@ def create_wrapper_c(system_calls) file.puts %( // Restore the stack after the function call "addl \$4 * #{num_parameters}, %esp\\n"\ - ) +) end file.puts %[ diff --git a/storm/x86/thread.c b/storm/x86/thread.c index a536d072..645a1895 100644 --- a/storm/x86/thread.c +++ b/storm/x86/thread.c @@ -210,12 +210,12 @@ thread_id_type thread_get_free_id(void) return thread_id; } -// Create a thread under the current cluster. Returns STORM_RETURN_THREAD_ORIGINAL for calling thread and +// Create a thread under the current cluster. Returns STORM_RETURN_THREAD_ORIGINAL for the calling thread and // STORM_RETURN_THREAD_NEW for new thread. // FIXME: Some regions used by the kernel for temporary mappings need to be unique (or mutexed) to each thread under // a cluster. Discuss how this is best done! Right now, we lock everything, which is sub-optimal. -return_type thread_create(void *(*start_routine) (void *), void *argument) +return_type thread_create(uint32_t current_thread_esp, void *(*start_routine) (void *), void *argument) { storm_tss_type *new_tss; @@ -271,46 +271,67 @@ return_type thread_create(void *(*start_routine) (void *), void *argument) GET_PAGE_NUMBER(BASE_PROCESS_PAGE_DIRECTORY), page_directory_physical_page, 1, PAGE_KERNEL); - // The "magical" mapping previously created by memory_virtual_create_page_tables_mapping also needs to be adjusted. - // Otherwise we will update the wrong thread's addressing space when mapping memory for the new thread. + // The "magical" mapping previously created by memory_virtual_create_page_tables_mapping also + // needs to be adjusted. Otherwise we will update the wrong thread's addressing space when + // mapping memory for the new thread. memory_virtual_create_page_tables_mapping(new_page_directory, page_directory_physical_page); // FIXME: Map into all sister threads address spaces when creating a new page table for a thread. - // Start by creating a PL0 stack. Remember that the lowest page of the stack area is the PL0 stack. - // FIXME: Check return value. + // Start by creating a PL0 stack, and map it into our current context so we can clear it. + // FIXME: Check return values; all of these can fail. memory_physical_allocate(&stack_physical_page, 1, "Thread PL0 stack."); memory_virtual_map(GET_PAGE_NUMBER(BASE_PROCESS_CREATE), stack_physical_page, 1, PAGE_KERNEL); - memory_copy((uint8_t *) BASE_PROCESS_CREATE, (uint8_t *) BASE_PROCESS_STACK, SIZE_PAGE * 1); - memory_virtual_map_other(new_tss, GET_PAGE_NUMBER(BASE_PROCESS_STACK), stack_physical_page, 1, PAGE_KERNEL); + memory_set_uint8_t((uint8_t *) BASE_PROCESS_CREATE, 0, SIZE_PAGE); + + // Make the PL0 stack accessible in the new thread's context. Its virtual memory address range + // is 0xFC000000-0xFC000FF. + memory_virtual_map_other(new_tss, GET_PAGE_NUMBER(BASE_PROCESS_STACK), stack_physical_page, 1, + PAGE_KERNEL); + + // PL0 stack is now set up. We now need to set up the PL3 stack also, which is slightly more complicated. + // All the content of the current thread's stack gets copied to the stack for the new thread, so that stack + // variables set in the mother thread will have expected values in the child thread also. + + // FIXME: Check return values. + memory_physical_allocate(&stack_physical_page, current_tss->stack_pages, "Thread PL3 stack."); + memory_virtual_map(GET_PAGE_NUMBER(BASE_PROCESS_CREATE), stack_physical_page, current_tss->stack_pages, + PAGE_KERNEL); + memory_copy((uint8_t *) BASE_PROCESS_CREATE, + (uint8_t *) ((MAX_PAGES - current_tss->stack_pages) * SIZE_PAGE), + current_tss->stack_pages * SIZE_PAGE); + memory_virtual_map_other(new_tss, MAX_PAGES - current_tss->stack_pages, stack_physical_page, + current_tss->stack_pages, PAGE_WRITABLE | PAGE_NON_PRIVILEGED); - new_tss->esp = cpu_get_esp(); + new_tss->esp = current_thread_esp; - // This stuff needs to run while the PL0 stack is being mapped, since ESP will (because of technical reasons ;) - // currently point into the PL0 stack. This happens because we are currently running kernel code, so it's not really that - // weird after all. - uint32_t new_stack_in_current_address_space = BASE_PROCESS_CREATE + (new_tss->esp - BASE_PROCESS_STACK); + // 0xFFF = 4095, so we get the offset of the stack pointer within its current page. + int offset = (new_tss->esp & 0xFFF); + + if (offset < 8) + { + // FIXME: This algorithm has a serious flaw. If the current_thread_esp has LESS than 8 bytes left until + // it reaches the next page boundary, the stack operations below will fail. It is indeed an edge case, + // but clearly something that's very likely to happen at some point. We need to special case it here, + // or re-think the algorithm in some other way. + DEBUG_HALT("Kernel bug encountered. %d is less than 8.", offset); + } + + // Place the thread's optional parameter into its stack. We utilize the fact here that the last stack page + // will always be mapped at BASE_PROCESS_CREATE, since x86 stacks grow downwards => "the last shall be + // first." + uint32_t new_stack_in_current_address_space = BASE_PROCESS_CREATE + offset; new_tss->esp -= 4; new_stack_in_current_address_space -= 4; *(void **)new_stack_in_current_address_space = argument; - // FIXME: Make the return address here be to a thread_exit() method or similar. As it is now, returning from a - // thread will trigger a page fault. + // FIXME: Make the return address here be to a thread_exit() method or similar. As it is now, + // returning from a thread will trigger a page fault. It's not great, but it's a lot better + // than leaving it as a wild pointer into an undefined location in memory. new_tss->esp -= 4; new_stack_in_current_address_space -= 4; *(void **)new_stack_in_current_address_space = NULL; - // Phew... Finished setting up a PL0 stack. Lets take a deep breath and do the same for the PL3 stack, which is - // slightly more complicated. - - // FIXME: Check return value. - memory_physical_allocate(&stack_physical_page, current_tss->stack_pages, "Thread PL3 stack."); - memory_virtual_map(GET_PAGE_NUMBER(BASE_PROCESS_CREATE), stack_physical_page, current_tss->stack_pages, PAGE_KERNEL); - memory_copy((uint8_t *) BASE_PROCESS_CREATE, (uint8_t *) ((MAX_PAGES - current_tss->stack_pages) * SIZE_PAGE), - current_tss->stack_pages * SIZE_PAGE); - memory_virtual_map_other(new_tss, MAX_PAGES - current_tss->stack_pages, stack_physical_page, current_tss->stack_pages, - PAGE_WRITABLE | PAGE_NON_PRIVILEGED); - new_tss->ss = new_tss->ss0; new_tss->cs = SELECTOR_KERNEL_CODE; new_tss->eflags = THREAD_NEW_EFLAGS; diff --git a/storm/x86/wrapper.c b/storm/x86/wrapper.c index c319a2ab..b443f450 100644 --- a/storm/x86/wrapper.c +++ b/storm/x86/wrapper.c @@ -36,7 +36,7 @@ void wrapper_kernelfs_entry_read(void) "call system_call_kernelfs_entry_read\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -69,7 +69,7 @@ void wrapper_mailbox_create(void) "call system_call_mailbox_create\n" // Restore the stack after the function call - "addl $4 * 5, %esp\n" + "addl $4 * 5, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -98,7 +98,7 @@ void wrapper_mailbox_destroy(void) "call system_call_mailbox_destroy\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -127,7 +127,7 @@ void wrapper_mailbox_flush(void) "call system_call_mailbox_flush\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -157,7 +157,7 @@ void wrapper_mailbox_send(void) "call system_call_mailbox_send\n" // Restore the stack after the function call - "addl $4 * 2, %esp\n" + "addl $4 * 2, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -187,7 +187,7 @@ void wrapper_mailbox_receive(void) "call system_call_mailbox_receive\n" // Restore the stack after the function call - "addl $4 * 2, %esp\n" + "addl $4 * 2, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -218,7 +218,7 @@ void wrapper_service_create(void) "call system_call_service_create\n" // Restore the stack after the function call - "addl $4 * 3, %esp\n" + "addl $4 * 3, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -247,7 +247,7 @@ void wrapper_service_destroy(void) "call system_call_service_destroy\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -278,7 +278,7 @@ void wrapper_service_get(void) "call system_call_service_get\n" // Restore the stack after the function call - "addl $4 * 3, %esp\n" + "addl $4 * 3, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -308,7 +308,7 @@ void wrapper_service_protocol_get(void) "call system_call_service_protocol_get\n" // Restore the stack after the function call - "addl $4 * 2, %esp\n" + "addl $4 * 2, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -337,7 +337,7 @@ void wrapper_service_protocol_get_amount(void) "call system_call_service_protocol_get_amount\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -370,7 +370,7 @@ void wrapper_dma_transfer(void) "call system_call_dma_transfer\n" // Restore the stack after the function call - "addl $4 * 5, %esp\n" + "addl $4 * 5, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -399,7 +399,7 @@ void wrapper_dma_transfer_cancel(void) "call system_call_dma_transfer_cancel\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -429,7 +429,7 @@ void wrapper_dma_register(void) "call system_call_dma_register\n" // Restore the stack after the function call - "addl $4 * 2, %esp\n" + "addl $4 * 2, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -458,7 +458,7 @@ void wrapper_dma_unregister(void) "call system_call_dma_unregister\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -488,7 +488,7 @@ void wrapper_irq_register(void) "call system_call_irq_register\n" // Restore the stack after the function call - "addl $4 * 2, %esp\n" + "addl $4 * 2, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -517,7 +517,7 @@ void wrapper_irq_unregister(void) "call system_call_irq_unregister\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -546,7 +546,7 @@ void wrapper_irq_wait(void) "call system_call_irq_wait\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -575,7 +575,7 @@ void wrapper_irq_acknowledge(void) "call system_call_irq_acknowledge\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -606,7 +606,7 @@ void wrapper_memory_allocate(void) "call system_call_memory_allocate\n" // Restore the stack after the function call - "addl $4 * 3, %esp\n" + "addl $4 * 3, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -635,7 +635,7 @@ void wrapper_memory_deallocate(void) "call system_call_memory_deallocate\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -666,7 +666,7 @@ void wrapper_memory_reserve(void) "call system_call_memory_reserve\n" // Restore the stack after the function call - "addl $4 * 3, %esp\n" + "addl $4 * 3, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -696,7 +696,7 @@ void wrapper_memory_get_physical_address(void) "call system_call_memory_get_physical_address\n" // Restore the stack after the function call - "addl $4 * 2, %esp\n" + "addl $4 * 2, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -727,7 +727,7 @@ void wrapper_port_range_register(void) "call system_call_port_range_register\n" // Restore the stack after the function call - "addl $4 * 3, %esp\n" + "addl $4 * 3, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -756,7 +756,7 @@ void wrapper_port_range_unregister(void) "call system_call_port_range_unregister\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -785,7 +785,7 @@ void wrapper_process_create(void) "call system_call_process_create\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -814,7 +814,7 @@ void wrapper_process_name_set(void) "call system_call_process_name_set\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -860,13 +860,14 @@ void wrapper_thread_create(void) // Push all arguments. This approach pretty smart; it utilizes the fact that the stack grows downwards // so the "next parameter to push" is always in the same memory location. :) - "pushl 32 + 4 + 2 * 4(%esp)\n" - "pushl 32 + 4 + 2 * 4(%esp)\n" + "pushl 32 + 4 + 3 * 4(%esp)\n" + "pushl 32 + 4 + 3 * 4(%esp)\n" + "pushl 32 + 4 + 3 * 4(%esp)\n" "call system_call_thread_create\n" // Restore the stack after the function call - "addl $4 * 2, %esp\n" + "addl $4 * 3, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -881,7 +882,7 @@ void wrapper_thread_create(void) // Adjust the stack for the fact that EAX isn't being popped. "addl $4, %esp\n" - "lret $4 * 2\n"); + "lret $4 * 3\n"); } void wrapper_thread_control(void) @@ -897,7 +898,7 @@ void wrapper_thread_control(void) "call system_call_thread_control\n" // Restore the stack after the function call - "addl $4 * 3, %esp\n" + "addl $4 * 3, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -926,7 +927,7 @@ void wrapper_thread_name_set(void) "call system_call_thread_name_set\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n" @@ -955,7 +956,7 @@ void wrapper_timer_read(void) "call system_call_timer_read\n" // Restore the stack after the function call - "addl $4 * 1, %esp\n" + "addl $4 * 1, %esp\n" // Simulate a popa, without overwriting EAX (since it contains the return value from the system call). "popl %edi\n"