From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_x86_64.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 9e02d2cb9d0..8bc39f35f05 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -1723,6 +1723,10 @@ __ASM_GLOBAL_FUNC( call_user_mode_callback, "leaq 0x10(%rbp),%rax\n\t" "movq %rax,0xa8(%rsp)\n\t" /* frame->syscall_cfa */ "movq 0x378(%r13),%r14\n\t" /* thread_data->syscall_frame */ + "movq 0x78(%r14),%rax\n\t" /* prev_frame->cs */ + "movq %rax,0x78(%rsp)\n\t" /* frame->cs */ + "movq 0x90(%r14),%rax\n\t" /* prev_frame->ss */ + "movq %rax,0x90(%rsp)\n\t" /* frame->ss */ "movq %r14,0xa0(%rsp)\n\t" /* frame->prev_frame */ "movq %rsp,0x378(%r13)\n\t" /* thread_data->syscall_frame */ "testl $1,0x380(%r13)\n\t" /* thread_data->syscall_trace */
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 4 ++++ dlls/ntdll/unix/signal_x86_64.c | 4 ++++ 2 files changed, 8 insertions(+)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index 92a01bb37cb..d628d44272d 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -2700,6 +2700,10 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, "movl $0,(%ecx)\n\t" /* frame->restore_flags */ "popl 0x08(%ecx)\n\t" /* frame->eip */ __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") + "pushfl\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") + "popl 0x04(%ecx)\n\t" /* frame->eflags */ + __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") __ASM_CFI_REG_IS_AT1(eip, ecx, 0x08) __ASM_GLOBL(__ASM_NAME("__wine_unix_call_dispatcher_prolog_end")) "\n\t" "leal 0x10(%esp),%edx\n\t" diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 8bc39f35f05..6564074893a 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -3140,6 +3140,10 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, "movq %gs:0x378,%rcx\n\t" /* thread_data->syscall_frame */ "popq 0x70(%rcx)\n\t" /* frame->rip */ __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") + "pushfq\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t") + "popq 0x80(%rcx)\n\t" /* frame->eflags */ + __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") __ASM_CFI_REG_IS_AT2(rip, rcx, 0xf0,0x00) "movl $0,0xb4(%rcx)\n\t" /* frame->restore_flags */ __ASM_LOCAL_LABEL("__wine_unix_call_dispatcher_prolog_end") ":\n\t"
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_x86_64.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 6564074893a..2cd1c8a3444 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -439,6 +439,7 @@ C_ASSERT( offsetof(struct callback_stack_layout, machine_frame) == 0x30 ); C_ASSERT( sizeof(struct callback_stack_layout) == 0x58 );
#define RESTORE_FLAGS_INSTRUMENTATION CONTEXT_i386 +#define RESTORE_FLAGS_INVALID_FPSTATE CONTEXT_ARM
struct syscall_frame { @@ -2069,6 +2070,7 @@ static BOOL handle_syscall_trap( ucontext_t *sigcontext, siginfo_t *siginfo ) extern const void *__wine_syscall_dispatcher_prolog_end_ptr;
RIP_sig( sigcontext ) = (ULONG64)__wine_syscall_dispatcher_prolog_end_ptr; + frame->restore_flags = CONTEXT_CONTROL; } else if ((void *)RIP_sig( sigcontext ) == __wine_unix_call_dispatcher) { @@ -2076,6 +2078,7 @@ static BOOL handle_syscall_trap( ucontext_t *sigcontext, siginfo_t *siginfo )
RIP_sig( sigcontext ) = (ULONG64)__wine_unix_call_dispatcher_prolog_end_ptr; R10_sig( sigcontext ) = RCX_sig( sigcontext ); + frame->restore_flags = CONTEXT_CONTROL | RESTORE_FLAGS_INVALID_FPSTATE; } else if (siginfo->si_code == 4 /* TRAP_HWBKPT */ && is_inside_syscall( RSP_sig(sigcontext) )) { @@ -2089,7 +2092,6 @@ static BOOL handle_syscall_trap( ucontext_t *sigcontext, siginfo_t *siginfo )
frame->rip = *(ULONG64 *)RSP_sig( sigcontext ); frame->eflags = EFL_sig(sigcontext); - frame->restore_flags = CONTEXT_CONTROL; if (instrumentation_callback) frame->restore_flags |= RESTORE_FLAGS_INSTRUMENTATION;
RCX_sig( sigcontext ) = (ULONG64)frame; @@ -2430,6 +2432,24 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) return; } context->c.ContextFlags = CONTEXT_FULL | CONTEXT_SEGMENTS | CONTEXT_EXCEPTION_REQUEST; + if (frame->restore_flags & RESTORE_FLAGS_INVALID_FPSTATE) + { + /* frame FP state is not fully filled, fill in the missing state from the current Unix side context. */ + frame->restore_flags &= ~RESTORE_FLAGS_INVALID_FPSTATE; + memset( &frame->xstate, 0, sizeof(frame->xstate) ); + if (user_shared_data->XState.CompactionEnabled) + frame->xstate.CompactionMask = 0x8000000000000000 | user_shared_data->XState.EnabledFeatures; + if (FPU_sig(ucontext)) + { + XSAVE_FORMAT xsave; + + xsave = *FPU_sig(ucontext); + memcpy( &xsave.XmmRegisters[6], &frame->xsave.XmmRegisters[6], 10 * sizeof(*xsave.XmmRegisters) ); + xsave.MxCsr = frame->xsave.MxCsr; + frame->xsave = xsave; + frame->xstate.Mask = XSTATE_MASK_LEGACY; + } + } NtGetContextThread( GetCurrentThread(), &context->c ); if (xstate_extended_features) { @@ -3145,7 +3165,7 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, "popq 0x80(%rcx)\n\t" /* frame->eflags */ __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") __ASM_CFI_REG_IS_AT2(rip, rcx, 0xf0,0x00) - "movl $0,0xb4(%rcx)\n\t" /* frame->restore_flags */ + "movl $0x200000,0xb4(%rcx)\n\t" /* frame->restore_flags <- RESTORE_FLAGS_INVALID_FPSTATE */ __ASM_LOCAL_LABEL("__wine_unix_call_dispatcher_prolog_end") ":\n\t" "movq %rbx,0x08(%rcx)\n\t" __ASM_CFI_REG_IS_AT1(rbx, rcx, 0x08)
From: Paul Gofman pgofman@codeweavers.com
--- dlls/ntdll/unix/signal_i386.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c index d628d44272d..d1c61d5685c 100644 --- a/dlls/ntdll/unix/signal_i386.c +++ b/dlls/ntdll/unix/signal_i386.c @@ -505,6 +505,8 @@ struct syscall_frame
C_ASSERT( sizeof(struct syscall_frame) == 0x280 );
+#define RESTORE_FLAGS_INVALID_FPSTATE 0x00008000 + struct x86_thread_data { UINT fs; /* 1d4 TEB selector */ @@ -1859,12 +1861,14 @@ static BOOL handle_syscall_trap( ucontext_t *sigcontext, siginfo_t *siginfo ) extern void __wine_syscall_dispatcher_prolog_end(void);
EIP_sig( sigcontext ) = (ULONG)__wine_syscall_dispatcher_prolog_end; + frame->restore_flags = LOWORD(CONTEXT_CONTROL); } else if ((void *)EIP_sig( sigcontext ) == __wine_unix_call_dispatcher) { extern void __wine_unix_call_dispatcher_prolog_end(void);
EIP_sig( sigcontext ) = (ULONG)__wine_unix_call_dispatcher_prolog_end; + frame->restore_flags = LOWORD(CONTEXT_CONTROL | RESTORE_FLAGS_INVALID_FPSTATE); } else if (siginfo->si_code == 4 /* TRAP_HWBKPT */ && is_inside_syscall( ESP_sig(sigcontext) )) { @@ -1877,7 +1881,6 @@ static BOOL handle_syscall_trap( ucontext_t *sigcontext, siginfo_t *siginfo )
frame->eip = *(ULONG *)ESP_sig( sigcontext ); frame->eflags = EFL_sig(sigcontext); - frame->restore_flags = LOWORD(CONTEXT_CONTROL);
ECX_sig( sigcontext ) = (ULONG)frame; ESP_sig( sigcontext ) += sizeof(ULONG); @@ -2135,6 +2138,20 @@ static void usr1_handler( int signal, siginfo_t *siginfo, void *sigcontext ) return; } context->c.ContextFlags = CONTEXT_FULL | CONTEXT_EXCEPTION_REQUEST; + if (frame->restore_flags & RESTORE_FLAGS_INVALID_FPSTATE) + { + /* frame FP state is not fully filled, fill in the missing state from the current Unix side context. */ + frame->restore_flags &= ~RESTORE_FLAGS_INVALID_FPSTATE; + memset( &frame->xstate, 0, sizeof(frame->xstate) ); + if (user_shared_data->XState.CompactionEnabled) + frame->xstate.CompactionMask = 0x8000000000000000 | user_shared_data->XState.EnabledFeatures; + if (FPUX_sig(ucontext)) + { + frame->u.xsave = *FPUX_sig(ucontext); + frame->xstate.Mask = XSTATE_MASK_LEGACY; + } + else if (FPU_sig(ucontext)) frame->u.fsave = *FPU_sig(ucontext); + } NtGetContextThread( GetCurrentThread(), &context->c ); if (xstate_extended_features) { @@ -2697,7 +2714,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher_return, */ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, "movl %fs:0x218,%ecx\n\t" /* thread_data->syscall_frame */ - "movl $0,(%ecx)\n\t" /* frame->restore_flags */ + "movl $0x8000,(%ecx)\n\t" /* frame->restore_flags <- RESTORE_FLAGS_INVALID_FPSTATE */ "popl 0x08(%ecx)\n\t" /* frame->eip */ __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t") "pushfl\n\t" @@ -2738,7 +2755,7 @@ __ASM_GLOBAL_FUNC( __wine_unix_call_dispatcher, __ASM_CFI(".cfi_offset %edi,-20\n\t") "call *(%eax,%edx,4)\n\t" "leal 16(%esp),%esp\n\t" - "testl $0xffff,(%esp)\n\t" /* frame->restore_flags */ + "testl $0x7fff,(%esp)\n\t" /* frame->restore_flags */ "jnz " __ASM_LOCAL_LABEL("__wine_syscall_dispatcher_return") "\n\t" "movl 0x08(%esp),%ecx\n\t" /* frame->eip */ /* switch to user stack */
I was reproducing the crash loop randomly on start of Summoners War: RUSH. The core problem is not particularly specific to this game, the game just hits conditions which make reproduction likely.
The problems happen when usr1_handler is invoked while in Unix called dispatcher. The faults happen if unix_syscall_dispatcher was called on nested syscall frame inside user mode callback and there was no normal syscall in this callback execution. Those conditions are more likely met with various logging enabled as that results in many Unix calls for debug output. xsave and xstate area are not properly initialized in this case. usr1_handler gets the context from invalid syscall frame data and then sets it back, bow triggering the restore of it. That first crashes on xrstor in wine_syscall_dispatcher_return. When FP and xstate data are fixed up it then may crash on 'iretq' due to invalid segment registers (on x64, on i386 those are saved in unix_call_dispatcher) and / or eflags.
When these specific conditions are not met, the crash doesn't happen because the float save, xstate and eflags are left initialized after previous normal system call to the valid state. However, there is still a problem: if, e. g, the app changes x87 FP mode between normal syscall and Unix call, and USR1 happens inside the Unix call, that will restore the stale state from the last syscall.
The general idea is to detect that we have invalid state usr1_handler and fix up the missing parts from the current Unix-side context. That should work because our general assumption is that if some context part is not saved in unix_call_dispatcher it is either volatile or is not supposed to be changed by the Unix side (otherwise we would have issues by normal return from unix_call_dispatcher without usr1 involved). I added eflags save to unix_call_dispatcher instead of fixing up from Unix contents because there are non-volatile parts (e. g., direction flag), which, however, may be temporarily changed during execution. Looks like all the other non-volatile state is already saved.
Well, the assumption that Unix side doesn't change persistent FPU state is not strictly correct of course, it can rightfully switch and restore FPU flags. Strictly correct would be to save all the relevant FPU state in wine_unix_call_dispatcher but avoiding that was one of the most important optimizations which triggered the introduction of the separate Unix call dispatcher and there is probably no other feasible way to know the correct FP words. But using x87 FPU on the Unix side and switching it state is probably exotic these days, Linux i386 uses SSE now, so I guess avoiding potential loss of PE-side FPU state is more important.
Yet I think I'd need to at least reset x87 FP stack when fixing it up in usr1_handler, I will update the patch with that.