Fix sysenter and syscall features.

Those features were not configured properly and had a race-condition
when enabling back interrupts before a sysexit/sysret.

Configuration: The BSP is executing tss_init on behalf of APs. Thus any
writes in the MSRs in tss_init would end up in the BSP's MSRs leading
to:
	* BSP not using the correct kernel stack.
	* APs using undefined instruction/stack pointer and code segment
upon entering the kernel.
Fix: Let the cores writing in their own MSRs.

Race-condition: When restoring a process' EFLAGS before a sysexit/sysret
interrupts could be enabled while the core was still in the kernel. If
an interrupt is triggered at this point it would lead to a deadlock on
the BKL down the line.
Fix: Disable IF in saved EFLAGS and use sti before sysexit/sysret.
This commit is contained in:
Justinien Bouron 2019-03-10 04:35:03 -07:00
parent f9accfb8ff
commit b1b2735c60
6 changed files with 46 additions and 28 deletions

View File

@ -218,6 +218,9 @@ static void ap_finish_booting(void)
/* inform the world of our presence. */ /* inform the world of our presence. */
ap_cpu_ready = cpu; ap_cpu_ready = cpu;
/* Set up sysenter/syscall. */
setup_sysenter_syscall();
/* /*
* Finish processor initialisation. CPUs must be excluded from running. * Finish processor initialisation. CPUs must be excluded from running.
* lapic timer calibration locks and unlocks the BKL because of the * lapic timer calibration locks and unlocks the BKL because of the
@ -298,6 +301,7 @@ void smp_init (void)
ioapic_enabled = 0; ioapic_enabled = 0;
tss_init_all(); tss_init_all();
setup_sysenter_syscall();
/* /*
* we still run on the boot stack and we cannot use cpuid as its value * we still run on the boot stack and we cannot use cpuid as its value

View File

@ -249,6 +249,11 @@ reg_t read_ebp(void);
*/ */
int tss_init(unsigned cpu, void * kernel_stack); int tss_init(unsigned cpu, void * kernel_stack);
/*
* Set up MSRs for sysenter or syscall for the current cpu.
* Assumes that tss_init has been called before for that cpu ! */
void setup_sysenter_syscall(void);
void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type); void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type);
void __copy_msg_from_user_end(void); void __copy_msg_from_user_end(void);

View File

@ -16,6 +16,7 @@
*/ */
#define smp_single_cpu_fallback() do { \ #define smp_single_cpu_fallback() do { \
tss_init(0, get_k_stack_top(0)); \ tss_init(0, get_k_stack_top(0)); \
setup_sysenter_syscall(); \
bsp_cpu_id = 0; \ bsp_cpu_id = 0; \
ncpus = 1; \ ncpus = 1; \
bsp_finish_booting(); \ bsp_finish_booting(); \

View File

@ -405,6 +405,7 @@ ENTRY(restore_user_context_sysenter)
/* restore PSW */ /* restore PSW */
movl PSWREG(%ebp), %edi /* load desired PSW to EDI */ movl PSWREG(%ebp), %edi /* load desired PSW to EDI */
andl $0xfffffdff, %edi /* Clear IF to avoid receiving int. */
push %edi push %edi
popf popf
@ -421,6 +422,7 @@ ENTRY(restore_user_context_syscall)
/* restore PSW (before we switch to user stack!) */ /* restore PSW (before we switch to user stack!) */
movl PSWREG(%ebp), %edi /* load desired PSW to EDI */ movl PSWREG(%ebp), %edi /* load desired PSW to EDI */
andl $0xfffffdff, %edi /* Clear IF to avoid receiving int. */
push %edi push %edi
popf popf
@ -429,6 +431,7 @@ ENTRY(restore_user_context_syscall)
mov AXREG(%ebp), %eax /* trap return value */ mov AXREG(%ebp), %eax /* trap return value */
mov BXREG(%ebp), %ebx /* secondary return value */ mov BXREG(%ebp), %ebx /* secondary return value */
sti
sysret /* jump to EIP in user */ sysret /* jump to EIP in user */
ENTRY(restore_user_context_int) ENTRY(restore_user_context_int)

View File

@ -151,35 +151,10 @@ static struct gate_table_s gate_table_exceptions[] = {
{ NULL, 0, 0} { NULL, 0, 0}
}; };
int tss_init(unsigned cpu, void * kernel_stack) void setup_sysenter_syscall(void)
{ {
struct tss_s * t = &tss[cpu]; const int cpu = cpuid;
int index = TSS_INDEX(cpu); struct tss_s const *const t = &tss[cpu];
struct segdesc_s *tssgdt;
tssgdt = &gdt[index];
init_param_dataseg(tssgdt, (phys_bytes) t,
sizeof(struct tss_s), INTR_PRIVILEGE);
tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE;
/* Build TSS. */
memset(t, 0, sizeof(*t));
t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR;
t->cs = KERN_CS_SELECTOR;
t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */
/*
* make space for process pointer and cpu id and point to the first
* usable word
*/
k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
/*
* set the cpu id at the top of the stack so we know on which cpu is
* this stack in use when we trap to kernel
*/
*((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
/* Set up Intel SYSENTER support if available. */ /* Set up Intel SYSENTER support if available. */
if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) { if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR); ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR);
@ -211,7 +186,36 @@ int tss_init(unsigned cpu, void * kernel_stack)
set_star_cpu(7); set_star_cpu(7);
assert(CONFIG_MAX_CPUS <= 8); assert(CONFIG_MAX_CPUS <= 8);
} }
}
int tss_init(unsigned cpu, void * kernel_stack)
{
struct tss_s * t = &tss[cpu];
int index = TSS_INDEX(cpu);
struct segdesc_s *tssgdt;
tssgdt = &gdt[index];
init_param_dataseg(tssgdt, (phys_bytes) t,
sizeof(struct tss_s), INTR_PRIVILEGE);
tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE;
/* Build TSS. */
memset(t, 0, sizeof(*t));
t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR;
t->cs = KERN_CS_SELECTOR;
t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */
/*
* make space for process pointer and cpu id and point to the first
* usable word
*/
k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
/*
* set the cpu id at the top of the stack so we know on which cpu is
* this stack in use when we trap to kernel
*/
*((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
return SEG_SELECTOR(index); return SEG_SELECTOR(index);
} }

View File

@ -42,6 +42,7 @@ void bsp_finish_booting(void)
sprofiling = 0; /* we're not profiling until instructed to */ sprofiling = 0; /* we're not profiling until instructed to */
#endif /* SPROFILE */ #endif /* SPROFILE */
setup_sysenter_syscall();
cpu_identify(); cpu_identify();
vm_running = 0; vm_running = 0;