From f9accfb8ffbeca410ad6bea3858879ffe18ec75e Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 04:23:14 -0700 Subject: [PATCH 1/8] Fix compilation failure for CONFIG_SMP=y builds. --- minix/include/minix/bitmap.h | 1 + minix/include/minix/driver.h | 2 +- minix/include/minix/drivers.h | 2 +- minix/kernel/spinlock.h | 2 +- minix/kernel/type.h | 2 ++ 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/minix/include/minix/bitmap.h b/minix/include/minix/bitmap.h index 2c675e3d5..0d68f757d 100644 --- a/minix/include/minix/bitmap.h +++ b/minix/include/minix/bitmap.h @@ -19,6 +19,7 @@ #if defined(CONFIG_SMP) && defined(__GNUC__) #ifndef __ASSEMBLY__ +typedef unsigned int bitchunk_t; static inline void bits_fill(bitchunk_t * chunks, unsigned bits) { unsigned c, cnt; diff --git a/minix/include/minix/driver.h b/minix/include/minix/driver.h index 2995fef45..f752b6ccc 100644 --- a/minix/include/minix/driver.h +++ b/minix/include/minix/driver.h @@ -9,6 +9,7 @@ /* The following are so basic, all the *.c files get them automatically. */ #include /* MUST be first */ +#include #include #include #include @@ -19,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/minix/include/minix/drivers.h b/minix/include/minix/drivers.h index da94fef14..92be5752e 100644 --- a/minix/include/minix/drivers.h +++ b/minix/include/minix/drivers.h @@ -11,6 +11,7 @@ /* The following are so basic, all the *.c files get them automatically. */ #include /* MUST be first */ +#include #include #include #include @@ -34,7 +35,6 @@ #include #include -#include #include #include #include diff --git a/minix/kernel/spinlock.h b/minix/kernel/spinlock.h index 7cfee2d9e..335c67f61 100644 --- a/minix/kernel/spinlock.h +++ b/minix/kernel/spinlock.h @@ -1,7 +1,7 @@ #ifndef __SPINLOCK_H__ #define __SPINLOCK_H__ -#include "kernel/kernel.h" +#include typedef struct spinlock { atomic_t val; diff --git a/minix/kernel/type.h b/minix/kernel/type.h index 4c45bcf65..17aed29f1 100644 --- a/minix/kernel/type.h +++ b/minix/kernel/type.h @@ -5,6 +5,8 @@ #include #include +typedef uint32_t bitchunk_t; + /* Process table and system property related types. */ typedef int proc_nr_t; /* process table entry number */ typedef short sys_id_t; /* system process index */ From b1b2735c60fd9e84d11d55f2234c4a27bf8a1e19 Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 04:35:03 -0700 Subject: [PATCH 2/8] Fix sysenter and syscall features. Those features were not configured properly and had a race-condition when enabling back interrupts before a sysexit/sysret. Configuration: The BSP is executing tss_init on behalf of APs. Thus any writes in the MSRs in tss_init would end up in the BSP's MSRs leading to: * BSP not using the correct kernel stack. * APs using undefined instruction/stack pointer and code segment upon entering the kernel. Fix: Let the cores writing in their own MSRs. Race-condition: When restoring a process' EFLAGS before a sysexit/sysret interrupts could be enabled while the core was still in the kernel. If an interrupt is triggered at this point it would lead to a deadlock on the BKL down the line. Fix: Disable IF in saved EFLAGS and use sti before sysexit/sysret. --- minix/kernel/arch/i386/arch_smp.c | 4 ++ minix/kernel/arch/i386/include/arch_proto.h | 5 ++ minix/kernel/arch/i386/include/arch_smp.h | 1 + minix/kernel/arch/i386/mpx.S | 3 ++ minix/kernel/arch/i386/protect.c | 60 +++++++++++---------- minix/kernel/main.c | 1 + 6 files changed, 46 insertions(+), 28 deletions(-) diff --git a/minix/kernel/arch/i386/arch_smp.c b/minix/kernel/arch/i386/arch_smp.c index 8998b8904..7a7458160 100644 --- a/minix/kernel/arch/i386/arch_smp.c +++ b/minix/kernel/arch/i386/arch_smp.c @@ -218,6 +218,9 @@ static void ap_finish_booting(void) /* inform the world of our presence. */ ap_cpu_ready = cpu; + /* Set up sysenter/syscall. */ + setup_sysenter_syscall(); + /* * Finish processor initialisation. CPUs must be excluded from running. * lapic timer calibration locks and unlocks the BKL because of the @@ -298,6 +301,7 @@ void smp_init (void) ioapic_enabled = 0; tss_init_all(); + setup_sysenter_syscall(); /* * we still run on the boot stack and we cannot use cpuid as its value diff --git a/minix/kernel/arch/i386/include/arch_proto.h b/minix/kernel/arch/i386/include/arch_proto.h index 3aa879096..6679cb5a5 100644 --- a/minix/kernel/arch/i386/include/arch_proto.h +++ b/minix/kernel/arch/i386/include/arch_proto.h @@ -249,6 +249,11 @@ reg_t read_ebp(void); */ int tss_init(unsigned cpu, void * kernel_stack); +/* + * Set up MSRs for sysenter or syscall for the current cpu. + * Assumes that tss_init has been called before for that cpu ! */ +void setup_sysenter_syscall(void); + void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type); void __copy_msg_from_user_end(void); diff --git a/minix/kernel/arch/i386/include/arch_smp.h b/minix/kernel/arch/i386/include/arch_smp.h index 3a013a982..6b79bdd5d 100644 --- a/minix/kernel/arch/i386/include/arch_smp.h +++ b/minix/kernel/arch/i386/include/arch_smp.h @@ -16,6 +16,7 @@ */ #define smp_single_cpu_fallback() do { \ tss_init(0, get_k_stack_top(0)); \ + setup_sysenter_syscall(); \ bsp_cpu_id = 0; \ ncpus = 1; \ bsp_finish_booting(); \ diff --git a/minix/kernel/arch/i386/mpx.S b/minix/kernel/arch/i386/mpx.S index fa38697e5..6fbb72855 100644 --- a/minix/kernel/arch/i386/mpx.S +++ b/minix/kernel/arch/i386/mpx.S @@ -405,6 +405,7 @@ ENTRY(restore_user_context_sysenter) /* restore PSW */ movl PSWREG(%ebp), %edi /* load desired PSW to EDI */ + andl $0xfffffdff, %edi /* Clear IF to avoid receiving int. */ push %edi popf @@ -421,6 +422,7 @@ ENTRY(restore_user_context_syscall) /* restore PSW (before we switch to user stack!) */ movl PSWREG(%ebp), %edi /* load desired PSW to EDI */ + andl $0xfffffdff, %edi /* Clear IF to avoid receiving int. */ push %edi popf @@ -429,6 +431,7 @@ ENTRY(restore_user_context_syscall) mov AXREG(%ebp), %eax /* trap return value */ mov BXREG(%ebp), %ebx /* secondary return value */ + sti sysret /* jump to EIP in user */ ENTRY(restore_user_context_int) diff --git a/minix/kernel/arch/i386/protect.c b/minix/kernel/arch/i386/protect.c index ca9a9fd98..0307915db 100644 --- a/minix/kernel/arch/i386/protect.c +++ b/minix/kernel/arch/i386/protect.c @@ -151,35 +151,10 @@ static struct gate_table_s gate_table_exceptions[] = { { NULL, 0, 0} }; -int tss_init(unsigned cpu, void * kernel_stack) +void setup_sysenter_syscall(void) { - struct tss_s * t = &tss[cpu]; - int index = TSS_INDEX(cpu); - struct segdesc_s *tssgdt; - - tssgdt = &gdt[index]; - - init_param_dataseg(tssgdt, (phys_bytes) t, - sizeof(struct tss_s), INTR_PRIVILEGE); - tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; - - /* Build TSS. */ - memset(t, 0, sizeof(*t)); - t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR; - t->cs = KERN_CS_SELECTOR; - t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */ - - /* - * make space for process pointer and cpu id and point to the first - * usable word - */ - k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED; - /* - * set the cpu id at the top of the stack so we know on which cpu is - * this stack in use when we trap to kernel - */ - *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu; - + const int cpu = cpuid; + struct tss_s const *const t = &tss[cpu]; /* Set up Intel SYSENTER support if available. */ if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) { ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR); @@ -211,7 +186,36 @@ int tss_init(unsigned cpu, void * kernel_stack) set_star_cpu(7); assert(CONFIG_MAX_CPUS <= 8); } +} +int tss_init(unsigned cpu, void * kernel_stack) +{ + struct tss_s * t = &tss[cpu]; + int index = TSS_INDEX(cpu); + struct segdesc_s *tssgdt; + + tssgdt = &gdt[index]; + + init_param_dataseg(tssgdt, (phys_bytes) t, + sizeof(struct tss_s), INTR_PRIVILEGE); + tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; + + /* Build TSS. */ + memset(t, 0, sizeof(*t)); + t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR; + t->cs = KERN_CS_SELECTOR; + t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */ + + /* + * make space for process pointer and cpu id and point to the first + * usable word + */ + k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED; + /* + * set the cpu id at the top of the stack so we know on which cpu is + * this stack in use when we trap to kernel + */ + *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu; return SEG_SELECTOR(index); } diff --git a/minix/kernel/main.c b/minix/kernel/main.c index 3198aba73..1f5f3e51f 100644 --- a/minix/kernel/main.c +++ b/minix/kernel/main.c @@ -42,6 +42,7 @@ void bsp_finish_booting(void) sprofiling = 0; /* we're not profiling until instructed to */ #endif /* SPROFILE */ + setup_sysenter_syscall(); cpu_identify(); vm_running = 0; From f3787e99034ed633a5ff85c19447d311941d48b6 Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 04:48:09 -0700 Subject: [PATCH 3/8] Fix division by 0 in get_cpu_ticks. --- minix/kernel/arch/i386/arch_clock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/minix/kernel/arch/i386/arch_clock.c b/minix/kernel/arch/i386/arch_clock.c index 8573e42f7..54959324c 100644 --- a/minix/kernel/arch/i386/arch_clock.c +++ b/minix/kernel/arch/i386/arch_clock.c @@ -434,7 +434,11 @@ get_cpu_ticks(unsigned int cpu, uint64_t ticks[CPUSTATES]) { int i; - /* TODO: make this inter-CPU safe! */ - for (i = 0; i < CPUSTATES; i++) - ticks[i] = tsc_per_state[cpu][i] / tsc_per_tick[cpu]; + for (i = 0; i < CPUSTATES; i++) { + /* Avoid divide by 0. */ + if (tsc_per_tick[cpu]) + ticks[i] = tsc_per_state[cpu][i] / tsc_per_tick[cpu]; + else + ticks[i] = 0; + } } From 2846b0ab97acf39fc3ed1121a218c6080c5d47be Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 04:58:01 -0700 Subject: [PATCH 4/8] Fix overflow in LAPIC timer frequence calibration. --- minix/kernel/arch/i386/apic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/minix/kernel/arch/i386/apic.c b/minix/kernel/arch/i386/apic.c index 39346b679..c4fa74c1e 100644 --- a/minix/kernel/arch/i386/apic.c +++ b/minix/kernel/arch/i386/apic.c @@ -442,7 +442,8 @@ static int spurious_irq_handler(irq_hook_t * UNUSED(hook)) static void apic_calibrate_clocks(unsigned cpu) { - u32_t lvtt, val, lapic_delta; + u32_t lvtt, val; + u64_t lapic_delta; u64_t tsc_delta; u64_t cpu_freq; @@ -514,7 +515,8 @@ static void apic_calibrate_clocks(unsigned cpu) lapic_delta = lapic_tctr0 - lapic_tctr1; tsc_delta = tsc1 - tsc0; - lapic_bus_freq[cpuid] = system_hz * lapic_delta / (PROBE_TICKS - 1); + lapic_bus_freq[cpuid] = make64(system_hz,0) * lapic_delta / + make64(PROBE_TICKS-1,0); BOOT_VERBOSE(printf("APIC bus freq %u MHz\n", lapic_bus_freq[cpuid] / 1000000)); cpu_freq = (tsc_delta / (PROBE_TICKS - 1)) * make64(system_hz, 0); From 7d8d1b64b99d65810418065be27b20baedae0934 Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 05:04:59 -0700 Subject: [PATCH 5/8] Fix scheduler's load tracking. Add load balancer. The load tracking in the scheduler had a bug were the BSP would always have the lowest load on the system leading to large imbalance. Also add a simple periodic load balancer. --- minix/servers/sched/schedule.c | 70 ++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/minix/servers/sched/schedule.c b/minix/servers/sched/schedule.c index 76f2e3f19..9e41343c3 100644 --- a/minix/servers/sched/schedule.c +++ b/minix/servers/sched/schedule.c @@ -29,11 +29,6 @@ static int schedule_process(struct schedproc * rmp, unsigned flags); SCHEDULE_CHANGE_CPU \ ) -#define schedule_process_local(p) \ - schedule_process(p, SCHEDULE_CHANGE_PRIO | SCHEDULE_CHANGE_QUANTUM) -#define schedule_process_migrate(p) \ - schedule_process(p, SCHEDULE_CHANGE_CPU) - #define CPU_DEAD -1 #define cpu_is_available(c) (cpu_proc[c] >= 0) @@ -45,38 +40,41 @@ static int schedule_process(struct schedproc * rmp, unsigned flags); static unsigned cpu_proc[CONFIG_MAX_CPUS]; -static void pick_cpu(struct schedproc * proc) +static void allow_all_cpus(struct schedproc *proc) +{ + int i; + for(i=0;icpu_mask[i]); + } +} + +static int next_cpu = 0; +static int pick_cpu(struct schedproc * proc) { #ifdef CONFIG_SMP unsigned cpu, c; unsigned cpu_load = (unsigned) -1; - if (machine.processors_count == 1) { - proc->cpu = machine.bsp_id; - return; - } - - /* schedule sysytem processes only on the boot cpu */ - if (is_system_proc(proc)) { - proc->cpu = machine.bsp_id; - return; - } + if (machine.processors_count == 1) + return machine.bsp_id; /* if no other cpu available, try BSP */ cpu = machine.bsp_id; + cpu_load = cpu_proc[machine.bsp_id]; for (c = 0; c < machine.processors_count; c++) { /* skip dead cpus */ if (!cpu_is_available(c)) continue; + if(!GET_BIT(proc->cpu_mask,c)) + continue; if (c != machine.bsp_id && cpu_load > cpu_proc[c]) { cpu_load = cpu_proc[c]; cpu = c; } } - proc->cpu = cpu; - cpu_proc[cpu]++; + return cpu; #else - proc->cpu = 0; + return 0; #endif } @@ -100,7 +98,8 @@ int do_noquantum(message *m_ptr) rmp->priority += 1; /* lower priority */ } - if ((rv = schedule_process_local(rmp)) != OK) { + rv = schedule_process(rmp,SCHEDULE_CHANGE_PRIO|SCHEDULE_CHANGE_QUANTUM); + if (rv != OK) { return rv; } return OK; @@ -126,10 +125,8 @@ int do_stop_scheduling(message *m_ptr) } rmp = &schedproc[proc_nr_n]; -#ifdef CONFIG_SMP - cpu_proc[rmp->cpu]--; -#endif rmp->flags = 0; /*&= ~IN_USE;*/ + cpu_proc[rmp->cpu]--; return OK; } @@ -141,6 +138,7 @@ int do_start_scheduling(message *m_ptr) { register struct schedproc *rmp; int rv, proc_nr_n, parent_nr_n; + int cpu_chosen; /* we can handle two kinds of messages here */ assert(m_ptr->m_type == SCHEDULING_START || @@ -165,6 +163,9 @@ int do_start_scheduling(message *m_ptr) return EINVAL; } + allow_all_cpus(rmp); + + cpu_chosen = 0; /* Inherit current priority and time slice from parent. Since there * is currently only one scheduler scheduling the whole system, this * value is local and we assert that the parent endpoint is valid */ @@ -183,6 +184,7 @@ int do_start_scheduling(message *m_ptr) #ifdef CONFIG_SMP rmp->cpu = machine.bsp_id; /* FIXME set the cpu mask */ + cpu_chosen = 1; #endif } @@ -223,12 +225,11 @@ int do_start_scheduling(message *m_ptr) rmp->flags = IN_USE; /* Schedule the process, giving it some quantum */ - pick_cpu(rmp); - while ((rv = schedule_process(rmp, SCHEDULE_CHANGE_ALL)) == EBADCPU) { - /* don't try this CPU ever again */ - cpu_proc[rmp->cpu] = CPU_DEAD; - pick_cpu(rmp); - } + if(!cpu_chosen) + rmp->cpu = pick_cpu(rmp); + cpu_proc[rmp->cpu]++; + if((rv = schedule_process(rmp, SCHEDULE_CHANGE_ALL)) == EBADCPU) + panic("EBADCPU\n"); if (rv != OK) { printf("Sched: Error while scheduling process, kernel replied %d\n", @@ -281,7 +282,8 @@ int do_nice(message *m_ptr) /* Update the proc entry and reschedule the process */ rmp->max_priority = rmp->priority = new_q; - if ((rv = schedule_process_local(rmp)) != OK) { + rv = schedule_process(rmp,SCHEDULE_CHANGE_PRIO|SCHEDULE_CHANGE_QUANTUM); + if (rv != OK) { /* Something went wrong when rescheduling the process, roll * back the changes to proc struct */ rmp->priority = old_q; @@ -299,8 +301,6 @@ static int schedule_process(struct schedproc * rmp, unsigned flags) int err; int new_prio, new_quantum, new_cpu, niced; - pick_cpu(rmp); - if (flags & SCHEDULE_CHANGE_PRIO) new_prio = rmp->priority; else @@ -359,7 +359,11 @@ void balance_queues(void) if (rmp->flags & IN_USE) { if (rmp->priority > rmp->max_priority) { rmp->priority -= 1; /* increase priority */ - schedule_process_local(rmp); + /* Balance the load, select new cpu. */ + cpu_proc[rmp->cpu]--; + rmp->cpu = pick_cpu(rmp); + cpu_proc[rmp->cpu]++; + schedule_process(rmp,SCHEDULE_CHANGE_ALL); } } } From 81ce1c3df2e3e27d87ca054dfaf2294f23d670d9 Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 05:10:06 -0700 Subject: [PATCH 6/8] Fix VFS panic "process has two calls" when closing a socket. --- minix/lib/libc/sys/close.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/minix/lib/libc/sys/close.c b/minix/lib/libc/sys/close.c index ac8c7f54c..a0c942aa8 100644 --- a/minix/lib/libc/sys/close.c +++ b/minix/lib/libc/sys/close.c @@ -12,7 +12,12 @@ close(int fd) memset(&m, 0, sizeof(m)); m.m_lc_vfs_close.fd = fd; - m.m_lc_vfs_close.nblock = 0; + + // When closing a socket VFS would output an error message: + // "vfs(1): panic: process has two calls (105, 131)". + // The fix is to make close non-blocking by default. + // There's probably a better way to do this TODO. + m.m_lc_vfs_close.nblock = 1; return _syscall(VFS_PROC_NR, VFS_CLOSE, &m); } From dd0c1095a142263b31eafad5cc3b7d852c465484 Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 05:16:22 -0700 Subject: [PATCH 7/8] Fix issue where Application Processors would sometimes use the physical address of LAPIC registers without translating it to virtual ones first. That would happen if VM had changed the page tables before the APs configured their LAPIC. --- minix/kernel/arch/i386/memory.c | 2 -- minix/kernel/main.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/minix/kernel/arch/i386/memory.c b/minix/kernel/arch/i386/memory.c index 8450aa344..f70f7d98f 100644 --- a/minix/kernel/arch/i386/memory.c +++ b/minix/kernel/arch/i386/memory.c @@ -964,8 +964,6 @@ int arch_enable_paging(struct proc * caller) } #if CONFIG_SMP barrier(); - - wait_for_APs_to_finish_booting(); #endif #endif diff --git a/minix/kernel/main.c b/minix/kernel/main.c index 1f5f3e51f..50b053810 100644 --- a/minix/kernel/main.c +++ b/minix/kernel/main.c @@ -104,6 +104,9 @@ void bsp_finish_booting(void) /* Kernel may no longer use bits of memory as VM will be running soon */ kernel_may_alloc = 0; +#ifdef CONFIG_SMP + wait_for_APs_to_finish_booting(); +#endif switch_to_user(); NOT_REACHABLE; From 663ee11368e2f448e844fbb25b8e58ee6faad229 Mon Sep 17 00:00:00 2001 From: Justinien Bouron Date: Sun, 10 Mar 2019 05:22:08 -0700 Subject: [PATCH 8/8] Fix intermittent failures in lin_lin_copy and verify_grant. Sometimes, the srcptr or dstptr in lin_lin_copy would have the RTS_VMINHIBIT flag set leading to an assertion error. In case this happens defer the kernel call instead of panicking. Verify_grant would somtimes page-fault when accessing an entry. Defer the kernel call when that happens. --- minix/kernel/arch/i386/do_sdevio.c | 4 +++- minix/kernel/arch/i386/memory.c | 12 ++++++++++-- minix/kernel/proto.h | 2 +- minix/kernel/system/do_safecopy.c | 29 +++++++++++++++++++++------- minix/kernel/system/do_safememset.c | 5 +++-- minix/kernel/system/do_umap_remote.c | 7 +++++-- minix/kernel/system/do_vumap.c | 2 +- 7 files changed, 45 insertions(+), 16 deletions(-) diff --git a/minix/kernel/arch/i386/do_sdevio.c b/minix/kernel/arch/i386/do_sdevio.c index add9f2adf..9f4c94f30 100644 --- a/minix/kernel/arch/i386/do_sdevio.c +++ b/minix/kernel/arch/i386/do_sdevio.c @@ -13,6 +13,7 @@ #include "kernel/system.h" #include #include +#include "kernel/vm.h" #include "arch_proto.h" @@ -67,12 +68,13 @@ int do_sdevio(struct proc * caller, message *m_ptr) /* Check for 'safe' variants. */ if((m_ptr->m_lsys_krn_sys_sdevio.request & _DIO_SAFEMASK) == _DIO_SAFE) { /* Map grant address to physical address. */ - if((r=verify_grant(proc_nr_e, caller->p_endpoint, + if((r=verify_grant(caller,proc_nr_e, caller->p_endpoint, m_ptr->m_lsys_krn_sys_sdevio.vec_addr, count, req_dir == _DIO_INPUT ? CPF_WRITE : CPF_READ, m_ptr->m_lsys_krn_sys_sdevio.offset, &newoffset, &newep, NULL)) != OK) { if(r == ENOTREADY) return r; + if(r == VMSUSPEND) return r; printf("do_sdevio: verify_grant failed\n"); return EPERM; } diff --git a/minix/kernel/arch/i386/memory.c b/minix/kernel/arch/i386/memory.c index f70f7d98f..ae520bc93 100644 --- a/minix/kernel/arch/i386/memory.c +++ b/minix/kernel/arch/i386/memory.c @@ -164,8 +164,16 @@ static int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr, if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE)); assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE)); assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v); - if(srcproc) assert(!RTS_ISSET(srcproc, RTS_VMINHIBIT)); - if(dstproc) assert(!RTS_ISSET(dstproc, RTS_VMINHIBIT)); + if(srcproc&&RTS_ISSET(srcproc, RTS_VMINHIBIT)) { + /* If the src is marked with an unstable memory space, then + * suspend as if a page fault occured. */ + return EFAULT_SRC; + } + if(dstproc&&RTS_ISSET(dstproc, RTS_VMINHIBIT)) { + /* If the dst is marked with an unstable memory space, then + * suspend as if a page fault occured. */ + return EFAULT_DST; + } while(bytes > 0) { phys_bytes srcptr, dstptr; diff --git a/minix/kernel/proto.h b/minix/kernel/proto.h index f8b8dbed1..e640547fc 100644 --- a/minix/kernel/proto.h +++ b/minix/kernel/proto.h @@ -157,7 +157,7 @@ void hook_ipc_clear(struct proc *proc); /* system/do_safecopy.c */ struct cp_sfinfo; /* external callers may only provide NULL */ -int verify_grant(endpoint_t, endpoint_t, cp_grant_id_t, vir_bytes, int, +int verify_grant(struct proc*,endpoint_t, endpoint_t, cp_grant_id_t, vir_bytes, int, vir_bytes, vir_bytes *, endpoint_t *, struct cp_sfinfo *); /* system/do_diagctl.c */ diff --git a/minix/kernel/system/do_safecopy.c b/minix/kernel/system/do_safecopy.c index 83ad0ad4e..f0d5679ac 100644 --- a/minix/kernel/system/do_safecopy.c +++ b/minix/kernel/system/do_safecopy.c @@ -39,6 +39,7 @@ struct cp_sfinfo { /* information for handling soft faults */ * verify_grant * *===========================================================================*/ int verify_grant( + struct proc *caller, /* The caller */ endpoint_t granter, /* copyee */ endpoint_t grantee, /* copyer */ cp_grant_id_t grant, /* grant id */ @@ -118,12 +119,25 @@ int verify_grant( * has (presumably) set an invalid grant table entry by * returning EPERM, just like with an invalid grant id. */ - if(data_copy(granter, priv(granter_proc)->s_grant_table + - sizeof(g) * grant_idx, - KERNEL, (vir_bytes) &g, sizeof(g)) != OK) { - printf( - "verify_grant: grant verify: data_copy failed\n"); - return EPERM; + const vir_bytes entry_addr = priv(granter_proc)->s_grant_table+sizeof(g)*grant_idx; + const int copy_res =data_copy_vmcheck(caller,granter,entry_addr, + KERNEL, (vir_bytes) &g, sizeof(g)); + if(copy_res!=OK) { + /* The copy failed, it may be because we had a page + * fault from the source. In this case, the caller has + * been suspended already, but we need to propagate the + * VMSUSPEND to the upper level (until kernel_call_finish + * to make it happen. */ + if(copy_res==VMSUSPEND) { + /* Propagate the VMSUSPEND. */ + return VMSUSPEND; + } else { + /* The reason is not a pagefault in the source + * , in this case report the error. */ + panic( + "verify_grant: grant verify: data_copy failed\n"); + return EPERM; + } } /* Check validity: flags and sequence number. */ @@ -302,9 +316,10 @@ static int safecopy( } /* Verify permission exists. */ - if((r=verify_grant(granter, grantee, grantid, bytes, access, + if((r=verify_grant(caller,granter, grantee, grantid, bytes, access, g_offset, &v_offset, &new_granter, &sfinfo)) != OK) { if(r == ENOTREADY) return r; + if(r == VMSUSPEND) return r; printf( "grant %d verify to copy %d->%d by %d failed: err %d\n", grantid, *src, *dst, grantee, r); diff --git a/minix/kernel/system/do_safememset.c b/minix/kernel/system/do_safememset.c index 9b356051a..de9134b04 100644 --- a/minix/kernel/system/do_safememset.c +++ b/minix/kernel/system/do_safememset.c @@ -13,6 +13,7 @@ #include #include "kernel/system.h" +#include "kernel/vm.h" /*===========================================================================* * do_safememset * @@ -45,9 +46,9 @@ int do_safememset(struct proc *caller, message *m_ptr) { } /* Verify permission exists, memset always requires CPF_WRITE */ - r = verify_grant(dst_endpt, caller_endpt, grantid, len, CPF_WRITE, + r = verify_grant(caller,dst_endpt, caller_endpt, grantid, len, CPF_WRITE, g_offset, &v_offset, &new_granter, NULL); - + if(r==VMSUSPEND) return r; if (r != OK) { printf("safememset: grant %d verify failed %d", grantid, r); return r; diff --git a/minix/kernel/system/do_umap_remote.c b/minix/kernel/system/do_umap_remote.c index 8ebe78d28..14a110957 100644 --- a/minix/kernel/system/do_umap_remote.c +++ b/minix/kernel/system/do_umap_remote.c @@ -11,6 +11,7 @@ */ #include "kernel/system.h" +#include "kernel/vm.h" #include @@ -63,8 +64,10 @@ int do_umap_remote(struct proc * caller, message * m_ptr) int new_proc_nr; cp_grant_id_t grant = (cp_grant_id_t) offset; - if(verify_grant(targetpr->p_endpoint, grantee, grant, count, - 0, 0, &newoffset, &newep, NULL) != OK) { + const int vres =verify_grant(caller,targetpr->p_endpoint, grantee, grant, count, + 0, 0, &newoffset, &newep, NULL); + if(vres==VMSUSPEND) return vres; + if(vres!=OK) { printf("SYSTEM: do_umap: verify_grant in %s, grant %d, bytes 0x%lx, failed, caller %s\n", targetpr->p_name, offset, count, caller->p_name); proc_stacktrace(caller); return EFAULT; diff --git a/minix/kernel/system/do_vumap.c b/minix/kernel/system/do_vumap.c index 47bc35edb..d5c3b7edb 100644 --- a/minix/kernel/system/do_vumap.c +++ b/minix/kernel/system/do_vumap.c @@ -77,7 +77,7 @@ int do_vumap(struct proc *caller, message *m_ptr) size -= offset; if (source != SELF) { - r = verify_grant(source, endpt, vvec[i].vv_grant, size, access, + r = verify_grant(caller,source, endpt, vvec[i].vv_grant, size, access, offset, &vir_addr, &granter, NULL); if (r != OK) return r;