[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH gnumach 1/2] xxx sysenter prototype
From: |
Justus Winter |
Subject: |
[PATCH gnumach 1/2] xxx sysenter prototype |
Date: |
Sat, 2 May 2015 18:49:09 +0200 |
---
i386/Makefrag.am | 2 +
i386/i386/gdt.c | 17 +++++
i386/i386/gdt.h | 7 +-
i386/i386/locore.S | 177 ++++++++++++++++++++++++++++++++++++++++++++++++
i386/i386/pcb.c | 24 +++----
i386/i386/syscall.c | 103 ++++++++++++++++++++++++++++
i386/i386/syscall.h | 7 ++
i386/i386/tss.h | 1 +
i386/i386at/conf.c | 8 +++
i386/i386at/model_dep.c | 2 +
10 files changed, 335 insertions(+), 13 deletions(-)
create mode 100644 i386/i386/syscall.c
create mode 100644 i386/i386/syscall.h
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 4dd6a9f..f59ac29 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -147,6 +147,8 @@ libkernel_a_SOURCES += \
i386/i386/setjmp.h \
i386/i386/spl.S \
i386/i386/spl.h \
+ i386/i386/syscall.c \
+ i386/i386/syscall.h \
i386/i386/task.h \
i386/i386/thread.h \
i386/i386/time_stamp.h \
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index c895eb3..0f9d0e3 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -57,6 +57,23 @@ gdt_init(void)
LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
LINEAR_MAX_KERNEL_ADDRESS -
(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
ACC_PL_K|ACC_DATA_W, SZ_32);
+ fill_gdt_descriptor(KERNEL_ENTER_CS,
+ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ LINEAR_MAX_KERNEL_ADDRESS -
(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
+ ACC_PL_K|ACC_CODE_R, SZ_32);
+ fill_gdt_descriptor(KERNEL_ENTER_DS,
+ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ LINEAR_MAX_KERNEL_ADDRESS -
(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
+ ACC_PL_K|ACC_DATA_W, SZ_32);
+ fill_gdt_descriptor(USER_EXIT_CS,
+ VM_MIN_ADDRESS,
+ VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
+ /* XXX LINEAR_... */
+ ACC_PL_U|ACC_CODE_R, SZ_32);
+ fill_gdt_descriptor(USER_EXIT_DS,
+ VM_MIN_ADDRESS,
+ VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
+ ACC_PL_U|ACC_DATA_W, SZ_32);
#ifndef MACH_PV_DESCRIPTORS
fill_gdt_descriptor(LINEAR_DS,
0,
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index d865640..37ca6f5 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -55,7 +55,12 @@
#define USER_GDT 0x48 /* user-defined GDT entries */
#define USER_GDT_SLOTS 2
-#define GDTSZ (USER_GDT/8 + USER_GDT_SLOTS)
+#define KERNEL_ENTER_CS (0x58 | SEL_PL_K) /* kernel code
*/
+#define KERNEL_ENTER_DS (0x60 | SEL_PL_K) /* kernel data
*/
+#define USER_EXIT_CS (0x68 | SEL_PL_U) /* user code */
+#define USER_EXIT_DS (0x70 | SEL_PL_U) /* user data */
+
+#define GDTSZ (USER_EXIT_DS/8 + 1)
extern struct real_descriptor gdt[GDTSZ];
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index cfda86f..aa13c6b 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -592,6 +592,7 @@ ENTRY(thread_syscall_return)
or $(KERNEL_STACK_SIZE-1),%ecx
movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */
movl %eax,R_EAX(%esp) /* save return value */
+ /* XXX make this work with sysenter */
jmp _return_from_trap
ENTRY(call_continuation)
@@ -978,6 +979,18 @@ ttd_from_iret_i: /* on interrupt stack */
#endif /* MACH_TTD */
+/* User stub for calling the kernel using the trap gate. */
+ .globl user_trapgate_stub_start
+user_trapgate_stub_start:
+ popl %ecx /* Pop return address into %ecx. */
+ popl %eax /* Pop syscall number into %eax. */
+ pushl %ecx /* Push back return address. */
+ lcall $7, $0
+ subl $4, %esp /* magic */
+ ret
+ .globl user_trapgate_stub_end
+user_trapgate_stub_end:
+
/*
* System call enters through a call gate. Flags are not saved -
* we must shuffle stack to look like trap save area.
@@ -1171,6 +1184,170 @@ syscall_addr:
jmp _take_trap /* treat as a trap */
+/* User stub for calling the kernel using the sysenter instruction. */
+ .globl user_sysenter_stub_start
+user_sysenter_stub_start:
+ push %ebp
+ mov %esp, %ebp
+ pushf
+ push %ebx
+ push %esi
+ push %edi /* xxxmore callee-saved registers? */
+ mov 8(%ebp), %eax /* Move syscall number into %eax. */
+ mov 12(%ebp), %ebx /* Move first argument into %ebx. */
+ mov 16(%ebp), %ecx /* Move second argument into %ecx. */
+ mov 20(%ebp), %edx /* Move third argument into %edx. */
+ mov 24(%ebp), %esi /* Move fourth argument into %esi. */
+ call get_ip /* compute location of sysexit */
+get_ip: pop %edi /* load current ip */
+ add $8, %edi /* userspace return address */
+ movl %esp, %ebp /* userspace stack pointer */
+ sysenter
+ pop %edi
+ pop %esi
+ pop %ebx
+ popf
+ pop %ebp
+ ret
+ .globl user_sysenter_stub_end
+user_sysenter_stub_end:
+
+/*
+ * SYSENTER entry point.
+ *
+ * Calling convention:
+ * %eax - syscall number
+ * %ebx - syscall argument 1
+ * %ecx - syscall argument 2
+ * %edx - syscall argument 3
+ * %esi - syscall argument 4
+ * %edi - userspace return address
+ * %ebp - userspace stack pointer
+ *
+ * Kernel Stack layout:
+ * kernel stack base -> EAX
+ * ECX
+ * EDX
+ * EBX
+ * ESP
+ * EBP
+ * ESI
+ * EDI
+ * XXX: make this look like a trap save area to make thread_syscall_return work
+ */
+/* Offsets from %ebp */
+#define SE_EAX (4 * 7)
+#define SE_ECX (4 * 6)
+#define SE_EDX (4 * 5)
+#define SE_EBX (4 * 4)
+#define SE_ESX (4 * 3)
+#define SE_EBP (4 * 2)
+#define SE_ESI (4 * 1)
+#define SE_EDI (4 * 0)
+#define SE_STACK_POINTER SE_EBP
+#define SE_RETURN_ADDRESS SE_EDI
+
+#define SE_USER_SKIP 20 /* skip past the scratchpad */
+
+ENTRY(sysenter_entry)
+ pusha /* save all registers */
+ mov %esp, %ebp /* to access the sysenter stack */
+ cld /* clear direction flag */
+
+ pushl %ds /* save the segment registers */
+ pushl %es
+ pushl %fs
+ pushl %gs
+
+ mov %ss,%cx /* switch to kernel data segment */
+ mov %cx,%ds
+ mov %cx,%es
+ mov %cx,%fs
+ mov %cx,%gs
+
+ CPU_NUMBER(%edx)
+ movl CX(EXT(kernel_stack),%edx),%ebx
+ /* get current kernel stack */
+ xchgl %ebx, %esp /* switch stacks */
+ pushl %ebx /* save sysenter sp */
+ movl %esp,%ebx /* save kernel sp for argument
+ unwinding */
+
+ negl %eax /* get system call number */
+ /* xxx sysenter_mach_call_range */
+ jl mach_call_range /* out of range if it was positive */
+ cmpl EXT(mach_trap_count),%eax /* check system call table
bounds */
+ /* xxx sysenter_mach_call_range */
+ jg mach_call_range /* error if out of range */
+
+ shll $4,%eax /* manual indexing */
+ movl EXT(mach_trap_table)(%eax),%ecx
+ /* get number of arguments */
+
+ cmp $4, %ecx
+ ja se_args_5plus
+ je se_args_4
+ cmp $2, %ecx
+ ja se_args_3
+ je se_args_2
+ cmp $1, %ecx
+ je se_args_1
+ jmp se_args_0
+
+se_args_5plus:
+
+ sub $4, %ecx /* skip the four first arguments */
+ movl SE_STACK_POINTER(%ebp), %esi
+ /* get user stack pointer */
+ lea (4 /* skip user return address */\
+ +4 /* point past last argument */\
+ +16 /* skip register arguments */\
+ +SE_USER_SKIP)(%esi,%ecx,4),%esi
+ /* and skip past the userspace
+ local storage */
+
+ movl $USER_DS,%edx /* use user data segment for accesses */
+ mov %dx,%fs
+
+0: subl $4,%esi
+ RECOVER(sysenter_mach_call_addr_push)
+ pushl %fs:(%esi) /* push argument on stack */
+ loop 0b /* loop for all arguments */
+
+se_args_4:
+ push SE_ESI(%ebp) /* push fourth argument */
+se_args_3:
+ push SE_EDX(%ebp) /* push third argument */
+se_args_2:
+ push SE_ECX(%ebp) /* push second argument */
+se_args_1:
+ push SE_EBX(%ebp) /* push first argument */
+se_args_0:
+ sti /* xxx: sti/cli where ? */
+ call *EXT(mach_trap_table)+4(%eax)
+ /* call procedure */
+ cli /* xxx: sti/cli where ? */
+ movl %ebx, %esp /* clean parameters from stack */
+return_from_sysenter:
+ /* xxx: process ast */
+ popl %esp /* restore sysenter sp */
+ popl %gs /* restore segment registers */
+ popl %fs
+ popl %es
+ popl %ds
+
+ movl SE_RETURN_ADDRESS(%ebp), %edx
+ movl SE_STACK_POINTER(%ebp), %ecx
+ sti /* xxx: sti/cli where ? */
+ sysexit
+
+sysenter_mach_call_addr_push:
+ movl %ebx,%esp /* clean parameters from stack */
+ /* xxx signal page-fault */
+ jmp sysenter_mach_call_addr_push
+#undef SE_STACK_POINTER
+#undef SE_RETURN_ADDRESS
+
.data
DATA(cpu_features)
.long 0
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index e8040c8..2da3804 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -391,12 +391,12 @@ void pcb_init(thread_t thread)
* Guarantee that the bootstrapped thread will be in user
* mode.
*/
- pcb->iss.cs = USER_CS;
- pcb->iss.ss = USER_DS;
- pcb->iss.ds = USER_DS;
- pcb->iss.es = USER_DS;
- pcb->iss.fs = USER_DS;
- pcb->iss.gs = USER_DS;
+ pcb->iss.cs = USER_EXIT_CS;
+ pcb->iss.ss = USER_EXIT_DS;
+ pcb->iss.ds = USER_EXIT_DS;
+ pcb->iss.es = USER_EXIT_DS;
+ pcb->iss.fs = USER_EXIT_DS;
+ pcb->iss.gs = USER_EXIT_DS;
pcb->iss.efl = EFL_USER_SET;
thread->pcb = pcb;
@@ -524,12 +524,12 @@ kern_return_t thread_setstatus(
* 386 mode. Set segment registers for flat
* 32-bit address space.
*/
- saved_state->cs = USER_CS;
- saved_state->ss = USER_DS;
- saved_state->ds = USER_DS;
- saved_state->es = USER_DS;
- saved_state->fs = USER_DS;
- saved_state->gs = USER_DS;
+ saved_state->cs = USER_EXIT_CS;
+ saved_state->ss = USER_EXIT_DS;
+ saved_state->ds = USER_EXIT_DS;
+ saved_state->es = USER_EXIT_DS;
+ saved_state->fs = USER_EXIT_DS;
+ saved_state->gs = USER_EXIT_DS;
}
else {
/*
diff --git a/i386/i386/syscall.c b/i386/i386/syscall.c
new file mode 100644
index 0000000..e9b17d0
--- /dev/null
+++ b/i386/i386/syscall.c
@@ -0,0 +1,103 @@
+#include <mach/vm_param.h>
+#include <mach/vm_prot.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <string.h>
+#include <kern/debug.h>
+
+#include <machine/tss.h>
+#include <i386/i386/ktss.h>
+#include <i386/i386/gdt.h>
+#include <i386/i386/locore.h>
+
+#include "syscall.h"
+
+#include <kern/printf.h> // xxx
+
+static vm_offset_t msyscall = 0;
+
+void user_trapgate_stub_start();
+void user_trapgate_stub_end();
+
+void user_sysenter_stub_start();
+void user_sysenter_stub_end();
+
+void
+syscall_init(void)
+{
+ kern_return_t kr;
+ vm_offset_t user_stub_start;
+ vm_offset_t user_stub_end;
+
+ kr = kmem_alloc_wired(kernel_map, &msyscall, PAGE_SIZE);
+ if (kr != KERN_SUCCESS)
+ panic("syscall_init");
+
+ memset((void *) msyscall, 0, PAGE_SIZE);
+
+ if (CPU_HAS_FEATURE (CPU_FEATURE_SEP)) {
+ printf ("syscall: using SYSENTER/SYSEXIT\n");
+ user_stub_start = (vm_offset_t) user_sysenter_stub_start;
+ user_stub_end = (vm_offset_t) user_sysenter_stub_end;
+ } else {
+ printf ("syscall: using trap gate\n");
+ user_stub_start = (vm_offset_t) user_trapgate_stub_start;
+ user_stub_end = (vm_offset_t) user_trapgate_stub_end;
+ }
+
+ memcpy((void *) msyscall, (void *) user_stub_start,
+ (size_t) (user_stub_end - user_stub_start));
+
+ syscall_init_cpu();
+}
+
+static void
+wrmsr(unsigned int msr, unsigned long long val)
+{
+ __asm__ __volatile__("wrmsr"
+ : /* no Outputs */
+ : "c" (msr), "A" (val));
+}
+
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+
+extern void sysenter_entry(void);
+
+void
+syscall_init_cpu(void)
+{
+ if (! CPU_HAS_FEATURE (CPU_FEATURE_SEP))
+ return;
+
+ //struct task_tss *tss = curr_ktss (cpu_number ());
+ struct task_tss *tss = &ktss;
+
+ wrmsr(MSR_IA32_SYSENTER_CS, KERNEL_ENTER_CS);
+ wrmsr(MSR_IA32_SYSENTER_ESP,
+ (unsigned long) tss->sysenter_stack + sizeof tss->sysenter_stack);
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry);
+}
+
+int
+syscall_open(dev_t dev, int flag, io_req_t ior)
+{
+ return 0;
+}
+
+void
+syscall_close(dev_t dev, int flag)
+{
+ return;
+}
+
+int
+syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot)
+{
+ if (prot & VM_PROT_WRITE)
+ return (-1);
+
+ return (i386_btop(pmap_extract(pmap_kernel(),
+ (vm_offset_t) msyscall)));
+}
diff --git a/i386/i386/syscall.h b/i386/i386/syscall.h
new file mode 100644
index 0000000..de9670c
--- /dev/null
+++ b/i386/i386/syscall.h
@@ -0,0 +1,7 @@
+// XXX
+
+void syscall_init(void);
+void syscall_init_cpu(void);
+int syscall_open(dev_t dev, int flag, io_req_t ior);
+void syscall_close(dev_t dev, int flag);
+int syscall_mmap(dev_t dev, vm_offset_t off, vm_prot_t prot);
diff --git a/i386/i386/tss.h b/i386/i386/tss.h
index ff25f21..8c939c7 100644
--- a/i386/i386/tss.h
+++ b/i386/i386/tss.h
@@ -76,6 +76,7 @@ struct task_tss
struct i386_tss tss;
unsigned char iopb[IOPB_BYTES];
unsigned char barrier;
+ unsigned long sysenter_stack[64]; /* xxx */
};
diff --git a/i386/i386at/conf.c b/i386/i386at/conf.c
index ab4f680..d7f9e6f 100644
--- a/i386/i386at/conf.c
+++ b/i386/i386at/conf.c
@@ -68,6 +68,9 @@
#define hypcnname "hyp"
#endif /* MACH_HYP */
+#include <i386/syscall.h>
+#define syscall_name "syscall"
+
/*
* List of devices - console must be at slot 0
*/
@@ -143,6 +146,11 @@ struct dev_ops dev_name_list[] =
nodev },
#endif /* MACH_HYP */
+ { syscall_name, syscall_open, syscall_close, nulldev_read,
+ nulldev_write, nulldev_getstat, nulldev_setstat,
+ syscall_mmap,
+ nodev, nulldev, nulldev_portdeath, 0,
+ nodev },
};
int dev_name_count = sizeof(dev_name_list)/sizeof(dev_name_list[0]);
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index bc34c9b..210e54d 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -63,6 +63,7 @@
#include <i386/proc_reg.h>
#include <i386/locore.h>
#include <i386/model_dep.h>
+#include <i386/syscall.h>
#include <i386at/autoconf.h>
#include <i386at/idt.h>
#include <i386at/int_init.h>
@@ -197,6 +198,7 @@ void machine_init(void)
*/
pmap_unmap_page_zero();
#endif
+ syscall_init();
}
/* Conserve power on processor CPU. */
--
2.1.4