[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC PATCH gnumach] percpu area using gs segment
From: |
Damien Zammit |
Subject: |
[RFC PATCH gnumach] percpu area using gs segment |
Date: |
Sat, 26 Aug 2023 06:48:52 +0000 |
This speeds up smp again, by storing the struct processor
in a percpu area and avoiding an expensive cpu_number every call
of current_processor(), as well as getting the cpu_number by
an offset into the percpu area. Needs work for 64 bit and replacing
other percpu arrays.
---
i386/Makefrag.am | 2 ++
i386/i386/cpu_number.c | 8 ++++-
i386/i386/cpu_number.h | 1 +
i386/i386/gdt.c | 9 +++++-
i386/i386/gdt.h | 11 ++++++-
i386/i386/i386asm.sym | 7 ----
i386/i386/locore.S | 13 +++++---
i386/i386/mp_desc.c | 4 ++-
i386/i386/percpu.c | 30 ++++++++++++++++++
i386/i386/percpu.h | 72 ++++++++++++++++++++++++++++++++++++++++++
kern/processor.c | 7 ++--
kern/processor.h | 18 ++++-------
12 files changed, 150 insertions(+), 32 deletions(-)
create mode 100644 i386/i386/percpu.c
create mode 100644 i386/i386/percpu.h
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 274e8695..c1724cea 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -108,6 +108,8 @@ libkernel_a_SOURCES += \
i386/i386/irq.c \
i386/i386/irq.h \
i386/i386/msr.h \
+ i386/i386/percpu.c \
+ i386/i386/percpu.h \
i386/i386/pit.c \
i386/i386/pit.h
diff --git a/i386/i386/cpu_number.c b/i386/i386/cpu_number.c
index ef19e11f..241015b5 100644
--- a/i386/i386/cpu_number.c
+++ b/i386/i386/cpu_number.c
@@ -20,11 +20,17 @@
#include <i386/smp.h>
#include <i386/cpu.h>
#include <i386/mp_desc.h>
+#include <i386/percpu.h>
#include <kern/printf.h>
#if NCPUS > 1
-int cpu_number(void)
+int cpu_number_slow(void)
{
return cpu_id_lut[apic_get_current_cpu()];
}
+
+int cpu_number(void)
+{
+ return *((int *)percpu_ptr(int, cpu_id));
+}
#endif
diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
index 479a847a..098696a3 100644
--- a/i386/i386/cpu_number.h
+++ b/i386/i386/cpu_number.h
@@ -65,6 +65,7 @@
#ifndef __ASSEMBLER__
#include "kern/cpu_number.h"
+int cpu_number_slow(void);
int cpu_number(void);
#endif
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index ddda603b..e335de50 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -35,6 +35,7 @@
#include <kern/assert.h>
#include <intel/pmap.h>
+#include <machine/percpu.h>
#include "vm_param.h"
#include "seg.h"
@@ -73,6 +74,11 @@ gdt_fill(struct real_descriptor *mygdt)
0xffffffff,
ACC_PL_K|ACC_DATA_W, SZ_32);
#endif /* MACH_PV_DESCRIPTORS */
+ vm_offset_t thiscpu = kvtolin(&percpu_array[cpu_number_slow()]);
+ _fill_gdt_descriptor(mygdt, PERCPU_DS,
+ thiscpu,
+ thiscpu + sizeof(struct percpu) - 1,
+ ACC_PL_K|ACC_DATA_W, SZ_32);
#endif
#ifdef MACH_PV_DESCRIPTORS
@@ -119,8 +125,9 @@ reload_segs(void)
"movw %w1,%%ds\n"
"movw %w1,%%es\n"
+ "movw %w3,%%gs\n"
"movw %w1,%%ss\n"
- : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0));
+ : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0), "r"
(PERCPU_DS));
#endif
}
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index 5def73cb..d5d78d43 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -77,12 +77,20 @@
/* 0x58 used by user TSS in 64bit mode */
+#ifdef __x86_64__
+/* XXX */
+#else
+#define PERCPU_DS 0x68 /* per-cpu data mapping */
+#endif
+
#ifdef __x86_64__
#define GDTSZ sel_idx(0x60)
#else
-#define GDTSZ sel_idx(0x58)
+#define GDTSZ sel_idx(0x70)
#endif
+#ifndef __ASSEMBLER__
+
extern struct real_descriptor gdt[GDTSZ];
/* Fill a segment descriptor in the GDT. */
@@ -117,4 +125,5 @@ extern struct real_descriptor gdt[GDTSZ];
extern void gdt_init(void);
extern void ap_gdt_init(int cpu);
+#endif /* __ASSEMBLER__ */
#endif /* _I386_GDT_ */
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index 436e296a..832c7041 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -154,17 +154,10 @@ expr NPTES
PTES_PER_PAGE
expr INTEL_PTE_VALID|INTEL_PTE_WRITE INTEL_PTE_KERNEL
expr IDTSZ
-expr GDTSZ
-expr LDTSZ
expr KERNEL_RING
-
expr KERNEL_CS
expr KERNEL_DS
-expr KERNEL_TSS
-#ifndef MACH_PV_DESCRIPTORS
-expr KERNEL_LDT
-#endif /* MACH_PV_DESCRIPTORS */
expr (VM_MIN_KERNEL_ADDRESS>>PDESHIFT)*sizeof(pt_entry_t) KERNELBASEPDE
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index 55aa9d60..463cce55 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -33,6 +33,7 @@
#include <i386/proc_reg.h>
#include <i386/trap.h>
#include <i386/seg.h>
+#include <i386/gdt.h>
#include <i386/ldt.h>
#include <i386/i386asm.h>
#include <i386/cpu_number.h>
@@ -468,7 +469,8 @@ trap_push_segs:
mov %ax,%ds /* (same as kernel stack segment) */
mov %ax,%es
mov %ax,%fs
- mov %ax,%gs
+ mov $(PERCPU_DS),%ax
+ movw %ax,%gs
trap_set_segs:
cld /* clear direction flag */
@@ -686,7 +688,8 @@ ENTRY(all_intrs)
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
- mov %dx,%gs
+ mov $(PERCPU_DS),%dx
+ movw %dx,%gs
CPU_NUMBER(%edx)
@@ -792,7 +795,8 @@ ast_from_interrupt:
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
- mov %dx,%gs
+ mov $(PERCPU_DS),%dx
+ movw %dx,%gs
CPU_NUMBER(%edx)
TIME_TRAP_UENTRY
@@ -1051,7 +1055,8 @@ syscall_entry_2:
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
- mov %dx,%gs
+ mov $(PERCPU_DS),%dx
+ movw %dx,%gs
/*
* Shuffle eflags,eip,cs into proper places
diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
index f1a1f989..465ffacc 100644
--- a/i386/i386/mp_desc.c
+++ b/i386/i386/mp_desc.c
@@ -143,6 +143,8 @@ mp_desc_init(int mycpu)
struct mp_desc_table *mpt;
vm_offset_t mem;
+ init_percpu(mycpu);
+
if (mycpu == 0) {
/*
* Master CPU uses the tables built at boot time.
@@ -275,7 +277,7 @@ cpu_setup(int cpu)
void
cpu_ap_main()
{
- int cpu = cpu_number();
+ int cpu = cpu_number_slow();
do {
cpu_pause();
diff --git a/i386/i386/percpu.c b/i386/i386/percpu.c
new file mode 100644
index 00000000..0bc8b234
--- /dev/null
+++ b/i386/i386/percpu.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <i386/smp.h>
+#include <i386/apic.h>
+#include <i386/percpu.h>
+
+struct percpu percpu_array[NCPUS] __aligned(0x8000);
+
+void init_percpu(int cpu)
+{
+ int apic_id = apic_get_current_cpu();
+
+ percpu_array[cpu].self = &percpu_array[cpu];
+ percpu_array[cpu].apic_id = apic_id;
+ percpu_array[cpu].cpu_id = cpu_id_lut[apic_id];
+}
diff --git a/i386/i386/percpu.h b/i386/i386/percpu.h
new file mode 100644
index 00000000..b22d512c
--- /dev/null
+++ b/i386/i386/percpu.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PERCPU_H_
+#define _PERCPU_H_
+
+#include <kern/ast.h>
+#include <kern/processor.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <i386/mp_desc.h>
+#include <i386/spl.h>
+#include <intel/pmap.h>
+#include <ipc/ipc_kmsg.h>
+
+#define percpu_assign(stm, val) \
+ asm("mov %0, %%gs:%1" \
+ : : "r" (val), "m" (__builtin_offsetof(struct percpu, stm)));
+
+#define percpu_ptr(typ, stm) \
+MACRO_BEGIN \
+ typ *ptr_ = (typ *)__builtin_offsetof(struct percpu, stm); \
+ \
+ asm("add %%gs:0, %0" \
+ : "+r" (ptr_) \
+ : ); \
+ \
+ ptr_; \
+MACRO_END
+
+struct percpu {
+ struct percpu *self;
+ struct processor processor;
+/*
+ struct machine_slot machine_slot;
+ struct mp_desc_table mp_desc_table;
+ thread_t active_thread;
+ vm_offset_t active_stack;
+ vm_offset_t int_stack_top;
+ vm_offset_t int_stack_base;
+ ast_t need_ast;
+ ipc_kmsg_t ipc_kmsg_cache;
+ pmap_update_list cpu_update_list;
+ spl_t saved_ipl;
+ spl_t curr_ipl;
+ timer_data_t kernel_timer;
+ timer_t current_timer;
+ unsigned long in_interrupt;
+*/
+ int apic_id;
+ int cpu_id;
+};
+
+extern struct percpu percpu_array[NCPUS];
+
+void init_percpu(int cpu);
+
+#endif /* _PERCPU_H_ */
diff --git a/kern/processor.c b/kern/processor.c
index 2cd6d46c..76735381 100644
--- a/kern/processor.c
+++ b/kern/processor.c
@@ -60,14 +60,12 @@ struct kmem_cache pset_cache;
int master_cpu;
struct processor_set default_pset;
-struct processor processor_array[NCPUS];
queue_head_t all_psets;
int all_psets_count;
def_simple_lock_data(, all_psets_lock);
processor_t master_processor;
-processor_t processor_ptr[NCPUS];
/*
* Bootstrap the processor/pset system so the scheduler can run.
@@ -81,10 +79,9 @@ void pset_sys_bootstrap(void)
for (i = 0; i < NCPUS; i++) {
/*
* Initialize processor data structures.
- * Note that cpu_to_processor(i) is processor_ptr[i].
+ * Note that cpu_to_processor is processor_ptr.
*/
- processor_ptr[i] = &processor_array[i];
- processor_init(processor_ptr[i], i);
+ processor_init(processor_ptr(i), i);
}
master_processor = cpu_to_processor(master_cpu);
queue_init(&all_psets);
diff --git a/kern/processor.h b/kern/processor.h
index 17b784a3..d83cdf3c 100644
--- a/kern/processor.h
+++ b/kern/processor.h
@@ -112,6 +112,8 @@ struct processor {
typedef struct processor Processor;
extern struct processor processor_array[NCPUS];
+#include <machine/percpu.h>
+
/*
* Chain of all processor sets.
*/
@@ -195,23 +197,15 @@ extern processor_t master_processor;
#define PROCESSOR_ASSIGN 4 /* Assignment is changing */
#define PROCESSOR_SHUTDOWN 5 /* Being shutdown */
-/*
- * Use processor ptr array to find current processor's data structure.
- * This replaces a multiplication (index into processor_array) with
- * an array lookup and a memory reference. It also allows us to save
- * space if processor numbering gets too sparse.
- */
-
-extern processor_t processor_ptr[NCPUS];
-
-#define cpu_to_processor(i) (processor_ptr[i])
+#define processor_ptr(i) (&percpu_array[i].processor)
+#define cpu_to_processor processor_ptr
-#define current_processor() (processor_ptr[cpu_number()])
+#define current_processor() (percpu_ptr(struct processor, processor))
#define current_processor_set() (current_processor()->processor_set)
/* Compatibility -- will go away */
-#define cpu_state(slot_num) (processor_ptr[slot_num]->state)
+#define cpu_state(slot_num) (processor_ptr(slot_num)->state)
#define cpu_idle(slot_num) (cpu_state(slot_num) == PROCESSOR_IDLE)
/* Useful lock macros */
--
2.40.1
- [RFC PATCH gnumach] percpu area using gs segment,
Damien Zammit <=