2007-01-05 Samuel Thibault Add MMX FPU registers save/restore support. This needs 16-byte-aligned structures. * i386/include/mach/i386/fp_reg.h (struct i386_fp_regs): Invert array indices. (struct i386_xfp_save): New structure. (FP_387X): New macro. * i386/i386/thread.h (struct i386_fpsave_state): Add xfp_save_state member, keep existing fp_save_state and fp_regs members in an unnamed union member. Move fp_valid member to the end of the structure. * i386/i386/fpu.h (fxsave, fxrstor): New macros. (fpu_save_context): Use fxsave() when FPU is FXSR-capable. * i386/i386/fpu.c: Include (fp_save, fp_load): Add declaration. (init_fpu): Add FXSR-capable FPU detection. (fpu_module_init): Request 16-byte alignment to zinit() for i386_fpsave_state structures. (fpu_set_state): Abort when FPU is FXSR-capable (not implemented yet). (fpu_get_state): Convert FPU state when FPU is FXSR-capable. (fp_save): Use fxsave() when FPU is FXSR-capable. (fp_load): Use fxrstor() when FPU is FXSR-capable. (fp_state_alloc): Add FXSR-aware initialization. Index: i386/include/mach/i386/fp_reg.h =================================================================== RCS file: /cvsroot/hurd/gnumach/i386/include/mach/i386/fp_reg.h,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 fp_reg.h --- i386/include/mach/i386/fp_reg.h 25 Feb 1997 21:27:00 -0000 1.1.1.1 +++ i386/include/mach/i386/fp_reg.h 4 Jan 2007 23:55:56 -0000 @@ -46,10 +46,30 @@ struct i386_fp_save { }; struct i386_fp_regs { - unsigned short fp_reg_word[5][8]; + unsigned short fp_reg_word[8][5]; /* space for 8 80-bit FP registers */ }; +struct i386_xfp_save { + unsigned short fp_control; /* control */ + unsigned short fp_status; /* status */ + unsigned short fp_tag; /* register tags */ + unsigned short fp_opcode; /* opcode of failed instruction */ + unsigned int fp_eip; /* eip at failed instruction */ + unsigned short fp_cs; /* cs at failed instruction */ + unsigned short fp_unused_1; + unsigned int fp_dp; /* data address */ + unsigned short fp_ds; /* data segment */ + unsigned short fp_unused_2; + unsigned int fp_mxcsr; /* MXCSR */ + unsigned int fp_mxcsr_mask; /* MXCSR_MASK */ + unsigned char fp_reg_word[8][16]; + /* space for 8 128-bit FP registers */ + unsigned char fp_xreg_word[8][16]; + /* space for 8 128-bit XMM registers */ + unsigned int padding[56]; +} __attribute__((aligned(16))); + /* * Control register */ @@ -104,5 +124,6 @@ struct i386_fp_regs { #define FP_SOFT 1 /* software FP emulator */ #define FP_287 2 /* 80287 */ #define FP_387 3 /* 80387 or 80486 */ +#define FP_387X 4 /* FXSAVE/RSTOR-capable */ #endif /* _MACH_I386_FP_REG_H_ */ Index: i386/i386/thread.h =================================================================== RCS file: /cvsroot/hurd/gnumach/i386/i386/thread.h,v retrieving revision 1.3.2.1 diff -u -p -r1.3.2.1 thread.h --- i386/i386/thread.h 5 Nov 2006 20:39:24 -0000 1.3.2.1 +++ i386/i386/thread.h 4 Jan 2007 23:55:56 -0000 @@ -112,9 +112,14 @@ struct i386_kernel_state { */ struct i386_fpsave_state { + union { + struct { + struct i386_fp_save fp_save_state; + struct i386_fp_regs fp_regs; + }; + struct i386_xfp_save xfp_save_state; + }; boolean_t fp_valid; - struct i386_fp_save fp_save_state; - struct i386_fp_regs fp_regs; }; /* Index: i386/i386/fpu.h =================================================================== RCS file: /cvsroot/hurd/gnumach/i386/i386/fpu.h,v retrieving revision 1.1.1.1.4.3 diff -u -p -r1.1.1.1.4.3 fpu.h --- i386/i386/fpu.h 12 Dec 2006 23:30:49 -0000 1.1.1.1.4.3 +++ i386/i386/fpu.h 4 Jan 2007 23:55:56 -0000 @@ -66,6 +66,12 @@ #define frstor(state) \ asm volatile("frstor %0" : : "m" (state)) +#define fxsave(state) \ + asm volatile("fxsave %0" : "=m" (*state)) + +#define fxrstor(state) \ + asm volatile("fxrstor %0" : : "m" (state)) + #define fwait() \ asm("fwait"); @@ -85,7 +91,10 @@ if (ifps != 0 && !ifps->fp_valid) { \ /* registers are in FPU - save to memory */ \ ifps->fp_valid = TRUE; \ - fnsave(&ifps->fp_save_state); \ + if (fp_kind == FP_387X) \ + fxsave(&ifps->fp_save_state.xfp_state); \ + else \ + fnsave(&ifps->fp_save_state.fp_state); \ set_ts(); \ } \ } Index: i386/i386/fpu.c =================================================================== RCS file: /cvsroot/hurd/gnumach/i386/i386/fpu.c,v retrieving revision 1.2.4.10 diff -u -p -r1.2.4.10 fpu.c --- i386/i386/fpu.c 4 Jan 2007 23:51:02 -0000 1.2.4.10 +++ i386/i386/fpu.c 4 Jan 2007 23:55:56 -0000 @@ -43,6 +43,7 @@ #include #include #include +#include #include "cpu_number.h" #if 0 @@ -64,6 +65,9 @@ extern void i386_exception(); int fp_kind = FP_387; /* 80387 present */ zone_t ifps_zone; /* zone for FPU save area */ +void fp_save(thread_t thread); +void fp_load(thread_t thread); + #if NCPUS == 1 volatile thread_t fp_thread = THREAD_NULL; /* thread whose state is in FPU */ @@ -130,7 +134,12 @@ init_fpu() /* * We have a 387. */ - fp_kind = FP_387; + if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) { + fp_kind = FP_387X; + printf("Enabling FXSR\n"); + set_cr4(get_cr4() | CR4_OSFXSR); + } else + fp_kind = FP_387; } /* * Trap wait instructions. Turn off FPU for now. @@ -152,7 +161,7 @@ init_fpu() void fpu_module_init() { - ifps_zone = zinit(sizeof(struct i386_fpsave_state), 0, + ifps_zone = zinit(sizeof(struct i386_fpsave_state), 16, THREAD_MAX * sizeof(struct i386_fpsave_state), THREAD_CHUNK * sizeof(struct i386_fpsave_state), 0, "i386 fpsave state"); @@ -202,6 +211,10 @@ ASSERT_IPL(SPL0); if (fp_kind == FP_NO) return KERN_FAILURE; + if (fp_kind == FP_387X) + /* TODO */ + return KERN_FAILURE; + #if NCPUS == 1 /* @@ -339,15 +352,29 @@ ASSERT_IPL(SPL0); */ memset(user_fp_state, 0, sizeof(struct i386_fp_save)); - user_fp_state->fp_control = ifps->fp_save_state.fp_control; - user_fp_state->fp_status = ifps->fp_save_state.fp_status; - user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; - user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; - user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; - user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; - user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; - user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; - *user_fp_regs = ifps->fp_regs; + if (fp_kind == FP_387X) { + int i; + user_fp_state->fp_control = ifps->xfp_save_state.fp_control; + user_fp_state->fp_status = ifps->xfp_save_state.fp_status; + user_fp_state->fp_tag = ifps->xfp_save_state.fp_tag; /* TODO: convert */ + user_fp_state->fp_eip = ifps->xfp_save_state.fp_eip; + user_fp_state->fp_cs = ifps->xfp_save_state.fp_cs; + user_fp_state->fp_opcode = ifps->xfp_save_state.fp_opcode; + user_fp_state->fp_dp = ifps->xfp_save_state.fp_dp; + user_fp_state->fp_ds = ifps->xfp_save_state.fp_ds; + for (i=0; i<8; i++) + memcpy(&user_fp_regs[i], &ifps->xfp_save_state.fp_reg_word[i], sizeof(user_fp_regs[i])); + } else { + user_fp_state->fp_control = ifps->fp_save_state.fp_control; + user_fp_state->fp_status = ifps->fp_save_state.fp_status; + user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; + user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; + user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; + user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; + user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; + user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; + *user_fp_regs = ifps->fp_regs; + } } simple_unlock(&pcb->lock); @@ -553,7 +580,10 @@ fp_save(thread) if (ifps != 0 && !ifps->fp_valid) { /* registers are in FPU */ ifps->fp_valid = TRUE; - fnsave(&ifps->fp_save_state); + if (fp_kind == FP_387X) + fxsave(&ifps->xfp_save_state); + else + fnsave(&ifps->fp_save_state); } } @@ -601,7 +631,10 @@ ASSERT_IPL(SPL0); printf("fp_load: invalid FPU state!\n"); fninit (); } else { - frstor(ifps->fp_save_state); + if (fp_kind == FP_387X) + fxrstor(ifps->xfp_save_state); + else + frstor(ifps->fp_save_state); } ifps->fp_valid = FALSE; /* in FPU */ } @@ -623,11 +656,22 @@ fp_state_alloc() pcb->ims.ifps = ifps; ifps->fp_valid = TRUE; - ifps->fp_save_state.fp_control = (0x037f - & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) - | (FPC_PC_53|FPC_IC_AFF); - ifps->fp_save_state.fp_status = 0; - ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ + + if (fp_kind == FP_387X) { + ifps->xfp_save_state.fp_control = (0x037f + & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) + | (FPC_PC_53|FPC_IC_AFF); + ifps->xfp_save_state.fp_status = 0; + ifps->xfp_save_state.fp_tag = 0xffff; /* all empty */ + if (CPU_HAS_FEATURE(CPU_FEATURE_SSE)) + ifps->xfp_save_state.fp_mxcsr = 0x1f80; + } else { + ifps->fp_save_state.fp_control = (0x037f + & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) + | (FPC_PC_53|FPC_IC_AFF); + ifps->fp_save_state.fp_status = 0; + ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ + } } #if AT386