Align PowerPC registers struct manually, i.e. don't depend on non-portable

compiler extensions (e.g. GCC __attribute__((aligned(N)))).
This commit is contained in:
gbeauche 2005-12-06 22:25:13 +00:00
parent d8aa8a7459
commit dd2b9a95d5
6 changed files with 166 additions and 126 deletions

View File

@ -177,50 +177,8 @@ public:
// Make sure the SIGSEGV handler can access CPU registers
friend sigsegv_return_t sigsegv_handler(sigsegv_address_t, sigsegv_address_t);
// Memory allocator returning areas aligned on 16-byte boundaries
void *operator new(size_t size);
void operator delete(void *p);
};
// Memory allocator returning sheepshaver_cpu objects aligned on 16-byte boundaries
// FORMAT: [ alignment ] magic identifier, offset to malloc'ed data, sheepshaver_cpu data
void *sheepshaver_cpu::operator new(size_t size)
{
const int ALIGN = 16;
// Allocate enough space for sheepshaver_cpu data + signature + align pad
uint8 *ptr = (uint8 *)malloc(size + ALIGN * 2);
if (ptr == NULL)
throw std::bad_alloc();
// Align memory
int ofs = 0;
while ((((uintptr)ptr) % ALIGN) != 0)
ofs++, ptr++;
// Insert signature and offset
struct aligned_block_t {
uint32 pad[(ALIGN - 8) / 4];
uint32 signature;
uint32 offset;
uint8 data[sizeof(sheepshaver_cpu)];
};
aligned_block_t *blk = (aligned_block_t *)ptr;
blk->signature = FOURCC('S','C','P','U');
blk->offset = ofs + (&blk->data[0] - (uint8 *)blk);
assert((((uintptr)&blk->data) % ALIGN) == 0);
return &blk->data[0];
}
void sheepshaver_cpu::operator delete(void *p)
{
uint32 *blk = (uint32 *)p;
assert(blk[-2] == FOURCC('S','C','P','U'));
void *ptr = (void *)(((uintptr)p) - blk[-1]);
free(ptr);
}
sheepshaver_cpu::sheepshaver_cpu()
: powerpc_cpu(enable_jit_p())
{

View File

@ -20,6 +20,7 @@
#include "sysdeps.h"
#include <stdlib.h>
#include <assert.h>
#include "vm_alloc.h"
#include "cpu/vm.hpp"
#include "cpu/ppc/ppc-cpu.hpp"
@ -119,6 +120,7 @@ uint32 powerpc_registers::reserve_data = 0;
void powerpc_cpu::init_registers()
{
assert((((uintptr)&vr(0)) % 16) == 0);
for (int i = 0; i < 32; i++) {
gpr(i) = 0;
fpr(i) = 0;
@ -299,6 +301,44 @@ void powerpc_cpu::initialize()
#endif
}
// Memory allocator returning powerpc_cpu objects aligned on 16-byte boundaries
// FORMAT: [ alignment ] magic identifier, offset to malloc'ed data, powerpc_cpu data
void *powerpc_cpu::operator new(size_t size)
{
const int ALIGN = 16;
// Allocate enough space for powerpc_cpu data + signature + align pad
uint8 *ptr = (uint8 *)malloc(size + ALIGN * 2);
if (ptr == NULL)
throw std::bad_alloc();
// Align memory
int ofs = 0;
while ((((uintptr)ptr) % ALIGN) != 0)
ofs++, ptr++;
// Insert signature and offset
struct aligned_block_t {
uint32 pad[(ALIGN - 8) / 4];
uint32 signature;
uint32 offset;
uint8 data[sizeof(powerpc_cpu)];
};
aligned_block_t *blk = (aligned_block_t *)ptr;
blk->signature = 0x53435055; /* 'SCPU' */
blk->offset = ofs + (&blk->data[0] - (uint8 *)blk);
assert((((uintptr)&blk->data) % ALIGN) == 0);
return &blk->data[0];
}
void powerpc_cpu::operator delete(void *p)
{
uint32 *blk = (uint32 *)p;
assert(blk[-2] == 0x53435055); /* 'SCPU' */
void *ptr = (void *)(((uintptr)p) - blk[-1]);
free(ptr);
}
#ifdef SHEEPSHAVER
powerpc_cpu::powerpc_cpu(bool do_use_jit)
: use_jit(do_use_jit)
@ -468,9 +508,9 @@ bool powerpc_cpu::check_spcflags()
if (!processing_interrupt) {
processing_interrupt = true;
powerpc_registers r;
powerpc_registers::interrupt_copy(r, regs);
powerpc_registers::interrupt_copy(r, regs());
HandleInterrupt(&r);
powerpc_registers::interrupt_copy(regs, r);
powerpc_registers::interrupt_copy(regs(), r);
processing_interrupt = false;
}
}

View File

@ -38,7 +38,22 @@ class powerpc_cpu
: public basic_cpu
#endif
{
// NOTE: PowerPC registers structure shall be aligned on 16-byte
// boundaries for the AltiVec registers to be used in native code
// with aligned load/stores.
//
// We can't assume (offsetof(powerpc_cpu, regs) % 16) == 0 since
// extra data could be inserted prior regs, e.g. pointer to vtable
struct {
powerpc_registers regs;
uint8 pad[16];
} _regs;
// Make sure the calculation of the current offset makes use of
// 'this' as this could make it simplified at compile-time
powerpc_registers *regs_ptr() const { return (powerpc_registers *)((char *)&_regs.regs + (16 - (((char *)&_regs.regs - (char *)this) % 16))); }
powerpc_registers const & regs() const { return *regs_ptr(); }
powerpc_registers & regs() { return *regs_ptr(); }
#if PPC_PROFILE_REGS_USE
// Registers use statistics
@ -59,30 +74,30 @@ private:
protected:
powerpc_spcflags & spcflags() { return regs.spcflags; }
powerpc_spcflags const & spcflags() const { return regs.spcflags; }
powerpc_cr_register & cr() { return regs.cr; }
powerpc_cr_register const & cr() const { return regs.cr; }
powerpc_xer_register & xer() { return regs.xer; }
powerpc_xer_register const & xer() const { return regs.xer; }
powerpc_vscr & vscr() { return regs.vscr; }
powerpc_vscr const & vscr() const { return regs.vscr; }
powerpc_spcflags & spcflags() { return regs().spcflags; }
powerpc_spcflags const & spcflags() const { return regs().spcflags; }
powerpc_cr_register & cr() { return regs().cr; }
powerpc_cr_register const & cr() const { return regs().cr; }
powerpc_xer_register & xer() { return regs().xer; }
powerpc_xer_register const & xer() const { return regs().xer; }
powerpc_vscr & vscr() { return regs().vscr; }
powerpc_vscr const & vscr() const { return regs().vscr; }
uint32 vrsave() const { return regs.vrsave; }
uint32 & vrsave() { return regs.vrsave; }
double fp_result() const { return regs.fp_result.d; }
double & fp_result() { return regs.fp_result.d; }
uint64 fp_result_dw() const { return regs.fp_result.j; }
uint64 & fp_result_dw() { return regs.fp_result.j; }
uint32 vrsave() const { return regs().vrsave; }
uint32 & vrsave() { return regs().vrsave; }
double fp_result() const { return regs().fp_result.d; }
double & fp_result() { return regs().fp_result.d; }
uint64 fp_result_dw() const { return regs().fp_result.j; }
uint64 & fp_result_dw() { return regs().fp_result.j; }
uint32 & fpscr() { return regs.fpscr; }
uint32 fpscr() const { return regs.fpscr; }
uint32 & lr() { return regs.lr; }
uint32 lr() const { return regs.lr; }
uint32 & ctr() { return regs.ctr; }
uint32 ctr() const { return regs.ctr; }
uint32 & pc() { return regs.pc; }
uint32 pc() const { return regs.pc; }
uint32 & fpscr() { return regs().fpscr; }
uint32 fpscr() const { return regs().fpscr; }
uint32 & lr() { return regs().lr; }
uint32 lr() const { return regs().lr; }
uint32 & ctr() { return regs().ctr; }
uint32 ctr() const { return regs().ctr; }
uint32 & pc() { return regs().pc; }
uint32 pc() const { return regs().pc; }
void increment_pc(int o) { pc() += o; }
friend class pc_operand;
@ -94,14 +109,14 @@ protected:
public:
uint32 & gpr(int i) { log_reg(i); return regs.gpr[i]; }
uint32 gpr(int i) const { log_reg(i); return regs.gpr[i]; }
double & fpr(int i) { return regs.fpr[i].d; }
double fpr(int i) const { return regs.fpr[i].d; }
uint64 & fpr_dw(int i) { return regs.fpr[i].j; }
uint64 fpr_dw(int i) const { return regs.fpr[i].j; }
powerpc_vr & vr(int i) { return regs.vr[i]; }
powerpc_vr const & vr(int i) const { return regs.vr[i]; }
uint32 & gpr(int i) { log_reg(i); return regs().gpr[i]; }
uint32 gpr(int i) const { log_reg(i); return regs().gpr[i]; }
double & fpr(int i) { return regs().fpr[i].d; }
double fpr(int i) const { return regs().fpr[i].d; }
uint64 & fpr_dw(int i) { return regs().fpr[i].j; }
uint64 fpr_dw(int i) const { return regs().fpr[i].j; }
powerpc_vr & vr(int i) { return regs().vr[i]; }
powerpc_vr const & vr(int i) const { return regs().vr[i]; }
protected:
@ -258,6 +273,10 @@ public:
#endif
~powerpc_cpu();
// Specialised memory allocation (needs to be 16-byte aligned)
void *operator new(size_t size);
void operator delete(void *p);
// Handle flight recorder
#if PPC_FLIGHT_RECORDER
bool is_logging() const { return logging; }

View File

@ -735,10 +735,10 @@ template< class RA >
void powerpc_cpu::execute_lwarx(uint32 opcode)
{
const uint32 ea = RA::get(this, opcode) + operand_RB::get(this, opcode);
regs.reserve_valid = 1;
regs.reserve_addr = ea;
regs.reserve_data = vm_read_memory_4(ea);
operand_RD::set(this, opcode, regs.reserve_data);
regs().reserve_valid = 1;
regs().reserve_addr = ea;
regs().reserve_data = vm_read_memory_4(ea);
operand_RD::set(this, opcode, regs().reserve_data);
increment_pc(4);
}
@ -747,13 +747,13 @@ void powerpc_cpu::execute_stwcx(uint32 opcode)
{
const uint32 ea = RA::get(this, opcode) + operand_RB::get(this, opcode);
cr().clear(0);
if (regs.reserve_valid) {
if (regs.reserve_addr == ea /* physical_addr(EA) */
&& /* HACK */ regs.reserve_data == vm_read_memory_4(ea)) {
if (regs().reserve_valid) {
if (regs().reserve_addr == ea /* physical_addr(EA) */
&& /* HACK */ regs().reserve_data == vm_read_memory_4(ea)) {
vm_write_memory_4(ea, operand_RS::get(this, opcode));
cr().set(0, standalone_CR_EQ_field::mask());
}
regs.reserve_valid = 0;
regs().reserve_valid = 0;
}
cr().set_so(0, xer().get_so());
increment_pc(4);

View File

@ -210,13 +210,7 @@ union powerpc_vr
uint32 w[4];
uint64 j[2];
float f[4];
}
#if defined(__GNUC__)
// 16-byte alignment is required for SIMD optimizations operating on
// 128-bit aligned registers (e.g. SSE).
__attribute__((aligned(16)))
#endif
;
};
/**
@ -253,8 +247,8 @@ struct powerpc_registers
powerpc_fpr fpr[32]; // Floating-Point Registers
powerpc_fpr fp_result; // Floating-Point result
powerpc_cr_register cr; // Condition Register
uint32 fpscr; // Floating-Point Status and Control Register
powerpc_xer_register xer; // XER Register (SPR 1)
uint32 fpscr; // Floating-Point Status and Control Register
uint32 lr; // Link Register (SPR 8)
uint32 ctr; // Count Register (SPR 9)
uint32 pc; // Program Counter
@ -262,6 +256,9 @@ struct powerpc_registers
static uint32 reserve_valid;
static uint32 reserve_addr;
static uint32 reserve_data;
#define PPC_SZ(T) sizeof(powerpc_##T)
uint8 _pad[16 - ((PPC_SZ(fpr) + PPC_SZ(cr_register) + PPC_SZ(xer_register) + PPC_SZ(spcflags)) % 16)];
#undef PPC_SZ
powerpc_vr vr[32]; // Vector Registers
powerpc_vscr vscr; // Vector Status and Control Register
uint32 vrsave; // AltiVec Save Register

View File

@ -104,14 +104,6 @@ typedef uintptr_t uintptr;
#define TEST_VMX_ARITH 1
#if defined __GNUC__
#define ALIGNED(N) __attribute__((aligned(N)))
#else
#if TEST_VMX_OPS
#error "AltiVec testing requires the align attribute"
#endif
#endif
// Partial PowerPC runtime assembler from GNU lightning
#undef _I
#define _I(X) ((uint32)(X))
@ -506,7 +498,25 @@ typedef bit_field< 2, 2 > XER_CA_field;
static bool has_altivec = true;
// A 128-bit AltiVec register
typedef uint8 vector_t[16] ALIGNED(16);
typedef uint8 vector_t[16];
class aligned_vector_t {
struct {
vector_t v;
uint8 pad[16];
} vs;
public:
aligned_vector_t()
{ memset(addr(), 0, sizeof(vector_t)); }
aligned_vector_t(vector_t const & vi)
{ memcpy(addr(), &vi, sizeof(vector_t)); }
vector_t *addr() const
{ return (vector_t *)((char *)&vs.v + (16 - (((char *)&vs.v - (char *)this) % 16))); }
vector_t const & value() const
{ return *addr(); }
vector_t & value()
{ return *addr(); }
};
union vector_helper_t {
vector_t v;
@ -721,7 +731,7 @@ private:
struct vector_value_t {
char type;
vector_t v ALIGNED(16);
vector_t v;
};
static const uint32 reg_values[];
@ -1611,24 +1621,25 @@ void powerpc_test_cpu::test_one_vector(uint32 *code, vector_test_t const & vt, u
#endif
// Invoke emulated code
static vector_t emul_vD;
memset(&emul_vD, 0, sizeof(emul_vD));
static vector_helper_t emul_vSCR;
memset(&emul_vSCR, 0, sizeof(emul_vSCR));
emul_vSCR.w[3] = 0;
static aligned_vector_t emul_vD;
memset(emul_vD.addr(), 0, sizeof(vector_t));
static aligned_vector_t emul_vSCR;
memset(emul_vSCR.addr(), 0, sizeof(vector_t));
emul_set_cr(init_cr);
set_gpr(RD, (uintptr)&emul_vD);
set_gpr(RD, (uintptr)emul_vD.addr());
set_gpr(RA, (uintptr)rAp);
set_gpr(RB, (uintptr)rBp);
set_gpr(RC, (uintptr)rCp);
set_gpr(VSCR, (uintptr)emul_vSCR.b);
set_gpr(VSCR, (uintptr)emul_vSCR.addr());
execute(code);
vector_helper_t emul_vSCR_helper;
memcpy(&emul_vSCR_helper, emul_vSCR.addr(), sizeof(vector_t));
const uint32 emul_cr = emul_get_cr();
const uint32 emul_vscr = ntohl(emul_vSCR.w[3]);
const uint32 emul_vscr = ntohl(emul_vSCR_helper.w[3]);
++tests;
bool ok = vector_equals(vt.type, native_vD, emul_vD)
bool ok = vector_equals(vt.type, native_vD, emul_vD.value())
&& native_cr == emul_cr
&& native_vscr == emul_vscr;
@ -1676,7 +1687,7 @@ void powerpc_test_cpu::test_one_vector(uint32 *code, vector_test_t const & vt, u
print_vector(native_vD, vt.type);
printf("\n");
printf("vD.E = ");
print_vector(emul_vD, vt.type);
print_vector(emul_vD.value(), vt.type);
printf("\n");
printf("CR.N = %08x ; VSCR.N = %08x\n", native_cr, native_vscr);
printf("CR.E = %08x ; VSCR.E = %08x\n", emul_cr, emul_vscr);
@ -1764,7 +1775,6 @@ void powerpc_test_cpu::test_vector_load(void)
}
}
assert(i_opcode != -1);
assert(((uintptr)&vector_values[0].v) % 16 == 0);
const int n_elements = sizeof(tests) / sizeof(tests[0]);
for (int i = 0; i < n_elements; i++) {
@ -1778,18 +1788,19 @@ void powerpc_test_cpu::test_vector_load(void)
printf("Testing %s\n", vt.name);
const int n_vector_values = sizeof(vector_values)/sizeof(vector_values[0]);
for (int j = 0; j < n_vector_values; j++) {
aligned_vector_t av(vector_values[j].v);
switch (vt.type) {
case 'b':
for (int k = 0; k < 16; k++)
test_one_vector(code, vt, ((uint8 *)&vector_values[j].v) + 1 * k);
test_one_vector(code, vt, ((uint8 *)av.addr()) + 1 * k);
break;
case 'h':
for (int k = 0; k < 8; k++)
test_one_vector(code, vt, ((uint8 *)&vector_values[j].v) + 2 * k);
test_one_vector(code, vt, ((uint8 *)av.addr()) + 2 * k);
break;
case 'w':
for (int k = 0; k < 4; k++)
test_one_vector(code, vt, ((uint8 *)&vector_values[j].v) + 4 * k);
test_one_vector(code, vt, ((uint8 *)av.addr()) + 4 * k);
break;
}
}
@ -2012,29 +2023,40 @@ void powerpc_test_cpu::test_vector_arith(void)
printf("Testing %s\n", vt.name);
if (vt.operands[1] == vA && vt.operands[2] == vB && vt.operands[3] == vC) {
for (int i = 0; i < n_vector_values; i++)
for (int j = 0; j < n_vector_values; j++)
for (int k = 0; k < n_vector_values; k++)
test_one_vector(code, vt, &vvp[i].v, &vvp[j].v, &vvp[k].v);
for (int i = 0; i < n_vector_values; i++) {
aligned_vector_t avi(vvp[i].v);
for (int j = 0; j < n_vector_values; j++) {
aligned_vector_t avj(vvp[j].v);
for (int k = 0; k < n_vector_values; k++) {
aligned_vector_t avk(vvp[k].v);
test_one_vector(code, vt, avi.addr(), avj.addr(), avk.addr());
}
}
}
}
else if (vt.operands[1] == vA && vt.operands[2] == vB && vt.operands[3] == vN) {
for (int i = 0; i < 16; i++) {
vSH_field::insert(vt.opcode, i);
code[i_opcode] = vt.opcode;
flush_icache_range(code, sizeof(code));
for (int j = 0; j < n_vector_values; j++)
aligned_vector_t avi(vvp[i].v);
for (int j = 0; j < n_vector_values; j++) {
aligned_vector_t avj(vvp[j].v);
for (int k = 0; k < n_vector_values; k++)
test_one_vector(code, vt, &vvp[i].v, &vvp[j].v);
test_one_vector(code, vt, avi.addr(), avj.addr());
}
}
}
else if (vt.operands[1] == vA && vt.operands[2] == vB) {
for (int i = 0; i < n_vector_values; i++) {
aligned_vector_t avi(vvp[i].v);
for (int j = 0; j < n_vector_values; j++) {
if (op_type == 'B') {
if (!vector_all_eq('b', vvp[j].v))
continue;
}
test_one_vector(code, vt, &vvp[i].v, &vvp[j].v);
aligned_vector_t avj(vvp[j].v);
test_one_vector(code, vt, avi.addr(), avj.addr());
}
}
}
@ -2043,8 +2065,10 @@ void powerpc_test_cpu::test_vector_arith(void)
rA_field::insert(vt.opcode, i);
code[i_opcode] = vt.opcode;
flush_icache_range(code, sizeof(code));
for (int j = 0; j < n_vector_values; j++)
test_one_vector(code, vt, NULL, &vvp[j].v);
for (int j = 0; j < n_vector_values; j++) {
aligned_vector_t avj(vvp[j].v);
test_one_vector(code, vt, NULL, avj.addr());
}
}
}
else if (vt.operands[1] == vI) {
@ -2056,8 +2080,10 @@ void powerpc_test_cpu::test_vector_arith(void)
}
}
else if (vt.operands[1] == __ && vt.operands[2] == vB) {
for (int i = 0; i < n_vector_values; i++)
test_one_vector(code, vt, NULL, &vvp[i].v);
for (int i = 0; i < n_vector_values; i++) {
aligned_vector_t avi(vvp[i].v);
test_one_vector(code, vt, NULL, avi.addr());
}
}
else {
printf("ERROR: unhandled test case\n");