mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-10-25 10:27:04 +00:00
Add X86 code emitter support AVX encoded MRMDestReg instructions.
Previously we weren't skipping the VVVV encoded register. Based on patch by Michael Liao. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177221 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -674,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
|||||||
// MRMDestReg instructions forms:
|
// MRMDestReg instructions forms:
|
||||||
// dst(ModR/M), src(ModR/M)
|
// dst(ModR/M), src(ModR/M)
|
||||||
// dst(ModR/M), src(ModR/M), imm8
|
// dst(ModR/M), src(ModR/M), imm8
|
||||||
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
|
// dst(ModR/M), src1(VEX_4V), src2(ModR/M)
|
||||||
|
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
|
||||||
VEX_B = 0x0;
|
VEX_B = 0x0;
|
||||||
if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
|
CurOp++;
|
||||||
|
|
||||||
|
if (HasVEX_4V)
|
||||||
|
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
|
||||||
|
|
||||||
|
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
|
||||||
VEX_R = 0x0;
|
VEX_R = 0x0;
|
||||||
break;
|
break;
|
||||||
case X86II::MRM0r: case X86II::MRM1r:
|
case X86II::MRM0r: case X86II::MRM1r:
|
||||||
@@ -1046,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||||||
|
|
||||||
case X86II::MRMDestReg:
|
case X86II::MRMDestReg:
|
||||||
EmitByte(BaseOpcode, CurByte, OS);
|
EmitByte(BaseOpcode, CurByte, OS);
|
||||||
|
SrcRegNum = CurOp + 1;
|
||||||
|
|
||||||
|
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
|
||||||
|
++SrcRegNum;
|
||||||
|
|
||||||
EmitRegModRMByte(MI.getOperand(CurOp),
|
EmitRegModRMByte(MI.getOperand(CurOp),
|
||||||
GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
|
GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
|
||||||
CurOp += 2;
|
CurOp = SrcRegNum + 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case X86II::MRMDestMem:
|
case X86II::MRMDestMem:
|
||||||
|
|||||||
@@ -1047,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
|
|||||||
// MRMDestReg instructions forms:
|
// MRMDestReg instructions forms:
|
||||||
// dst(ModR/M), src(ModR/M)
|
// dst(ModR/M), src(ModR/M)
|
||||||
// dst(ModR/M), src(ModR/M), imm8
|
// dst(ModR/M), src(ModR/M), imm8
|
||||||
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
|
// dst(ModR/M), src1(VEX_4V), src2(ModR/M)
|
||||||
|
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
|
||||||
VEX_B = 0x0;
|
VEX_B = 0x0;
|
||||||
if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
|
CurOp++;
|
||||||
|
|
||||||
|
if (HasVEX_4V)
|
||||||
|
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
|
||||||
|
|
||||||
|
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
|
||||||
VEX_R = 0x0;
|
VEX_R = 0x0;
|
||||||
break;
|
break;
|
||||||
case X86II::MRM0r: case X86II::MRM1r:
|
case X86II::MRM0r: case X86II::MRM1r:
|
||||||
@@ -1284,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
|
|||||||
|
|
||||||
case X86II::MRMDestReg: {
|
case X86II::MRMDestReg: {
|
||||||
MCE.emitByte(BaseOpcode);
|
MCE.emitByte(BaseOpcode);
|
||||||
|
|
||||||
|
unsigned SrcRegNum = CurOp+1;
|
||||||
|
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
|
||||||
|
SrcRegNum++;
|
||||||
|
|
||||||
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
|
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
|
||||||
getX86RegNum(MI.getOperand(CurOp+1).getReg()));
|
getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
|
||||||
CurOp += 2;
|
CurOp = SrcRegNum + 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86II::MRMDestMem: {
|
case X86II::MRMDestMem: {
|
||||||
|
|||||||
@@ -417,35 +417,44 @@ ReSimplify:
|
|||||||
case X86::VMOVDQAYrr:
|
case X86::VMOVDQAYrr:
|
||||||
case X86::VMOVDQUrr:
|
case X86::VMOVDQUrr:
|
||||||
case X86::VMOVDQUYrr:
|
case X86::VMOVDQUYrr:
|
||||||
case X86::VMOVSDrr:
|
|
||||||
case X86::VMOVSSrr:
|
|
||||||
case X86::VMOVUPDrr:
|
case X86::VMOVUPDrr:
|
||||||
case X86::VMOVUPDYrr:
|
case X86::VMOVUPDYrr:
|
||||||
case X86::VMOVUPSrr:
|
case X86::VMOVUPSrr:
|
||||||
case X86::VMOVUPSYrr: {
|
case X86::VMOVUPSYrr: {
|
||||||
if (X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
|
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
|
||||||
!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()))
|
X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
|
||||||
break;
|
unsigned NewOpc;
|
||||||
|
switch (OutMI.getOpcode()) {
|
||||||
unsigned NewOpc;
|
default: llvm_unreachable("Invalid opcode");
|
||||||
switch (OutMI.getOpcode()) {
|
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
|
||||||
default: llvm_unreachable("Invalid opcode");
|
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
|
||||||
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
|
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
|
||||||
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
|
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
|
||||||
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
|
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
|
||||||
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
|
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
|
||||||
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
|
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
|
||||||
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
|
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
|
||||||
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
|
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
|
||||||
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
|
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
|
||||||
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
|
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
|
||||||
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
|
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
|
||||||
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
|
}
|
||||||
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
|
OutMI.setOpcode(NewOpc);
|
||||||
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
|
}
|
||||||
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
|
break;
|
||||||
|
}
|
||||||
|
case X86::VMOVSDrr:
|
||||||
|
case X86::VMOVSSrr: {
|
||||||
|
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
|
||||||
|
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
|
||||||
|
unsigned NewOpc;
|
||||||
|
switch (OutMI.getOpcode()) {
|
||||||
|
default: llvm_unreachable("Invalid opcode");
|
||||||
|
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
|
||||||
|
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
|
||||||
|
}
|
||||||
|
OutMI.setOpcode(NewOpc);
|
||||||
}
|
}
|
||||||
OutMI.setOpcode(NewOpc);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
31
test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
Normal file
31
test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx-i -show-mc-encoding
|
||||||
|
|
||||||
|
; ModuleID = 'bugpoint-reduced-simplified.bc'
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-macosx10.8.0"
|
||||||
|
|
||||||
|
@b = external global [8 x float], align 32
|
||||||
|
@e = external global [8 x float], align 16
|
||||||
|
|
||||||
|
define void @main() #0 {
|
||||||
|
entry:
|
||||||
|
%0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0
|
||||||
|
%bitcast.i = extractelement <8 x float> %0, i32 0
|
||||||
|
%vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
|
||||||
|
%vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
|
||||||
|
%vecinit3.i.i = insertelement <4 x float> %vecinit2.i.i, float 0.000000e+00, i32 2
|
||||||
|
%vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
|
||||||
|
%1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
|
||||||
|
%vecext.i.i = extractelement <4 x float> %1, i32 0
|
||||||
|
store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
|
||||||
|
|
||||||
|
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||||
|
attributes #1 = { nounwind readnone }
|
||||||
|
attributes #2 = { nounwind }
|
||||||
|
|
||||||
|
!0 = metadata !{metadata !"omnipotent char", metadata !1}
|
||||||
|
!1 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||||
Reference in New Issue
Block a user