mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
R600/SI: Use S_BFE_I64 for 64-bit sext_inreg
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222012 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5c76b3d03e
commit
8fd3b90c3f
@ -136,8 +136,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
|
||||
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
@ -1901,8 +1901,13 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_BFE_I64: {
|
||||
splitScalar64BitBFE(Worklist, Inst);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
case AMDGPU::S_BFE_U64:
|
||||
case AMDGPU::S_BFE_I64:
|
||||
case AMDGPU::S_BFM_B64:
|
||||
llvm_unreachable("Moving this op to VALU not implemented");
|
||||
}
|
||||
@ -2167,6 +2172,65 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
|
||||
Worklist.push_back(Second);
|
||||
}
|
||||
|
||||
void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst) const {
|
||||
MachineBasicBlock &MBB = *Inst->getParent();
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
MachineBasicBlock::iterator MII = Inst;
|
||||
DebugLoc DL = Inst->getDebugLoc();
|
||||
|
||||
MachineOperand &Dest = Inst->getOperand(0);
|
||||
uint32_t Imm = Inst->getOperand(2).getImm();
|
||||
uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
|
||||
uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
|
||||
|
||||
// Only sext_inreg cases handled.
|
||||
assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
|
||||
BitWidth <= 32 &&
|
||||
Offset == 0 &&
|
||||
"Not implemented");
|
||||
|
||||
if (BitWidth < 32) {
|
||||
unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
|
||||
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
|
||||
.addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
|
||||
.addImm(0)
|
||||
.addImm(BitWidth);
|
||||
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
|
||||
.addImm(31)
|
||||
.addReg(MidRegLo);
|
||||
|
||||
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
|
||||
.addReg(MidRegLo)
|
||||
.addImm(AMDGPU::sub0)
|
||||
.addReg(MidRegHi)
|
||||
.addImm(AMDGPU::sub1);
|
||||
|
||||
MRI.replaceRegWith(Dest.getReg(), ResultReg);
|
||||
return;
|
||||
}
|
||||
|
||||
MachineOperand &Src = Inst->getOperand(1);
|
||||
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
|
||||
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
|
||||
.addImm(31)
|
||||
.addReg(Src.getReg(), 0, AMDGPU::sub0);
|
||||
|
||||
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
|
||||
.addReg(Src.getReg(), 0, AMDGPU::sub0)
|
||||
.addImm(AMDGPU::sub0)
|
||||
.addReg(TmpReg)
|
||||
.addImm(AMDGPU::sub1);
|
||||
|
||||
MRI.replaceRegWith(Dest.getReg(), ResultReg);
|
||||
}
|
||||
|
||||
void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
|
||||
MachineInstr *Inst) const {
|
||||
// Add the implict and explicit register definitions.
|
||||
|
@ -52,6 +52,8 @@ private:
|
||||
|
||||
void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst) const;
|
||||
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst) const;
|
||||
|
||||
void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
|
||||
|
||||
|
@ -350,6 +350,11 @@ class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
|
||||
opName#" $dst, $src0, $src1", pattern
|
||||
>;
|
||||
|
||||
class SOP2_64_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
|
||||
op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
|
||||
opName#" $dst, $src0, $src1", pattern
|
||||
>;
|
||||
|
||||
class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
|
||||
op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
|
||||
opName#" $dst, $src0, $src1", pattern
|
||||
|
@ -297,7 +297,7 @@ def S_MUL_I32 : SOP2_32 <0x00000026, "s_mul_i32",
|
||||
def S_BFE_U32 : SOP2_32 <0x00000027, "s_bfe_u32", []>;
|
||||
def S_BFE_I32 : SOP2_32 <0x00000028, "s_bfe_i32", []>;
|
||||
def S_BFE_U64 : SOP2_64 <0x00000029, "s_bfe_u64", []>;
|
||||
def S_BFE_I64 : SOP2_64 <0x0000002a, "s_bfe_i64", []>;
|
||||
def S_BFE_I64 : SOP2_64_32 <0x0000002a, "s_bfe_i64", []>;
|
||||
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "s_cbranch_g_fork", []>;
|
||||
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "s_absdiff_i32", []>;
|
||||
|
||||
@ -2972,30 +2972,25 @@ defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
|
||||
def : Pat<(i32 (sext_inreg i32:$src, i1)),
|
||||
(S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
|
||||
|
||||
// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
|
||||
// might not be worth the effort, and will need to expand to shifts when
|
||||
// fixing SGPR copies.
|
||||
|
||||
// Handle sext_inreg in i64
|
||||
def : Pat <
|
||||
(i64 (sext_inreg i64:$src, i1)),
|
||||
(REG_SEQUENCE SReg_64,
|
||||
(S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0, // 0 | 1 << 16
|
||||
(S_MOV_B32 -1), sub1)
|
||||
(S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i64 (sext_inreg i64:$src, i8)),
|
||||
(REG_SEQUENCE SReg_64,
|
||||
(S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0,
|
||||
(S_MOV_B32 -1), sub1)
|
||||
(S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i64 (sext_inreg i64:$src, i16)),
|
||||
(REG_SEQUENCE SReg_64,
|
||||
(S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0,
|
||||
(S_MOV_B32 -1), sub1)
|
||||
(S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i64 (sext_inreg i64:$src, i32)),
|
||||
(S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16
|
||||
>;
|
||||
|
||||
class ZExt_i64_i32_Pat <SDNode ext> : Pat <
|
||||
|
@ -2,6 +2,7 @@
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
|
||||
@ -75,12 +76,13 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
|
||||
; SI: s_mov_b32 {{s[0-9]+}}, -1
|
||||
; SI: s_add_i32 [[VAL:s[0-9]+]],
|
||||
; SI: s_bfe_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
|
||||
; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000
|
||||
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%c = add i64 %a, %b
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 63
|
||||
%ashr = ashr i64 %shl, 63
|
||||
store i64 %ashr, i64 addrspace(1)* %out, align 8
|
||||
@ -88,15 +90,16 @@ define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
|
||||
; SI: s_mov_b32 {{s[0-9]+}}, -1
|
||||
; SI: s_add_i32 [[VAL:s[0-9]+]],
|
||||
; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
|
||||
; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000
|
||||
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
|
||||
; EG: ADD_INT
|
||||
; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
|
||||
; EG: LSHL
|
||||
; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
|
||||
; EG: ASHR [[RES_HI]]
|
||||
; EG-NOT: BFE_INT
|
||||
; EG: LSHR
|
||||
@ -104,7 +107,7 @@ define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
|
||||
;; TODO Check address computation, using | with variables in {{}} does not work,
|
||||
;; also the _LO/_HI order might be different
|
||||
define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%c = add i64 %a, %b
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 56
|
||||
%ashr = ashr i64 %shl, 56
|
||||
store i64 %ashr, i64 addrspace(1)* %out, align 8
|
||||
@ -112,15 +115,16 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
|
||||
; SI: s_mov_b32 {{s[0-9]+}}, -1
|
||||
; SI: s_add_i32 [[VAL:s[0-9]+]],
|
||||
; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
|
||||
; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000
|
||||
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
|
||||
; EG: ADD_INT
|
||||
; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
|
||||
; EG: LSHL
|
||||
; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
|
||||
; EG: ASHR [[RES_HI]]
|
||||
; EG-NOT: BFE_INT
|
||||
; EG: LSHR
|
||||
@ -128,7 +132,7 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
|
||||
;; TODO Check address computation, using | with variables in {{}} does not work,
|
||||
;; also the _LO/_HI order might be different
|
||||
define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%c = add i64 %a, %b
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 48
|
||||
%ashr = ashr i64 %shl, 48
|
||||
store i64 %ashr, i64 addrspace(1)* %out, align 8
|
||||
@ -136,24 +140,24 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
|
||||
; SI: s_load_dword
|
||||
; SI: s_load_dword
|
||||
; SI: s_add_i32 [[ADD:s[0-9]+]],
|
||||
; SI: s_ashr_i32 s{{[0-9]+}}, [[ADD]], 31
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
|
||||
; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000
|
||||
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
|
||||
; EG-NOT: BFE_INT
|
||||
; EG: ADD_INT {{\*?}} [[RES_LO]]
|
||||
|
||||
; EG: ASHR [[RES_HI]]
|
||||
; EG: ADD_INT
|
||||
|
||||
; EG: LSHR
|
||||
; EG: LSHR
|
||||
;; TODO Check address computation, using | with variables in {{}} does not work,
|
||||
;; also the _LO/_HI order might be different
|
||||
define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%c = add i64 %a, %b
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 32
|
||||
%ashr = ashr i64 %shl, 32
|
||||
store i64 %ashr, i64 addrspace(1)* %out, align 8
|
||||
@ -175,6 +179,89 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
|
||||
; ret void
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64:
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
|
||||
; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
|
||||
; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64 addrspace(1)* %a.gep, align 8
|
||||
%b = load i64 addrspace(1)* %b.gep, align 8
|
||||
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 63
|
||||
%ashr = ashr i64 %shl, 63
|
||||
store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64:
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
|
||||
; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8
|
||||
; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64 addrspace(1)* %a.gep, align 8
|
||||
%b = load i64 addrspace(1)* %b.gep, align 8
|
||||
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 56
|
||||
%ashr = ashr i64 %shl, 56
|
||||
store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64:
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
|
||||
; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16
|
||||
; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64 addrspace(1)* %a.gep, align 8
|
||||
%b = load i64 addrspace(1)* %b.gep, align 8
|
||||
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 48
|
||||
%ashr = ashr i64 %shl, 48
|
||||
store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64:
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
|
||||
; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
|
||||
define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64 addrspace(1)* %a.gep, align 8
|
||||
%b = load i64 addrspace(1)* %b.gep, align 8
|
||||
|
||||
%c = shl i64 %a, %b
|
||||
%shl = shl i64 %c, 32
|
||||
%ashr = ashr i64 %shl, 32
|
||||
store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_lshl_b32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
|
||||
|
Loading…
Reference in New Issue
Block a user