PTX: Fix detection of stack load/store vs. global load/store, as well as fix the

printing of local offsets

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140547 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2011-09-26 18:57:22 +00:00
parent 439780eeae
commit 63602ed876
4 changed files with 77 additions and 50 deletions

View File

@ -299,10 +299,12 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
if (FrameInfo->getObjectSize(i) > 0) {
std::string def = "\t.local .align ";
def += utostr(FrameInfo->getObjectAlignment(i));
def += " .b";
def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
def += " .b8";
def += " __local";
def += utostr(i);
def += "[";
def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
def += "]";
def += ";";
OutStreamer.EmitRawText(Twine(def));
}
@ -465,6 +467,11 @@ void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
void PTXAsmPrinter::printLocalOperand(const MachineInstr *MI, int opNum,
raw_ostream &OS, const char *Modifier) {
OS << "__local" << MI->getOperand(opNum).getImm();
if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() != 0){
OS << "+";
printOperand(MI, opNum+1, OS);
}
}
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {

View File

@ -213,14 +213,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
if (Addr.getOpcode() != ISD::ADD) {
// FrameIndex addresses are handled separately
//errs() << "SelectADDRri: ";
//Addr.getNode()->dumpr();
if (isa<FrameIndexSDNode>(Addr)) {
//errs() << "Failure\n";
return false;
}
if (CurDAG->isBaseWithConstantOffset(Addr)) {
Base = Addr.getOperand(0);
if (isa<FrameIndexSDNode>(Base)) {
//errs() << "Failure\n";
return false;
}
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
//errs() << "Success\n";
return true;
}
/*if (Addr.getNumOperands() == 1) {
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
errs() << "Success\n";
return true;
}*/
//errs() << "SelectADDRri fails on: ";
//Addr.getNode()->dumpr();
if (isImm(Addr)) {
//errs() << "Failure\n";
return false;
}
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
//errs() << "Success\n";
return true;
/*if (Addr.getOpcode() != ISD::ADD) {
// let SelectADDRii handle the [imm] case
if (isImm(Addr))
return false;
// it is [reg]
assert(Addr.getValueType().isSimple() && "Type must be simple");
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
@ -242,7 +282,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
}
// neither [reg+imm] nor [imm+reg]
return false;
return false;*/
}
// Match memory operand of the form [imm+imm] and [imm]
@ -269,35 +309,30 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
if (Addr.getOpcode() != ISD::ADD) {
// let SelectADDRii handle the [imm] case
if (isImm(Addr))
return false;
// it is [reg]
assert(Addr.getValueType().isSimple() && "Type must be simple");
//errs() << "SelectADDRlocal: ";
//Addr.getNode()->dumpr();
if (isa<FrameIndexSDNode>(Addr)) {
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
//errs() << "Success\n";
return true;
}
if (Addr.getNumOperands() < 2)
return false;
// let SelectADDRii handle the [imm+imm] case
if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
return false;
// try [reg+imm] and [imm+reg]
for (int i = 0; i < 2; i ++)
if (SelectImm(Addr.getOperand(1-i), Offset)) {
Base = Addr.getOperand(i);
return true;
if (CurDAG->isBaseWithConstantOffset(Addr)) {
Base = Addr.getOperand(0);
if (!isa<FrameIndexSDNode>(Base)) {
//errs() << "Failure\n";
return false;
}
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
//errs() << "Offset: ";
//Offset.getNode()->dumpr();
//errs() << "Success\n";
return true;
}
// neither [reg+imm] nor [imm+reg]
//errs() << "Failure\n";
return false;
}

View File

@ -24,9 +24,7 @@ def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const Value *Src;
const PointerType *PT;
const SDValue &MemOp = N->getOperand(1);
if ((MemOp.getOpcode() != ISD::FrameIndex) &&
(Src = cast<LoadSDNode>(N)->getSrcValue()) &&
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTX::GLOBAL;
return false;
@ -41,11 +39,6 @@ def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return false;
}]>;
def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const SDValue &MemOp = N->getOperand(1);
return MemOp.getOpcode() == ISD::FrameIndex;
}]>;
def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const Value *Src;
const PointerType *PT;
@ -59,20 +52,12 @@ def store_global
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
const Value *Src;
const PointerType *PT;
const SDValue &MemOp = N->getOperand(2);
if ((MemOp.getOpcode() != ISD::FrameIndex) &&
(Src = cast<StoreSDNode>(N)->getSrcValue()) &&
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTX::GLOBAL;
return false;
}]>;
def store_local
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
const SDValue &MemOp = N->getOperand(2);
return MemOp.getOpcode() == ISD::FrameIndex;
}]>;
def store_shared
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
const Value *Src;
@ -221,16 +206,16 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (load_local ADDRlocal32:$a))]>;
[(set RC:$d, (load_global ADDRlocal32:$a))]>;
def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (load_local ADDRlocal64:$a))]>;
[(set RC:$d, (load_global ADDRlocal64:$a))]>;
def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
[(store_local RC:$d, ADDRlocal32:$a)]>;
[(store_global RC:$d, ADDRlocal32:$a)]>;
def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
[(store_local RC:$d, ADDRlocal64:$a)]>;
[(store_global RC:$d, ADDRlocal64:$a)]>;
}
multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
define ptx_device float @stack1(float %a) {
; CHECK: .local .align 4 .b32 __local0;
; CHECK: .local .align 4 .b8 __local0[4];
%a.2 = alloca float, align 4
; CHECK: st.local.f32 [__local0], %f0
store float %a, float* %a.2
@ -10,7 +10,7 @@ define ptx_device float @stack1(float %a) {
}
define ptx_device float @stack1_align8(float %a) {
; CHECK: .local .align 8 .b32 __local0;
; CHECK: .local .align 8 .b8 __local0[4];
%a.2 = alloca float, align 8
; CHECK: st.local.f32 [__local0], %f0
store float %a, float* %a.2