Fixed a bug in printing "cmp" pseudo ops.

> This IR code
> %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 14)
> fails with assertion:
>
> llc: X86ATTInstPrinter.cpp:62: void llvm::X86ATTInstPrinter::printSSECC(const llvm::MCInst*, unsigned int, llvm::raw_ostream&): Assertion `0 && "Invalid ssecc argument!"' failed.
> 0  llc             0x0000000001355803
> 1  llc             0x0000000001355dc9
> 2  libpthread.so.0 0x00007f79a30575d0
> 3  libc.so.6       0x00007f79a23a1945 gsignal + 53
> 4  libc.so.6       0x00007f79a23a2f21 abort + 385
> 5  libc.so.6       0x00007f79a239a810 __assert_fail + 240
> 6  llc             0x00000000011858d5 llvm::X86ATTInstPrinter::printSSECC(llvm::MCInst const*, unsigned int, llvm::raw_ostream&) + 119

I added the full testing for all possible pseudo-ops of cmp.
I extended X86AsmPrinter.cpp and X86IntelInstPrinter.cpp.

You'l also see lines alignments (unrelated to this fix) in X86IselLowering.cpp from my previous check-in.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150068 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2012-02-08 08:37:26 +00:00
parent bc0e4bf754
commit f602040c49
5 changed files with 183 additions and 41 deletions

View File

@ -60,14 +60,38 @@ void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
switch (MI->getOperand(Op).getImm()) {
default: llvm_unreachable("Invalid ssecc argument!");
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
case 3: O << "unord"; break;
case 4: O << "neq"; break;
case 5: O << "nlt"; break;
case 6: O << "nle"; break;
case 7: O << "ord"; break;
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
case 3: O << "unord"; break;
case 4: O << "neq"; break;
case 5: O << "nlt"; break;
case 6: O << "nle"; break;
case 7: O << "ord"; break;
case 8: O << "eq_uq"; break;
case 9: O << "nge"; break;
case 0xa: O << "ngt"; break;
case 0xb: O << "false"; break;
case 0xc: O << "neq_oq"; break;
case 0xd: O << "ge"; break;
case 0xe: O << "gt"; break;
case 0xf: O << "true"; break;
case 0x10: O << "eq_os"; break;
case 0x11: O << "lt_oq"; break;
case 0x12: O << "le_oq"; break;
case 0x13: O << "unord_s"; break;
case 0x14: O << "neq_us"; break;
case 0x15: O << "nlt_uq"; break;
case 0x16: O << "nle_uq"; break;
case 0x17: O << "ord_s"; break;
case 0x18: O << "eq_us"; break;
case 0x19: O << "nge_uq"; break;
case 0x1a: O << "ngt_uq"; break;
case 0x1b: O << "false_os"; break;
case 0x1c: O << "neq_os"; break;
case 0x1d: O << "ge_oq"; break;
case 0x1e: O << "gt_oq"; break;
case 0x1f: O << "true_us"; break;
}
}

View File

@ -50,14 +50,39 @@ void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
switch (MI->getOperand(Op).getImm()) {
default: llvm_unreachable("Invalid ssecc argument!");
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
case 3: O << "unord"; break;
case 4: O << "neq"; break;
case 5: O << "nlt"; break;
case 6: O << "nle"; break;
case 7: O << "ord"; break;
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
case 3: O << "unord"; break;
case 4: O << "neq"; break;
case 5: O << "nlt"; break;
case 6: O << "nle"; break;
case 7: O << "ord"; break;
case 8: O << "eq_uq"; break;
case 9: O << "nge"; break;
case 0xa: O << "ngt"; break;
case 0xb: O << "false"; break;
case 0xc: O << "neq_oq"; break;
case 0xd: O << "ge"; break;
case 0xe: O << "gt"; break;
case 0xf: O << "true"; break;
case 0x10: O << "eq_os"; break;
case 0x11: O << "lt_oq"; break;
case 0x12: O << "le_oq"; break;
case 0x13: O << "unord_s"; break;
case 0x14: O << "neq_us"; break;
case 0x15: O << "nlt_uq"; break;
case 0x16: O << "nle_uq"; break;
case 0x17: O << "ord_s"; break;
case 0x18: O << "eq_us"; break;
case 0x19: O << "nge_uq"; break;
case 0x1a: O << "ngt_uq"; break;
case 0x1b: O << "false_os"; break;
case 0x1c: O << "neq_os"; break;
case 0x1d: O << "ge_oq"; break;
case 0x1e: O << "gt_oq"; break;
case 0x1f: O << "true_us"; break;
}
}

View File

@ -266,14 +266,38 @@ void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
unsigned char value = MI->getOperand(Op).getImm();
assert(value <= 7 && "Invalid ssecc argument!");
switch (value) {
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
case 3: O << "unord"; break;
case 4: O << "neq"; break;
case 5: O << "nlt"; break;
case 6: O << "nle"; break;
case 7: O << "ord"; break;
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
case 3: O << "unord"; break;
case 4: O << "neq"; break;
case 5: O << "nlt"; break;
case 6: O << "nle"; break;
case 7: O << "ord"; break;
case 8: O << "eq_uq"; break;
case 9: O << "nge"; break;
case 0xa: O << "ngt"; break;
case 0xb: O << "false"; break;
case 0xc: O << "neq_oq"; break;
case 0xd: O << "ge"; break;
case 0xe: O << "gt"; break;
case 0xf: O << "true"; break;
case 0x10: O << "eq_os"; break;
case 0x11: O << "lt_oq"; break;
case 0x12: O << "le_oq"; break;
case 0x13: O << "unord_s"; break;
case 0x14: O << "neq_us"; break;
case 0x15: O << "nlt_uq"; break;
case 0x16: O << "nle_uq"; break;
case 0x17: O << "ord_s"; break;
case 0x18: O << "eq_us"; break;
case 0x19: O << "nge_uq"; break;
case 0x1a: O << "ngt_uq"; break;
case 0x1b: O << "false_os"; break;
case 0x1c: O << "neq_os"; break;
case 0x1d: O << "ge_oq"; break;
case 0x1e: O << "gt_oq"; break;
case 0x1f: O << "true_us"; break;
}
}

View File

@ -14597,41 +14597,42 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps())
return SDValue();
if (!Subtarget->hasAVX()) return SDValue();
if (!Subtarget->hasAVX())
return SDValue();
// Optimize vectors in AVX mode
// Sign extend v8i16 to v8i32 and
// v4i32 to v4i64
//
// Divide input vector into two parts
// for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
// Optimize vectors in AVX mode
// Sign extend v8i16 to v8i32 and
// v4i32 to v4i64
//
// Divide input vector into two parts
// for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
DebugLoc dl = N->getDebugLoc();
if (((VT == MVT::v4i64) && (OpVT == MVT::v4i32)) ||
((VT == MVT::v8i32) && (OpVT == MVT::v8i16))) {
if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
(VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
unsigned NumElems = OpVT.getVectorNumElements();
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i=0; i< NumElems/2; i++) ShufMask1[i] = i;
for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i;
SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
ShufMask1.data());
ShufMask1.data());
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i=0; i< NumElems/2; i++) ShufMask2[i] = i+NumElems/2;
for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2;
SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
ShufMask2.data());
ShufMask2.data());
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
VT.getVectorNumElements()/2);
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);

View File

@ -1766,6 +1766,74 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
%res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vcmpeqps
%a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
; CHECK: vcmpltps
%a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
; CHECK: vcmpleps
%a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
; CHECK: vcmpunordps
%a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
; CHECK: vcmpneqps
%a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
; CHECK: vcmpnltps
%a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
; CHECK: vcmpnleps
%a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
; CHECK: vcmpordps
%a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
; CHECK: vcmpeq_uqps
%a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
; CHECK: vcmpngeps
%a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
; CHECK: vcmpngtps
%a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
; CHECK: vcmpfalseps
%a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
; CHECK: vcmpneq_oqps
%a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
; CHECK: vcmpgeps
%a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
; CHECK: vcmpgtps
%a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
; CHECK: vcmptrueps
%a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
; CHECK: vcmpeq_osps
%a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
; CHECK: vcmplt_oqps
%a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
; CHECK: vcmple_oqps
%a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
; CHECK: vcmpunord_sps
%a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
; CHECK: vcmpneq_usps
%a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
; CHECK: vcmpnlt_uqps
%a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
; CHECK: vcmpnle_uqps
%a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
; CHECK: vcmpord_sps
%a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
; CHECK: vcmpeq_usps
%a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
; CHECK: vcmpnge_uqps
%a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
; CHECK: vcmpngt_uqps
%a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
; CHECK: vcmpfalse_osps
%a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
; CHECK: vcmpneq_osps
%a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
; CHECK: vcmpge_oqps
%a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
; CHECK: vcmpgt_oqps
%a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
; CHECK: vcmptrue_usps
%res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone