diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index b71eeca36d6..962977f6520 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -562,7 +562,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, - // BMI/BMI2 foldable instructions + // BMI/BMI2/LZCNT/POPCNT foldable instructions + { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, + { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, + { X86::BLSI32rr, X86::BLSI32rm, 0 }, + { X86::BLSI64rr, X86::BLSI64rm, 0 }, + { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 }, + { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 }, + { X86::BLSR32rr, X86::BLSR32rm, 0 }, + { X86::BLSR64rr, X86::BLSR64rm, 0 }, + { X86::BZHI32rr, X86::BZHI32rm, 0 }, + { X86::BZHI64rr, X86::BZHI64rm, 0 }, + { X86::LZCNT16rr, X86::LZCNT16rm, 0 }, + { X86::LZCNT32rr, X86::LZCNT32rm, 0 }, + { X86::LZCNT64rr, X86::LZCNT64rm, 0 }, + { X86::POPCNT16rr, X86::POPCNT16rm, 0 }, + { X86::POPCNT32rr, X86::POPCNT32rm, 0 }, + { X86::POPCNT64rr, X86::POPCNT64rm, 0 }, { X86::RORX32ri, X86::RORX32mi, 0 }, { X86::RORX64ri, X86::RORX64mi, 0 }, { X86::SARX32rr, X86::SARX32rm, 0 }, @@ -571,6 +587,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::SHRX64rr, X86::SHRX64rm, 0 }, { X86::SHLX32rr, X86::SHLX32rm, 0 }, { X86::SHLX64rr, X86::SHLX64rm, 0 }, + { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, + { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, + { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1156,8 +1175,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, // BMI/BMI2 foldable instructions + { X86::ANDN32rr, X86::ANDN32rm, 0 }, + { X86::ANDN64rr, X86::ANDN64rm, 0 }, { X86::MULX32rr, X86::MULX32rm, 0 }, { X86::MULX64rr, X86::MULX64rm, 0 }, + { X86::PDEP32rr, X86::PDEP32rm, 0 }, + { X86::PDEP64rr, X86::PDEP64rm, 0 }, + { X86::PEXT32rr, X86::PEXT32rm, 0 }, + { X86::PEXT64rr, X86::PEXT64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll index 43c47c0fa8a..b89e648c52d 100644 --- a/test/CodeGen/X86/bmi.ll +++ b/test/CodeGen/X86/bmi.ll @@ -26,6 +26,14 @@ define i32 @t3(i32 %x) nounwind { ; CHECK: tzcntl } +define i32 @tzcnt32_load(i32* %x) nounwind { + %x1 = load i32* %x + %tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false ) + ret i32 %tmp +; CHECK: tzcnt32_load: +; CHECK: tzcntl ({{.*}}) +} + define i64 @t4(i64 %x) nounwind { %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false ) ret i64 %tmp @@ -69,6 +77,15 @@ define i32 @andn32(i32 %x, i32 %y) nounwind readnone { ; CHECK: andnl } +define i32 @andn32_load(i32 %x, i32* %y) nounwind readnone { + %y1 = load i32* %y + %tmp1 = xor i32 %x, -1 + %tmp2 = and i32 %y1, %tmp1 + ret i32 %tmp2 +; CHECK: andn32_load: +; CHECK: andnl ({{.*}}) +} + define i64 @andn64(i64 %x, i64 %y) nounwind readnone { %tmp1 = xor i64 %x, -1 %tmp2 = and i64 %tmp1, %y @@ -84,6 +101,14 @@ define i32 @bextr32(i32 %x, i32 %y) nounwind readnone { ; CHECK: bextrl } +define i32 @bextr32_load(i32* %x, i32 %y) nounwind readnone { + %x1 = load i32* %x + %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y) + ret i32 %tmp +; CHECK: bextr32_load: +; CHECK: bextrl {{.*}}, ({{.*}}), {{.*}} +} + declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone define i64 @bextr64(i64 %x, i64 %y) nounwind readnone { @@ -102,6 +127,14 @@ define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone { ; CHECK: bzhil } +define i32 @bzhi32_load(i32* %x, i32 %y) nounwind readnone { + %x1 = load i32* %x + %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y) + ret i32 %tmp +; CHECK: bzhi32_load: +; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}} +} + declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone { @@ -121,6 +154,15 @@ define i32 @blsi32(i32 %x) nounwind readnone { ; CHECK: blsil } +define i32 @blsi32_load(i32* %x) nounwind readnone { + %x1 = load i32* %x + %tmp = sub i32 0, %x1 + %tmp2 = and i32 %x1, %tmp + ret i32 %tmp2 +; CHECK: blsi32_load: +; CHECK: blsil ({{.*}}) +} + define i64 @blsi64(i64 %x) nounwind readnone { %tmp = sub i64 0, %x %tmp2 = and i64 %tmp, %x @@ -137,6 +179,15 @@ define i32 @blsmsk32(i32 %x) nounwind readnone { ; CHECK: blsmskl } +define i32 @blsmsk32_load(i32* %x) nounwind readnone { + %x1 = load i32* %x + %tmp = sub i32 %x1, 1 + %tmp2 = xor i32 %x1, %tmp + ret i32 %tmp2 +; CHECK: blsmsk32_load: +; CHECK: blsmskl ({{.*}}) +} + define i64 @blsmsk64(i64 %x) nounwind readnone { %tmp = sub i64 %x, 1 %tmp2 = xor i64 %tmp, %x @@ -153,6 +204,15 @@ define i32 @blsr32(i32 %x) nounwind readnone { ; CHECK: blsrl } +define i32 @blsr32_load(i32* %x) nounwind readnone { + %x1 = load i32* %x + %tmp = sub i32 %x1, 1 + %tmp2 = and i32 %x1, %tmp + ret i32 %tmp2 +; CHECK: blsr32_load: +; CHECK: blsrl ({{.*}}) +} + define i64 @blsr64(i64 %x) nounwind readnone { %tmp = sub i64 %x, 1 %tmp2 = and i64 %tmp, %x @@ -168,6 +228,14 @@ define i32 @pdep32(i32 %x, i32 %y) nounwind readnone { ; CHECK: pdepl } +define i32 @pdep32_load(i32 %x, i32* %y) nounwind readnone { + %y1 = load i32* %y + %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1) + ret i32 %tmp +; CHECK: pdep32_load: +; CHECK: pdepl ({{.*}}) +} + declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone define i64 @pdep64(i64 %x, i64 %y) nounwind readnone { @@ -186,6 +254,14 @@ define i32 @pext32(i32 %x, i32 %y) nounwind readnone { ; CHECK: pextl } +define i32 @pext32_load(i32 %x, i32* %y) nounwind readnone { + %y1 = load i32* %y + %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1) + ret i32 %tmp +; CHECK: pext32_load: +; CHECK: pextl ({{.*}}) +} + declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone define i64 @pext64(i64 %x, i64 %y) nounwind readnone {