diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 805d3667d15..afba3a06670 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1008,6 +1008,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // SSE4A let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], []>; + def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], []>; + + def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_i8_ty, llvm_i8_ty], []>; + def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_sse4a_movnt_ss : GCCBuiltin<"__builtin_ia32_movntss">, Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty], []>; def int_x86_sse4a_movnt_sd : GCCBuiltin<"__builtin_ia32_movntsd">, diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 24ac52d8e42..f79073ff588 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1150,8 +1150,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, } // If there is a remaining operand, it must be a trailing immediate. Emit it - // according to the right size for the instruction. - if (CurOp != NumOps) { + // according to the right size for the instruction. Some instructions + // (SSE4a extrq and insertq) have two trailing immediates. + while (CurOp != NumOps && NumOps - CurOp <= 2) { // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a518997b786..e9545cdbe67 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7268,6 +7268,31 @@ defm : pclmul_alias<"lqlq", 0x00>; //===----------------------------------------------------------------------===// let Predicates = [HasSSE4A] in { + +let Constraints = "$src = $dst" in { +def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst), + (ins VR128:$src, i8imm:$len, i8imm:$idx), + "extrq\t{$idx, $len, $src|$src, $len, $idx}", + [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, + imm:$idx))]>, TB, OpSize; +def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src, VR128:$mask), + "extrq\t{$mask, $src|$src, $mask}", + [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, + VR128:$mask))]>, TB, OpSize; + +def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), + "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", + [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, + VR128:$src2, imm:$len, imm:$idx))]>, XD; +def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src, VR128:$mask), + "insertq\t{$mask, $src|$src, $mask}", + [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, + VR128:$mask))]>, XD; +} + def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), "movntss\t{$src, $dst|$dst, $src}", [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS; diff --git a/test/CodeGen/X86/sse4a.ll b/test/CodeGen/X86/sse4a.ll index 14c0fb3df21..076e2133649 100644 --- a/test/CodeGen/X86/sse4a.ll +++ b/test/CodeGen/X86/sse4a.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4a | FileCheck %s define void @test1(i8* %p, <4 x float> %a) nounwind optsize ssp { +; CHECK: test1: ; CHECK: movntss -entry: tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) nounwind ret void } @@ -10,10 +10,47 @@ entry: declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>) define void @test2(i8* %p, <2 x double> %a) nounwind optsize ssp { +; CHECK: test2: ; CHECK: movntsd -entry: tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) nounwind ret void } declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>) + +define <2 x i64> @test3(<2 x i64> %x) nounwind uwtable ssp { +; CHECK: test3: +; CHECK: extrq + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) + ret <2 x i64> %1 +} + +declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind + +define <2 x i64> @test4(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp { +; CHECK: test4: +; CHECK: extrq + %1 = bitcast <2 x i64> %y to <16 x i8> + %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind + ret <2 x i64> %2 +} + +declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind + +define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp { +; CHECK: test5: +; CHECK: insertq + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6) + ret <2 x i64> %1 +} + +declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind + +define <2 x i64> @test6(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp { +; CHECK: test6: +; CHECK: insertq + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind + ret <2 x i64> %1 +} + +declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt index 739fa6a843f..3ec55f9689f 100644 --- a/test/MC/Disassembler/X86/x86-32.txt +++ b/test/MC/Disassembler/X86/x86-32.txt @@ -612,3 +612,21 @@ # CHECK: shrxl %esi, %ebx, %edx 0xc4 0xe2 0x0b 0xf7 0xd3 + +# CHECK: extrq $2, $3, %xmm0 +0x66 0x0f 0x78 0xc0 0x03 0x02 + +# CHECK: extrq %xmm1, %xmm0 +0x66 0x0f 0x79 0xc1 + +# CHECK: insertq $6, $5, %xmm1, %xmm0 +0xf2 0x0f 0x78 0xc1 0x05 0x06 + +# CHECK: insertq %xmm1, %xmm0 +0xf2 0x0f 0x79 0xc1 + +# CHECK: movntsd %xmm0, (%edi) +0xf2 0x0f 0x2b 0x07 + +# CHECK: movntss %xmm0, (%edi) +0xf3 0x0f 0x2b 0x07 diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt index f4b8f46fa2e..df449a403b8 100644 --- a/test/MC/Disassembler/X86/x86-64.txt +++ b/test/MC/Disassembler/X86/x86-64.txt @@ -61,3 +61,21 @@ # CHECK: cmpordsd 0xf2 0x0f 0xc2 0xc7 0x07 + +# CHECK: extrq $2, $3, %xmm0 +0x66 0x0f 0x78 0xc0 0x03 0x02 + +# CHECK: extrq %xmm1, %xmm0 +0x66 0x0f 0x79 0xc1 + +# CHECK: insertq $6, $5, %xmm1, %xmm0 +0xf2 0x0f 0x78 0xc1 0x05 0x06 + +# CHECK: insertq %xmm1, %xmm0 +0xf2 0x0f 0x79 0xc1 + +# CHECK: movntsd %xmm0, (%rdi) +0xf2 0x0f 0x2b 0x07 + +# CHECK: movntss %xmm0, (%rdi) +0xf3 0x0f 0x2b 0x07 diff --git a/test/MC/X86/x86_64-sse4a.s b/test/MC/X86/x86_64-sse4a.s new file mode 100644 index 00000000000..e5ed69e5b2c --- /dev/null +++ b/test/MC/X86/x86_64-sse4a.s @@ -0,0 +1,25 @@ +# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +extrq $2, $3, %xmm0 +# CHECK: extrq $2, $3, %xmm0 +# CHECK: encoding: [0x66,0x0f,0x78,0xc0,0x03,0x02] + +extrq %xmm1, %xmm0 +# CHECK: extrq %xmm1, %xmm0 +# CHECK: encoding: [0x66,0x0f,0x79,0xc1] + +insertq $6, $5, %xmm1, %xmm0 +# CHECK: insertq $6, $5, %xmm1, %xmm0 +# CHECK: encoding: [0xf2,0x0f,0x78,0xc1,0x05,0x06] + +insertq %xmm1, %xmm0 +# CHECK: insertq %xmm1, %xmm0 +# CHECK: encoding: [0xf2,0x0f,0x79,0xc1] + +movntsd %xmm0, (%rdi) +# CHECK: movntsd %xmm0, (%rdi) +# CHECK: encoding: [0xf2,0x0f,0x2b,0x07] + +movntss %xmm0, (%rdi) +# CHECK: movntss %xmm0, (%rdi) +# CHECK: encoding: [0xf3,0x0f,0x2b,0x07] diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 6a01cce637e..afb25be7ff8 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -690,12 +690,13 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { // Operand 2 is a register operand in the R/M field. // - In AVX, there is a register operand in the VEX.vvvv field here - // Operand 3 (optional) is an immediate. + // Operand 4 (optional) is an immediate. if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix) assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 && "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); else - assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && + assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 4 && "Unexpected number of operands for MRMSrcRegFrm"); HANDLE_OPERAND(roRegister) @@ -716,6 +717,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { if (!HasMemOp4Prefix) HANDLE_OPTIONAL(immediate) HANDLE_OPTIONAL(immediate) // above might be a register in 7:4 + HANDLE_OPTIONAL(immediate) break; case X86Local::MRMSrcMem: // Operand 1 is a register operand in the Reg/Opcode field. @@ -759,16 +761,18 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { case X86Local::MRM7r: // Operand 1 is a register operand in the R/M field. // Operand 2 (optional) is an immediate or relocation. + // Operand 3 (optional) is an immediate. if (HasVEX_4VPrefix) assert(numPhysicalOperands <= 3 && "Unexpected number of operands for MRMnRFrm with VEX_4V"); else - assert(numPhysicalOperands <= 2 && + assert(numPhysicalOperands <= 3 && "Unexpected number of operands for MRMnRFrm"); if (HasVEX_4VPrefix) HANDLE_OPERAND(vvvvRegister) HANDLE_OPTIONAL(rmRegister) HANDLE_OPTIONAL(relocation) + HANDLE_OPTIONAL(immediate) break; case X86Local::MRM0m: case X86Local::MRM1m: