mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Remove the pmulld intrinsic and autoupdate it as a vector multiply.
Rewrite the pmulld patterns, and make sure that they fold in loads of arguments into the instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99910 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -810,9 +810,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.". | ||||
|   def int_x86_sse41_pmuldq          : GCCBuiltin<"__builtin_ia32_pmuldq128">, | ||||
|               Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], | ||||
|                         [IntrNoMem, Commutative]>; | ||||
|   def int_x86_sse41_pmulld          : GCCBuiltin<"__builtin_ia32_pmulld128">, | ||||
|               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], | ||||
|                         [IntrNoMem, Commutative]>; | ||||
| } | ||||
|  | ||||
| // Vector extract | ||||
|   | ||||
| @@ -597,7 +597,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) | ||||
|     { X86::PMULHUWrr,       X86::PMULHUWrm, 16 }, | ||||
|     { X86::PMULHWrr,        X86::PMULHWrm, 16 }, | ||||
|     { X86::PMULLDrr,        X86::PMULLDrm, 16 }, | ||||
|     { X86::PMULLDrr_int,    X86::PMULLDrm_int, 16 }, | ||||
|     { X86::PMULLWrr,        X86::PMULLWrm, 16 }, | ||||
|     { X86::PMULUDQrr,       X86::PMULUDQrm, 16 }, | ||||
|     { X86::PORrr,           X86::PORrm, 16 }, | ||||
|   | ||||
| @@ -3448,8 +3448,28 @@ let Constraints = "$src1 = $dst" in { | ||||
|                        OpSize; | ||||
|   } | ||||
| } | ||||
| defm PMULLD       : SS41I_binop_patint<0x40, "pmulld", v4i32, mul, | ||||
|                                        int_x86_sse41_pmulld, 1>; | ||||
|  | ||||
| /// SS48I_binop_rm - Simple SSE41 binary operator. | ||||
| let Constraints = "$src1 = $dst" in { | ||||
| multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, | ||||
|                         ValueType OpVT, bit Commutable = 0> { | ||||
|   def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),  | ||||
|                                  (ins VR128:$src1, VR128:$src2), | ||||
|                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), | ||||
|                [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, | ||||
|                OpSize { | ||||
|     let isCommutable = Commutable; | ||||
|   } | ||||
|   def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),  | ||||
|                                  (ins VR128:$src1, i128mem:$src2), | ||||
|                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), | ||||
|                [(set VR128:$dst, (OpNode VR128:$src1, | ||||
|                                   (bc_v4i32 (memopv2i64 addr:$src2))))]>, | ||||
|                OpSize; | ||||
| } | ||||
| } | ||||
|  | ||||
| defm PMULLD         : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>; | ||||
|  | ||||
| /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate | ||||
| let Constraints = "$src1 = $dst" in { | ||||
|   | ||||
| @@ -225,7 +225,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { | ||||
|       // Calls to these intrinsics are transformed into ShuffleVector's. | ||||
|       NewFn = 0; | ||||
|       return true; | ||||
|     } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) { | ||||
|       // Calls to these intrinsics are transformed into vector multiplies. | ||||
|       NewFn = 0; | ||||
|       return true; | ||||
|     } | ||||
|      | ||||
|  | ||||
|     break; | ||||
|   } | ||||
| @@ -355,6 +360,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { | ||||
|        | ||||
|       //  Clean up the old call now that it has been completely upgraded. | ||||
|       CI->eraseFromParent(); | ||||
|     } else if (F->getName() == "llvm.x86.sse41.pmulld") { | ||||
|       // Upgrade this set of intrinsics into vector multiplies. | ||||
|       Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1), | ||||
|                                                    CI->getOperand(2), | ||||
|                                                    CI->getName(), | ||||
|                                                    CI); | ||||
|       // Fix up all the uses with our new multiply. | ||||
|       if (!CI->use_empty()) | ||||
|         CI->replaceAllUsesWith(Mul); | ||||
|          | ||||
|       // Remove upgraded multiply. | ||||
|       CI->eraseFromParent(); | ||||
|     } else { | ||||
|       llvm_unreachable("Unknown function for CallInst upgrade."); | ||||
|     } | ||||
|   | ||||
							
								
								
									
										2
									
								
								test/Bitcode/sse41_pmulld.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								test/Bitcode/sse41_pmulld.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| ; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld} | ||||
| ; RUN: llvm-dis < %s.bc | grep mul | ||||
							
								
								
									
										
											BIN
										
									
								
								test/Bitcode/sse41_pmulld.ll.bc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								test/Bitcode/sse41_pmulld.ll.bc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -1,6 +1,6 @@ | ||||
| ; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t | ||||
| ; RUN: grep pmul %t | count 12 | ||||
| ; RUN: grep mov %t | count 12 | ||||
| ; RUN: grep mov %t | count 11 | ||||
|  | ||||
| define <4 x i32> @a(<4 x i32> %i) nounwind  { | ||||
|         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > | ||||
|   | ||||
							
								
								
									
										16
									
								
								test/CodeGen/X86/pmulld.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								test/CodeGen/X86/pmulld.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| ; RUN: llc < %s -march=x86-64 -mattr=+sse41 -asm-verbose=0 | FileCheck %s | ||||
|  | ||||
| define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { | ||||
| ; CHECK: test1: | ||||
| ; CHECK-NEXT: pmulld | ||||
|   %C = mul <4 x i32> %A, %B | ||||
|   ret <4 x i32> %C | ||||
| } | ||||
|  | ||||
| define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind { | ||||
| ; CHECK: test1a: | ||||
| ; CHECK-NEXT: pmulld | ||||
|   %B = load <4 x i32>* %Bp | ||||
|   %C = mul <4 x i32> %A, %B | ||||
|   ret <4 x i32> %C | ||||
| } | ||||
		Reference in New Issue
	
	Block a user