mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Rename fma4 intrinsics to just fma since they are now used for both FMA4 and FMA3. Autoupgrade support coming in a separate commit.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157898 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -1759,137 +1759,137 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.". | ||||
| } | ||||
|  | ||||
| //===----------------------------------------------------------------------===// | ||||
| // FMA4 | ||||
| // FMA3 and FMA4 | ||||
|  | ||||
| let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.". | ||||
|   def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">, | ||||
|   def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">, | ||||
|   def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">, | ||||
|   def int_x86_fma_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">, | ||||
|   def int_x86_fma_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">, | ||||
|   def int_x86_fma_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">, | ||||
|               Intrinsic<[llvm_v8f32_ty], | ||||
|                         [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">, | ||||
|   def int_x86_fma_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">, | ||||
|               Intrinsic<[llvm_v4f64_ty], | ||||
|                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">, | ||||
|   def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">, | ||||
|   def int_x86_fma_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">, | ||||
|   def int_x86_fma_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">, | ||||
|   def int_x86_fma_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">, | ||||
|   def int_x86_fma_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">, | ||||
|               Intrinsic<[llvm_v8f32_ty], | ||||
|                         [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">, | ||||
|   def int_x86_fma_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">, | ||||
|               Intrinsic<[llvm_v4f64_ty], | ||||
|                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">, | ||||
|   def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">, | ||||
|   def int_x86_fma_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">, | ||||
|   def int_x86_fma_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">, | ||||
|   def int_x86_fma_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">, | ||||
|   def int_x86_fma_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">, | ||||
|               Intrinsic<[llvm_v8f32_ty], | ||||
|                         [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">, | ||||
|   def int_x86_fma_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">, | ||||
|               Intrinsic<[llvm_v4f64_ty], | ||||
|                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">, | ||||
|   def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">, | ||||
|   def int_x86_fma_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">, | ||||
|   def int_x86_fma_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">, | ||||
|   def int_x86_fma_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">, | ||||
|   def int_x86_fma_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">, | ||||
|               Intrinsic<[llvm_v8f32_ty], | ||||
|                         [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">, | ||||
|   def int_x86_fma_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">, | ||||
|               Intrinsic<[llvm_v4f64_ty], | ||||
|                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">, | ||||
|   def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">, | ||||
|   def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmaddsub_ps_256 : | ||||
|   def int_x86_fma_vfmaddsub_ps_256 : | ||||
|                GCCBuiltin<"__builtin_ia32_vfmaddsubps256">, | ||||
|               Intrinsic<[llvm_v8f32_ty], | ||||
|                         [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmaddsub_pd_256 : | ||||
|   def int_x86_fma_vfmaddsub_pd_256 : | ||||
|               GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">, | ||||
|               Intrinsic<[llvm_v4f64_ty], | ||||
|                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">, | ||||
|   def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">, | ||||
|               Intrinsic<[llvm_v4f32_ty], | ||||
|                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">, | ||||
|   def int_x86_fma_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">, | ||||
|               Intrinsic<[llvm_v2f64_ty], | ||||
|                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsubadd_ps_256 : | ||||
|   def int_x86_fma_vfmsubadd_ps_256 : | ||||
|               GCCBuiltin<"__builtin_ia32_vfmsubaddps256">, | ||||
|               Intrinsic<[llvm_v8f32_ty], | ||||
|                         [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], | ||||
|                         [IntrNoMem]>; | ||||
|   def int_x86_fma4_vfmsubadd_pd_256 : | ||||
|   def int_x86_fma_vfmsubadd_pd_256 : | ||||
|               GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">, | ||||
|               Intrinsic<[llvm_v4f64_ty], | ||||
|                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], | ||||
|   | ||||
| @@ -79,40 +79,40 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, | ||||
| // Fused Multiply-Add | ||||
| let ExeDomain = SSEPackedSingle in { | ||||
|   defm VFMADDPS    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32, | ||||
|     memopv8f32, int_x86_fma4_vfmadd_ps, int_x86_fma4_vfmadd_ps_256>; | ||||
|     memopv8f32, int_x86_fma_vfmadd_ps, int_x86_fma_vfmadd_ps_256>; | ||||
|   defm VFMSUBPS    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps",  memopv4f32, | ||||
|     memopv8f32, int_x86_fma4_vfmsub_ps, int_x86_fma4_vfmsub_ps_256>; | ||||
|     memopv8f32, int_x86_fma_vfmsub_ps, int_x86_fma_vfmsub_ps_256>; | ||||
|   defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", | ||||
|     memopv4f32, memopv8f32, int_x86_fma4_vfmaddsub_ps, | ||||
|     int_x86_fma4_vfmaddsub_ps_256>; | ||||
|     memopv4f32, memopv8f32, int_x86_fma_vfmaddsub_ps, | ||||
|     int_x86_fma_vfmaddsub_ps_256>; | ||||
|   defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", | ||||
|     memopv4f32, memopv8f32, int_x86_fma4_vfmsubadd_ps, | ||||
|     int_x86_fma4_vfmaddsub_ps_256>; | ||||
|     memopv4f32, memopv8f32, int_x86_fma_vfmsubadd_ps, | ||||
|     int_x86_fma_vfmaddsub_ps_256>; | ||||
| } | ||||
|  | ||||
| let ExeDomain = SSEPackedDouble in { | ||||
|   defm VFMADDPD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64, | ||||
|     memopv4f64, int_x86_fma4_vfmadd_pd, int_x86_fma4_vfmadd_pd_256>, VEX_W; | ||||
|     memopv4f64, int_x86_fma_vfmadd_pd, int_x86_fma_vfmadd_pd_256>, VEX_W; | ||||
|   defm VFMSUBPD    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64, | ||||
|     memopv4f64, int_x86_fma4_vfmsub_pd, int_x86_fma4_vfmsub_pd_256>, VEX_W; | ||||
|     memopv4f64, int_x86_fma_vfmsub_pd, int_x86_fma_vfmsub_pd_256>, VEX_W; | ||||
|   defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", memopv2f64, | ||||
|     memopv4f64, int_x86_fma4_vfmaddsub_pd, int_x86_fma4_vfmaddsub_pd_256>, VEX_W; | ||||
|     memopv4f64, int_x86_fma_vfmaddsub_pd, int_x86_fma_vfmaddsub_pd_256>, VEX_W; | ||||
|   defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", memopv2f64, | ||||
|     memopv4f64, int_x86_fma4_vfmsubadd_pd, int_x86_fma4_vfmsubadd_pd_256>, VEX_W; | ||||
|     memopv4f64, int_x86_fma_vfmsubadd_pd, int_x86_fma_vfmsubadd_pd_256>, VEX_W; | ||||
| } | ||||
|  | ||||
| // Fused Negative Multiply-Add | ||||
| let ExeDomain = SSEPackedSingle in { | ||||
|   defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps",  memopv4f32, | ||||
|     memopv8f32, int_x86_fma4_vfnmadd_ps, int_x86_fma4_vfnmadd_ps_256>; | ||||
|     memopv8f32, int_x86_fma_vfnmadd_ps, int_x86_fma_vfnmadd_ps_256>; | ||||
|   defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps",  memopv4f32, | ||||
|     memopv8f32, int_x86_fma4_vfnmsub_ps, int_x86_fma4_vfnmsub_ps_256>; | ||||
|     memopv8f32, int_x86_fma_vfnmsub_ps, int_x86_fma_vfnmsub_ps_256>; | ||||
| } | ||||
| let ExeDomain = SSEPackedDouble in { | ||||
|   defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64, | ||||
|     memopv4f64, int_x86_fma4_vfnmadd_pd, int_x86_fma4_vfnmadd_pd_256>, VEX_W; | ||||
|     memopv4f64, int_x86_fma_vfnmadd_pd, int_x86_fma_vfnmadd_pd_256>, VEX_W; | ||||
|   defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", memopv2f64, | ||||
|     memopv4f64, int_x86_fma4_vfnmsub_pd, int_x86_fma4_vfnmsub_pd_256>, VEX_W; | ||||
|     memopv4f64, int_x86_fma_vfnmsub_pd, int_x86_fma_vfnmsub_pd_256>, VEX_W; | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -160,15 +160,15 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, | ||||
|                                   sse_load_f64, IntF64>; | ||||
| } | ||||
|  | ||||
| defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma4_vfmadd_ss, | ||||
|                           int_x86_fma4_vfmadd_sd>, VEX_LIG; | ||||
| defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma4_vfmsub_ss, | ||||
|                           int_x86_fma4_vfmsub_sd>, VEX_LIG; | ||||
| defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, | ||||
|                           int_x86_fma_vfmadd_sd>, VEX_LIG; | ||||
| defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, | ||||
|                           int_x86_fma_vfmsub_sd>, VEX_LIG; | ||||
|  | ||||
| defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma4_vfnmadd_ss, | ||||
|                            int_x86_fma4_vfnmadd_sd>, VEX_LIG; | ||||
| defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma4_vfnmsub_ss, | ||||
|                            int_x86_fma4_vfnmsub_sd>, VEX_LIG; | ||||
| defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, | ||||
|                            int_x86_fma_vfnmadd_sd>, VEX_LIG; | ||||
| defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, | ||||
|                            int_x86_fma_vfnmsub_sd>, VEX_LIG; | ||||
|  | ||||
|  | ||||
| //===----------------------------------------------------------------------===// | ||||
| @@ -259,44 +259,44 @@ let isCodeGenOnly = 1 in { | ||||
| let Predicates = [HasFMA4] in { | ||||
|  | ||||
| defm VFMADDSS4    : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32, | ||||
|                           int_x86_fma4_vfmadd_ss>; | ||||
|                           int_x86_fma_vfmadd_ss>; | ||||
| defm VFMADDSD4    : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64, | ||||
|                           int_x86_fma4_vfmadd_sd>; | ||||
| defm VFMADDPS4    : fma4p<0x68, "vfmaddps", int_x86_fma4_vfmadd_ps, | ||||
|                           int_x86_fma4_vfmadd_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", int_x86_fma4_vfmadd_pd, | ||||
|                           int_x86_fma4_vfmadd_pd_256, memopv2f64, memopv4f64>; | ||||
|                           int_x86_fma_vfmadd_sd>; | ||||
| defm VFMADDPS4    : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps, | ||||
|                           int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd, | ||||
|                           int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>; | ||||
| defm VFMSUBSS4    : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32, | ||||
|                           int_x86_fma4_vfmsub_ss>; | ||||
|                           int_x86_fma_vfmsub_ss>; | ||||
| defm VFMSUBSD4    : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64, | ||||
|                           int_x86_fma4_vfmsub_sd>; | ||||
| defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", int_x86_fma4_vfmsub_ps, | ||||
|                           int_x86_fma4_vfmsub_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", int_x86_fma4_vfmsub_pd, | ||||
|                           int_x86_fma4_vfmsub_pd_256, memopv2f64, memopv4f64>; | ||||
|                           int_x86_fma_vfmsub_sd>; | ||||
| defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps, | ||||
|                           int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd, | ||||
|                           int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>; | ||||
| defm VFNMADDSS4   : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32, | ||||
|                           int_x86_fma4_vfnmadd_ss>; | ||||
|                           int_x86_fma_vfnmadd_ss>; | ||||
| defm VFNMADDSD4   : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64, | ||||
|                           int_x86_fma4_vfnmadd_sd>; | ||||
| defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", int_x86_fma4_vfnmadd_ps, | ||||
|                           int_x86_fma4_vfnmadd_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", int_x86_fma4_vfnmadd_pd, | ||||
|                           int_x86_fma4_vfnmadd_pd_256, memopv2f64, memopv4f64>; | ||||
|                           int_x86_fma_vfnmadd_sd>; | ||||
| defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps, | ||||
|                           int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd, | ||||
|                           int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>; | ||||
| defm VFNMSUBSS4   : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32, | ||||
|                           int_x86_fma4_vfnmsub_ss>; | ||||
|                           int_x86_fma_vfnmsub_ss>; | ||||
| defm VFNMSUBSD4   : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64, | ||||
|                           int_x86_fma4_vfnmsub_sd>; | ||||
| defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", int_x86_fma4_vfnmsub_ps, | ||||
|                           int_x86_fma4_vfnmsub_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", int_x86_fma4_vfnmsub_pd, | ||||
|                           int_x86_fma4_vfnmsub_pd_256, memopv2f64, memopv4f64>; | ||||
| defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma4_vfmaddsub_ps, | ||||
|                          int_x86_fma4_vfmaddsub_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma4_vfmaddsub_pd, | ||||
|                          int_x86_fma4_vfmaddsub_pd_256, memopv2f64, memopv4f64>; | ||||
| defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma4_vfmsubadd_ps, | ||||
|                          int_x86_fma4_vfmsubadd_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma4_vfmsubadd_pd, | ||||
|                          int_x86_fma4_vfmsubadd_pd_256, memopv2f64, memopv4f64>; | ||||
|                           int_x86_fma_vfnmsub_sd>; | ||||
| defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps, | ||||
|                           int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd, | ||||
|                           int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>; | ||||
| defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps, | ||||
|                           int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd, | ||||
|                           int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>; | ||||
| defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps, | ||||
|                           int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>; | ||||
| defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd, | ||||
|                           int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>; | ||||
| } // HasFMA4 | ||||
|  | ||||
|   | ||||
| @@ -2,131 +2,131 @@ | ||||
|  | ||||
| define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fmadd132ss %xmm | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fmadd132ps | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { | ||||
|   ; CHECK: fmadd132ps {{.*\(%r.*}}, %ymm | ||||
|   %res = call <8 x float> @llvm.x86.fma4.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind | ||||
|   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind | ||||
|   ret <8 x float> %res | ||||
| } | ||||
| declare <8 x float> @llvm.x86.fma4.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone | ||||
| declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone | ||||
|  | ||||
| define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fnmadd132ss %xmm | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fnmadd132ps | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { | ||||
|   ; CHECK: fnmadd132ps {{.*\(%r.*}}, %ymm | ||||
|   %res = call <8 x float> @llvm.x86.fma4.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind | ||||
|   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind | ||||
|   ret <8 x float> %res | ||||
| } | ||||
| declare <8 x float> @llvm.x86.fma4.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone | ||||
| declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone | ||||
|  | ||||
|  | ||||
| define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fmsub132ss | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fmsub132ps | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fnmsub132ss | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
|   ; CHECK: fnmsub132ps | ||||
|   %res = call <4 x float> @llvm.x86.fma4.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind | ||||
|   ret <4 x float> %res | ||||
| } | ||||
| declare <4 x float> @llvm.x86.fma4.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
| declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone | ||||
|  | ||||
| ;;;; | ||||
|  | ||||
| define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fmadd132sd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|  | ||||
| define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fmadd132pd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|  | ||||
| define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fnmadd132sd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|  | ||||
| define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fnmadd132pd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|  | ||||
|  | ||||
|  | ||||
| define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fmsub132sd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|  | ||||
| define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fmsub132pd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|  | ||||
| define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fnmsub132sd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|  | ||||
| define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
|   ; CHECK: fnmsub132pd | ||||
|   %res = call <2 x double> @llvm.x86.fma4.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind | ||||
|   ret <2 x double> %res | ||||
| } | ||||
| declare <2 x double> @llvm.x86.fma4.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
| declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone | ||||
|   | ||||
| @@ -1,295 +1,295 @@ | ||||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s | ||||
|  | ||||
| ; VFMADD | ||||
| define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmaddss | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) { | ||||
|   ; CHECK: vfmaddss (%{{.*}}) | ||||
|   %x = load float *%a2 | ||||
|   %y = insertelement <4 x float> undef, float %x, i32 0 | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmaddss %{{.*}}, (%{{.*}}) | ||||
|   %x = load float *%a1 | ||||
|   %y = insertelement <4 x float> undef, float %x, i32 0 | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmaddsd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) { | ||||
|   ; CHECK: vfmaddsd (%{{.*}}) | ||||
|   %x = load double *%a2 | ||||
|   %y = insertelement <2 x double> undef, double %x, i32 0 | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmaddsd %{{.*}}, (%{{.*}}) | ||||
|   %x = load double *%a1 | ||||
|   %y = insertelement <2 x double> undef, double %x, i32 0 | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmaddps | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) { | ||||
|   ; CHECK: vfmaddps (%{{.*}}) | ||||
|   %x = load <4 x float>* %a2 | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmaddps %{{.*}}, (%{{.*}}) | ||||
|   %x = load <4 x float>* %a1 | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmaddpd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) { | ||||
|   ; CHECK: vfmaddpd (%{{.*}}) | ||||
|   %x = load <2 x double>* %a2 | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmaddpd %{{.*}}, (%{{.*}}) | ||||
|   %x = load <2 x double>* %a1 | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
| define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
|   ; CHECK: vfmaddps | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 8 x float > %res | ||||
| } | ||||
| declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
| declare < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
|  | ||||
| define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
| define < 4 x double > @test_x86_fma_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
|   ; CHECK: vfmaddpd | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x double > %res | ||||
| } | ||||
| declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
| declare < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
|  | ||||
| ; VFMSUB | ||||
| define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmsubss | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmsubsd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmsubps | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmsubpd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
| define < 8 x float > @test_x86_fma_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
|   ; CHECK: vfmsubps | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 8 x float > %res | ||||
| } | ||||
| declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
| declare < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
|  | ||||
| define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
| define < 4 x double > @test_x86_fma_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
|   ; CHECK: vfmsubpd | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x double > %res | ||||
| } | ||||
| declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
| declare < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
|  | ||||
| ; VFNMADD | ||||
| define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfnmaddss | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfnmaddsd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfnmaddps | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfnmaddpd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
| define < 8 x float > @test_x86_fma_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
|   ; CHECK: vfnmaddps | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 8 x float > %res | ||||
| } | ||||
| declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
| declare < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
|  | ||||
| define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
| define < 4 x double > @test_x86_fma_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
|   ; CHECK: vfnmaddpd | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x double > %res | ||||
| } | ||||
| declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
| declare < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
|  | ||||
| ; VFNMSUB | ||||
| define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfnmsubss | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfnmsubsd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfnmsubps | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfnmsubpd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
| define < 8 x float > @test_x86_fma_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
|   ; CHECK: vfnmsubps | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 8 x float > %res | ||||
| } | ||||
| declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
| declare < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
|  | ||||
| define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
| define < 4 x double > @test_x86_fma_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
|   ; CHECK: vfnmsubpd | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x double > %res | ||||
| } | ||||
| declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
| declare < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
|  | ||||
| ; VFMADDSUB | ||||
| define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmaddsubps | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmaddsubpd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
| define < 8 x float > @test_x86_fma_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
|   ; CHECK: vfmaddsubps | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 8 x float > %res | ||||
| } | ||||
| declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
| declare < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
|  | ||||
| define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
| define < 4 x double > @test_x86_fma_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
|   ; CHECK: vfmaddsubpd | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x double > %res | ||||
| } | ||||
| declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
| declare < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
|  | ||||
| ; VFMSUBADD | ||||
| define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
| define < 4 x float > @test_x86_fma_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { | ||||
|   ; CHECK: vfmsubaddps | ||||
|   %res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x float > %res | ||||
| } | ||||
| declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
| declare < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone | ||||
|  | ||||
| define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
| define < 2 x double > @test_x86_fma_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) { | ||||
|   ; CHECK: vfmsubaddpd | ||||
|   %res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 2 x double > %res | ||||
| } | ||||
| declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
| declare < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone | ||||
|  | ||||
| define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
| define < 8 x float > @test_x86_fma_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) { | ||||
|   ; CHECK: vfmsubaddps | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1] | ||||
|   ret < 8 x float > %res | ||||
| } | ||||
| declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
| declare < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone | ||||
|  | ||||
| define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
| define < 4 x double > @test_x86_fma_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) { | ||||
|   ; CHECK: vfmsubaddpd | ||||
|   ; CHECK: ymm | ||||
|   %res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   %res = call < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1] | ||||
|   ret < 4 x double > %res | ||||
| } | ||||
| declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
| declare < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone | ||||
|   | ||||
		Reference in New Issue
	
	Block a user