Enable execution dependency fix pass for YMM registers when AVX2 is enabled. Add AVX2 logical operations to list of replaceable instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144179 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-15 19:24:33 +00:00 · 2011-11-09 09:37:21 +00:00
parent 94d80da4a0
commit b80ada98c5
3 changed files with 58 additions and 4 deletions
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3367,6 +3367,14 @@ static const unsigned ReplaceableInstrs[][3] = {
  { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
  { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
+  { X86::VANDNPSYrm,   X86::VANDNPDYrm,   X86::VPANDNYrm   },
+  { X86::VANDNPSYrr,   X86::VANDNPDYrr,   X86::VPANDNYrr   },
+  { X86::VANDPSYrm,    X86::VANDPDYrm,    X86::VPANDYrm    },
+  { X86::VANDPSYrr,    X86::VANDPDYrr,    X86::VPANDYrr    },
+  { X86::VORPSYrm,     X86::VORPDYrm,     X86::VPORYrm     },
+  { X86::VORPSYrr,     X86::VORPDYrr,     X86::VPORYrr     },
+  { X86::VXORPSYrm,    X86::VXORPDYrm,    X86::VPXORYrm    },
+  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    },
 };

 // FIXME: Some shuffle and unpack instructions have equivalents in different
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -135,10 +135,18 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
 bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
                                      CodeGenOpt::Level OptLevel) {
  bool ShouldPrint = false;
-  if (OptLevel != CodeGenOpt::None &&
-      (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
-    PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
-    ShouldPrint = true;
+  if (OptLevel != CodeGenOpt::None) {
+    if (Subtarget.hasXMMInt()) {
+      PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+      ShouldPrint = true;
+    }
+    if (Subtarget.hasAVX2()) {
+      // FIXME this should be turned on for just AVX, but the pass doesn't check
+      // that instructions are valid before replacing them and there are AVX2
+      // integer instructions in the table.
+      PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass));
+      ShouldPrint = true;
+    }
  }

  if (Subtarget.hasAVX() && UseVZeroUpper) {
--- a/test/CodeGen/X86/avx2-logic.ll
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpandn  %ymm
+define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %x = and <4 x i64> %a, %y
+  ret <4 x i64> %x
+}
+
+; CHECK: vpand %ymm
+define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = and <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpor %ymm
+define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = or <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpxor %ymm
+define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = xor <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}