[X86] Add more details in the comments of X86TargetLowering::getScalingFactorCost.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207432 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-21 09:40:22 +00:00 · 2014-04-28 18:39:57 +00:00 · 2014-04-28 18:39:57 +00:00 · aec1f2c2f5
commit aec1f2c2f5
parent 4e0cc51d79
1 changed files with 16 additions and 2 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -20833,8 +20833,22 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
                                            Type *Ty) const {
  // Scaling factors are not free at all.
  // An indexed folded instruction, i.e., inst (reg1, reg2, scale),
-  // will take 2 allocations instead of 1 for plain addressing mode,
-  // i.e. inst (reg1).
+  // will take 2 allocations in the out of order engine instead of 1
+  // for plain addressing mode, i.e. inst (reg1).
+  // E.g.,
+  // vaddps (%rsi,%drx), %ymm0, %ymm1
+  // Requires two allocations (one for the load, one for the computation)
+  // whereas:
+  // vaddps (%rsi), %ymm0, %ymm1
+  // Requires just 1 allocation, i.e., freeing allocations for other operations
+  // and having less micro operations to execute.
+  //
+  // For some X86 architectures, this is even worse because for instance for
+  // stores, the complex addressing mode forces the instruction to use the
+  // "load" ports instead of the dedicated "store" port.
+  // E.g., on Haswell:
+  // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
+  // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.   
  if (isLegalAddressingMode(AM, Ty))
    // Scale represents reg2 * scale, thus account for 1
    // as soon as we use a second register.