mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	Use vld1/vst1 to load/store f64 if alignment is < 4 and the target allows unaligned access. rdar://12091029
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161962 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		@@ -8808,6 +8808,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
 | 
				
			|||||||
  case MVT::i16:
 | 
					  case MVT::i16:
 | 
				
			||||||
  case MVT::i32:
 | 
					  case MVT::i32:
 | 
				
			||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
 | 
					  case MVT::f64:
 | 
				
			||||||
 | 
					    return Subtarget->hasNEON();
 | 
				
			||||||
  // FIXME: VLD1 etc with standard alignment is legal.
 | 
					  // FIXME: VLD1 etc with standard alignment is legal.
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -242,6 +242,9 @@ def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
 | 
				
			|||||||
def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
 | 
					def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
 | 
				
			||||||
                                 "Subtarget->isTargetDarwin()">;
 | 
					                                 "Subtarget->isTargetDarwin()">;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def IsLE             : Predicate<"TLI.isLittleEndian()">;
 | 
				
			||||||
 | 
					def IsBE             : Predicate<"TLI.isBigEndian()">;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
// ARM Flag Definitions.
 | 
					// ARM Flag Definitions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> {
 | 
				
			|||||||
  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
 | 
					  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<LoadSDNode>(N)->getAlignment() == 2;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
 | 
				
			||||||
 | 
					                                 (store node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->getAlignment() == 2;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<LoadSDNode>(N)->getAlignment() == 1;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
 | 
				
			||||||
 | 
					                             (store node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->getAlignment() == 1;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<LoadSDNode>(N)->getAlignment() < 4;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
 | 
				
			||||||
 | 
					                                    (store node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->getAlignment() < 4;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
// NEON-specific DAG Nodes.
 | 
					// NEON-specific DAG Nodes.
 | 
				
			||||||
@@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
 | 
					} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Use vld1/vst1 for unaligned f64 load / store
 | 
				
			||||||
 | 
					def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
 | 
				
			||||||
 | 
					          (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
 | 
				
			||||||
 | 
					def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
 | 
				
			||||||
 | 
					          (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
 | 
				
			||||||
 | 
					def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
 | 
				
			||||||
 | 
					          (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
 | 
				
			||||||
 | 
					def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
 | 
				
			||||||
 | 
					          (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
 | 
				
			||||||
 | 
					def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
 | 
				
			||||||
 | 
					          (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
 | 
				
			||||||
 | 
					def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
 | 
				
			||||||
 | 
					          (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
// NEON pattern fragments
 | 
					// NEON pattern fragments
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>,
 | 
				
			|||||||
  let ParserMatchClass = FPImmOperand;
 | 
					  let ParserMatchClass = FPImmOperand;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<LoadSDNode>(N)->getAlignment() >= 4;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
 | 
				
			||||||
 | 
					                             (store node:$val, node:$ptr), [{
 | 
				
			||||||
 | 
					  return cast<StoreSDNode>(N)->getAlignment() >= 4;
 | 
				
			||||||
 | 
					}]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// The VCVT to/from fixed-point instructions encode the 'fbits' operand
 | 
					// The VCVT to/from fixed-point instructions encode the 'fbits' operand
 | 
				
			||||||
// (the number of fixed bits) differently than it appears in the assembly
 | 
					// (the number of fixed bits) differently than it appears in the assembly
 | 
				
			||||||
// source. It's encoded as "Size - fbits" where Size is the size of the
 | 
					// source. It's encoded as "Size - fbits" where Size is the size of the
 | 
				
			||||||
@@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
 | 
					def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
 | 
				
			||||||
                 IIC_fpLoad64, "vldr", "\t$Dd, $addr",
 | 
					                 IIC_fpLoad64, "vldr", "\t$Dd, $addr",
 | 
				
			||||||
                 [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
 | 
					                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
 | 
					def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
 | 
				
			||||||
                 IIC_fpLoad32, "vldr", "\t$Sd, $addr",
 | 
					                 IIC_fpLoad32, "vldr", "\t$Sd, $addr",
 | 
				
			||||||
@@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
 | 
					def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
 | 
				
			||||||
                 IIC_fpStore64, "vstr", "\t$Dd, $addr",
 | 
					                 IIC_fpStore64, "vstr", "\t$Dd, $addr",
 | 
				
			||||||
                 [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
 | 
					                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
 | 
					def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
 | 
				
			||||||
                 IIC_fpStore32, "vstr", "\t$Sd, $addr",
 | 
					                 IIC_fpStore32, "vstr", "\t$Sd, $addr",
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,25 +1,25 @@
 | 
				
			|||||||
; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
 | 
					; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
 | 
				
			||||||
; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
 | 
					; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
 | 
				
			||||||
; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
 | 
					; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED
 | 
				
			||||||
; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
; rdar://7113725
 | 
					; rdar://7113725
 | 
				
			||||||
 | 
					; rdar://12091029
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 | 
					define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
; GENERIC: t:
 | 
					; EXPANDED: t:
 | 
				
			||||||
; GENERIC: ldrb [[R2:r[0-9]+]]
 | 
					; EXPANDED: ldrb [[R2:r[0-9]+]]
 | 
				
			||||||
; GENERIC: ldrb [[R3:r[0-9]+]]
 | 
					; EXPANDED: ldrb [[R3:r[0-9]+]]
 | 
				
			||||||
; GENERIC: ldrb [[R12:r[0-9]+]]
 | 
					; EXPANDED: ldrb [[R12:r[0-9]+]]
 | 
				
			||||||
; GENERIC: ldrb [[R1:r[0-9]+]]
 | 
					; EXPANDED: ldrb [[R1:r[0-9]+]]
 | 
				
			||||||
; GENERIC: strb [[R1]]
 | 
					; EXPANDED: strb [[R1]]
 | 
				
			||||||
; GENERIC: strb [[R12]]
 | 
					; EXPANDED: strb [[R12]]
 | 
				
			||||||
; GENERIC: strb [[R3]]
 | 
					; EXPANDED: strb [[R3]]
 | 
				
			||||||
; GENERIC: strb [[R2]]
 | 
					; EXPANDED: strb [[R2]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; DARWIN_V6: t:
 | 
					; UNALIGNED: t:
 | 
				
			||||||
; DARWIN_V6: ldr r1
 | 
					; UNALIGNED: ldr r1
 | 
				
			||||||
; DARWIN_V6: str r1
 | 
					; UNALIGNED: str r1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
 | 
					  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
 | 
				
			||||||
  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
 | 
					  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
 | 
				
			||||||
@@ -27,3 +27,35 @@ entry:
 | 
				
			|||||||
  store i32 %tmp.i, i32* %__dest2.i, align 1
 | 
					  store i32 %tmp.i, i32* %__dest2.i, align 1
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define void @hword(double* %a, double* %b) nounwind {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					; EXPANDED: hword:
 | 
				
			||||||
 | 
					; EXPANDED-NOT: vld1
 | 
				
			||||||
 | 
					; EXPANDED: ldrh
 | 
				
			||||||
 | 
					; EXPANDED-NOT: str1
 | 
				
			||||||
 | 
					; EXPANDED: strh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; UNALIGNED: hword:
 | 
				
			||||||
 | 
					; UNALIGNED: vld1.16
 | 
				
			||||||
 | 
					; UNALIGNED: vst1.16
 | 
				
			||||||
 | 
					  %tmp = load double* %a, align 2
 | 
				
			||||||
 | 
					  store double %tmp, double* %b, align 2
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define void @byte(double* %a, double* %b) nounwind {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					; EXPANDED: byte:
 | 
				
			||||||
 | 
					; EXPANDED-NOT: vld1
 | 
				
			||||||
 | 
					; EXPANDED: ldrb
 | 
				
			||||||
 | 
					; EXPANDED-NOT: str1
 | 
				
			||||||
 | 
					; EXPANDED: strb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; UNALIGNED: byte:
 | 
				
			||||||
 | 
					; UNALIGNED: vld1.8
 | 
				
			||||||
 | 
					; UNALIGNED: vst1.8
 | 
				
			||||||
 | 
					  %tmp = load double* %a, align 1
 | 
				
			||||||
 | 
					  store double %tmp, double* %b, align 1
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user