Reapply address space patch after fixing an issue in MemCopyOptimizer.

Added support for address spaces and added a isVolatile field to memcpy, memmove, and memset, e.g., llvm.memcpy.i32(i8*, i8*, i32, i32) -> llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100304 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 17:39:16 +00:00 · 2010-04-04 03:10:48 +00:00 · 2010-04-04 03:10:48 +00:00 · 20adc9dc46
commit 20adc9dc46
parent 0c08d09204
28 changed files with 317 additions and 145 deletions
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@ -532,17 +532,17 @@ public:
  SDValue getStackArgumentTokenFactor(SDValue Chain);

  SDValue getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
-                    SDValue Size, unsigned Align, bool AlwaysInline,
+                    SDValue Size, unsigned Align, bool isVol, bool AlwaysInline,
                    const Value *DstSV, uint64_t DstSVOff,
                    const Value *SrcSV, uint64_t SrcSVOff);

  SDValue getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
-                     SDValue Size, unsigned Align,
+                     SDValue Size, unsigned Align, bool isVol,
                     const Value *DstSV, uint64_t DstOSVff,
                     const Value *SrcSV, uint64_t SrcSVOff);

  SDValue getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
-                    SDValue Size, unsigned Align,
+                    SDValue Size, unsigned Align, bool isVol,
                    const Value *DstSV, uint64_t DstSVOff);

  /// getSetCC - Helper function to make it easier to build SetCC's if you just
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@ -133,6 +133,13 @@ namespace llvm {
      return getAlignmentCst()->getZExtValue();
    }

+    ConstantInt *getVolatileCst() const {
+      return cast<ConstantInt>(const_cast<Value*>(getOperand(5)));
+    }
+    bool isVolatile() const {
+      return getVolatileCst()->getZExtValue() != 0;
+    }
+
    /// getDest - This is just like getRawDest, but it strips off any cast
    /// instructions that feed it, giving the original input.  The returned
    /// value is guaranteed to be a pointer.
@ -155,7 +162,11 @@ namespace llvm {
    void setAlignment(Constant* A) {
      setOperand(4, A);
    }
-    
+
+    void setVolatile(Constant* V) {
+      setOperand(5, V);
+    }
+
    const Type *getAlignmentType() const {
      return getOperand(4)->getType();
    }
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@ -224,16 +224,16 @@ def int_stackprotector : Intrinsic<[],
 //

 def int_memcpy  : Intrinsic<[],
-                             [llvm_ptr_ty, llvm_ptr_ty, llvm_anyint_ty,
-                              llvm_i32_ty],
+                             [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
+                              llvm_i32_ty, llvm_i1_ty],
                            [IntrWriteArgMem, NoCapture<0>, NoCapture<1>]>;
 def int_memmove : Intrinsic<[],
-                            [llvm_ptr_ty, llvm_ptr_ty, llvm_anyint_ty,
-                             llvm_i32_ty],
+                            [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
+                             llvm_i32_ty, llvm_i1_ty],
                            [IntrWriteArgMem, NoCapture<0>, NoCapture<1>]>;
 def int_memset  : Intrinsic<[],
-                            [llvm_ptr_ty, llvm_i8_ty, llvm_anyint_ty,
-                             llvm_i32_ty],
+                            [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
+                             llvm_i32_ty, llvm_i1_ty],
                            [IntrWriteArgMem, NoCapture<0>]>;

 // These functions do not actually read memory, but they are sensitive to the
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@ -917,6 +917,11 @@ public:
    Value *Args[] = { Arg1, Arg2, Arg3, Arg4 };
    return Insert(CallInst::Create(Callee, Args, Args+4), Name);
  }
+  CallInst *CreateCall5(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
+                        Value *Arg4, Value *Arg5, const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3, Arg4, Arg5 };
+    return Insert(CallInst::Create(Callee, Args, Args+5), Name);
+  }

  template<typename InputIterator>
  CallInst *CreateCall(Value *Callee, InputIterator ArgBegin,
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -1191,7 +1191,7 @@ public:
  EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                          SDValue Chain,
                          SDValue Op1, SDValue Op2,
-                          SDValue Op3, unsigned Align,
+                          SDValue Op3, unsigned Align, bool isVolatile,
                          bool AlwaysInline,
                          const Value *DstSV, uint64_t DstOff,
                          const Value *SrcSV, uint64_t SrcOff) {
@ -1208,7 +1208,7 @@ public:
  EmitTargetCodeForMemmove(SelectionDAG &DAG, DebugLoc dl,
                           SDValue Chain,
                           SDValue Op1, SDValue Op2,
-                           SDValue Op3, unsigned Align,
+                           SDValue Op3, unsigned Align, bool isVolatile,
                           const Value *DstSV, uint64_t DstOff,
                           const Value *SrcSV, uint64_t SrcOff) {
    return SDValue();
@ -1224,7 +1224,7 @@ public:
  EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                          SDValue Chain,
                          SDValue Op1, SDValue Op2,
-                          SDValue Op3, unsigned Align,
+                          SDValue Op3, unsigned Align, bool isVolatile,
                          const Value *DstSV, uint64_t DstOff) {
    return SDValue();
  }
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@ -46,8 +46,8 @@ namespace llvm {
  
  /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
-                    unsigned Align, IRBuilder<> &B, const TargetData *TD);
+  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
+                    bool isVolatile, IRBuilder<> &B, const TargetData *TD);

  /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
  /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
@ -57,8 +57,8 @@ namespace llvm {

  /// EmitMemMove - Emit a call to the memmove function to the builder.  This
  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemMove(Value *Dst, Value *Src, Value *Len,
-		                 unsigned Align, IRBuilder<> &B, const TargetData *TD);
+  Value *EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
+                     bool isVolatile, IRBuilder<> &B, const TargetData *TD);

  /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
  /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
@ -70,8 +70,8 @@ namespace llvm {
                    const TargetData *TD);

  /// EmitMemSet - Emit a call to the memset function
-  Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B,
-                    const TargetData *TD);
+  Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
+                    IRBuilder<> &B, const TargetData *TD);

  /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
  /// (e.g.  'floor').  This function is known to take a single of type matching
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -3263,7 +3263,8 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
 static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                       SDValue Chain, SDValue Dst,
                                       SDValue Src, uint64_t Size,
-                                       unsigned Align, bool AlwaysInline,
+                                       unsigned Align, bool isVol,
+                                       bool AlwaysInline,
                                       const Value *DstSV, uint64_t DstSVOff,
                                       const Value *SrcSV, uint64_t SrcSVOff) {
  // Turn a memcpy of undef to nop.
@ -3322,7 +3323,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
      Store = DAG.getStore(Chain, dl, Value,
                           getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, false, false, Align);
+                           DstSV, DstSVOff + DstOff, isVol, false, Align);
    } else {
      // The type might not be legal for the target.  This should only happen
      // if the type is smaller than a legal type, as on PPC, so the right
@ -3333,11 +3334,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
      assert(NVT.bitsGE(VT));
      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                             getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, false, false,
+                             SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
                             MinAlign(SrcAlign, SrcOff));
      Store = DAG.getTruncStore(Chain, dl, Value,
                                getMemBasePlusOffset(Dst, DstOff, DAG),
-                                DstSV, DstSVOff + DstOff, VT, false, false,
+                                DstSV, DstSVOff + DstOff, VT, isVol, false,
                                Align);
    }
    OutChains.push_back(Store);
@ -3352,7 +3353,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                        SDValue Chain, SDValue Dst,
                                        SDValue Src, uint64_t Size,
-                                        unsigned Align,bool AlwaysInline,
+                                        unsigned Align,  bool isVol,
+                                        bool AlwaysInline,
                                        const Value *DstSV, uint64_t DstSVOff,
                                        const Value *SrcSV, uint64_t SrcSVOff) {
  // Turn a memmove of undef to nop.
@ -3403,7 +3405,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

    Value = DAG.getLoad(VT, dl, Chain,
                        getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign);
+                        SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
    LoadValues.push_back(Value);
    LoadChains.push_back(Value.getValue(1));
    SrcOff += VTSize;
@ -3418,7 +3420,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,

    Store = DAG.getStore(Chain, dl, LoadValues[i],
                         getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, false, false, Align);
+                         DstSV, DstSVOff + DstOff, isVol, false, Align);
    OutChains.push_back(Store);
    DstOff += VTSize;
  }
@ -3430,7 +3432,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
                               SDValue Chain, SDValue Dst,
                               SDValue Src, uint64_t Size,
-                               unsigned Align,
+                               unsigned Align, bool isVol,
                               const Value *DstSV, uint64_t DstSVOff) {
  // Turn a memset of undef to nop.
  if (Src.getOpcode() == ISD::UNDEF)
@ -3472,7 +3474,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
    SDValue Value = getMemsetValue(Src, VT, DAG, dl);
    SDValue Store = DAG.getStore(Chain, dl, Value,
                                 getMemBasePlusOffset(Dst, DstOff, DAG),
-                                 DstSV, DstSVOff + DstOff, false, false, 0);
+                                 DstSV, DstSVOff + DstOff, isVol, false, 0);
    OutChains.push_back(Store);
    DstOff += VTSize;
  }
@ -3483,7 +3485,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,

 SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
                                SDValue Src, SDValue Size,
-                                unsigned Align, bool AlwaysInline,
+                                unsigned Align, bool isVol, bool AlwaysInline,
                                const Value *DstSV, uint64_t DstSVOff,
                                const Value *SrcSV, uint64_t SrcSVOff) {

@ -3497,7 +3499,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,

    SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
                                             ConstantSize->getZExtValue(),Align,
-                                       false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
    if (Result.getNode())
      return Result;
  }
@ -3506,7 +3508,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
  // code. If the target chooses to do this, this is the next best.
  SDValue Result =
    TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
-                                AlwaysInline,
+                                isVol, AlwaysInline,
                                DstSV, DstSVOff, SrcSV, SrcSVOff);
  if (Result.getNode())
    return Result;
@ -3516,11 +3518,12 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
  if (AlwaysInline) {
    assert(ConstantSize && "AlwaysInline requires a constant size!");
    return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
-                                   ConstantSize->getZExtValue(), Align, true,
-                                   DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                   ConstantSize->getZExtValue(), Align, isVol,
+                                   true, DstSV, DstSVOff, SrcSV, SrcSVOff);
  }

  // Emit a library call.
+  assert(!isVol && "library memcpy does not support volatile");
  TargetLowering::ArgListTy Args;
  TargetLowering::ArgListEntry Entry;
  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
@ -3541,7 +3544,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,

 SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
-                                 unsigned Align,
+                                 unsigned Align, bool isVol,
                                 const Value *DstSV, uint64_t DstSVOff,
                                 const Value *SrcSV, uint64_t SrcSVOff) {

@ -3555,8 +3558,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,

    SDValue Result =
      getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
-                               ConstantSize->getZExtValue(),
-                               Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                               ConstantSize->getZExtValue(), Align, isVol,
+                               false, DstSV, DstSVOff, SrcSV, SrcSVOff);
    if (Result.getNode())
      return Result;
  }
@ -3564,12 +3567,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
  // Then check to see if we should lower the memmove with target-specific
  // code. If the target chooses to do this, this is the next best.
  SDValue Result =
-    TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align,
+    TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
                                 DstSV, DstSVOff, SrcSV, SrcSVOff);
  if (Result.getNode())
    return Result;

  // Emit a library call.
+  assert(!isVol && "library memmove does not support volatile");
  TargetLowering::ArgListTy Args;
  TargetLowering::ArgListEntry Entry;
  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
@ -3590,7 +3594,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,

 SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
                                SDValue Src, SDValue Size,
-                                unsigned Align,
+                                unsigned Align, bool isVol,
                                const Value *DstSV, uint64_t DstSVOff) {

  // Check to see if we should lower the memset to stores first.
@ -3601,9 +3605,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
    if (ConstantSize->isNullValue())
      return Chain;

-    SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
-                                     ConstantSize->getZExtValue(),
-                                     Align, DstSV, DstSVOff);
+    SDValue Result =
+      getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+                      Align, isVol, DstSV, DstSVOff);
+
    if (Result.getNode())
      return Result;
  }
@ -3611,12 +3616,13 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
  // Then check to see if we should lower the memset with target-specific
  // code. If the target chooses to do this, this is the next best.
  SDValue Result =
-    TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align,
+    TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
                                DstSV, DstSVOff);
  if (Result.getNode())
    return Result;

  // Emit a library call.
+  assert(!isVol && "library memset does not support volatile");
  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
  TargetLowering::ArgListTy Args;
  TargetLowering::ArgListEntry Entry;
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -3731,28 +3731,50 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
  case Intrinsic::longjmp:
    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
  case Intrinsic::memcpy: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
    SDValue Op1 = getValue(I.getOperand(1));
    SDValue Op2 = getValue(I.getOperand(2));
    SDValue Op3 = getValue(I.getOperand(3));
    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+    bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
                              I.getOperand(1), 0, I.getOperand(2), 0));
    return 0;
  }
  case Intrinsic::memset: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
    SDValue Op1 = getValue(I.getOperand(1));
    SDValue Op2 = getValue(I.getOperand(2));
    SDValue Op3 = getValue(I.getOperand(3));
    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+    bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                              I.getOperand(1), 0));
    return 0;
  }
  case Intrinsic::memmove: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
    SDValue Op1 = getValue(I.getOperand(1));
    SDValue Op2 = getValue(I.getOperand(2));
    SDValue Op3 = getValue(I.getOperand(3));
    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();

    // If the source and destination are known to not be aliases, we can
    // lower memmove as memcpy.
@ -3761,12 +3783,12 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
      Size = C->getZExtValue();
    if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
        AliasAnalysis::NoAlias) {
-      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
-                                I.getOperand(1), 0, I.getOperand(2), 0));
+      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 
+                                false, I.getOperand(1), 0, I.getOperand(2), 0));
      return 0;
    }

-    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                               I.getOperand(1), 0, I.getOperand(2), 0));
    return 0;
  }
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -861,7 +861,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                          DebugLoc dl) {
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       /*AlwaysInline=*/false, NULL, 0, NULL, 0);
+                       /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       NULL, 0, NULL, 0);
 }

 /// LowerMemOpCallTo - Store the argument to the stack.
@ -2053,7 +2054,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                           SDValue Chain,
                                           SDValue Dst, SDValue Src,
                                           SDValue Size, unsigned Align,
-                                           bool AlwaysInline,
+                                           bool isVolatile, bool AlwaysInline,
                                         const Value *DstSV, uint64_t DstSVOff,
                                         const Value *SrcSV, uint64_t SrcSVOff){
  // Do repeated 4-byte loads and stores. To be improved.
@ -2089,7 +2090,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
      Loads[i] = DAG.getLoad(VT, dl, Chain,
                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                         DAG.getConstant(SrcOff, MVT::i32)),
-                             SrcSV, SrcSVOff + SrcOff, false, false, 0);
+                             SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
      TFOps[i] = Loads[i].getValue(1);
      SrcOff += VTSize;
    }
@ -2100,7 +2101,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                          DAG.getConstant(DstOff, MVT::i32)),
-                              DstSV, DstSVOff + DstOff, false, false, 0);
+                              DstSV, DstSVOff + DstOff, isVolatile, false, 0);
      DstOff += VTSize;
    }
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@ -305,7 +305,7 @@ namespace llvm {
                                      SDValue Chain,
                                      SDValue Dst, SDValue Src,
                                      SDValue Size, unsigned Align,
-                                      bool AlwaysInline,
+                                      bool isVolatile, bool AlwaysInline,
                                      const Value *DstSV, uint64_t DstSVOff,
                                      const Value *SrcSV, uint64_t SrcSVOff);
    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -2392,7 +2392,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                          DebugLoc dl) {
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       false, NULL, 0, NULL, 0);
+                       false, false, NULL, 0, NULL, 0);
 }

 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -1434,7 +1434,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                          DebugLoc dl) {
  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       /*AlwaysInline=*/true, NULL, 0, NULL, 0);
+                       /*isVolatile*/false, /*AlwaysInline=*/true,
+                       NULL, 0, NULL, 0);
 }

 /// IsTailCallConvention - Return true if the calling convention is one that
@ -6548,6 +6549,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                           SDValue Chain,
                                           SDValue Dst, SDValue Src,
                                           SDValue Size, unsigned Align,
+                                           bool isVolatile,
                                           const Value *DstSV,
                                           uint64_t DstSVOff) {
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@ -6676,7 +6678,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                      DAG.getConstant(Offset, AddrVT)),
                          Src,
                          DAG.getConstant(BytesLeft, SizeVT),
-                          Align, DstSV, DstSVOff + Offset);
+                          Align, isVolatile, DstSV, DstSVOff + Offset);
  }

  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@ -6687,7 +6689,7 @@ SDValue
 X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                      SDValue Chain, SDValue Dst, SDValue Src,
                                      SDValue Size, unsigned Align,
-                                      bool AlwaysInline,
+                                      bool isVolatile, bool AlwaysInline,
                                      const Value *DstSV, uint64_t DstSVOff,
                                      const Value *SrcSV, uint64_t SrcSVOff) {
  // This requires the copy size to be a constant, preferrably
@ -6746,7 +6748,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                DAG.getConstant(Offset, SrcVT)),
                                    DAG.getConstant(BytesLeft, SizeVT),
-                                    Align, AlwaysInline,
+                                    Align, isVolatile, AlwaysInline,
                                    DstSV, DstSVOff + Offset,
                                    SrcSV, SrcSVOff + Offset));
  }
@ -6829,8 +6831,8 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
  DebugLoc dl = Op.getDebugLoc();

  return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
-                       DAG.getIntPtrConstant(24), 8, false,
-                       DstSV, 0, SrcSV, 0);
+                       DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+                       false, DstSV, 0, SrcSV, 0);
 }

 SDValue
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -741,12 +741,13 @@ namespace llvm {
                                    SDValue Chain,
                                    SDValue Dst, SDValue Src,
                                    SDValue Size, unsigned Align,
+                                    bool isVolatile,
                                    const Value *DstSV, uint64_t DstSVOff);
    SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                    SDValue Chain,
                                    SDValue Dst, SDValue Src,
                                    SDValue Size, unsigned Align,
-                                    bool AlwaysInline,
+                                    bool isVolatile, bool AlwaysInline,
                                    const Value *DstSV, uint64_t DstSVOff,
                                    const Value *SrcSV, uint64_t SrcSVOff);
    
@ -756,7 +757,7 @@ namespace llvm {
    /// block, the number of args, and whether or not the second arg is
    /// in memory or not.
    MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
-				unsigned argNum, bool inMem) const;
+                                unsigned argNum, bool inMem) const;

    /// Utility function to emit atomic bitwise operations (and, or, xor).
    /// It takes the bitwise instruction to expand, the associated machine basic
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@ -1443,7 +1443,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
        return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
                              LD->getBasePtr(),
                              DAG.getConstant(StoreBits/8, MVT::i32),
-                              Alignment, ST->getSrcValue(),
+                              Alignment, false, ST->getSrcValue(),
                              ST->getSrcValueOffset(), LD->getSrcValue(),
                              LD->getSrcValueOffset());
      }
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -136,8 +136,14 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
    return 0;  // If not 1/2/4/8 bytes, exit.
  
  // Use an integer load+store unless we can find something better.
-  Type *NewPtrTy =
-            PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3));
+  unsigned SrcAddrSp =
+    cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace();
+  unsigned DstAddrSp =
+    cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace();
+
+  const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
  
  // Memcpy forces the use of i8* for the source and destination.  That means
  // that if you're using memcpy to move one double around, you'll get a cast
@ -167,8 +173,10 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
          break;
      }
      
-      if (SrcETy->isSingleValueType())
-        NewPtrTy = PointerType::getUnqual(SrcETy);
+      if (SrcETy->isSingleValueType()) {
+        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+      }
    }
  }
  
@ -178,11 +186,12 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
  SrcAlign = std::max(SrcAlign, CopyAlign);
  DstAlign = std::max(DstAlign, CopyAlign);
  
-  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
-  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
-  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy);
+  Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
  InsertNewInstBefore(L, *MI);
-  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
+  InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
+                      *MI);

  // Set the size of the copy to 0, it will be deleted on the next iteration.
  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
@ -275,10 +284,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
        if (GVSrc->isConstant()) {
          Module *M = CI.getParent()->getParent()->getParent();
          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
-          const Type *Tys[1];
-          Tys[0] = CI.getOperand(3)->getType();
+          const Type *Tys[3] = { CI.getOperand(1)->getType(),
+                                 CI.getOperand(2)->getType(),
+                                 CI.getOperand(3)->getType() };
          CI.setOperand(0, 
-                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
+                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
          Changed = true;
        }
    }
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@ -413,7 +413,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  // interesting as a small compile-time optimization.
  Ranges.addStore(0, SI);
  
-  Function *MemSetF = 0;
  
  // Now that we have full information about ranges, loop over the ranges and
  // emit memset's for anything big enough to be worthwhile.
@ -433,29 +432,40 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
    // memset block.  This ensure that the memset is dominated by any addressing
    // instruction needed by the start of the block.
    BasicBlock::iterator InsertPt = BI;
-  
-    if (MemSetF == 0) {
-      const Type *Ty = Type::getInt64Ty(Context);
-      MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1);
-    }
-    
+
    // Get the starting pointer of the block.
    StartPtr = Range.StartPtr;
-  
+
+    // Determine alignment
+    unsigned Alignment = Range.Alignment;
+    if (Alignment == 0) {
+      const Type *EltType = 
+         cast<PointerType>(StartPtr->getType())->getElementType();
+      Alignment = TD->getABITypeAlignment(EltType);
+    }
+
    // Cast the start ptr to be i8* as memset requires.
-    const Type *i8Ptr = Type::getInt8PtrTy(Context);
-    if (StartPtr->getType() != i8Ptr)
+    const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
+    const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
+                                                  StartPTy->getAddressSpace());
+    if (StartPTy!= i8Ptr)
      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
                                 InsertPt);
-  
+
    Value *Ops[] = {
      StartPtr, ByteVal,   // Start, value
      // size
      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
      // align
-      ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)
+      ConstantInt::get(Type::getInt32Ty(Context), Alignment),
+      // volatile
+      ConstantInt::get(Type::getInt1Ty(Context), 0),
    };
-    Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
+    const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+
+    Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+
+    Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
    DEBUG(dbgs() << "Replace stores:\n";
          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
            dbgs() << *Range.TheStores[i];
@ -680,16 +690,19 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
    return false;
  
  // If all checks passed, then we can transform these memcpy's
-  const Type *Ty = M->getLength()->getType();
+  const Type *ArgTys[3] = { M->getRawDest()->getType(),
+                            MDep->getRawSource()->getType(),
+                            M->getLength()->getType() };
  Function *MemCpyFun = Intrinsic::getDeclaration(
                                 M->getParent()->getParent()->getParent(),
-                                 M->getIntrinsicID(), &Ty, 1);
+                                 M->getIntrinsicID(), ArgTys, 3);
    
-  Value *Args[4] = {
-    M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
+  Value *Args[5] = {
+    M->getRawDest(), MDep->getRawSource(), M->getLength(),
+    M->getAlignmentCst(), M->getVolatileCst()
  };
  
-  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
+  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
  
  
  // If C and M don't interfere, then this is a valid transformation.  If they
@ -728,8 +741,10 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
  
  // If not, then we know we can transform this.
  Module *Mod = M->getParent()->getParent()->getParent();
-  const Type *Ty = M->getLength()->getType();
-  M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1));
+  const Type *ArgTys[3] = { M->getRawDest()->getType(),
+                            M->getRawSource()->getType(),
+                            M->getLength()->getType() };
+  M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));

  // MemDep may have over conservative information about this instruction, just
  // conservatively flush it from the cache.
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@ -858,8 +858,17 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
    
    // Cast the other pointer (if we have one) to BytePtrTy. 
-    if (OtherElt && OtherElt->getType() != BytePtrTy)
-      OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI);
+    if (OtherElt && OtherElt->getType() != BytePtrTy) {
+      // Preserve address space of OtherElt
+      const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
+      const PointerType* PTy = cast<PointerType>(BytePtrTy);
+      if (OtherPTy->getElementType() != PTy->getElementType()) {
+        Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
+                                             OtherPTy->getAddressSpace());
+        OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
+                                   OtherElt->getNameStr(), MI);
+      }
+    }
    
    unsigned EltSize = TD->getTypeAllocSize(EltTy);
    
@ -870,17 +879,28 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
        SROADest ? OtherElt : EltPtr,  // Src ptr
        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
        // Align
-        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
+        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
+        MI->getVolatileCst()
      };
-      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+      // In case we fold the address space overloaded memcpy of A to B
+      // with memcpy of B to C, change the function to be a memcpy of A to C.
+      const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
+                            Ops[2]->getType() };
+      Module *M = MI->getParent()->getParent()->getParent();
+      TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
+      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
    } else {
      assert(isa<MemSetInst>(MI));
      Value *Ops[] = {
        EltPtr, MI->getOperand(2),  // Dest, Value,
        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
-        Zero  // Align
+        Zero,  // Align
+        ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
      };
-      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+      const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+      Module *M = MI->getParent()->getParent()->getParent();
+      TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
    }
  }
  DeadInsts.push_back(MI);
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@ -142,7 +142,8 @@ struct StrCatOpt : public LibCallOptimization {
    // We have enough information to now generate the memcpy call to do the
    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
    EmitMemCpy(CpyDst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
+                                1, false, B, TD);
  }
 };

@ -383,7 +384,8 @@ struct StrCpyOpt : public LibCallOptimization {
                    CI->getOperand(3), B, TD);
    else
      EmitMemCpy(Dst, Src,
-                 ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
+                 ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                                  1, false, B, TD);
    return Dst;
  }
 };
@ -411,8 +413,8 @@ struct StrNCpyOpt : public LibCallOptimization {

    if (SrcLen == 0) {
      // strncpy(x, "", y) -> memset(x, '\0', y, 1)
-      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
-		             B, TD);
+      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
+                 LenOp, false, B, TD);
      return Dst;
    }

@ -432,7 +434,8 @@ struct StrNCpyOpt : public LibCallOptimization {

    // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
    EmitMemCpy(Dst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                                1, false, B, TD);

    return Dst;
  }
@ -593,7 +596,7 @@ struct MemCpyOpt : public LibCallOptimization {

    // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
    EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
-               CI->getOperand(3), 1, B, TD);
+               CI->getOperand(3), 1, false, B, TD);
    return CI->getOperand(1);
  }
 };
@ -615,7 +618,7 @@ struct MemMoveOpt : public LibCallOptimization {

    // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
    EmitMemMove(CI->getOperand(1), CI->getOperand(2),
-                CI->getOperand(3), 1, B, TD);
+                CI->getOperand(3), 1, false, B, TD);
    return CI->getOperand(1);
  }
 };
@ -637,8 +640,8 @@ struct MemSetOpt : public LibCallOptimization {

    // memset(p, v, n) -> llvm.memset(p, v, n, 1)
    Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
-				 false);
-    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
+                                 false);
+    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), false, B, TD);
    return CI->getOperand(1);
  }
 };
@ -999,7 +1002,7 @@ struct SPrintFOpt : public LibCallOptimization {
      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
                 ConstantInt::get(TD->getIntPtrType(*Context),
-                 FormatStr.size()+1), 1, B, TD);
+                 FormatStr.size()+1), 1, false, B, TD);
      return ConstantInt::get(CI->getType(), FormatStr.size());
    }

@ -1013,11 +1016,11 @@ struct SPrintFOpt : public LibCallOptimization {
      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
      if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
      Value *V = B.CreateTrunc(CI->getOperand(3),
-			       Type::getInt8Ty(*Context), "char");
+                               Type::getInt8Ty(*Context), "char");
      Value *Ptr = CastToCStr(CI->getOperand(1), B);
      B.CreateStore(V, Ptr);
      Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
-			"nul");
+                        "nul");
      B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);

      return ConstantInt::get(CI->getType(), 1);
@ -1034,7 +1037,7 @@ struct SPrintFOpt : public LibCallOptimization {
      Value *IncLen = B.CreateAdd(Len,
                                  ConstantInt::get(Len->getType(), 1),
                                  "leninc");
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD);
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD);

      // The sprintf result is the unincremented number of bytes in the string.
      return B.CreateIntCast(Len, CI->getType(), false);
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@ -109,15 +109,16 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,

 /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
 /// expects that Len has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
-                        unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
+                        bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
  Module *M = B.GetInsertBlock()->getParent()->getParent();
-  const Type *Ty = Len->getType();
-  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+  const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
+  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
  Dst = CastToCStr(Dst, B);
  Src = CastToCStr(Src, B);
-  return B.CreateCall4(MemCpy, Dst, Src, Len,
-                       ConstantInt::get(B.getInt32Ty(), Align));
+  return B.CreateCall5(MemCpy, Dst, Src, Len,
+                       ConstantInt::get(B.getInt32Ty(), Align),
+                       ConstantInt::get(B.getInt1Ty(), isVolatile));
 }

 /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
@ -146,16 +147,18 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,

 /// EmitMemMove - Emit a call to the memmove function to the builder.  This
 /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len,
-                         unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
+                         bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
  Module *M = B.GetInsertBlock()->getParent()->getParent();
  LLVMContext &Context = B.GetInsertBlock()->getContext();
-  const Type *Ty = TD->getIntPtrType(Context);
-  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+  const Type *ArgTys[3] = { Dst->getType(), Src->getType(),
+                            TD->getIntPtrType(Context) };
+  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3);
  Dst = CastToCStr(Dst, B);
  Src = CastToCStr(Src, B);
  Value *A = ConstantInt::get(B.getInt32Ty(), Align);
-  return B.CreateCall4(MemMove, Dst, Src, Len, A);
+  Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
+  return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol);
 }

 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
@ -206,15 +209,15 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 }

 /// EmitMemSet - Emit a call to the memset function
-Value *llvm::EmitMemSet(Value *Dst, Value *Val,
-                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
+                        IRBuilder<> &B, const TargetData *TD) {
 Module *M = B.GetInsertBlock()->getParent()->getParent();
 Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[1];
- Tys[0] = Len->getType();
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ const Type *Tys[2] = { Dst->getType(), Len->getType() };
+ Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2);
 Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
- return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
+ Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
+ return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol);
 }

 /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
@ -381,7 +384,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
  if (Name == "__memcpy_chk") {
    if (isFoldable(4, 3, false)) {
      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
-                 1, B, TD);
+                 1, false, B, TD);
      replaceCall(CI->getOperand(1));
      return true;
    }
@ -396,7 +399,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
  if (Name == "__memmove_chk") {
    if (isFoldable(4, 3, false)) {
      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
-                  1, B, TD);
+                  1, false, B, TD);
      replaceCall(CI->getOperand(1));
      return true;
    }
@ -407,7 +410,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
    if (isFoldable(4, 3, false)) {
      Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
                                   false);
-      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
+      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), false, B, TD);
      replaceCall(CI->getOperand(1));
      return true;
    }
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@ -297,10 +297,10 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
                                          I->getName(), 
                                          &*Caller->begin()->begin());
        // Emit a memcpy.
-        const Type *Tys[] = { Type::getInt64Ty(Context) };
+        const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
        Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                       Intrinsic::memcpy, 
-                                                       Tys, 1);
+                                                       Tys, 3);
        Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
        Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);

@ -309,17 +309,18 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
          Size = ConstantExpr::getSizeOf(AggTy);
        else
          Size = ConstantInt::get(Type::getInt64Ty(Context),
-                                         TD->getTypeStoreSize(AggTy));
+                                  TD->getTypeStoreSize(AggTy));

        // Always generate a memcpy of alignment 1 here because we don't know
        // the alignment of the src pointer.  Other optimizations can infer
        // better alignment.
        Value *CallArgs[] = {
          DestCast, SrcCast, Size,
-          ConstantInt::get(Type::getInt32Ty(Context), 1)
+          ConstantInt::get(Type::getInt32Ty(Context), 1),
+          ConstantInt::get(Type::getInt1Ty(Context), 0)
        };
        CallInst *TheMemCpy =
-          CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall);
+          CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);

        // If we have a call graph, update it.
        if (CG) {
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@ -145,6 +145,54 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
    }
    break;

+  case 'm': {
+    // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the
+    // new format that allows overloading the pointer for different address
+    // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16)
+    const char* NewFnName = NULL;
+    if (Name.compare(5,8,"memcpy.i",8) == 0) {
+      if (Name[13] == '8')
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i8";
+      else if (Name.compare(13,2,"16") == 0)
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i16";
+      else if (Name.compare(13,2,"32") == 0)
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i32";
+      else if (Name.compare(13,2,"64") == 0)
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i64";
+    } else if (Name.compare(5,9,"memmove.i",9) == 0) {
+      if (Name[14] == '8')
+        NewFnName = "llvm.memmove.p0i8.p0i8.i8";
+      else if (Name.compare(14,2,"16") == 0)
+        NewFnName = "llvm.memmove.p0i8.p0i8.i16";
+      else if (Name.compare(14,2,"32") == 0)
+        NewFnName = "llvm.memmove.p0i8.p0i8.i32";
+      else if (Name.compare(14,2,"64") == 0)
+        NewFnName = "llvm.memmove.p0i8.p0i8.i64";
+    }
+    else if (Name.compare(5,8,"memset.i",8) == 0) {
+      if (Name[13] == '8')
+        NewFnName = "llvm.memset.p0i8.i8";
+      else if (Name.compare(13,2,"16") == 0)
+        NewFnName = "llvm.memset.p0i8.i16";
+      else if (Name.compare(13,2,"32") == 0)
+        NewFnName = "llvm.memset.p0i8.i32";
+      else if (Name.compare(13,2,"64") == 0)
+        NewFnName = "llvm.memset.p0i8.i64";
+    }
+    if (NewFnName) {
+      const FunctionType *FTy = F->getFunctionType();
+      NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
+                                            FTy->getReturnType(),
+                                            FTy->getParamType(0),
+                                            FTy->getParamType(1),
+                                            FTy->getParamType(2),
+                                            FTy->getParamType(3),
+                                            Type::getInt1Ty(F->getContext()),
+                                            (Type *)0));
+      return true;
+    }
+    break;
+  }
  case 'p':
    //  This upgrades the llvm.part.select overloaded intrinsic names to only 
    //  use one type specifier in the name. We only care about the old format
@ -472,6 +520,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
    CI->eraseFromParent();
  }
  break;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+  case Intrinsic::memset: {
+    // Add isVolatile
+    const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
+    Value *Operands[5] = { CI->getOperand(1), CI->getOperand(2),
+                           CI->getOperand(3), CI->getOperand(4),
+                           llvm::ConstantInt::get(I1Ty, 0) };
+    CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
+                                       CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty())
+      //  Replace all uses of the old call with the new cast which has the 
+      //  correct type.
+      CI->replaceAllUsesWith(NewCI);
+    
+    //  Clean up the old call now that it has been completely upgraded.
+    CI->eraseFromParent();
+    break;
+  }
  }
 }

--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@ -103,7 +103,7 @@ define i32 @test4(i8* %P) {
  ret i32 %sub
 ; CHECK: @test4
 ; CHECK: load i32* @G
-; CHECK: memset.i32
+; CHECK: memset.p0i8.i32
 ; CHECK-NOT: load
 ; CHECK: sub i32 %tmp, %tmp
 }
@ -118,7 +118,7 @@ define i32 @test5(i8* %P, i32 %Len) {
  ret i32 %sub
 ; CHECK: @test5
 ; CHECK: load i32* @G
-; CHECK: memcpy.i32
+; CHECK: memcpy.p0i8.p0i8.i32
 ; CHECK-NOT: load
 ; CHECK: sub i32 %tmp, %tmp
 }
--- a/test/Bitcode/memcpy.ll
+++ b/test/Bitcode/memcpy.ll
@ -8,6 +8,7 @@ entry:
        tail call void @llvm.memcpy.i64( i8* %tmp.1, i8* %tmp.3, i64 100000, i32 1 )
        tail call void @llvm.memset.i32( i8* %tmp.3, i8 14, i32 10000, i32 0 )
        tail call void @llvm.memmove.i32( i8* %tmp.1, i8* %tmp.3, i32 123124, i32 1 )
+        tail call void @llvm.memmove.i64( i8* %tmp.1, i8* %tmp.3, i64 123124, i32 1 )
        ret void
 }

@ -19,3 +20,4 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32)

 declare void @llvm.memmove.i32(i8*, i8*, i32, i32)

+declare void @llvm.memmove.i64(i8*, i8*, i32, i32)
--- a/test/Transforms/InstCombine/memset_chk.ll
+++ b/test/Transforms/InstCombine/memset_chk.ll
@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3

 define i32 @t() nounwind ssp {
 ; CHECK: @t
-; CHECK: @llvm.memset.i64
+; CHECK: @llvm.memset.p0i8.i64
 entry:
  %0 = alloca %struct.data, align 8               ; <%struct.data*> [#uses=1]
  %1 = bitcast %struct.data* %0 to i8*            ; <i8*> [#uses=1]
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@ -113,7 +113,7 @@ entry:
  %1 = bitcast %struct.data* %0 to i8*
  %2 = call i64 @llvm.objectsize.i64(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memset.i64(i8* %1, i8 0, i64 1824, i32 8)
+; CHECK: @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 1824, i32 8, i1 false)
  %3 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 %2) nounwind
  ret i32 0
 }
@ -128,7 +128,7 @@ entry:
  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
  %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memcpy.i32(i8* %0, i8* %1, i32 10, i32 1)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
  %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
  ret void
 }
--- a/test/Transforms/MemCpyOpt/align.ll
+++ b/test/Transforms/MemCpyOpt/align.ll
@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; The resulting memset is only 4-byte aligned, despite containing
 ; a 16-byte alignmed store in the middle.

-; CHECK: call void @llvm.memset.i64(i8* %a01, i8 0, i64 16, i32 4)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %a01, i8 0, i64 16, i32 4, i1 false)

 define void @foo(i32* %p) {
  %a0 = getelementptr i32* %p, i64 0
--- a/test/Transforms/SimplifyLibCalls/StrCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/StrCpy.ll
@ -21,7 +21,7 @@ define i32 @t1() {
  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
  %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 )
-; CHECK: @llvm.memcpy.i32
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
  ret i32 0
 }

--- a/test/Verifier/2006-12-12-IntrinsicDefine.ll
+++ b/test/Verifier/2006-12-12-IntrinsicDefine.ll
@ -1,7 +1,7 @@
 ; RUN: not llvm-as < %s |& grep {llvm intrinsics cannot be defined}
 ; PR1047

-define void @llvm.memcpy.i32(i8*, i8*, i32, i32) {
+define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) {
 entry:
 	ret void
 }