Atomic op support. If any gcc test uses __sync builtins, it might start failing on archs that haven't implemented them yet

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47430 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-06 09:44:39 +00:00 · 2008-02-21 06:45:13 +00:00 · 2008-02-21 06:45:13 +00:00 · ab0b949e0e
commit ab0b949e0e
parent f9853bc0d4
13 changed files with 589 additions and 12 deletions
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@ -206,7 +206,10 @@
      </li>
          <li><a href="#int_atomics">Atomic intrinsics</a>
            <ol>
-              <li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></li>
+              <li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></a></li>
+              <li><a href="#int_atomic_lcs"><tt>llvm.atomic.lcs</tt></a></li>
+              <li><a href="#int_atomic_las"><tt>llvm.atomic.las</tt></a></li>
+              <li><a href="#int_atomic_swap"><tt>llvm.atomic.swap</tt></a></li>
            </ol>
          </li>
      <li><a href="#int_general">General intrinsics</a>
@ -5339,6 +5342,176 @@ i1 &lt;device&gt; )
 </pre>
 </div>

+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_lcs">'<tt>llvm.atomic.lcs.*</tt>' Intrinsic</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<p>
+  This is an overloaded intrinsic. You can use <tt>llvm.atomic.lcs</tt> on any 
+  integer bit width. Not all targets support all bit widths however.</p>
+
+<pre>
+declare i8 @llvm.atomic.lcs.i8( i8* &lt;ptr&gt;, i8 &lt;cmp&gt;, i8 &lt;val&gt; )
+declare i16 @llvm.atomic.lcs.i16( i16* &lt;ptr&gt;, i16 &lt;cmp&gt;, i16 &lt;val&gt; )
+declare i32 @llvm.atomic.lcs.i32( i32* &lt;ptr&gt;, i32 &lt;cmp&gt;, i32 &lt;val&gt; )
+declare i64 @llvm.atomic.lcs.i64( i64* &lt;ptr&gt;, i64 &lt;cmp&gt;, i64 &lt;val&gt; )
+
+</pre>
+<h5>Overview:</h5>
+<p>
+  This loads a value in memory and compares it to a given value. If they are 
+  equal, it stores a new value into the memory.
+</p>
+<h5>Arguments:</h5>
+<p>
+  The <tt>llvm.atomic.lcs</tt> intrinsic takes three arguments. The result as 
+  well as both <tt>cmp</tt> and <tt>val</tt> must be integer values with the 
+  same bit width. The <tt>ptr</tt> argument must be a pointer to a value of 
+  this integer type. While any bit width integer may be used, targets may only 
+  lower representations they support in hardware.
+
+</p>
+<h5>Semantics:</h5>
+<p>
+  This entire intrinsic must be executed atomically. It first loads the value 
+  in memory pointed to by <tt>ptr</tt> and compares it with the value 
+  <tt>cmp</tt>. If they are equal, <tt>val</tt> is stored into the memory. The 
+  loaded value is yielded in all cases. This provides the equivalent of an 
+  atomic compare-and-swap operation within the SSA framework.
+</p>
+<h5>Examples:</h5>
+
+<pre>
+%ptr      = malloc i32
+            store i32 4, %ptr
+
+%val1     = add i32 4, 4
+%result1  = call i32 @llvm.atomic.lcs.i32( i32* %ptr, i32 4, %val1 )
+                                          <i>; yields {i32}:result1 = 4</i>
+%stored1  = icmp eq i32 %result1, 4       <i>; yields {i1}:stored1 = true</i>
+%memval1  = load i32* %ptr                <i>; yields {i32}:memval1 = 8</i>
+
+%val2     = add i32 1, 1
+%result2  = call i32 @llvm.atomic.lcs.i32( i32* %ptr, i32 5, %val2 )
+                                          <i>; yields {i32}:result2 = 8</i>
+%stored2  = icmp eq i32 %result2, 5       <i>; yields {i1}:stored2 = false</i>
+
+%memval2  = load i32* %ptr                <i>; yields {i32}:memval2 = 8</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_swap">'<tt>llvm.atomic.swap.*</tt>' Intrinsic</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+
+<p>
+  This is an overloaded intrinsic. You can use <tt>llvm.atomic.swap</tt> on any 
+  integer bit width. Not all targets support all bit widths however.</p>
+<pre>
+declare i8 @llvm.atomic.swap.i8( i8* &lt;ptr&gt;, i8 &lt;val&gt; )
+declare i16 @llvm.atomic.swap.i16( i16* &lt;ptr&gt;, i16 &lt;val&gt; )
+declare i32 @llvm.atomic.swap.i32( i32* &lt;ptr&gt;, i32 &lt;val&gt; )
+declare i64 @llvm.atomic.swap.i64( i64* &lt;ptr&gt;, i64 &lt;val&gt; )
+
+</pre>
+<h5>Overview:</h5>
+<p>
+  This intrinsic loads the value stored in memory at <tt>ptr</tt> and yields 
+  the value from memory. It then stores the value in <tt>val</tt> in the memory 
+  at <tt>ptr</tt>.
+</p>
+<h5>Arguments:</h5>
+
+<p>
+  The <tt>llvm.atomic.ls</tt> intrinsic takes two arguments. Both the 
+  <tt>val</tt> argument and the result must be integers of the same bit width. 
+  The first argument, <tt>ptr</tt>, must be a pointer to a value of this 
+  integer type. The targets may only lower integer representations they 
+  support.
+</p>
+<h5>Semantics:</h5>
+<p>
+  This intrinsic loads the value pointed to by <tt>ptr</tt>, yields it, and 
+  stores <tt>val</tt> back into <tt>ptr</tt> atomically. This provides the 
+  equivalent of an atomic swap operation within the SSA framework.
+
+</p>
+<h5>Examples:</h5>
+<pre>
+%ptr      = malloc i32
+            store i32 4, %ptr
+
+%val1     = add i32 4, 4
+%result1  = call i32 @llvm.atomic.swap.i32( i32* %ptr, i32 %val1 )
+                                        <i>; yields {i32}:result1 = 4</i>
+%stored1  = icmp eq i32 %result1, 4     <i>; yields {i1}:stored1 = true</i>
+%memval1  = load i32* %ptr              <i>; yields {i32}:memval1 = 8</i>
+
+%val2     = add i32 1, 1
+%result2  = call i32 @llvm.atomic.swap.i32( i32* %ptr, i32 %val2 )
+                                        <i>; yields {i32}:result2 = 8</i>
+
+%stored2  = icmp eq i32 %result2, 8     <i>; yields {i1}:stored2 = true</i>
+%memval2  = load i32* %ptr              <i>; yields {i32}:memval2 = 2</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_las">'<tt>llvm.atomic.las.*</tt>' Intrinsic</a>
+
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<p>
+  This is an overloaded intrinsic. You can use <tt>llvm.atomic.las</tt> on any 
+  integer bit width. Not all targets support all bit widths however.</p>
+<pre>
+declare i8 @llvm.atomic.las.i8.( i8* &lt;ptr&gt;, i8 &lt;delta&gt; )
+declare i16 @llvm.atomic.las.i16.( i16* &lt;ptr&gt;, i16 &lt;delta&gt; )
+declare i32 @llvm.atomic.las.i32.( i32* &lt;ptr&gt;, i32 &lt;delta&gt; )
+declare i64 @llvm.atomic.las.i64.( i64* &lt;ptr&gt;, i64 &lt;delta&gt; )
+
+</pre>
+<h5>Overview:</h5>
+<p>
+  This intrinsic adds <tt>delta</tt> to the value stored in memory at 
+  <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.
+</p>
+<h5>Arguments:</h5>
+<p>
+
+  The intrinsic takes two arguments, the first a pointer to an integer value 
+  and the second an integer value. The result is also an integer value. These 
+  integer types can have any bit width, but they must all have the same bit 
+  width. The targets may only lower integer representations they support.
+</p>
+<h5>Semantics:</h5>
+<p>
+  This intrinsic does a series of operations atomically. It first loads the 
+  value stored at <tt>ptr</tt>. It then adds <tt>delta</tt>, stores the result 
+  to <tt>ptr</tt>. It yields the original value stored at <tt>ptr</tt>.
+</p>
+
+<h5>Examples:</h5>
+<pre>
+%ptr      = malloc i32
+        store i32 4, %ptr
+%result1  = call i32 @llvm.atomic.las.i32( i32* %ptr, i32 4 )
+                                <i>; yields {i32}:result1 = 4</i>
+%result2  = call i32 @llvm.atomic.las.i32( i32* %ptr, i32 2 )
+                                <i>; yields {i32}:result2 = 8</i>
+%result3  = call i32 @llvm.atomic.las.i32( i32* %ptr, i32 5 )
+                                <i>; yields {i32}:result3 = 10</i>
+%memval   = load i32* %ptr      <i>; yields {i32}:memval1 = 15</i>
+</pre>
+</div>
+

 <!-- ======================================================================= -->
 <div class="doc_subsection">
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@ -356,6 +356,16 @@ public:
  SDOperand getVAArg(MVT::ValueType VT, SDOperand Chain, SDOperand Ptr,
                     SDOperand SV);

+  /// getAtomic - Gets a node for an atomic op, produces result and chain, takes
+  // 3 operands
+  SDOperand getAtomic(unsigned Opcode, SDOperand Chain, SDOperand Ptr, 
+                      SDOperand A2, SDOperand A3, MVT::ValueType VT);
+
+  /// getAtomic - Gets a node for an atomic op, produces result and chain, takes
+  // 2 operands
+  SDOperand getAtomic(unsigned Opcode, SDOperand Chain, SDOperand Ptr, 
+                      SDOperand A2, MVT::ValueType VT);
+
  /// getLoad - Loads are not normal binary operators: their result type is not
  /// determined by their operands, and they produce a value AND a token chain.
  ///
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@ -441,7 +441,7 @@ namespace ISD {
    // is added / subtracted from the base pointer to form the address (for
    // indexed memory ops).
    LOAD, STORE,
-    
+
    // DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned
    // to a specified boundary.  This node always has two return values: a new
    // stack pointer value and a chain. The first operand is the token chain,
@ -591,12 +591,30 @@ namespace ISD {

    // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load, 
    //                       store-store, device)
-    // This corresponds to the atomic.barrier intrinsic.
+    // This corresponds to the memory.barrier intrinsic.
    // it takes an input chain, 4 operands to specify the type of barrier, an
    // operand specifying if the barrier applies to device and uncached memory
    // and produces an output chain.
    MEMBARRIER,

+    // Val, OUTCHAIN = ATOMIC_LCS(INCHAIN, ptr, cmp, swap)
+    // this corresponds to the atomic.lcs intrinsic.
+    // cmp is compared to *ptr, and if equal, swap is stored in *ptr.
+    // the return is always the original value in *ptr
+    ATOMIC_LCS,
+
+    // Val, OUTCHAIN = ATOMIC_LAS(INCHAIN, ptr, amt)
+    // this corresponds to the atomic.las intrinsic.
+    // *ptr + amt is stored to *ptr atomically.
+    // the return is always the original value in *ptr
+    ATOMIC_LAS,
+
+    // Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt)
+    // this corresponds to the atomic.swap intrinsic.
+    // amt is stored to *ptr atomically.
+    // the return is always the original value in *ptr
+    ATOMIC_SWAP,
+
    // BUILTIN_OP_END - This must be the last enum value in this list.
    BUILTIN_OP_END
  };
@ -1170,6 +1188,33 @@ public:
  SDOperand getValue() const { return Op; }
 };

+class AtomicSDNode : public SDNode {
+  virtual void ANCHOR();  // Out-of-line virtual method to give class a home.
+  SDOperand Ops[4];
+  MVT::ValueType OrigVT;
+public:
+  AtomicSDNode(unsigned Opc, SDVTList VTL, SDOperand Chain, SDOperand X, 
+               SDOperand Y, SDOperand Z, MVT::ValueType VT)
+    : SDNode(Opc, VTL) {
+    Ops[0] = Chain;
+    Ops[1] = X;
+    Ops[2] = Y;
+    Ops[3] = Z;
+    InitOperands(Ops, 4);
+    OrigVT=VT;
+  }
+  AtomicSDNode(unsigned Opc, SDVTList VTL, SDOperand Chain, SDOperand X, 
+               SDOperand Y, MVT::ValueType VT)
+    : SDNode(Opc, VTL) {
+    Ops[0] = Chain;
+    Ops[1] = X;
+    Ops[2] = Y;
+    InitOperands(Ops, 3);
+    OrigVT=VT;
+  }
+  MVT::ValueType getVT() const { return OrigVT; }
+};
+
 class StringSDNode : public SDNode {
  std::string Value;
  virtual void ANCHOR();  // Out-of-line virtual method to give class a home.
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@ -64,7 +64,7 @@ class LLVMPointerType<LLVMType elty>
 class LLVMMatchType<int num>
  : LLVMType<OtherVT>{
  int Number = num;
-} 
+}

 def llvm_void_ty       : LLVMType<isVoid>;
 def llvm_anyint_ty     : LLVMType<iAny>;
@ -267,6 +267,17 @@ def int_init_trampoline : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty,
 def int_memory_barrier : Intrinsic<[llvm_void_ty, llvm_i1_ty, llvm_i1_ty, 
                                       llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>;

+def int_atomic_lcs : Intrinsic<[llvm_anyint_ty, LLVMPointerType<LLVMMatchType<0>>,
+                                 LLVMMatchType<0>, LLVMMatchType<0>],
+                               [IntrWriteArgMem]>, GCCBuiltin<"__sync_val_compare_and_swap">;
+def int_atomic_las : Intrinsic<[llvm_anyint_ty, LLVMPointerType<LLVMMatchType<0>>,
+                                 LLVMMatchType<0>],
+                               [IntrWriteArgMem]>, GCCBuiltin<"__sync_fetch_and_add">;
+def int_atomic_swap : Intrinsic<[llvm_anyint_ty, LLVMPointerType<LLVMMatchType<0>>,
+                                 LLVMMatchType<0>],
+                               [IntrWriteArgMem]>, GCCBuiltin<"__sync_lock_test_and_set">;
+
+
 //===-------------------------- Other Intrinsics --------------------------===//
 //
 def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -1252,7 +1252,7 @@ private:
  /// TargetDAGCombineArray - Targets can specify ISD nodes that they would
  /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(),
  /// which sets a bit in this array.
-  unsigned char TargetDAGCombineArray[156/(sizeof(unsigned char)*8)];
+  unsigned char TargetDAGCombineArray[160/(sizeof(unsigned char)*8)];
  
  /// PromoteToType - For operations that must be promoted to a specific type,
  /// this holds the destination type.  This map should be sparse, so don't hold
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -1153,6 +1153,31 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
    break;
  }

+  case ISD::ATOMIC_LCS:
+  case ISD::ATOMIC_LAS:
+  case ISD::ATOMIC_SWAP: {
+    assert(((Node->getNumOperands() == 4 && Node->getOpcode() == ISD::ATOMIC_LCS) ||
+            (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_LAS) ||
+            (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_SWAP)) &&
+           "Invalid MemBarrier node!");
+    int num = Node->getOpcode() == ISD::ATOMIC_LCS ? 4 : 3;
+    MVT::ValueType VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(ISD::ATOMIC_LCS, VT)) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Legal: {
+      SDOperand Ops[4];
+      for (int x = 0; x < num; ++x)
+        Ops[x] = LegalizeOp(Node->getOperand(x));
+      Result = DAG.UpdateNodeOperands(Result, &Ops[0], num);
+      AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+      AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+      return Result.getValue(Op.ResNo);
+      break;
+    }
+    }
+    break;
+  }
+
  case ISD::Constant: {
    ConstantSDNode *CN = cast<ConstantSDNode>(Node);
    unsigned opAction =
@ -4228,6 +4253,27 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
    break;
  }
    
+  case ISD::ATOMIC_LCS: {
+    Tmp2 = PromoteOp(Node->getOperand(2));
+    Tmp3 = PromoteOp(Node->getOperand(3));
+    Result = DAG.getAtomic(Node->getOpcode(), Node->getOperand(0), 
+                           Node->getOperand(1), Tmp2, Tmp3,
+                           cast<AtomicSDNode>(Node)->getVT());
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+  }
+  case ISD::ATOMIC_LAS:
+  case ISD::ATOMIC_SWAP: {
+    Tmp2 = PromoteOp(Node->getOperand(2));
+    Result = DAG.getAtomic(Node->getOpcode(), Node->getOperand(0), 
+                           Node->getOperand(1), Tmp2,
+                           cast<AtomicSDNode>(Node)->getVT());
+    // Remember that we legalized the chain.
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+    break;
+  }
+
  case ISD::AND:
  case ISD::OR:
  case ISD::XOR:
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -2427,6 +2427,43 @@ SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest,
  return getNode(ISD::MEMSET, MVT::Other, Ops, 6);
 }

+SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, 
+                                  SDOperand Ptr, SDOperand A2, 
+                                  SDOperand A3, MVT::ValueType VT) {
+  assert(Opcode == ISD::ATOMIC_LCS && "Invalid Atomic Op");
+  SDVTList VTs = getVTList(A2.getValueType(), MVT::Other);
+  FoldingSetNodeID ID;
+  SDOperand Ops[] = {Chain, Ptr, A2, A3};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+  ID.AddInteger((unsigned int)VT);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode* N = new AtomicSDNode(Opcode, VTs, Chain, Ptr, A2, A3, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, 
+                                  SDOperand Ptr, SDOperand A2, 
+                                  MVT::ValueType VT) {
+  assert((Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_SWAP)
+         && "Invalid Atomic Op");
+  SDVTList VTs = getVTList(A2.getValueType(), MVT::Other);
+  FoldingSetNodeID ID;
+  SDOperand Ops[] = {Chain, Ptr, A2};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+  ID.AddInteger((unsigned int)VT);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDOperand(E, 0);
+  SDNode* N = new AtomicSDNode(Opcode, VTs, Chain, Ptr, A2, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDOperand(N, 0);
+}
+
 SDOperand SelectionDAG::getLoad(MVT::ValueType VT,
                                SDOperand Chain, SDOperand Ptr,
                                const Value *SV, int SVOffset,
@ -3593,6 +3630,7 @@ void CondCodeSDNode::ANCHOR() {}
 void VTSDNode::ANCHOR() {}
 void LoadSDNode::ANCHOR() {}
 void StoreSDNode::ANCHOR() {}
+void AtomicSDNode::ANCHOR() {}

 HandleSDNode::~HandleSDNode() {
  SDVTList VTs = { 0, 0 };
@ -3821,6 +3859,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
    }
   
  case ISD::MEMBARRIER:    return "MemBarrier";
+  case ISD::ATOMIC_LCS:    return "AtomicLCS";
+  case ISD::ATOMIC_LAS:    return "AtomicLAS";
+  case ISD::ATOMIC_SWAP:    return "AtomicSWAP";
  case ISD::PCMARKER:      return "PCMarker";
  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
  case ISD::SRCVALUE:      return "SrcValue";
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@ -221,7 +221,7 @@ static bool isSelector(Instruction *I) {

 /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
 /// PHI nodes or outside of the basic block that defines it, or used by a 
-/// switch instruction, which may expand to multiple basic blocks.
+/// switch or atomic instruction, which may expand to multiple basic blocks.
 static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
  if (isa<PHINode>(I)) return true;
  BasicBlock *BB = I->getParent();
@ -3059,6 +3059,38 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, MVT::Other, &Ops[0], 6));
    return 0;
  }
+  case Intrinsic::atomic_lcs: {
+    SDOperand Root = getRoot();   
+    SDOperand O3 = getValue(I.getOperand(3));
+    SDOperand L = DAG.getAtomic(ISD::ATOMIC_LCS, Root, 
+                                getValue(I.getOperand(1)), 
+                                getValue(I.getOperand(2)),
+                                O3, O3.getValueType());
+    setValue(&I, L);
+    DAG.setRoot(L.getValue(1));
+    return 0;
+  }
+  case Intrinsic::atomic_las: {
+    SDOperand Root = getRoot();   
+    SDOperand O2 = getValue(I.getOperand(2));
+    SDOperand L = DAG.getAtomic(ISD::ATOMIC_LAS, Root, 
+                                getValue(I.getOperand(1)), 
+                                O2, O2.getValueType());
+    setValue(&I, L);
+    DAG.setRoot(L.getValue(1));
+    return 0;
+  }
+  case Intrinsic::atomic_swap: {
+    SDOperand Root = getRoot();   
+    SDOperand O2 = getValue(I.getOperand(2));
+    SDOperand L = DAG.getAtomic(ISD::ATOMIC_SWAP, Root, 
+                                getValue(I.getOperand(1)), 
+                                O2, O2.getValueType());
+    setValue(&I, L);
+    DAG.setRoot(L.getValue(1));
+    return 0;
+  }
+
  }
 }

--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@ -629,3 +629,96 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
  
  return std::vector<unsigned>();
 }
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  assert((MI->getOpcode() == Alpha::CAS32 ||
+          MI->getOpcode() == Alpha::CAS64 ||
+          MI->getOpcode() == Alpha::LAS32 ||
+          MI->getOpcode() == Alpha::LAS64 ||
+          MI->getOpcode() == Alpha::SWAP32 ||
+          MI->getOpcode() == Alpha::SWAP64) &&
+         "Unexpected instr type to insert");
+
+  bool is32 = MI->getOpcode() == Alpha::CAS32 || 
+    MI->getOpcode() == Alpha::LAS32 ||
+    MI->getOpcode() == Alpha::SWAP32;
+  
+  //Load locked store conditional for atomic ops take on the same form
+  //start:
+  //ll
+  //do stuff (maybe branch to exit)
+  //sc
+  //test sc and maybe branck to start
+  //exit:
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  ilist<MachineBasicBlock>::iterator It = BB;
+  ++It;
+  
+  MachineBasicBlock *thisMBB = BB;
+  MachineBasicBlock *llscMBB = new MachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+
+  for(MachineBasicBlock::succ_iterator i = thisMBB->succ_begin(), 
+        e = thisMBB->succ_end(); i != e; ++i)
+    sinkMBB->addSuccessor(*i);
+  while(!thisMBB->succ_empty())
+    thisMBB->removeSuccessor(thisMBB->succ_begin());
+
+  MachineFunction *F = BB->getParent();
+  F->getBasicBlockList().insert(It, llscMBB);
+  F->getBasicBlockList().insert(It, sinkMBB);
+
+  BuildMI(thisMBB, TII->get(Alpha::BR)).addMBB(llscMBB);
+  
+  unsigned reg_res = MI->getOperand(0).getReg(),
+    reg_ptr = MI->getOperand(1).getReg(),
+    reg_v2 = MI->getOperand(2).getReg(),
+    reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+
+  BuildMI(llscMBB, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), 
+          reg_res).addImm(0).addReg(reg_ptr);
+  switch (MI->getOpcode()) {
+  case Alpha::CAS32:
+  case Alpha::CAS64: {
+    unsigned reg_cmp 
+      = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+    BuildMI(llscMBB, TII->get(Alpha::CMPEQ), reg_cmp)
+      .addReg(reg_v2).addReg(reg_res);
+    BuildMI(llscMBB, TII->get(Alpha::BEQ))
+      .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
+    BuildMI(llscMBB, TII->get(Alpha::BISr), reg_store)
+      .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
+    break;
+  }
+  case Alpha::LAS32:
+  case Alpha::LAS64: {
+    BuildMI(llscMBB, TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
+      .addReg(reg_res).addReg(reg_v2);
+    break;
+  }
+  case Alpha::SWAP32:
+  case Alpha::SWAP64: {
+    BuildMI(llscMBB, TII->get(Alpha::BISr), reg_store)
+      .addReg(reg_v2).addReg(reg_v2);
+    break;
+  }
+  }
+  BuildMI(llscMBB, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
+    .addReg(reg_store).addImm(0).addReg(reg_ptr);
+  BuildMI(llscMBB, TII->get(Alpha::BEQ))
+    .addImm(0).addReg(reg_store).addMBB(llscMBB);
+  BuildMI(llscMBB, TII->get(Alpha::BR)).addMBB(sinkMBB);
+
+  thisMBB->addSuccessor(llscMBB);
+  llscMBB->addSuccessor(llscMBB);
+  llscMBB->addSuccessor(sinkMBB);
+  delete MI;   // The pseudo instruction is gone now.
+
+  return sinkMBB;
+}
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@ -88,6 +88,9 @@ namespace llvm {
                                        MVT::ValueType VT) const;

    bool hasITOF() { return useITOF; }
+
+    MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB);
  };
 }

--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@ -167,6 +167,23 @@ def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64
         "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;


+let usesCustomDAGSchedInserter = 1 in {   // Expanded by the scheduler.
+def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_lcs_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_lcs_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+
+def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_las_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_las_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+
+def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+}
+
 //***********************
 //Real instructions
 //***********************
@ -568,6 +585,18 @@ def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
 def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
          (LDQl texternalsym:$ext, GPRC:$RB)>;

+let OutOperandList = (outs GPRC:$RR),
+    InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
+    Constraints = "$RA = $RR",
+    DisableEncoding = "$RR" in {
+def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
+def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
+}
+let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
+def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
+}
+
 def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
 def MB  : MfcPForm<0x18, 0x4000, "mb",  s_imisc>; //memory barrier
 def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
@ -965,7 +994,6 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
 //S_floating : IEEE Single
 //T_floating : IEEE Double

-
 //Unused instructions
 //Mnemonic Format Opcode Description
 //CALL_PAL Pcd 00 Trap to PALcode
@ -973,12 +1001,8 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
 //EXCB Mfc 18.0400 Exception barrier
 //FETCH Mfc 18.8000 Prefetch data
 //FETCH_M Mfc 18.A000 Prefetch data, modify intent
-//LDL_L Mem 2A Load sign-extended longword locked
-//LDQ_L Mem 2B Load quadword locked
 //LDQ_U Mem 0B Load unaligned quadword
 //MB Mfc 18.4000 Memory barrier
-//STL_C Mem 2E Store longword conditional
-//STQ_C Mem 2F Store quadword conditional
 //STQ_U Mem 0F Store unaligned quadword
 //TRAPB Mfc 18.0000 Trap barrier
 //WH64 Mfc 18.F800 Write hint  64 bytes
--- a/lib/Target/TargetSelectionDAG.td
+++ b/lib/Target/TargetSelectionDAG.td
@ -189,6 +189,12 @@ def STDMemBarrier : SDTypeProfile<0, 5, [
  SDTCisSameAs<0,1>,  SDTCisSameAs<0,2>,  SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
  SDTCisInt<0>
 ]>;
+def STDAtomic3 : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0,2>,  SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def STDAtomic2 : SDTypeProfile<1, 2, [
+  SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1>
+]>;

 class SDCallSeqStart<list<SDTypeConstraint> constraints> :
        SDTypeProfile<0, 1, constraints>;
@ -336,6 +342,13 @@ def trap       : SDNode<"ISD::TRAP"       , SDTNone,
                        [SDNPHasChain, SDNPSideEffect]>;
 def membarrier : SDNode<"ISD::MEMBARRIER"       , STDMemBarrier,
                        [SDNPHasChain, SDNPSideEffect]>;
+// Do not use atomic_* directly, use atomic_*_size (see below)
+def atomic_lcs : SDNode<"ISD::ATOMIC_LCS", STDAtomic3,
+                        [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def atomic_las : SDNode<"ISD::ATOMIC_LAS", STDAtomic2,
+                        [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2,
+                        [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;

 // Do not use ld, st directly. Use load, extload, sextload, zextload, store,
 // and truncst (see below).
@ -722,6 +735,84 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
  return false;
 }]>;

+//Atomic patterns
+def atomic_lcs_8 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp),
+                    (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i8;
+  return false;
+}]>;
+def atomic_lcs_16 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp), 
+                    (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i16;
+  return false;
+}]>;
+def atomic_lcs_32 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp), 
+                    (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i32;
+  return false;
+}]>;
+def atomic_lcs_64 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp), 
+                    (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i64;
+  return false;
+}]>;
+
+def atomic_las_8 : PatFrag<(ops node:$ptr, node:$inc),
+                    (atomic_las node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i8;
+  return false;
+}]>;
+def atomic_las_16 : PatFrag<(ops node:$ptr, node:$inc), 
+                    (atomic_las node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i16;
+  return false;
+}]>;
+def atomic_las_32 : PatFrag<(ops node:$ptr, node:$inc), 
+                    (atomic_las node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i32;
+  return false;
+}]>;
+def atomic_las_64 : PatFrag<(ops node:$ptr, node:$inc), 
+                    (atomic_las node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i64;
+  return false;
+}]>;
+
+def atomic_swap_8 : PatFrag<(ops node:$ptr, node:$inc),
+                    (atomic_swap node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i8;
+  return false;
+}]>;
+def atomic_swap_16 : PatFrag<(ops node:$ptr, node:$inc), 
+                    (atomic_swap node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i16;
+  return false;
+}]>;
+def atomic_swap_32 : PatFrag<(ops node:$ptr, node:$inc), 
+                    (atomic_swap node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i32;
+  return false;
+}]>;
+def atomic_swap_64 : PatFrag<(ops node:$ptr, node:$inc), 
+                    (atomic_swap node:$ptr, node:$inc), [{
+  if (AtomicSDNode* V = dyn_cast<AtomicSDNode>(N))
+        return V->getVT() == MVT::i64;
+  return false;
+}]>;
+
+
+
 // setcc convenience fragments.
 def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
                     (setcc node:$lhs, node:$rhs, SETOEQ)>;
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@ -2535,6 +2535,15 @@ def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),

 }

+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+let Defs = [EAX] in
+def LCMPXCHGL : I<0, Pseudo, (outs GR32:$dst), 
+                  (ins GR32:$ptr, GR32:$cmp, GR32:$swap),
+                  "movl $cmp, %eax ; lock cmpxchgl $swap,($ptr) ; movl %eax, $dst",
+                  [(set GR32:$dst, (atomic_lcs_32 GR32:$ptr, GR32:$cmp, GR32:$swap))]>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
@ -2683,7 +2692,6 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
                     (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
          (SHLD16mrCL addr:$dst, GR16:$src2)>;

-
 //===----------------------------------------------------------------------===//
 // Floating Point Stack Support
 //===----------------------------------------------------------------------===//