From ff03048c1350fcc4fda1ef6d6c57252f3a950854 Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Thu, 28 Jul 2011 21:48:00 +0000
Subject: [PATCH] LangRef and basic memory-representation/reading/writing for
 'cmpxchg' and 'atomicrmw' instructions, which allow representing all the
 current atomic rmw intrinsics.

The allowed operands for these instructions are heavily restricted at the
moment; we can probably loosen it a bit, but supporting general
first-class types (where it makes sense) might get a bit complicated,
given how SelectionDAG works.

As an initial cut, these operations do not support specifying an alignment,
but it would be possible to add if we think it's useful. Specifying an
alignment lower than the natural alignment would be essentially
impossible to support on anything other than x86, but specifying a greater
alignment would be possible.  I can't think of any useful optimizations which
would use that information, but maybe someone else has ideas.

Optimizer/codegen support coming soon.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136404 91177308-0d34-0410-b5e6-96231b3b80d8
---
 docs/LangRef.html                             | 249 ++++++++++++++++-
 include/llvm-c/Core.h                         |   6 +-
 include/llvm/Bitcode/LLVMBitCodes.h           |  25 +-
 include/llvm/Instruction.def                  |  64 ++---
 include/llvm/Instructions.h                   | 253 ++++++++++++++++++
 include/llvm/Support/InstVisitor.h            |   2 +
 lib/AsmParser/LLLexer.cpp                     |   5 +
 lib/AsmParser/LLParser.cpp                    |  97 +++++++
 lib/AsmParser/LLParser.h                      |   2 +
 lib/AsmParser/LLToken.h                       |   6 +-
 lib/Bitcode/Reader/BitcodeReader.cpp          |  59 ++++
 lib/Bitcode/Writer/BitcodeWriter.cpp          |  39 +++
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   6 +
 .../SelectionDAG/SelectionDAGBuilder.h        |   2 +
 lib/VMCore/AsmWriter.cpp                      |  25 ++
 lib/VMCore/Instruction.cpp                    |   2 +
 lib/VMCore/Instructions.cpp                   | 111 ++++++++
 lib/VMCore/Verifier.cpp                       |  36 +++
 18 files changed, 946 insertions(+), 43 deletions(-)
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 40affb7e917..0c07f12ecfb 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -54,6 +54,7 @@
       <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
       <li><a href="#volatile">Volatile Memory Accesses</a></li>
       <li><a href="#memmodel">Memory Model for Concurrent Operations</a></li>
+      <li><a href="#ordering">Atomic Memory Ordering Constraints</a></li>
     </ol>
   </li>
   <li><a href="#typesystem">Type System</a>
@@ -168,10 +169,12 @@
       </li>
       <li><a href="#memoryops">Memory Access and Addressing Operations</a>
         <ol>
-          <li><a href="#i_alloca">'<tt>alloca</tt>'   Instruction</a></li>
-         <li><a href="#i_load">'<tt>load</tt>'     Instruction</a></li>
-         <li><a href="#i_store">'<tt>store</tt>'    Instruction</a></li>
-         <li><a href="#i_fence">'<tt>fence</tt>'    Instruction</a></li>
+          <li><a href="#i_alloca">'<tt>alloca</tt>' Instruction</a></li>
+         <li><a href="#i_load">'<tt>load</tt>' Instruction</a></li>
+         <li><a href="#i_store">'<tt>store</tt>' Instruction</a></li>
+         <li><a href="#i_fence">'<tt>fence</tt>' Instruction</a></li>
+         <li><a href="#i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a></li>
+         <li><a href="#i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a></li>
          <li><a href="#i_getelementptr">'<tt>getelementptr</tt>' Instruction</a></li>
         </ol>
       </li>
@@ -1500,8 +1503,9 @@ that</p>
   <li>When a <i>synchronizes-with</i> <tt>b</tt>, includes an edge from
       <tt>a</tt> to <tt>b</tt>. <i>Synchronizes-with</i> pairs are introduced
       by platform-specific techniques, like pthread locks, thread
-      creation, thread joining, etc., and by the atomic operations described
-      in the <a href="#int_atomics">Atomic intrinsics</a> section.</li>
+      creation, thread joining, etc., and by atomic instructions.
+      (See also <a href="#ordering">Atomic Memory Ordering Constraints</a>).
+      </li>
 </ul>
 
 <p>Note that program order does not introduce <i>happens-before</i> edges
@@ -1536,8 +1540,9 @@ any write to the same byte, except:</p>
       write.</li>
   <li>Otherwise, if <var>R</var> is atomic, and all the writes
       <var>R<sub>byte</sub></var> may see are atomic, it chooses one of the
-      values written.  See the <a href="#int_atomics">Atomic intrinsics</a>
-      section for additional guarantees on how the choice is made.
+      values written.  See the <a href="#ordering">Atomic Memory Ordering
+      Constraints</a> section for additional constraints on how the choice
+      is made.
   <li>Otherwise <var>R<sub>byte</sub></var> returns <tt>undef</tt>.</li>
 </ul>
 
@@ -1569,6 +1574,82 @@ as if it writes to the relevant surrounding bytes.
 
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+      <a name="ordering">Atomic Memory Ordering Constraints</a>
+</div>
+
+<div class="doc_text">
+
+<p>Atomic instructions (<a href="#i_cmpxchg"><code>cmpxchg</code></a>,
+<a href="#i_atomicrmw"><code>atomicrmw</code></a>, and
+<a href="#i_fence"><code>fence</code></a>) take an ordering parameter
+that determines which other atomic instructions on the same address they
+<i>synchronize with</i>.  These semantics are borrowed from Java and C++0x,
+but are somewhat more colloquial. If these descriptions aren't precise enough,
+check those specs.  <a href="#i_fence"><code>fence</code></a> instructions
+treat these orderings somewhat differently since they don't take an address.
+See that instruction's documentation for details.</p>
+
+<!-- FIXME Note atomic load+store here once those get added. -->
+
+<dl>
+<!-- FIXME: unordered is intended to be used for atomic load and store;
+it isn't allowed for any instruction yet. -->
+<dt><code>unordered</code></dt>
+<dd>The set of values that can be read is governed by the happens-before
+partial order. A value cannot be read unless some operation wrote it.
+This is intended to provide a guarantee strong enough to model Java's
+non-volatile shared variables.  This ordering cannot be specified for
+read-modify-write operations; it is not strong enough to make them atomic
+in any interesting way.</dd>
+<dt><code>monotonic</code></dt>
+<dd>In addition to the guarantees of <code>unordered</code>, there is a single
+total order for modifications by <code>monotonic</code> operations on each
+address. All modification orders must be compatible with the happens-before
+order. There is no guarantee that the modification orders can be combined to
+a global total order for the whole program (and this often will not be
+possible). The read in an atomic read-modify-write operation
+(<a href="#i_cmpxchg"><code>cmpxchg</code></a> and
+<a href="#i_atomicrmw"><code>atomicrmw</code></a>)
+reads the value in the modification order immediately before the value it
+writes. If one atomic read happens before another atomic read of the same
+address, the later read must see the same value or a later value in the
+address's modification order. This disallows reordering of
+<code>monotonic</code> (or stronger) operations on the same address. If an
+address is written <code>monotonic</code>ally by one thread, and other threads
+<code>monotonic</code>ally read that address repeatedly, the other threads must
+eventually see the write. This is intended to model C++'s relaxed atomic
+variables.</dd>
+<dt><code>acquire</code></dt>
+<dd>In addition to the guarantees of <code>monotonic</code>, if this operation
+reads a value written by a <code>release</code> atomic operation, it
+<i>synchronizes-with</i> that operation.</dd>
+<dt><code>release</code></dt>
+<dd>In addition to the guarantees of <code>monotonic</code>,
+a <i>synchronizes-with</i> edge may be formed by an <code>acquire</code>
+operation.</dd>
+<dt><code>acq_rel</code> (acquire+release)</dt><dd>Acts as both an
+<code>acquire</code> and <code>release</code> operation on its address.</dd>
+<dt><code>seq_cst</code> (sequentially consistent)</dt><dd>
+<dd>In addition to the guarantees of <code>acq_rel</code>
+(<code>acquire</code> for an operation which only reads, <code>release</code>
+for an operation which only writes), there is a global total order on all
+sequentially-consistent operations on all addresses, which is consistent with
+the <i>happens-before</i> partial order and with the modification orders of
+all the affected addresses. Each sequentially-consistent read sees the last
+preceding write to the same address in this global order. This is intended
+to model C++'s sequentially-consistent atomic variables and Java's volatile
+shared variables.</dd>
+</dl>
+
+<p id="singlethread">If an atomic operation is marked <code>singlethread</code>,
+it only <i>synchronizes with</i> or participates in modification and seq_cst
+total orderings with other operations running in the same thread (for example,
+in signal handlers).</p>
+
+</div>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -4641,6 +4722,158 @@ thread.  (This is useful for interacting with signal handlers.)</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_cmpxchg">'<tt>cmpxchg</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  [volatile] cmpxchg &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>cmpxchg</tt>' instruction is used to atomically modify memory.
+It loads a value in memory and compares it to a given value. If they are
+equal, it stores a new value into the memory.</p>
+
+<h5>Arguments:</h5>
+<p>There are three arguments to the '<code>cmpxchg</code>' instruction: an
+address to operate on, a value to compare to the value currently be at that
+address, and a new value to place at that address if the compared values are
+equal.  The type of '<var>&lt;cmp&gt;</var>' must be an integer type whose
+bit width is a power of two greater than or equal to eight and less than
+or equal to a target-specific size limit. '<var>&lt;cmp&gt;</var>' and
+'<var>&lt;new&gt;</var>' must have the same type, and the type of
+'<var>&lt;pointer&gt;</var>' must be a pointer to that type. If the
+<code>cmpxchg</code> is marked as <code>volatile</code>, then the
+optimizer is not allowed to modify the number or order of execution
+of this <code>cmpxchg</code> with other <a href="#volatile">volatile
+operations</a>.</p>
+
+<!-- FIXME: Extend allowed types. -->
+
+<p>The <a href="#ordering"><var>ordering</var></a> argument specifies how this
+<code>cmpxchg</code> synchronizes with other atomic operations.</p>
+
+<p>The optional "<code>singlethread</code>" argument declares that the
+<code>cmpxchg</code> is only atomic with respect to code (usually signal
+handlers) running in the same thread as the <code>cmpxchg</code>.  Otherwise the
+cmpxchg is atomic with respect to all other code in the system.</p>
+
+<p>The pointer passed into cmpxchg must have alignment greater than or equal to
+the size in memory of the operand.
+
+<h5>Semantics:</h5>
+<p>The contents of memory at the location specified by the
+'<tt>&lt;pointer&gt;</tt>' operand is read and compared to
+'<tt>&lt;cmp&gt;</tt>'; if the read value is the equal,
+'<tt>&lt;new&gt;</tt>' is written.  The original value at the location
+is returned.
+
+<p>A successful <code>cmpxchg</code> is a read-modify-write instruction for the
+purpose of identifying <a href="#release_sequence">release sequences</a>.  A
+failed <code>cmpxchg</code> is equivalent to an atomic load with an ordering
+parameter determined by dropping any <code>release</code> part of the
+<code>cmpxchg</code>'s ordering.</p>
+
+<!--
+FIXME: Is compare_exchange_weak() necessary?  (Consider after we've done
+optimization work on ARM.)
+
+FIXME: Is a weaker ordering constraint on failure helpful in practice?
+-->
+
+<h5>Example:</h5>
+<pre>
+entry:
+  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                       <i>; yields {i32}</i>
+  <a href="#i_br">br</a> label %loop
+
+loop:
+  %cmp = <a href="#i_phi">phi</a> i32 [ %orig, %entry ], [%old, %loop]
+  %squared = <a href="#i_mul">mul</a> i32 %cmp, %cmp
+  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared                       <i>; yields {i32}</i>
+  %success = <a href="#i_icmp">icmp</a> eq i32 %cmp, %old
+  <a href="#i_br">br</a> i1 %success, label %done, label %loop
+
+done:
+  ...
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_atomicrmw">'<tt>atomicrmw</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  [volatile] atomicrmw &lt;operation&gt; &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;value&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>atomicrmw</tt>' instruction is used to atomically modify memory.</p>
+
+<h5>Arguments:</h5>
+<p>There are three arguments to the '<code>atomicrmw</code>' instruction: an
+operation to apply, an address whose value to modify, an argument to the
+operation.  The operation must be one of the following keywords:</p>
+<ul>
+  <li>xchg</li>
+  <li>add</li>
+  <li>sub</li>
+  <li>and</li>
+  <li>nand</li>
+  <li>or</li>
+  <li>xor</li>
+  <li>max</li>
+  <li>min</li>
+  <li>umax</li>
+  <li>umin</li>
+</ul>
+
+<p>The type of '<var>&lt;value&gt;</var>' must be an integer type whose
+bit width is a power of two greater than or equal to eight and less than
+or equal to a target-specific size limit.  The type of the
+'<code>&lt;pointer&gt;</code>' operand must be a pointer to that type.
+If the <code>atomicrmw</code> is marked as <code>volatile</code>, then the
+optimizer is not allowed to modify the number or order of execution of this
+<code>atomicrmw</code> with other <a href="#volatile">volatile
+  operations</a>.</p>
+
+<!-- FIXME: Extend allowed types. -->
+
+<h5>Semantics:</h5>
+<p>The contents of memory at the location specified by the
+'<tt>&lt;pointer&gt;</tt>' operand are atomically read, modified, and written
+back.  The original value at the location is returned.  The modification is
+specified by the <var>operation</var> argument:</p>
+
+<ul>
+  <li>xchg: <code>*ptr = val</code></li>
+  <li>add: <code>*ptr = *ptr + val</code></li>
+  <li>sub: <code>*ptr = *ptr - val</code></li>
+  <li>and: <code>*ptr = *ptr &amp; val</code></li>
+  <li>nand: <code>*ptr = ~(*ptr &amp; val)</code></li>
+  <li>or: <code>*ptr = *ptr | val</code></li>
+  <li>xor: <code>*ptr = *ptr ^ val</code></li>
+  <li>max: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using a signed comparison)</li>
+  <li>min: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using a signed comparison)</li>
+  <li>umax: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using an unsigned comparison)</li>
+  <li>umin: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using an unsigned comparison)</li>
+</ul>
+
+<h5>Example:</h5>
+<pre>
+  %old = atomicrmw add i32* %ptr, i32 1 acquire                        <i>; yields {i32}</i>
+</pre>
+
+</div>
+
 <!-- _______________________________________________________________________ -->
 <h4>
    <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 74599100e1b..69996074f27 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -187,10 +187,12 @@ typedef enum {
 
   /* Atomic operators */
   LLVMFence          = 55,
+  LLVMAtomicCmpXchg  = 56,
+  LLVMAtomicRMW      = 57,
 
   /* Exception Handling Operators */
-  LLVMLandingPad     = 56,
-  LLVMResume         = 57
+  LLVMLandingPad     = 58,
+  LLVMResume         = 59
 
 } LLVMOpcode;
 
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 0f74f633ea3..71512166d4a 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -205,6 +205,23 @@ namespace bitc {
     BINOP_XOR  = 12
   };
 
+  /// These are values used in the bitcode files to encode AtomicRMW operations.
+  /// The values of these enums have no fixed relation to the LLVM IR enum
+  /// values.  Changing these will break compatibility with old files.
+  enum RMWOperations {
+    RMW_XCHG = 0,
+    RMW_ADD = 1,
+    RMW_SUB = 2,
+    RMW_AND = 3,
+    RMW_NAND = 4,
+    RMW_OR = 5,
+    RMW_XOR = 6,
+    RMW_MAX = 7,
+    RMW_MIN = 8,
+    RMW_UMAX = 9,
+    RMW_UMIN = 10
+  };
+
   /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
   /// OverflowingBinaryOperator's SubclassOptionalData contents.
   enum OverflowingBinaryOperatorOptionalFlags {
@@ -285,7 +302,13 @@ namespace bitc {
 
     FUNC_CODE_DEBUG_LOC        = 35, // DEBUG_LOC:  [Line,Col,ScopeVal, IAVal]
     FUNC_CODE_INST_FENCE       = 36, // FENCE: [ordering, synchscope]
-    FUNC_CODE_INST_LANDINGPAD  = 37  // LANDINGPAD: [ty,val,val,num,id0,val0...]
+    FUNC_CODE_INST_LANDINGPAD  = 37, // LANDINGPAD: [ty,val,val,num,id0,val0...]
+    FUNC_CODE_INST_CMPXCHG     = 38, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol,
+                                     //           ordering, synchscope]
+    FUNC_CODE_INST_ATOMICRMW   = 39  // ATOMICRMW: [ptrty,ptr,val, operation,
+                                     //             align, vol,
+                                     //             ordering, synchscope]
+
   };
 } // End bitc namespace
 } // End llvm namespace
diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def
index a68601fbcdf..d36e4be1d91 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/Instruction.def
@@ -135,43 +135,45 @@ HANDLE_MEMORY_INST(28, Load  , LoadInst  )  // Memory manipulation instrs
 HANDLE_MEMORY_INST(29, Store , StoreInst )
 HANDLE_MEMORY_INST(30, GetElementPtr, GetElementPtrInst)
 HANDLE_MEMORY_INST(31, Fence , FenceInst )
-  LAST_MEMORY_INST(31)
+HANDLE_MEMORY_INST(32, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(33, AtomicRMW , AtomicRMWInst )
+  LAST_MEMORY_INST(33)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair 
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(33)
-HANDLE_CAST_INST(33, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(34, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(35, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(36, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(37, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(38, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(39, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(40, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(41, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(42, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(43, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(44, BitCast , BitCastInst )  // Type cast
-  LAST_CAST_INST(44)
+ FIRST_CAST_INST(34)
+HANDLE_CAST_INST(34, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(35, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(36, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(37, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(38, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(39, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(40, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(41, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(42, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(43, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(44, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(45, BitCast , BitCastInst )  // Type cast
+  LAST_CAST_INST(45)
 
 // Other operators...
- FIRST_OTHER_INST(45)
-HANDLE_OTHER_INST(45, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(46, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(47, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(48, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(49, Select , SelectInst )  // select instruction
-HANDLE_OTHER_INST(50, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_OTHER_INST(51, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(52, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(53, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(54, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(55, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(56, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(57, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(58, LandingPad, LandingPadInst)  // Landing pad instruction.
-  LAST_OTHER_INST(58)
+ FIRST_OTHER_INST(46)
+HANDLE_OTHER_INST(46, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(47, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(48, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(49, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(50, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(51, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(52, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(53, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(54, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(55, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(56, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(57, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(58, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(59, LandingPad, LandingPadInst)  // Landing pad instruction.
+  LAST_OTHER_INST(59)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 3e250d85274..cb21e6364b4 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -361,6 +361,259 @@ private:
   }
 };
 
+//===----------------------------------------------------------------------===//
+//                                AtomicCmpXchgInst Class
+//===----------------------------------------------------------------------===//
+
+/// AtomicCmpXchgInst - an instruction that atomically checks whether a
+/// specified value is in a memory location, and, if it is, stores a new value
+/// there.  Returns the value that was loaded.
+///
+class AtomicCmpXchgInst : public Instruction {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  void Init(Value *Ptr, Value *Cmp, Value *NewVal,
+            AtomicOrdering Ordering, SynchronizationScope SynchScope);
+protected:
+  virtual AtomicCmpXchgInst *clone_impl() const;
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    Instruction *InsertBefore = 0);
+  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    BasicBlock *InsertAtEnd);
+
+  /// isVolatile - Return true if this is a cmpxchg from a volatile memory
+  /// location.
+  ///
+  bool isVolatile() const {
+    return getSubclassDataFromInstruction() & 1;
+  }
+
+  /// setVolatile - Specify whether this is a volatile cmpxchg.
+  ///
+  void setVolatile(bool V) {
+     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                                (unsigned)V);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Set the ordering constraint on this cmpxchg.
+  void setOrdering(AtomicOrdering Ordering) {
+    assert(Ordering != NotAtomic &&
+           "CmpXchg instructions can only be atomic.");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
+                               (Ordering << 2));
+  }
+
+  /// Specify whether this cmpxchg is atomic and orders other operations with
+  /// respect to all concurrently executing threads, or only with respect to
+  /// signal handlers executing in the same thread.
+  void setSynchScope(SynchronizationScope SynchScope) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
+                               (SynchScope << 1));
+  }
+
+  /// Returns the ordering constraint on this cmpxchg.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering(getSubclassDataFromInstruction() >> 2);
+  }
+
+  /// Returns whether this cmpxchg is atomic between threads or only within a
+  /// single thread.
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  Value *getCompareOperand() { return getOperand(1); }
+  const Value *getCompareOperand() const { return getOperand(1); }
+  
+  Value *getNewValOperand() { return getOperand(2); }
+  const Value *getNewValOperand() const { return getOperand(2); }
+  
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+  
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const AtomicCmpXchgInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::AtomicCmpXchg;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<AtomicCmpXchgInst> :
+    public FixedNumOperandTraits<AtomicCmpXchgInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                AtomicRMWInst Class
+//===----------------------------------------------------------------------===//
+
+/// AtomicRMWInst - an instruction that atomically reads a memory location,
+/// combines it with another value, and then stores the result back.  Returns
+/// the old value.
+///
+class AtomicRMWInst : public Instruction {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+protected:
+  virtual AtomicRMWInst *clone_impl() const;
+public:
+  /// This enumeration lists the possible modifications atomicrmw can make.  In
+  /// the descriptions, 'p' is the pointer to the instruction's memory location,
+  /// 'old' is the initial value of *p, and 'v' is the other value passed to the
+  /// instruction.  These instructions always return 'old'.
+  enum BinOp {
+    /// *p = v
+    Xchg,
+    /// *p = old + v
+    Add,
+    /// *p = old - v
+    Sub,
+    /// *p = old & v
+    And,
+    /// *p = ~old & v
+    Nand,
+    /// *p = old | v
+    Or,
+    /// *p = old ^ v
+    Xor,
+    /// *p = old >signed v ? old : v
+    Max,
+    /// *p = old <signed v ? old : v
+    Min,
+    /// *p = old >unsigned v ? old : v
+    UMax,
+    /// *p = old <unsigned v ? old : v
+    UMin,
+
+    FIRST_BINOP = Xchg,
+    LAST_BINOP = UMin,
+    BAD_BINOP
+  };
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                Instruction *InsertBefore = 0);
+  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                BasicBlock *InsertAtEnd);
+
+  BinOp getOperation() const {
+    return static_cast<BinOp>(getSubclassDataFromInstruction() >> 5);
+  }
+
+  void setOperation(BinOp Operation) {
+    unsigned short SubclassData = getSubclassDataFromInstruction();
+    setInstructionSubclassData((SubclassData & 31) |
+                               (Operation << 5));
+  }
+
+  /// isVolatile - Return true if this is a RMW on a volatile memory location.
+  ///
+  bool isVolatile() const {
+    return getSubclassDataFromInstruction() & 1;
+  }
+
+  /// setVolatile - Specify whether this is a volatile RMW or not.
+  ///
+  void setVolatile(bool V) {
+     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                                (unsigned)V);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Set the ordering constraint on this RMW.
+  void setOrdering(AtomicOrdering Ordering) {
+    assert(Ordering != NotAtomic &&
+           "atomicrmw instructions can only be atomic.");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~28) |
+                               (Ordering << 2));
+  }
+
+  /// Specify whether this RMW orders other operations with respect to all
+  /// concurrently executing threads, or only with respect to signal handlers
+  /// executing in the same thread.
+  void setSynchScope(SynchronizationScope SynchScope) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
+                               (SynchScope << 1));
+  }
+
+  /// Returns the ordering constraint on this RMW.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering((getSubclassDataFromInstruction() & 28) >> 2);
+  }
+
+  /// Returns whether this RMW is atomic between threads or only within a
+  /// single thread.
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  Value *getValOperand() { return getOperand(1); }
+  const Value *getValOperand() const { return getOperand(1); }
+
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const AtomicRMWInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::AtomicRMW;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  void Init(BinOp Operation, Value *Ptr, Value *Val,
+            AtomicOrdering Ordering, SynchronizationScope SynchScope);
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<AtomicRMWInst>
+    : public FixedNumOperandTraits<AtomicRMWInst,2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicRMWInst, Value)
+
 //===----------------------------------------------------------------------===//
 //                             GetElementPtrInst Class
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
index 85e6f62903e..a661c4fac68 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/Support/InstVisitor.h
@@ -170,6 +170,8 @@ public:
   RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(Instruction); }
   RetTy visitLoadInst(LoadInst     &I)              { DELEGATE(Instruction); }
   RetTy visitStoreInst(StoreInst   &I)              { DELEGATE(Instruction); }
+  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I){ DELEGATE(Instruction); }
+  RetTy visitAtomicRMWInst(AtomicRMWInst &I)        { DELEGATE(Instruction); }
   RetTy visitFenceInst(FenceInst   &I)              { DELEGATE(Instruction); }
   RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction); }
   RetTy visitPHINode(PHINode       &I)              { DELEGATE(Instruction); }
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 970d7aa7ed1..d16cac1af2f 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -579,6 +579,9 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
   KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
 
+  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
+  KEYWORD(umin);
+
   KEYWORD(x);
   KEYWORD(blockaddress);
 
@@ -645,6 +648,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(alloca,      Alloca);
   INSTKEYWORD(load,        Load);
   INSTKEYWORD(store,       Store);
+  INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
+  INSTKEYWORD(atomicrmw,   AtomicRMW);
   INSTKEYWORD(fence,       Fence);
   INSTKEYWORD(getelementptr, GetElementPtr);
 
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index fe4bb2e6376..f412c1c89aa 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -2952,12 +2952,18 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
   case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
   case lltok::kw_store:          return ParseStore(Inst, PFS, false);
+  case lltok::kw_cmpxchg:        return ParseCmpXchg(Inst, PFS, false);
+  case lltok::kw_atomicrmw:      return ParseAtomicRMW(Inst, PFS, false);
   case lltok::kw_fence:          return ParseFence(Inst, PFS);
   case lltok::kw_volatile:
     if (EatIfPresent(lltok::kw_load))
       return ParseLoad(Inst, PFS, true);
     else if (EatIfPresent(lltok::kw_store))
       return ParseStore(Inst, PFS, true);
+    else if (EatIfPresent(lltok::kw_cmpxchg))
+      return ParseCmpXchg(Inst, PFS, true);
+    else if (EatIfPresent(lltok::kw_atomicrmw))
+      return ParseAtomicRMW(Inst, PFS, true);
     else
       return TokError("expected 'load' or 'store'");
   case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
@@ -3725,6 +3731,97 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
+/// ParseCmpXchg
+///   ::= 'volatile'? 'cmpxchg' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+///        'singlethread'? AtomicOrdering
+int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS,
+                           bool isVolatile) {
+  Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc;
+  bool AteExtraComma = false;
+  AtomicOrdering Ordering = NotAtomic;
+  SynchronizationScope Scope = CrossThread;
+  if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after cmpxchg address") ||
+      ParseTypeAndValue(Cmp, CmpLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") ||
+      ParseTypeAndValue(New, NewLoc, PFS) ||
+      ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+    return true;
+
+  if (Ordering == Unordered)
+    return TokError("cmpxchg cannot be unordered");
+  if (!Ptr->getType()->isPointerTy())
+    return Error(PtrLoc, "cmpxchg operand must be a pointer");
+  if (cast<PointerType>(Ptr->getType())->getElementType() != Cmp->getType())
+    return Error(CmpLoc, "compare value and pointer type do not match");
+  if (cast<PointerType>(Ptr->getType())->getElementType() != New->getType())
+    return Error(NewLoc, "new value and pointer type do not match");
+  if (!New->getType()->isIntegerTy())
+    return Error(NewLoc, "cmpxchg operand must be an integer");
+  unsigned Size = New->getType()->getPrimitiveSizeInBits();
+  if (Size < 8 || (Size & (Size - 1)))
+    return Error(NewLoc, "cmpxchg operand must be power-of-two byte-sized"
+                         " integer");
+
+  AtomicCmpXchgInst *CXI =
+    new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, Scope);
+  CXI->setVolatile(isVolatile);
+  Inst = CXI;
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseAtomicRMW
+///   ::= 'volatile'? 'atomicrmw' BinOp TypeAndValue ',' TypeAndValue
+///        'singlethread'? AtomicOrdering
+int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS,
+                             bool isVolatile) {
+  Value *Ptr, *Val; LocTy PtrLoc, ValLoc;
+  bool AteExtraComma = false;
+  AtomicOrdering Ordering = NotAtomic;
+  SynchronizationScope Scope = CrossThread;
+  AtomicRMWInst::BinOp Operation;
+  switch (Lex.getKind()) {
+  default: return TokError("expected binary operation in atomicrmw");
+  case lltok::kw_xchg: Operation = AtomicRMWInst::Xchg; break;
+  case lltok::kw_add: Operation = AtomicRMWInst::Add; break;
+  case lltok::kw_sub: Operation = AtomicRMWInst::Sub; break;
+  case lltok::kw_and: Operation = AtomicRMWInst::And; break;
+  case lltok::kw_nand: Operation = AtomicRMWInst::Nand; break;
+  case lltok::kw_or: Operation = AtomicRMWInst::Or; break;
+  case lltok::kw_xor: Operation = AtomicRMWInst::Xor; break;
+  case lltok::kw_max: Operation = AtomicRMWInst::Max; break;
+  case lltok::kw_min: Operation = AtomicRMWInst::Min; break;
+  case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break;
+  case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break;
+  }
+  Lex.Lex();  // Eat the operation.
+
+  if (ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after atomicrmw address") ||
+      ParseTypeAndValue(Val, ValLoc, PFS) ||
+      ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+    return true;
+
+  if (Ordering == Unordered)
+    return TokError("atomicrmw cannot be unordered");
+  if (!Ptr->getType()->isPointerTy())
+    return Error(PtrLoc, "atomicrmw operand must be a pointer");
+  if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+    return Error(ValLoc, "atomicrmw value and pointer type do not match");
+  if (!Val->getType()->isIntegerTy())
+    return Error(ValLoc, "atomicrmw operand must be an integer");
+  unsigned Size = Val->getType()->getPrimitiveSizeInBits();
+  if (Size < 8 || (Size & (Size - 1)))
+    return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
+                         " integer");
+
+  AtomicRMWInst *RMWI =
+    new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope);
+  RMWI->setVolatile(isVolatile);
+  Inst = RMWI;
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
 /// ParseFence
 ///   ::= 'fence' 'singlethread'? AtomicOrdering
 int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 6d2a929cc43..7fd01b6a214 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -365,6 +365,8 @@ namespace llvm {
     int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
     int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
     int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
     int ParseFence(Instruction *&I, PerFunctionState &PFS);
     int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
     int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index f4c834ac235..a9e79c542cc 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -110,6 +110,9 @@ namespace lltok {
     kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
     kw_ueq, kw_une,
 
+    // atomicrmw operations that aren't also instruction keywords.
+    kw_xchg, kw_nand, kw_max, kw_min, kw_umax, kw_umin,
+
     // Instruction Opcodes (Opcode in UIntVal).
     kw_add,  kw_fadd, kw_sub,  kw_fsub, kw_mul,  kw_fmul,
     kw_udiv, kw_sdiv, kw_fdiv,
@@ -126,7 +129,8 @@ namespace lltok {
     kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, kw_resume,
     kw_unreachable,
 
-    kw_alloca, kw_load, kw_store, kw_fence, kw_getelementptr,
+    kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw,
+    kw_getelementptr,
 
     kw_extractelement, kw_insertelement, kw_shufflevector,
     kw_extractvalue, kw_insertvalue, kw_blockaddress,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index bab33ed2576..e0af683a247 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -131,6 +131,23 @@ static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
   }
 }
 
+static AtomicRMWInst::BinOp GetDecodedRMWOperation(unsigned Val) {
+  switch (Val) {
+  default: return AtomicRMWInst::BAD_BINOP;
+  case bitc::RMW_XCHG: return AtomicRMWInst::Xchg;
+  case bitc::RMW_ADD: return AtomicRMWInst::Add;
+  case bitc::RMW_SUB: return AtomicRMWInst::Sub;
+  case bitc::RMW_AND: return AtomicRMWInst::And;
+  case bitc::RMW_NAND: return AtomicRMWInst::Nand;
+  case bitc::RMW_OR: return AtomicRMWInst::Or;
+  case bitc::RMW_XOR: return AtomicRMWInst::Xor;
+  case bitc::RMW_MAX: return AtomicRMWInst::Max;
+  case bitc::RMW_MIN: return AtomicRMWInst::Min;
+  case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
+  case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
+  }
+}
+
 static AtomicOrdering GetDecodedOrdering(unsigned Val) {
   switch (Val) {
   case bitc::ORDERING_NOTATOMIC: return NotAtomic;
@@ -2595,6 +2612,48 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
+    case bitc::FUNC_CODE_INST_CMPXCHG: {
+      // CMPXCHG:[ptrty, ptr, cmp, new, vol, ordering, synchscope]
+      unsigned OpNum = 0;
+      Value *Ptr, *Cmp, *New;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+          getValue(Record, OpNum,
+                    cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
+          getValue(Record, OpNum,
+                    cast<PointerType>(Ptr->getType())->getElementType(), New) ||
+          OpNum+3 != Record.size())
+        return Error("Invalid CMPXCHG record");
+      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]);
+      if (Ordering == NotAtomic)
+        return Error("Invalid CMPXCHG record");
+      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
+      I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope);
+      cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_ATOMICRMW: {
+      // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope]
+      unsigned OpNum = 0;
+      Value *Ptr, *Val;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+          getValue(Record, OpNum,
+                    cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+          OpNum+4 != Record.size())
+        return Error("Invalid ATOMICRMW record");
+      AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]);
+      if (Operation < AtomicRMWInst::FIRST_BINOP ||
+          Operation > AtomicRMWInst::LAST_BINOP)
+        return Error("Invalid ATOMICRMW record");
+      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+      if (Ordering == NotAtomic)
+        return Error("Invalid ATOMICRMW record");
+      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+      I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
+      cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
+      InstructionList.push_back(I);
+      break;
+    }
     case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
       if (2 != Record.size())
         return Error("Invalid FENCE record");
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 87154fc9c6f..8fcaf1111f4 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -101,6 +101,23 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
   }
 }
 
+static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
+  switch (Op) {
+  default: llvm_unreachable("Unknown RMW operation!");
+  case AtomicRMWInst::Xchg: return bitc::RMW_XCHG;
+  case AtomicRMWInst::Add: return bitc::RMW_ADD;
+  case AtomicRMWInst::Sub: return bitc::RMW_SUB;
+  case AtomicRMWInst::And: return bitc::RMW_AND;
+  case AtomicRMWInst::Nand: return bitc::RMW_NAND;
+  case AtomicRMWInst::Or: return bitc::RMW_OR;
+  case AtomicRMWInst::Xor: return bitc::RMW_XOR;
+  case AtomicRMWInst::Max: return bitc::RMW_MAX;
+  case AtomicRMWInst::Min: return bitc::RMW_MIN;
+  case AtomicRMWInst::UMax: return bitc::RMW_UMAX;
+  case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
+  }
+}
+
 static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
   switch (Ordering) {
   default: llvm_unreachable("Unknown atomic ordering");
@@ -1186,6 +1203,28 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
     Vals.push_back(cast<StoreInst>(I).isVolatile());
     break;
+  case Instruction::AtomicCmpXchg:
+    Code = bitc::FUNC_CODE_INST_CMPXCHG;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);  // ptrty + ptr
+    Vals.push_back(VE.getValueID(I.getOperand(1)));       // cmp.
+    Vals.push_back(VE.getValueID(I.getOperand(2)));       // newval.
+    Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
+    Vals.push_back(GetEncodedOrdering(
+                     cast<AtomicCmpXchgInst>(I).getOrdering()));
+    Vals.push_back(GetEncodedSynchScope(
+                     cast<AtomicCmpXchgInst>(I).getSynchScope()));
+    break;
+  case Instruction::AtomicRMW:
+    Code = bitc::FUNC_CODE_INST_ATOMICRMW;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);  // ptrty + ptr
+    Vals.push_back(VE.getValueID(I.getOperand(1)));       // val.
+    Vals.push_back(GetEncodedRMWOperation(
+                     cast<AtomicRMWInst>(I).getOperation()));
+    Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
+    Vals.push_back(GetEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
+    Vals.push_back(GetEncodedSynchScope(
+                     cast<AtomicRMWInst>(I).getSynchScope()));
+    break;
   case Instruction::Fence:
     Code = bitc::FUNC_CODE_INST_FENCE;
     Vals.push_back(GetEncodedOrdering(cast<FenceInst>(I).getOrdering()));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c5c97904568..6740bacbfff 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3222,6 +3222,12 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   DAG.setRoot(StoreNode);
 }
 
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+}
+
 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
   DebugLoc dl = getCurDebugLoc();
   SDValue Ops[3];
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 11c4a483849..0360ad28fc1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -506,6 +506,8 @@ private:
   void visitAlloca(const AllocaInst &I);
   void visitLoad(const LoadInst &I);
   void visitStore(const StoreInst &I);
+  void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+  void visitAtomicRMW(const AtomicRMWInst &I);
   void visitFence(const FenceInst &I);
   void visitPHI(const PHINode &I);
   void visitCall(const CallInst &I);
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index e6cd418c321..e3e2484def3 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -658,6 +658,23 @@ static const char *getPredicateText(unsigned predicate) {
   return pred;
 }
 
+static void writeAtomicRMWOperation(raw_ostream &Out,
+                                    AtomicRMWInst::BinOp Op) {
+  switch (Op) {
+  default: Out << " <unknown operation " << Op << ">"; break;
+  case AtomicRMWInst::Xchg: Out << " xchg"; break;
+  case AtomicRMWInst::Add:  Out << " add"; break;
+  case AtomicRMWInst::Sub:  Out << " sub"; break;
+  case AtomicRMWInst::And:  Out << " and"; break;
+  case AtomicRMWInst::Nand: Out << " nand"; break;
+  case AtomicRMWInst::Or:   Out << " or"; break;
+  case AtomicRMWInst::Xor:  Out << " xor"; break;
+  case AtomicRMWInst::Max:  Out << " max"; break;
+  case AtomicRMWInst::Min:  Out << " min"; break;
+  case AtomicRMWInst::UMax: Out << " umax"; break;
+  case AtomicRMWInst::UMin: Out << " umin"; break;
+  }
+}
 
 static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
   if (const OverflowingBinaryOperator *OBO =
@@ -1670,6 +1687,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
   if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
     Out << ' ' << getPredicateText(CI->getPredicate());
 
+  // Print out the atomicrmw operation
+  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
+    writeAtomicRMWOperation(Out, RMWI->getOperation());
+
   // Print out the type of the operands...
   const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
 
@@ -1936,6 +1957,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << ", align " << cast<LoadInst>(I).getAlignment();
   } else if (isa<StoreInst>(I) && cast<StoreInst>(I).getAlignment()) {
     Out << ", align " << cast<StoreInst>(I).getAlignment();
+  } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+    writeAtomic(CXI->getOrdering(), CXI->getSynchScope());
+  } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
+    writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope());
   } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
     writeAtomic(FI->getOrdering(), FI->getSynchScope());
   }
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 09d16e7d448..ad433ef22a9 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -128,6 +128,8 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case Alloca:        return "alloca";
   case Load:          return "load";
   case Store:         return "store";
+  case AtomicCmpXchg: return "cmpxchg";
+  case AtomicRMW:     return "atomicrmw";
   case Fence:         return "fence";
   case GetElementPtr: return "getelementptr";
 
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 9fdff0773a6..abee7b741a5 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1105,6 +1105,101 @@ void StoreInst::setAlignment(unsigned Align) {
   assert(getAlignment() == Align && "Alignment representation error!");
 }
 
+//===----------------------------------------------------------------------===//
+//                       AtomicCmpXchgInst Implementation
+//===----------------------------------------------------------------------===//
+
+void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
+                             AtomicOrdering Ordering,
+                             SynchronizationScope SynchScope) {
+  Op<0>() = Ptr;
+  Op<1>() = Cmp;
+  Op<2>() = NewVal;
+  setOrdering(Ordering);
+  setSynchScope(SynchScope);
+
+  assert(getOperand(0) && getOperand(1) && getOperand(2) &&
+         "All operands must be non-null!");
+  assert(getOperand(0)->getType()->isPointerTy() &&
+         "Ptr must have pointer type!");
+  assert(getOperand(1)->getType() ==
+                 cast<PointerType>(getOperand(0)->getType())->getElementType()
+         && "Ptr must be a pointer to Cmp type!");
+  assert(getOperand(2)->getType() ==
+                 cast<PointerType>(getOperand(0)->getType())->getElementType()
+         && "Ptr must be a pointer to NewVal type!");
+  assert(Ordering != NotAtomic &&
+         "AtomicCmpXchg instructions must be atomic!");
+}
+
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                                     AtomicOrdering Ordering,
+                                     SynchronizationScope SynchScope,
+                                     Instruction *InsertBefore)
+  : Instruction(Cmp->getType(), AtomicCmpXchg,
+                OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+                OperandTraits<AtomicCmpXchgInst>::operands(this),
+                InsertBefore) {
+  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+}
+
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                                     AtomicOrdering Ordering,
+                                     SynchronizationScope SynchScope,
+                                     BasicBlock *InsertAtEnd)
+  : Instruction(Cmp->getType(), AtomicCmpXchg,
+                OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+                OperandTraits<AtomicCmpXchgInst>::operands(this),
+                InsertAtEnd) {
+  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+}
+ 
+//===----------------------------------------------------------------------===//
+//                       AtomicRMWInst Implementation
+//===----------------------------------------------------------------------===//
+
+void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
+                         AtomicOrdering Ordering,
+                         SynchronizationScope SynchScope) {
+  Op<0>() = Ptr;
+  Op<1>() = Val;
+  setOperation(Operation);
+  setOrdering(Ordering);
+  setSynchScope(SynchScope);
+
+  assert(getOperand(0) && getOperand(1) &&
+         "All operands must be non-null!");
+  assert(getOperand(0)->getType()->isPointerTy() &&
+         "Ptr must have pointer type!");
+  assert(getOperand(1)->getType() ==
+         cast<PointerType>(getOperand(0)->getType())->getElementType()
+         && "Ptr must be a pointer to Val type!");
+  assert(Ordering != NotAtomic &&
+         "AtomicRMW instructions must be atomic!");
+}
+
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                             AtomicOrdering Ordering,
+                             SynchronizationScope SynchScope,
+                             Instruction *InsertBefore)
+  : Instruction(Val->getType(), AtomicRMW,
+                OperandTraits<AtomicRMWInst>::op_begin(this),
+                OperandTraits<AtomicRMWInst>::operands(this),
+                InsertBefore) {
+  Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                             AtomicOrdering Ordering,
+                             SynchronizationScope SynchScope,
+                             BasicBlock *InsertAtEnd)
+  : Instruction(Val->getType(), AtomicRMW,
+                OperandTraits<AtomicRMWInst>::op_begin(this),
+                OperandTraits<AtomicRMWInst>::operands(this),
+                InsertAtEnd) {
+  Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
 //===----------------------------------------------------------------------===//
 //                       FenceInst Implementation
 //===----------------------------------------------------------------------===//
@@ -3148,6 +3243,22 @@ StoreInst *StoreInst::clone_impl() const {
                        isVolatile(), getAlignment());
 }
 
+AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
+  AtomicCmpXchgInst *Result =
+    new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
+                          getOrdering(), getSynchScope());
+  Result->setVolatile(isVolatile());
+  return Result;
+}
+
+AtomicRMWInst *AtomicRMWInst::clone_impl() const {
+  AtomicRMWInst *Result =
+    new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1),
+                      getOrdering(), getSynchScope());
+  Result->setVolatile(isVolatile());
+  return Result;
+}
+
 FenceInst *FenceInst::clone_impl() const {
   return new FenceInst(getContext(), getOrdering(), getSynchScope());
 }
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 9ec2edf3fca..905e9a26233 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -288,6 +288,8 @@ namespace {
     void visitUserOp1(Instruction &I);
     void visitUserOp2(Instruction &I) { visitUserOp1(I); }
     void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
+    void visitAtomicRMWInst(AtomicRMWInst &RMWI);
     void visitFenceInst(FenceInst &FI);
     void visitAllocaInst(AllocaInst &AI);
     void visitExtractValueInst(ExtractValueInst &EVI);
@@ -1327,6 +1329,40 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
   visitInstruction(AI);
 }
 
+void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
+  Assert1(CXI.getOrdering() != NotAtomic,
+          "cmpxchg instructions must be atomic.", &CXI);
+  Assert1(CXI.getOrdering() != Unordered,
+          "cmpxchg instructions cannot be unordered.", &CXI);
+  PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
+  Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
+  Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == CXI.getOperand(1)->getType(),
+          "Expected value type does not match pointer operand type!",
+          &CXI, ElTy);
+  Assert2(ElTy == CXI.getOperand(2)->getType(),
+          "Stored value type does not match pointer operand type!",
+          &CXI, ElTy);
+  visitInstruction(CXI);
+}
+
+void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+  Assert1(RMWI.getOrdering() != NotAtomic,
+          "atomicrmw instructions must be atomic.", &RMWI);
+  Assert1(RMWI.getOrdering() != Unordered,
+          "atomicrmw instructions cannot be unordered.", &RMWI);
+  PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
+  Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
+  Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == RMWI.getOperand(1)->getType(),
+          "Argument value type does not match pointer operand type!",
+          &RMWI, ElTy);
+  Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
+          RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+          "Invalid binary operation!", &RMWI);
+  visitInstruction(RMWI);
+}
+
 void Verifier::visitFenceInst(FenceInst &FI) {
   const AtomicOrdering Ordering = FI.getOrdering();
   Assert1(Ordering == Acquire || Ordering == Release ||