diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 01cea3b0a78..0dfc3167600 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9067,10 +9067,11 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
 SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
   DebugLoc dl = Op.getDebugLoc();
 
-  if (!Subtarget->hasSSE2()) {
+  // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+  // There isn't any reason to disable it if the target processor supports it.
+  if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
     SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0,
-                                   Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+    SDValue Zero = DAG.getConstant(0, MVT::i32);
     SDValue Ops[] = {
       DAG.getRegister(X86::ESP, MVT::i32), // Base
       DAG.getTargetConstant(1, MVT::i8),   // Scale
diff --git a/test/CodeGen/X86/membarrier.ll b/test/CodeGen/X86/membarrier.ll
new file mode 100644
index 00000000000..42f8ef5ff04
--- /dev/null
+++ b/test/CodeGen/X86/membarrier.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mattr=-sse -O0
+; PR9675
+
+define i32 @t() {
+entry:
+  %i = alloca i32, align 4
+  store i32 1, i32* %i, align 4
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  %0 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %i, i32 1)
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret i32 0
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind