diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index db92a0cdc02..6742a90058a 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -49,7 +49,7 @@ def RetCC_X86_32_C : CallingConv<[ // weirdly; this is really the sse-regparm calling convention) in which // case they use XMM0, otherwise it is the same as the common X86 calling // conv. - CCIfInReg>>>, CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>, CCDelegateTo @@ -134,7 +134,8 @@ def CC_X86_64_C : CallingConv<[ // The first 8 FP/Vector arguments are passed in XMM registers. CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>, + CCIfSubtarget<"hasSSE1()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. @@ -223,7 +224,8 @@ def CC_X86_64_TailCall : CallingConv<[ // The first 8 FP/Vector arguments are passed in XMM registers. CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>, + CCIfSubtarget<"hasSSE1()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 111269f928c..19d5aedf693 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1031,6 +1031,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, // Assign locations to each value returned by this call. SmallVector RVLocs; bool isVarArg = TheCall->isVarArg(); + bool Is64Bit = Subtarget->is64Bit(); CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); @@ -1039,7 +1040,14 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { MVT CopyVT = RVLocs[i].getValVT(); - + + // If this is x86-64, and we disabled SSE, we can't return FP values + if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && + ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) { + cerr << "SSE register return with SSE disabled\n"; + exit(1); + } + // If this is a call to a function that returns an fp value on the floating // point stack, but where we prefer to use the value in xmm registers, copy // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. @@ -1382,6 +1390,13 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, TotalNumXMMRegs); + assert((Subtarget->hasSSE1() || !NumXMMRegs) && + "SSE register cannot be used when SSE is disabled!"); + if (!Subtarget->hasSSE1()) { + // Kernel mode asks for SSE to be disabled, so don't push them + // on the stack. + TotalNumXMMRegs = 0; + } // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so they // may be loaded by deferencing the result of va_next. @@ -1675,6 +1690,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); + assert((Subtarget->hasSSE1() || !NumXMMRegs) + && "SSE registers cannot be used when SSE is disabled"); Chain = DAG.getCopyToReg(Chain, X86::AL, DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 7034c555ccc..ff7f72be134 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -331,7 +331,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) // are enabled. These are available on all x86-64 CPUs. if (Is64Bit) { HasX86_64 = true; -#if 1 +#if 0 if (X86SSELevel < SSE2) X86SSELevel = SSE2; #endif diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll index 99826dd7159..b12e4137dbd 100644 --- a/test/CodeGen/X86/2009-01-25-NoSSE.ll +++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll @@ -1,7 +1,5 @@ ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=-sse,-sse2 | not grep xmm ; PR3402 -; reverted -; XFAIL: * target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/CodeGen/X86/nosse-varargs.ll b/test/CodeGen/X86/nosse-varargs.ll new file mode 100644 index 00000000000..e6da0ab5e37 --- /dev/null +++ b/test/CodeGen/X86/nosse-varargs.ll @@ -0,0 +1,46 @@ +; RUN: llvm-as < %s > %t +; RUN: llc -march=x86-64 -mattr=-sse < %t | not grep xmm +; RUN: llc -march=x86-64 < %t | grep xmm +; PR3403 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + %struct.__va_list_tag = type { i32, i32, i8*, i8* } + +define i32 @foo(float %a, i8* nocapture %fmt, ...) nounwind { +entry: + %ap = alloca [1 x %struct.__va_list_tag], align 8 ; <[1 x %struct.__va_list_tag]*> [#uses=4] + %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8* ; [#uses=2] + call void @llvm.va_start(i8* %ap12) + %0 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0 ; [#uses=2] + %1 = load i32* %0, align 8 ; [#uses=3] + %2 = icmp ult i32 %1, 48 ; [#uses=1] + br i1 %2, label %bb, label %bb3 + +bb: ; preds = %entry + %3 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3 ; [#uses=1] + %4 = load i8** %3, align 8 ; [#uses=1] + %5 = inttoptr i32 %1 to i8* ; [#uses=1] + %6 = ptrtoint i8* %5 to i64 ; [#uses=1] + %ctg2 = getelementptr i8* %4, i64 %6 ; [#uses=1] + %7 = add i32 %1, 8 ; [#uses=1] + store i32 %7, i32* %0, align 8 + br label %bb4 + +bb3: ; preds = %entry + %8 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2 ; [#uses=2] + %9 = load i8** %8, align 8 ; [#uses=2] + %10 = getelementptr i8* %9, i64 8 ; [#uses=1] + store i8* %10, i8** %8, align 8 + br label %bb4 + +bb4: ; preds = %bb3, %bb + %addr.0.0 = phi i8* [ %ctg2, %bb ], [ %9, %bb3 ] ; [#uses=1] + %11 = bitcast i8* %addr.0.0 to i32* ; [#uses=1] + %12 = load i32* %11, align 4 ; [#uses=1] + call void @llvm.va_end(i8* %ap12) + ret i32 %12 +} + +declare void @llvm.va_start(i8*) nounwind + +declare void @llvm.va_end(i8*) nounwind