diff --git a/BasiliskII/src/uae_cpu/fpu/core.h b/BasiliskII/src/uae_cpu/fpu/core.h
index 683a56c3..325dafdb 100644
--- a/BasiliskII/src/uae_cpu/fpu/core.h
+++ b/BasiliskII/src/uae_cpu/fpu/core.h
@@ -31,6 +31,16 @@
 #include "sysdeps.h"
 #include "fpu/types.h"
 
+/* Always use x87 FPU stack on IA-32.  */
+#if defined(X86_ASSEMBLY)
+#define USE_X87_ASSEMBLY 1
+#endif
+
+/* Only use x87 FPU on x86-64 if long double precision is requested.  */
+#if defined(X86_64_ASSEMBLY) && USE_LONG_DOUBLE
+#define USE_X87_ASSEMBLY 1
+#endif
+
 /* ========================================================================== */
 /* ========================= FPU CONTEXT DEFINITION ========================= */
 /* ========================================================================== */
@@ -150,8 +160,8 @@ struct fpu_t {
 	/* ---------------------------------------------------------------------- */
 	
 	#if			defined(FPU_X86) \
-			||	(defined(FPU_UAE) && defined(X86_ASSEMBLY)) \
-			||	(defined(FPU_IEEE) && defined(X86_ASSEMBLY))
+			||	(defined(FPU_UAE) && defined(USE_X87_ASSEMBLY)) \
+			||	(defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY))
 	
 	#define		CW_RESET				0x0040	// initial CW value after RESET
 	#define		CW_FINIT				0x037F	// initial CW value after FINIT
diff --git a/BasiliskII/src/uae_cpu/fpu/exceptions.h b/BasiliskII/src/uae_cpu/fpu/exceptions.h
index d9758ac2..9406b279 100644
--- a/BasiliskII/src/uae_cpu/fpu/exceptions.h
+++ b/BasiliskII/src/uae_cpu/fpu/exceptions.h
@@ -50,13 +50,13 @@
 /* -------------------------------------------------------------------------- */
 
 /* Optimized i386 fpu core must use native exceptions */
-#if defined(FPU_X86) && defined(X86_ASSEMBLY)
+#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_EXCEPTIONS
 # define FPU_USE_X86_EXCEPTIONS
 #endif
 
 /* Optimized i386 fpu core must use native accrued exceptions */
-#if defined(FPU_X86) && defined(X86_ASSEMBLY)
+#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_ACCRUED_EXCEPTIONS
 # define FPU_USE_X86_ACCRUED_EXCEPTIONS
 #endif
diff --git a/BasiliskII/src/uae_cpu/fpu/flags.h b/BasiliskII/src/uae_cpu/fpu/flags.h
index 509b0db5..1994ef27 100644
--- a/BasiliskII/src/uae_cpu/fpu/flags.h
+++ b/BasiliskII/src/uae_cpu/fpu/flags.h
@@ -49,13 +49,13 @@
 /* -------------------------------------------------------------------------- */
 
 /* Optimized i386 fpu core must use native flags */
-#if defined(FPU_X86) && defined(X86_ASSEMBLY)
+#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_FLAGS
 # define FPU_USE_X86_FLAGS
 #endif
 
 /* Old UAE FPU core can use native flags */
-#if defined(FPU_UAE) && defined(X86_ASSEMBLY)
+#if defined(FPU_UAE) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_FLAGS
 # define FPU_USE_X86_FLAGS
 #endif
@@ -67,7 +67,7 @@
 #endif
 
 /* JIT Compilation for FPU only works with lazy evaluation of FPU flags */
-#if defined(FPU_IEEE) && defined(X86_ASSEMBLY) && defined(USE_JIT_FPU)
+#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) && defined(USE_JIT_FPU)
 # undef FPU_USE_GENERIC_FLAGS
 # define FPU_USE_LAZY_FLAGS
 #endif
diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp
index 1820fa8b..f4a92153 100644
--- a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp
+++ b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp
@@ -2105,7 +2105,7 @@ PUBLIC void FFPU fpu_init (bool integral_68040)
 #if defined(FPU_USE_X86_ROUNDING)
 	// Initial state after boot, reset and frestore(null frame)
 	x86_control_word = CW_INITIAL;
-#elif defined(__i386__) && defined(X86_ASSEMBLY)
+#elif defined(USE_X87_ASSEMBLY)
 	volatile unsigned short int cw;
 	__asm__ __volatile__("fnstcw %0" : "=m" (cw));
 	cw &= ~0x0300; cw |= 0x0300; // CW_PC_EXTENDED
diff --git a/BasiliskII/src/uae_cpu/fpu/mathlib.cpp b/BasiliskII/src/uae_cpu/fpu/mathlib.cpp
index adb8f5cf..aebb6741 100644
--- a/BasiliskII/src/uae_cpu/fpu/mathlib.cpp
+++ b/BasiliskII/src/uae_cpu/fpu/mathlib.cpp
@@ -38,7 +38,7 @@
 #undef	FPU
 #define	FPU		fpu.
 
-#if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
+#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
 
 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y)
 {
diff --git a/BasiliskII/src/uae_cpu/fpu/mathlib.h b/BasiliskII/src/uae_cpu/fpu/mathlib.h
index 04062a85..2b4438ce 100644
--- a/BasiliskII/src/uae_cpu/fpu/mathlib.h
+++ b/BasiliskII/src/uae_cpu/fpu/mathlib.h
@@ -785,7 +785,7 @@ PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_regis
 # define fp_ceil	ceil
 #endif
 
-#if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
+#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
 // Assembly optimized support functions. Taken from glibc 2.2.2
 
 #undef fp_log
@@ -1105,7 +1105,7 @@ DEFINE_ROUND_FUNC(zero, 0xc00)
 
 DEFINE_ROUND_FUNC(nearest, 0x000)
 
-#endif /* X86_ASSEMBLY */
+#endif /* USE_X87_ASSEMBLY */
 
 #ifndef fp_round_to_minus_infinity
 #define fp_round_to_minus_infinity(x) fp_floor(x)
diff --git a/BasiliskII/src/uae_cpu/fpu/rounding.h b/BasiliskII/src/uae_cpu/fpu/rounding.h
index eda3952f..d1273450 100644
--- a/BasiliskII/src/uae_cpu/fpu/rounding.h
+++ b/BasiliskII/src/uae_cpu/fpu/rounding.h
@@ -50,26 +50,26 @@
 /* -------------------------------------------------------------------------- */
 
 /* Optimized i386 fpu core must use native rounding mode */
-#if defined(FPU_X86) && defined(X86_ASSEMBLY)
+#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_ROUNDING_MODE
 # define FPU_USE_X86_ROUNDING_MODE
 #endif
 
 /* Optimized i386 fpu core must use native rounding precision */
-#if defined(FPU_X86) && defined(X86_ASSEMBLY)
+#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_ROUNDING_PRECISION
 # define FPU_USE_X86_ROUNDING_PRECISION
 #endif
 
 #if 0 // gb-- FIXME: that doesn't work
 /* IEEE-based fpu core can have native rounding mode on i386 */
-#if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
+#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_ROUNDING_MODE
 # define FPU_USE_X86_ROUNDING_MODE
 #endif
 
 /* IEEE-based fpu core can have native rounding precision on i386 */
-#if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
+#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
 # undef FPU_USE_GENERIC_ROUNDING_PRECISION
 # define FPU_USE_X86_ROUNDING_PRECISION
 #endif
diff --git a/BasiliskII/src/uae_cpu/fpu/types.h b/BasiliskII/src/uae_cpu/fpu/types.h
index 2a19250b..e182309f 100644
--- a/BasiliskII/src/uae_cpu/fpu/types.h
+++ b/BasiliskII/src/uae_cpu/fpu/types.h
@@ -133,7 +133,12 @@ typedef long double uae_f64;
 typedef long double uae_f96;
 typedef uae_f96 fpu_register;
 #define USE_LONG_DOUBLE 1
-#elif (SIZEOF_LONG_DOUBLE == 16) && 0
+#elif SIZEOF_LONG_DOUBLE == 16 && defined(__x86_64__)
+/* Long doubles on x86-64 are really held in old x87 FPU stack.  */
+typedef long double uae_f128;
+typedef uae_f128 fpu_register;
+#define USE_LONG_DOUBLE 1
+#elif 0
 /* Disable for now and probably for good as (i) the emulator
    implementation is not correct, (ii) I don't know of any CPU which
    handles this kind of format *natively* with conformance to IEEE.  */