diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index feade6a56fb..8dca3c1cfb1 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -24,7 +24,7 @@ class Twine; /// Triple - Helper class for working with target triples. /// -/// Target triples are strings in the format of: +/// Target triples are strings in the canonical form: /// ARCHITECTURE-VENDOR-OPERATING_SYSTEM /// or /// ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT @@ -35,20 +35,11 @@ class Twine; /// from the components of the target triple to well known IDs. /// /// At its core the Triple class is designed to be a wrapper for a triple -/// string; it does not normally change or normalize the triple string, instead -/// it provides additional APIs to parse normalized parts out of the triple. +/// string; the constructor does not change or normalize the triple string. +/// Clients that need to handle the non-canonical triples that users often +/// specify should use the normalize method. /// -/// One curiosity this implies is that for some odd triples the results of, -/// e.g., getOSName() can be very different from the result of getOS(). For -/// example, for 'i386-mingw32', getOS() will return MinGW32, but since -/// getOSName() is purely based on the string structure that will return the -/// empty string. -/// -/// Clients should generally avoid using getOSName() and related APIs unless -/// they are familiar with the triple format (this is particularly true when -/// rewriting a triple). -/// -/// See autoconf/config.guess for a glimpse into what they look like in +/// See autoconf/config.guess for a glimpse into what triples look like in /// practice. class Triple { public: @@ -117,6 +108,9 @@ private: mutable OSType OS; bool isInitialized() const { return Arch != InvalidArch; } + static ArchType ParseArch(StringRef ArchName); + static VendorType ParseVendor(StringRef VendorName); + static OSType ParseOS(StringRef OSName); void Parse() const; public: @@ -133,6 +127,16 @@ public: Data += OSStr; } + /// @} + /// @name Normalization + /// @{ + + /// normalize - Turn an arbitrary machine specification into the canonical + /// triple form (or something sensible that the Triple class understands if + /// nothing better can reasonably be done). In particular, it handles the + /// common case in which otherwise valid components are in the wrong order. + static std::string normalize(StringRef Str); + /// @} /// @name Typed Component Access /// @{ diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 6a70449b56d..7806aec16c6 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -221,63 +221,111 @@ const char *Triple::getArchNameForAssembler() { // -void Triple::Parse() const { - assert(!isInitialized() && "Invalid parse call."); - - StringRef ArchName = getArchName(); - StringRef VendorName = getVendorName(); - StringRef OSName = getOSName(); - +Triple::ArchType Triple::ParseArch(StringRef ArchName) { if (ArchName.size() == 4 && ArchName[0] == 'i' && ArchName[2] == '8' && ArchName[3] == '6' && ArchName[1] - '3' < 6) // i[3-9]86 - Arch = x86; + return x86; else if (ArchName == "amd64" || ArchName == "x86_64") - Arch = x86_64; + return x86_64; else if (ArchName == "bfin") - Arch = bfin; + return bfin; else if (ArchName == "pic16") - Arch = pic16; + return pic16; else if (ArchName == "powerpc") - Arch = ppc; + return ppc; else if ((ArchName == "powerpc64") || (ArchName == "ppu")) - Arch = ppc64; + return ppc64; else if (ArchName == "mblaze") - Arch = mblaze; + return mblaze; else if (ArchName == "arm" || ArchName.startswith("armv") || ArchName == "xscale") - Arch = arm; + return arm; else if (ArchName == "thumb" || ArchName.startswith("thumbv")) - Arch = thumb; + return thumb; else if (ArchName.startswith("alpha")) - Arch = alpha; + return alpha; else if (ArchName == "spu" || ArchName == "cellspu") - Arch = cellspu; + return cellspu; else if (ArchName == "msp430") - Arch = msp430; + return msp430; else if (ArchName == "mips" || ArchName == "mipsallegrex") - Arch = mips; + return mips; else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" || ArchName == "psp") - Arch = mipsel; + return mipsel; else if (ArchName == "sparc") - Arch = sparc; + return sparc; else if (ArchName == "sparcv9") - Arch = sparcv9; + return sparcv9; else if (ArchName == "s390x") - Arch = systemz; + return systemz; else if (ArchName == "tce") - Arch = tce; + return tce; else if (ArchName == "xcore") - Arch = xcore; + return xcore; else - Arch = UnknownArch; + return UnknownArch; +} +Triple::VendorType Triple::ParseVendor(StringRef VendorName) { + if (VendorName == "apple") + return Apple; + else if (VendorName == "pc") + return PC; + else + return UnknownVendor; +} + +Triple::OSType Triple::ParseOS(StringRef OSName) { + if (OSName.startswith("auroraux")) + return AuroraUX; + else if (OSName.startswith("cygwin")) + return Cygwin; + else if (OSName.startswith("darwin")) + return Darwin; + else if (OSName.startswith("dragonfly")) + return DragonFly; + else if (OSName.startswith("freebsd")) + return FreeBSD; + else if (OSName.startswith("linux")) + return Linux; + else if (OSName.startswith("lv2")) + return Lv2; + else if (OSName.startswith("mingw32")) + return MinGW32; + else if (OSName.startswith("mingw64")) + return MinGW64; + else if (OSName.startswith("netbsd")) + return NetBSD; + else if (OSName.startswith("openbsd")) + return OpenBSD; + else if (OSName.startswith("psp")) + return Psp; + else if (OSName.startswith("solaris")) + return Solaris; + else if (OSName.startswith("win32")) + return Win32; + else if (OSName.startswith("haiku")) + return Haiku; + else if (OSName.startswith("minix")) + return Minix; + else + return UnknownOS; +} + +void Triple::Parse() const { + assert(!isInitialized() && "Invalid parse call."); + + Arch = ParseArch(getArchName()); + Vendor = ParseVendor(getVendorName()); + OS = ParseOS(getOSName()); // Handle some exceptional cases where the OS / environment components are // stuck into the vendor field. + // TODO: Remove this logic and have places that need it use 'normalize'. if (StringRef(getTriple()).count('-') == 1) { StringRef VendorName = getVendorName(); @@ -291,51 +339,136 @@ void Triple::Parse() const { // the environment. } - if (VendorName == "apple") - Vendor = Apple; - else if (VendorName == "pc") - Vendor = PC; - else - Vendor = UnknownVendor; - - if (OSName.startswith("auroraux")) - OS = AuroraUX; - else if (OSName.startswith("cygwin")) - OS = Cygwin; - else if (OSName.startswith("darwin")) - OS = Darwin; - else if (OSName.startswith("dragonfly")) - OS = DragonFly; - else if (OSName.startswith("freebsd")) - OS = FreeBSD; - else if (OSName.startswith("linux")) - OS = Linux; - else if (OSName.startswith("lv2")) - OS = Lv2; - else if (OSName.startswith("mingw32")) - OS = MinGW32; - else if (OSName.startswith("mingw64")) - OS = MinGW64; - else if (OSName.startswith("netbsd")) - OS = NetBSD; - else if (OSName.startswith("openbsd")) - OS = OpenBSD; - else if (OSName.startswith("psp")) - OS = Psp; - else if (OSName.startswith("solaris")) - OS = Solaris; - else if (OSName.startswith("win32")) - OS = Win32; - else if (OSName.startswith("haiku")) - OS = Haiku; - else if (OSName.startswith("minix")) - OS = Minix; - else - OS = UnknownOS; - assert(isInitialized() && "Failed to initialize!"); } +std::string Triple::normalize(StringRef Str) { + // Parse into components. + SmallVector Components; + for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) { + Last = Str.find('-', First); + Components.push_back(Str.slice(First, Last)); + } + + // If the first component corresponds to a known architecture, preferentially + // use it for the architecture. If the second component corresponds to a + // known vendor, preferentially use it for the vendor, etc. This avoids silly + // component movement when a component parses as (eg) both a valid arch and a + // valid os. + ArchType Arch = UnknownArch; + if (Components.size() > 0) + Arch = ParseArch(Components[0]); + VendorType Vendor = UnknownVendor; + if (Components.size() > 1) + Vendor = ParseVendor(Components[1]); + OSType OS = UnknownOS; + if (Components.size() > 2) + OS = ParseOS(Components[2]); + + // Note which components are already in their final position. These will not + // be moved. + bool Found[3]; + Found[0] = Arch != UnknownArch; + Found[1] = Vendor != UnknownVendor; + Found[2] = OS != UnknownOS; + + // If they are not there already, permute the components into their canonical + // positions by seeing if they parse as a valid architecture, and if so moving + // the component to the architecture position etc. + for (unsigned Pos = 0; Pos != 3; ++Pos) { + if (Found[Pos]) + continue; // Already in the canonical position. + + for (unsigned Idx = 0; Idx != Components.size(); ++Idx) { + // Do not reparse any components that already matched. + if (Idx < 3 && Found[Idx]) + continue; + + // Does this component parse as valid for the target position? + bool Valid = false; + StringRef Comp = Components[Idx]; + switch (Pos) { + default: + assert(false && "unexpected component type!"); + case 0: + Arch = ParseArch(Comp); + Valid = Arch != UnknownArch; + break; + case 1: + Vendor = ParseVendor(Comp); + Valid = Vendor != UnknownVendor; + break; + case 2: + OS = ParseOS(Comp); + Valid = OS != UnknownOS; + break; + } + if (!Valid) + continue; // Nope, try the next component. + + // Move the component to the target position, pushing any non-fixed + // components that are in the way to the right. This tends to give + // good results in the common cases of a forgotten vendor component + // or a wrongly positioned environment. + if (Pos < Idx) { + // Insert left, pushing the existing components to the right. For + // example, a-b-i386 -> i386-a-b when moving i386 to the front. + StringRef CurrentComponent(""); // The empty component. + // Replace the component we are moving with an empty component. + std::swap(CurrentComponent, Components[Idx]); + // Insert the component being moved at Pos, displacing any existing + // components to the right. + for (unsigned i = Pos; !CurrentComponent.empty(); ++i) { + // Skip over any fixed components. + while (i < 3 && Found[i]) ++i; + // Place the component at the new position, getting the component + // that was at this position - it will be moved right. + std::swap(CurrentComponent, Components[i]); + } + } else if (Pos > Idx) { + // Push right by inserting empty components until the component at Idx + // reaches the target position Pos. For example, pc-a -> -pc-a when + // moving pc to the second position. + do { + // Insert one empty component at Idx. + StringRef CurrentComponent(""); // The empty component. + for (unsigned i = Idx; i < Components.size(); ++i) { + // Skip over any fixed components. + while (i < 3 && Found[i]) ++i; + // Place the component at the new position, getting the component + // that was at this position - it will be moved right. + std::swap(CurrentComponent, Components[i]); + // If it was placed on top of an empty component then we are done. + if (CurrentComponent.empty()) + break; + } + // The last component was pushed off the end - append it. + if (!CurrentComponent.empty()) + Components.push_back(CurrentComponent); + + // Advance Idx to the component's new position. + while (++Idx < 3 && Found[Idx]) {} + } while (Idx < Pos); // Add more until the final position is reached. + } + assert(Pos < Components.size() && Components[Pos] == Comp && + "Component moved wrong!"); + Found[Pos] = true; + break; + } + } + + // Special case logic goes here. At this point Arch, Vendor and OS have the + // correct values for the computed components. + + // Stick the corrected components back together to form the normalized string. + std::string Normalized; + for (unsigned i = 0, e = Components.size(); i != e; ++i) { + if (i) Normalized += '-'; + Normalized += Components[i]; + } + return Normalized; +} + StringRef Triple::getArchName() const { return StringRef(Data).split('-').first; // Isolate first component } diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp index 1a9e81a0df7..067f5e5116c 100644 --- a/unittests/ADT/TripleTest.cpp +++ b/unittests/ADT/TripleTest.cpp @@ -92,18 +92,117 @@ TEST(TripleTest, ParsedIDs) { T = Triple("huh"); EXPECT_EQ(Triple::UnknownArch, T.getArch()); +} - // Two exceptional cases. +static std::string Join(StringRef A, StringRef B, StringRef C) { + std::string Str = A; Str += '-'; Str += B; Str += '-'; Str += C; + return Str; +} - T = Triple("i386-mingw32"); - EXPECT_EQ(Triple::x86, T.getArch()); - EXPECT_EQ(Triple::PC, T.getVendor()); - EXPECT_EQ(Triple::MinGW32, T.getOS()); +static std::string Join(StringRef A, StringRef B, StringRef C, StringRef D) { + std::string Str = A; Str += '-'; Str += B; Str += '-'; Str += C; Str += '-'; + Str += D; return Str; +} - T = Triple("arm-elf"); - EXPECT_EQ(Triple::arm, T.getArch()); - EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); - EXPECT_EQ(Triple::UnknownOS, T.getOS()); +TEST(TripleTest, Normalization) { + EXPECT_EQ("", Triple::normalize("")); + EXPECT_EQ("-", Triple::normalize("-")); + EXPECT_EQ("--", Triple::normalize("--")); + EXPECT_EQ("---", Triple::normalize("---")); + EXPECT_EQ("----", Triple::normalize("----")); + + EXPECT_EQ("a", Triple::normalize("a")); + EXPECT_EQ("a-b", Triple::normalize("a-b")); + EXPECT_EQ("a-b-c", Triple::normalize("a-b-c")); + EXPECT_EQ("a-b-c-d", Triple::normalize("a-b-c-d")); + + EXPECT_EQ("i386-b-c", Triple::normalize("i386-b-c")); + EXPECT_EQ("i386-a-c", Triple::normalize("a-i386-c")); + EXPECT_EQ("i386-a-b", Triple::normalize("a-b-i386")); + + EXPECT_EQ("a-pc-c", Triple::normalize("a-pc-c")); + EXPECT_EQ("-pc-b-c", Triple::normalize("pc-b-c")); + EXPECT_EQ("a-pc-b", Triple::normalize("a-b-pc")); + + EXPECT_EQ("a-b-linux", Triple::normalize("a-b-linux")); + EXPECT_EQ("--linux-b-c", Triple::normalize("linux-b-c")); + EXPECT_EQ("a--linux-c", Triple::normalize("a-linux-c")); + + EXPECT_EQ("i386-pc-a", Triple::normalize("a-pc-i386")); + EXPECT_EQ("i386-pc-", Triple::normalize("-pc-i386")); + EXPECT_EQ("-pc-linux-c", Triple::normalize("linux-pc-c")); + EXPECT_EQ("-pc-linux", Triple::normalize("linux-pc-")); + + EXPECT_EQ("i386", Triple::normalize("i386")); + EXPECT_EQ("-pc", Triple::normalize("pc")); + EXPECT_EQ("--linux", Triple::normalize("linux")); + + // Check that normalizing a permutated set of valid components returns a + // triple with the unpermuted components. + StringRef C[4]; + C[3] = "environment"; + for (int Arch = 1+Triple::UnknownArch; Arch < Triple::InvalidArch; ++Arch) { + C[0] = Triple::getArchTypeName(Triple::ArchType(Arch)); + for (int Vendor = 1+Triple::UnknownVendor; Vendor <= Triple::PC; + ++Vendor) { + C[1] = Triple::getVendorTypeName(Triple::VendorType(Vendor)); + for (int OS = 1+Triple::UnknownOS; OS <= Triple::Minix; ++OS) { + C[2] = Triple::getOSTypeName(Triple::OSType(OS)); + + std::string E = Join(C[0], C[1], C[2]); + std::string F = Join(C[0], C[1], C[2], C[3]); + EXPECT_EQ(E, Triple::normalize(Join(C[0], C[1], C[2]))); + EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[2], C[3]))); + + // If a value has multiple interpretations, then the permutation + // test will inevitably fail. Currently this is only the case for + // "psp" which parses as both an architecture and an O/S. + if (OS == Triple::Psp) + continue; + + EXPECT_EQ(E, Triple::normalize(Join(C[0], C[2], C[1]))); + EXPECT_EQ(E, Triple::normalize(Join(C[1], C[2], C[0]))); + EXPECT_EQ(E, Triple::normalize(Join(C[1], C[0], C[2]))); + EXPECT_EQ(E, Triple::normalize(Join(C[2], C[0], C[1]))); + EXPECT_EQ(E, Triple::normalize(Join(C[2], C[1], C[0]))); + + EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[3], C[2]))); + EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[3], C[1]))); + EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[1], C[3]))); + EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[1], C[2]))); + EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[2], C[1]))); + EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[3], C[0]))); + EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[0], C[3]))); + EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[0], C[2]))); + EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[2], C[0]))); + EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[2], C[3]))); + EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[3], C[2]))); + EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[0], C[1]))); + EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[1], C[0]))); + EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[1], C[3]))); + EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[3], C[1]))); + EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[3], C[0]))); + EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[0], C[3]))); + EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[1], C[2]))); + EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[2], C[1]))); + EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[2], C[0]))); + EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[0], C[2]))); + EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[0], C[1]))); + EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[1], C[0]))); + } + } + } + + EXPECT_EQ("a-b-psp", Triple::normalize("a-b-psp")); + EXPECT_EQ("psp-b-c", Triple::normalize("psp-b-c")); + + // Various real-world funky triples. The value returned by GCC's config.sub + // is given in the comment. + EXPECT_EQ("i386--mingw32", Triple::normalize("i386-mingw32")); // i386-pc-mingw32 + EXPECT_EQ("x86_64--linux-gnu", Triple::normalize("x86_64-linux-gnu")); // x86_64-pc-linux-gnu + EXPECT_EQ("i486--linux-gnu", Triple::normalize("i486-linux-gnu")); // i486-pc-linux-gnu + EXPECT_EQ("i386-redhat-linux", Triple::normalize("i386-redhat-linux")); // i386-redhat-linux-gnu + EXPECT_EQ("i686--linux", Triple::normalize("i686-linux")); // i686-pc-linux-gnu } TEST(TripleTest, MutateName) {