diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h new file mode 100644 index 00000000000..1a64d0a7011 --- /dev/null +++ b/include/llvm/ADT/Twine.h @@ -0,0 +1,347 @@ +//===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_TWINE_H +#define LLVM_ADT_TWINE_H + +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace llvm { + template + class SmallVectorImpl; + class StringRef; + class raw_ostream; + + /// Twine - A lightweight data structure for efficiently representing the + /// concatenation of temporary values as strings. + /// + /// A Twine is a kind of rope, it represents a concatenated string using a + /// binary-tree, where the string is the preorder of the nodes. Since the + /// Twine can be efficiently rendered into a buffer when its result is used, + /// it avoids the cost of generating temporary values for intermediate string + /// results -- particularly in cases when the Twine result is never + /// required. By explicitly tracking the type of leaf nodes, we can also avoid + /// the creation of temporary strings for conversions operations (such as + /// appending an integer to a string). + /// + /// A Twine is not intended for use directly and should not be stored, its + /// implementation relies on the ability to store pointers to temporary stack + /// objects which may be deallocated at the end of a statement. Twines should + /// only be used accepted as const references in arguments, when an API wishes + /// to accept possibly-concatenated strings. + /// + /// Twines support a special 'null' value, which always concatenates to form + /// itself, and renders as an empty string. This can be returned from APIs to + /// effectively nullify any concatenations performed on the result. + /// + /// \b Implementation \n + /// + /// Given the nature of a Twine, it is not possible for the Twine's + /// concatenation method to construct interior nodes; the result must be + /// represented inside the returned value. For this reason a Twine object + /// actually holds two values, the left- and right-hand sides of a + /// concatenation. We also have nullary Twine objects, which are effectively + /// sentinel values that represent empty strings. + /// + /// Thus, a Twine can effectively have zero, one, or two children. The \see + /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for + /// testing the number of children. + /// + /// We maintain a number of invariants on Twine objects (FIXME: Why): + /// - Nullary twines are always represented with their Kind on the left-hand + /// side, and the Empty kind on the right-hand side. + /// - Unary twines are always represented with the value on the left-hand + /// side, and the Empty kind on the right-hand side. + /// - If a Twine has another Twine as a child, that child should always be + /// binary (otherwise it could have been folded into the parent). + /// + /// These invariants are check by \see isValid(). + /// + /// \b Efficiency Considerations \n + /// + /// The Twine is designed to yield efficient and small code for common + /// situations. For this reason, the concat() method is inlined so that + /// concatenations of leaf nodes can be optimized into stores directly into a + /// single stack allocated object. + /// + /// In practice, not all compilers can be trusted to optimize concat() fully, + /// so we provide two additional methods (and accompanying operator+ + /// overloads) to guarantee that particularly important cases (cstring plus + /// StringRef) codegen as desired. + class Twine { + /// NodeKind - Represent the type of an argument. + enum NodeKind { + /// An empty string; the result of concatenating anything with it is also + /// empty. + NullKind, + + /// The empty string. + EmptyKind, + + /// A pointer to a C string instance. + CStringKind, + + /// A pointer to an std::string instance. + StdStringKind, + + /// A pointer to a StringRef instance. + StringRefKind, + + /// A pointer to a Twine instance. + TwineKind + }; + + private: + /// LHS - The prefix in the concatenation, which may be uninitialized for + /// Null or Empty kinds. + const void *LHS; + /// RHS - The suffix in the concatenation, which may be uninitialized for + /// Null or Empty kinds. + const void *RHS; + /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). + NodeKind LHSKind : 8; + /// RHSKind - The NodeKind of the left hand side, \see getLHSKind(). + NodeKind RHSKind : 8; + + private: + /// Construct a nullary twine; the kind must be NullKind or EmptyKind. + explicit Twine(NodeKind Kind) + : LHSKind(Kind), RHSKind(EmptyKind) { + assert(isNullary() && "Invalid kind!"); + } + + /// Construct a binary twine. + explicit Twine(const Twine &_LHS, const Twine &_RHS) + : LHS(&_LHS), RHS(&_RHS), LHSKind(TwineKind), RHSKind(TwineKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct a twine from explicit values. + explicit Twine(const void *_LHS, unsigned _LHSKind, + const void *_RHS, unsigned _RHSKind) + : LHS(_LHS), RHS(_RHS), LHSKind(_LHSKind), RHSKind(_RHSKind) { + assert(isValid() && "Invalid twine!"); + } + + /// isNull - Check for the null twine. + bool isNull() const { + return getLHSKind() == NullKind; + } + + /// isEmpty - Check for the empty twine. + bool isEmpty() const { + return getLHSKind() == EmptyKind; + } + + /// isNullary - Check if this is a nullary twine (null or empty). + bool isNullary() const { + return isNull() || isEmpty(); + } + + /// isUnary - Check if this is a unary twine. + bool isUnary() const { + return getRHSKind() == EmptyKind && !isNullary(); + } + + /// isBinary - Check if this is a binary twine. + bool isBinary() const { + return getLHSKind() != NullKind && getRHSKind() != EmptyKind; + } + + /// isValid - Check if this is a valid twine (satisfying the invariants on + /// order and number of arguments). + bool isValid() const { + // Nullary twines always have Empty on the RHS. + if (isNullary() && getRHSKind() != EmptyKind) + return false; + + // Null should never appear on the RHS. + if (getRHSKind() == NullKind) + return false; + + // The RHS cannot be non-empty if the LHS is empty. + if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) + return false; + + // A twine child should always be binary. + if (getLHSKind() == TwineKind && + !static_cast(LHS)->isBinary()) + return false; + if (getRHSKind() == TwineKind && + !static_cast(RHS)->isBinary()) + return false; + + return true; + } + + /// getLHSKind - Get the NodeKind of the left-hand side. + NodeKind getLHSKind() const { return LHSKind; } + + /// getRHSKind - Get the NodeKind of the left-hand side. + NodeKind getRHSKind() const { return RHSKind; } + + /// printOneChild - Print one child from a twine. + void printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const; + + /// printOneChildRepr - Print the representation of one child from a twine. + void printOneChildRepr(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const; + + public: + /// @name Constructors + /// @{ + + /// Construct from an empty string. + /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct from a C string. + /// + /// We take care here to optimize "" into the empty twine -- this will be + /// optimized out for string constants. This allows Twine arguments have + /// default "" values, without introducing unnecessary string constants. + /*implicit*/ Twine(const char *Str) + : RHSKind(EmptyKind) { + if (Str[0] != '\0') { + LHS = Str; + LHSKind = CStringKind; + } else + LHSKind = EmptyKind; + + assert(isValid() && "Invalid twine!"); + } + + /// Construct from an std::string. + /*implicit*/ Twine(const std::string &Str) + : LHS(&Str), LHSKind(StdStringKind), RHSKind(EmptyKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct from a StringRef. + /*implicit*/ Twine(const StringRef &Str) + : LHS(&Str), LHSKind(StringRefKind), RHSKind(EmptyKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Create a 'null' string, which is an empty string that always + /// concatenates to form another empty string. + static Twine createNull() { + return Twine(NullKind); + } + + // FIXME: Unfortunately, to make sure this is as efficient as possible we + // need extra binary constructors from particular types. We can't rely on + // the compiler to be smart enough to fold operator+()/concat() down to the + // right thing. Yet. + + /// Construct as the concatenation of a C string and a StringRef. + /*implicit*/ Twine(const char *_LHS, const StringRef &_RHS) + : LHS(_LHS), RHS(&_RHS), LHSKind(CStringKind), RHSKind(StringRefKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct as the concatenation of a StringRef and a C string. + /*implicit*/ Twine(const StringRef &_LHS, const char *_RHS) + : LHS(&_LHS), RHS(_RHS), LHSKind(StringRefKind), RHSKind(CStringKind) { + assert(isValid() && "Invalid twine!"); + } + + /// @} + /// @name String Operations + /// @{ + + Twine concat(const Twine &Suffix) const; + + /// @} + /// @name Output & Conversion. + /// @{ + + /// str - Return the twine contents as a std::string. + std::string str() const; + + /// toVector - Write the concatenated string into the given SmallString or + /// SmallVector. + void toVector(SmallVectorImpl &Out) const; + + /// print - Write the concatenated string represented by this twine to the + /// stream \arg OS. + void print(raw_ostream &OS) const; + + /// dump - Dump the concatenated string represented by this twine to stderr. + void dump() const; + + /// print - Write the representation of this twine to the stream \arg OS. + void printRepr(raw_ostream &OS) const; + + /// dumpRepr - Dump the representation of this twine to stderr. + void dumpRepr() const; + + /// @} + }; + + /// @name Twine Inline Implementations + /// @{ + + inline Twine Twine::concat(const Twine &Suffix) const { + // Concatenation with null is null. + if (isNull() || Suffix.isNull()) + return Twine(NullKind); + + // Concatenation with empty yields the other side. + if (isEmpty()) + return Suffix; + if (Suffix.isEmpty()) + return *this; + + // Otherwise we need to create a new node, taking care to fold in unary + // twines. + const void *NewLHS = this, *NewRHS = &Suffix; + NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; + if (isUnary()) { + NewLHS = LHS; + NewLHSKind = getLHSKind(); + } + if (Suffix.isUnary()) { + NewRHS = Suffix.LHS; + NewRHSKind = Suffix.getLHSKind(); + } + + return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); + } + + inline Twine operator+(const Twine &LHS, const Twine &RHS) { + return LHS.concat(RHS); + } + + /// Additional overload to guarantee simplified codegen; this is equivalent to + /// concat(). + + inline Twine operator+(const char *LHS, const StringRef &RHS) { + return Twine(LHS, RHS); + } + + /// Additional overload to guarantee simplified codegen; this is equivalent to + /// concat(). + + inline Twine operator+(const StringRef &LHS, const char *RHS) { + return Twine(LHS, RHS); + } + + inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { + RHS.print(OS); + return OS; + } + + /// @} +} + +#endif diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp new file mode 100644 index 00000000000..4c34d279e8f --- /dev/null +++ b/lib/Support/Twine.cpp @@ -0,0 +1,91 @@ +//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +std::string Twine::str() const { + std::string Res; + raw_string_ostream OS(Res); + print(OS); + return Res; +} + +void Twine::toVector(SmallVectorImpl &Out) const { + raw_svector_ostream OS(Out); + print(OS); +} + +void Twine::printOneChild(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: break; + case Twine::EmptyKind: break; + case Twine::CStringKind: + OS << static_cast(Ptr); + break; + case Twine::StdStringKind: + OS << *static_cast(Ptr); + break; + case Twine::StringRefKind: + OS << *static_cast(Ptr); + break; + case Twine::TwineKind: + static_cast(Ptr)->print(OS); + break; + } +} + +void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: + OS << "null"; break; + case Twine::EmptyKind: + OS << "empty"; break; + case Twine::CStringKind: + OS << "cstring:\"" + << static_cast(Ptr) << "\""; + break; + case Twine::StdStringKind: + OS << "std::string:\"" + << *static_cast(Ptr) << "\""; + break; + case Twine::StringRefKind: + OS << "stringref:\"" + << *static_cast(Ptr) << "\""; + break; + case Twine::TwineKind: + OS << "rope:"; + static_cast(Ptr)->printRepr(OS); + break; + } +} + +void Twine::print(raw_ostream &OS) const { + printOneChild(OS, LHS, getLHSKind()); + printOneChild(OS, RHS, getRHSKind()); +} + +void Twine::printRepr(raw_ostream &OS) const { + OS << "(Twine "; + printOneChildRepr(OS, LHS, getLHSKind()); + OS << " "; + printOneChildRepr(OS, RHS, getRHSKind()); + OS << ")"; +} + +void Twine::dump() const { + print(llvm::errs()); +} + +void Twine::dumpRepr() const { + printRepr(llvm::errs()); +} diff --git a/unittests/ADT/TwineTest.cpp b/unittests/ADT/TwineTest.cpp new file mode 100644 index 00000000000..50fdd2ed314 --- /dev/null +++ b/unittests/ADT/TwineTest.cpp @@ -0,0 +1,63 @@ +//===- TwineTest.cpp - Twine unit tests -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +std::string repr(const Twine &Value) { + std::string res; + llvm::raw_string_ostream OS(res); + Value.printRepr(OS); + return OS.str(); +} + +TEST(TwineTest, Construction) { + EXPECT_EQ("", Twine().str()); + EXPECT_EQ("hi", Twine("hi").str()); + EXPECT_EQ("hi", Twine(std::string("hi")).str()); + EXPECT_EQ("hi", Twine(StringRef("hi")).str()); + EXPECT_EQ("hi", Twine(StringRef(std::string("hi"))).str()); + EXPECT_EQ("hi", Twine(StringRef("hithere", 2)).str()); +} + +TEST(TwineTest, Concat) { + // Check verse repr, since we care about the actual representation not just + // the result. + + // Concat with null. + EXPECT_EQ("(Twine null empty)", + repr(Twine("hi").concat(Twine::createNull()))); + EXPECT_EQ("(Twine null empty)", + repr(Twine::createNull().concat(Twine("hi")))); + + // Concat with empty. + EXPECT_EQ("(Twine cstring:\"hi\" empty)", + repr(Twine("hi").concat(Twine()))); + EXPECT_EQ("(Twine cstring:\"hi\" empty)", + repr(Twine().concat(Twine("hi")))); + + // Concatenation of unary ropes. + EXPECT_EQ("(Twine cstring:\"a\" cstring:\"b\")", + repr(Twine("a").concat(Twine("b")))); + + // Concatenation of other ropes. + EXPECT_EQ("(Twine rope:(Twine cstring:\"a\" cstring:\"b\") cstring:\"c\")", + repr(Twine("a").concat(Twine("b")).concat(Twine("c")))); + EXPECT_EQ("(Twine cstring:\"a\" rope:(Twine cstring:\"b\" cstring:\"c\"))", + repr(Twine("a").concat(Twine("b").concat(Twine("c"))))); +} + + // I suppose linking in the entire code generator to add a unit test to check + // the code size of the concat operation is overkill... :) + +} // end anonymous namespace