Reimplement getToken and SplitString as "StringRef helper functions"

- getToken is modeled after StringRef::split but it can split on multiple separator chars and skips leading seperators. - SplitString is a StringRef::split variant for more than 2 elements with the same behaviour as getToken. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93161 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-28 04:33:05 +00:00 · 2010-01-11 18:03:24 +00:00 · 2010-01-11 18:03:24 +00:00 · d4f195999a
commit d4f195999a
parent e5dacc55ad
6 changed files with 44 additions and 49 deletions
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@ -20,9 +20,9 @@
 #include <cctype>
 #include <cstdio>
 #include <string>
 #include <vector>
 namespace llvm {
 template<typename T> class SmallVectorImpl;
 /// hexdigit - Return the (uppercase) hexadecimal character for the
 /// given number \arg X (which should be less than 16).
@ -206,16 +206,16 @@ static inline const char* CStrInCStrNoCase(const char *s1, const char *s2) {
 /// leading characters that appear in the Delimiters string, and ending the
 /// token at any of the characters that appear in the Delimiters string.  If
 /// there are no tokens in the source string, an empty string is returned.
-/// The Source source string is updated in place to remove the returned string
+/// The function returns a pair containing the extracted token and the
-/// and any delimiter prefix from it.
+/// remaining tail string.
-std::string getToken(std::string &Source,
+std::pair<StringRef, StringRef> getToken(StringRef Source,
-                     const char *Delimiters = " \t\n\v\f\r");
+                                         StringRef Delimiters = " \t\n\v\f\r");
 /// SplitString - Split up the specified string according to the specified
 /// delimiters, appending the result fragments to the output list.
-void SplitString(const std::string &Source,
+void SplitString(StringRef Source,
-                 std::vector<std::string> &OutFragments,
+                 SmallVectorImpl<StringRef> &OutFragments,
-                 const char *Delimiters = " \t\n\v\f\r");
+                 StringRef Delimiters = " \t\n\v\f\r");
 /// HashString - Hash funtion for strings.
 ///
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@ -11,50 +11,40 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include <cstring>
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 /// getToken - This function extracts one token from source, ignoring any
 /// leading characters that appear in the Delimiters string, and ending the
 /// token at any of the characters that appear in the Delimiters string.  If
 /// there are no tokens in the source string, an empty string is returned.
-/// The Source source string is updated in place to remove the returned string
+/// The function returns a pair containing the extracted token and the
-/// and any delimiter prefix from it.
+/// remaining tail string.
-std::string llvm::getToken(std::string &Source, const char *Delimiters) {
+std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
-  size_t NumDelimiters = std::strlen(Delimiters);
+                                               StringRef Delimiters) {
  // Figure out where the token starts.
-  std::string::size_type Start =
+  StringRef::size_type Start = Source.find_first_not_of(Delimiters);
-    Source.find_first_not_of(Delimiters, 0, NumDelimiters);
+  if (Start == StringRef::npos) Start = Source.size();
  if (Start == std::string::npos) Start = Source.size();
-  // Find the next occurance of the delimiter.
+  // Find the next occurrence of the delimiter.
-  std::string::size_type End =
+  StringRef::size_type End = Source.find_first_of(Delimiters, Start);
-    Source.find_first_of(Delimiters, Start, NumDelimiters);
+  if (End == StringRef::npos) End = Source.size();
  if (End == std::string::npos) End = Source.size();
-  // Create the return token.
+  return std::make_pair(Source.substr(Start, End), Source.substr(End));
  std::string Result = std::string(Source.begin()+Start, Source.begin()+End);
  // Erase the token that we read in.
  Source.erase(Source.begin(), Source.begin()+End);
  return Result;
 }
 /// SplitString - Split up the specified string according to the specified
 /// delimiters, appending the result fragments to the output list.
-void llvm::SplitString(const std::string &Source, 
+void llvm::SplitString(StringRef Source,
-                       std::vector<std::string> &OutFragments,
+                       SmallVectorImpl<StringRef> &OutFragments,
-                       const char *Delimiters) {
+                       StringRef Delimiters) {
-  std::string S = Source;
+  StringRef S2, S;
-  
+  tie(S2, S) = getToken(Source, Delimiters);
  std::string S2 = getToken(S, Delimiters);
  while (!S2.empty()) {
    OutFragments.push_back(S2);
-    S2 = getToken(S, Delimiters);
+    tie(S2, S) = getToken(S, Delimiters);
  }
 }
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -9538,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
  std::string AsmStr = IA->getAsmString();
  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
-  std::vector<std::string> AsmPieces;
+  SmallVector<StringRef, 4> AsmPieces;
  SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
  switch (AsmPieces.size()) {
@ -9575,7 +9575,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
        Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
        Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
      // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
-      std::vector<std::string> Words;
+      SmallVector<StringRef, 4> Words;
      SplitString(AsmPieces[0], Words, " \t");
      if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
        Words.clear();
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@ -76,11 +76,12 @@ Module::~Module() {
 /// Target endian information...
 Module::Endianness Module::getEndianness() const {
-  std::string temp = DataLayout;
+  StringRef temp = DataLayout;
  Module::Endianness ret = AnyEndianness;
  while (!temp.empty()) {
-    std::string token = getToken(temp, "-");
+    StringRef token = DataLayout;
    tie(token, temp) = getToken(DataLayout, "-");
    if (token[0] == 'e') {
      ret = LittleEndian;
@ -94,15 +95,17 @@ Module::Endianness Module::getEndianness() const {
 /// Target Pointer Size information...
 Module::PointerSize Module::getPointerSize() const {
-  std::string temp = DataLayout;
+  StringRef temp = DataLayout;
  Module::PointerSize ret = AnyPointerSize;
  while (!temp.empty()) {
-    std::string token = getToken(temp, "-");
+    StringRef token, signalToken;
-    char signal = getToken(token, ":")[0];
+    tie(token, temp) = getToken(temp, "-");
    tie(signalToken, token) = getToken(token, ":");
-    if (signal == 'p') {
+    if (signalToken[0] == 'p') {
-      int size = atoi(getToken(token, ":").c_str());
+      int size = 0;
      getToken(token, ":").first.getAsInteger(10, size);
      if (size == 32)
        ret = Pointer32;
      else if (size == 64)
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@ -443,12 +443,12 @@ bool LTOCodeGenerator::generateAssemblyCode(formatted_raw_ostream& out,
 /// Optimize merged modules using various IPO passes
 void LTOCodeGenerator::setCodeGenDebugOptions(const char* options)
 {
-    std::string ops(options);
+    for (std::pair<StringRef, StringRef> o = getToken(options);
-    for (std::string o = getToken(ops); !o.empty(); o = getToken(ops)) {
+         !o.first.empty(); o = getToken(o.second)) {
        // ParseCommandLineOptions() expects argv[0] to be program name.
        // Lazily add that.
        if ( _codegenOptions.empty() ) 
            _codegenOptions.push_back("libLTO");
-        _codegenOptions.push_back(strdup(o.c_str()));
+        _codegenOptions.push_back(strdup(o.first.str().c_str()));
    }
 }
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@ -14,6 +14,7 @@
 #include "CodeGenInstruction.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include <set>
 using namespace llvm;
@ -224,7 +225,8 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
  // Parse the DisableEncoding field.
  std::string DisableEncoding = R->getValueAsString("DisableEncoding");
  while (1) {
-    std::string OpName = getToken(DisableEncoding, " ,\t");
+    std::string OpName;
    tie(OpName, DisableEncoding) = getToken(DisableEncoding, " ,\t");
    if (OpName.empty()) break;
    // Figure out which operand this is.