From d4f195999a7774611e5f9e457a86f14d5e257324 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 11 Jan 2010 18:03:24 +0000
Subject: [PATCH] Reimplement getToken and SplitString as "StringRef helper
 functions"

- getToken is modeled after StringRef::split but it can split on multiple
  separator chars and skips leading seperators.
- SplitString is a StringRef::split variant for more than 2 elements with the
  same behaviour as getToken.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93161 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/ADT/StringExtras.h       | 16 +++++-----
 lib/Support/StringExtras.cpp          | 46 +++++++++++----------------
 lib/Target/X86/X86ISelLowering.cpp    |  4 +--
 lib/VMCore/Module.cpp                 | 17 ++++++----
 tools/lto/LTOCodeGenerator.cpp        |  6 ++--
 utils/TableGen/CodeGenInstruction.cpp |  4 ++-
 6 files changed, 44 insertions(+), 49 deletions(-)
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 85936c019d3..f58fe8eade0 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -20,9 +20,9 @@
 #include <cctype>
 #include <cstdio>
 #include <string>
-#include <vector>
 
 namespace llvm {
+template<typename T> class SmallVectorImpl;
 
 /// hexdigit - Return the (uppercase) hexadecimal character for the
 /// given number \arg X (which should be less than 16).
@@ -206,16 +206,16 @@ static inline const char* CStrInCStrNoCase(const char *s1, const char *s2) {
 /// leading characters that appear in the Delimiters string, and ending the
 /// token at any of the characters that appear in the Delimiters string.  If
 /// there are no tokens in the source string, an empty string is returned.
-/// The Source source string is updated in place to remove the returned string
-/// and any delimiter prefix from it.
-std::string getToken(std::string &Source,
-                     const char *Delimiters = " \t\n\v\f\r");
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> getToken(StringRef Source,
+                                         StringRef Delimiters = " \t\n\v\f\r");
 
 /// SplitString - Split up the specified string according to the specified
 /// delimiters, appending the result fragments to the output list.
-void SplitString(const std::string &Source,
-                 std::vector<std::string> &OutFragments,
-                 const char *Delimiters = " \t\n\v\f\r");
+void SplitString(StringRef Source,
+                 SmallVectorImpl<StringRef> &OutFragments,
+                 StringRef Delimiters = " \t\n\v\f\r");
 
 /// HashString - Hash funtion for strings.
 ///
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index 1b233ab200a..65b41d526f4 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -11,50 +11,40 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include <cstring>
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 /// getToken - This function extracts one token from source, ignoring any
 /// leading characters that appear in the Delimiters string, and ending the
 /// token at any of the characters that appear in the Delimiters string.  If
 /// there are no tokens in the source string, an empty string is returned.
-/// The Source source string is updated in place to remove the returned string
-/// and any delimiter prefix from it.
-std::string llvm::getToken(std::string &Source, const char *Delimiters) {
-  size_t NumDelimiters = std::strlen(Delimiters);
-
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
+                                               StringRef Delimiters) {
   // Figure out where the token starts.
-  std::string::size_type Start =
-    Source.find_first_not_of(Delimiters, 0, NumDelimiters);
-  if (Start == std::string::npos) Start = Source.size();
+  StringRef::size_type Start = Source.find_first_not_of(Delimiters);
+  if (Start == StringRef::npos) Start = Source.size();
 
-  // Find the next occurance of the delimiter.
-  std::string::size_type End =
-    Source.find_first_of(Delimiters, Start, NumDelimiters);
-  if (End == std::string::npos) End = Source.size();
+  // Find the next occurrence of the delimiter.
+  StringRef::size_type End = Source.find_first_of(Delimiters, Start);
+  if (End == StringRef::npos) End = Source.size();
 
-  // Create the return token.
-  std::string Result = std::string(Source.begin()+Start, Source.begin()+End);
-
-  // Erase the token that we read in.
-  Source.erase(Source.begin(), Source.begin()+End);
-
-  return Result;
+  return std::make_pair(Source.substr(Start, End), Source.substr(End));
 }
 
 /// SplitString - Split up the specified string according to the specified
 /// delimiters, appending the result fragments to the output list.
-void llvm::SplitString(const std::string &Source, 
-                       std::vector<std::string> &OutFragments,
-                       const char *Delimiters) {
-  std::string S = Source;
-  
-  std::string S2 = getToken(S, Delimiters);
+void llvm::SplitString(StringRef Source,
+                       SmallVectorImpl<StringRef> &OutFragments,
+                       StringRef Delimiters) {
+  StringRef S2, S;
+  tie(S2, S) = getToken(Source, Delimiters);
   while (!S2.empty()) {
     OutFragments.push_back(S2);
-    S2 = getToken(S, Delimiters);
+    tie(S2, S) = getToken(S, Delimiters);
   }
 }
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ff69066985c..228ec9f2d63 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9538,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   std::string AsmStr = IA->getAsmString();
 
   // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
-  std::vector<std::string> AsmPieces;
+  SmallVector<StringRef, 4> AsmPieces;
   SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
 
   switch (AsmPieces.size()) {
@@ -9575,7 +9575,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
         Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
         Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
       // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
-      std::vector<std::string> Words;
+      SmallVector<StringRef, 4> Words;
       SplitString(AsmPieces[0], Words, " \t");
       if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
         Words.clear();
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 03b12528491..510f3d5bd77 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -76,11 +76,12 @@ Module::~Module() {
 
 /// Target endian information...
 Module::Endianness Module::getEndianness() const {
-  std::string temp = DataLayout;
+  StringRef temp = DataLayout;
   Module::Endianness ret = AnyEndianness;
   
   while (!temp.empty()) {
-    std::string token = getToken(temp, "-");
+    StringRef token = DataLayout;
+    tie(token, temp) = getToken(DataLayout, "-");
     
     if (token[0] == 'e') {
       ret = LittleEndian;
@@ -94,15 +95,17 @@ Module::Endianness Module::getEndianness() const {
 
 /// Target Pointer Size information...
 Module::PointerSize Module::getPointerSize() const {
-  std::string temp = DataLayout;
+  StringRef temp = DataLayout;
   Module::PointerSize ret = AnyPointerSize;
   
   while (!temp.empty()) {
-    std::string token = getToken(temp, "-");
-    char signal = getToken(token, ":")[0];
+    StringRef token, signalToken;
+    tie(token, temp) = getToken(temp, "-");
+    tie(signalToken, token) = getToken(token, ":");
     
-    if (signal == 'p') {
-      int size = atoi(getToken(token, ":").c_str());
+    if (signalToken[0] == 'p') {
+      int size = 0;
+      getToken(token, ":").first.getAsInteger(10, size);
       if (size == 32)
         ret = Pointer32;
       else if (size == 64)
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 060fc4fdb05..93eb0a561ea 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -443,12 +443,12 @@ bool LTOCodeGenerator::generateAssemblyCode(formatted_raw_ostream& out,
 /// Optimize merged modules using various IPO passes
 void LTOCodeGenerator::setCodeGenDebugOptions(const char* options)
 {
-    std::string ops(options);
-    for (std::string o = getToken(ops); !o.empty(); o = getToken(ops)) {
+    for (std::pair<StringRef, StringRef> o = getToken(options);
+         !o.first.empty(); o = getToken(o.second)) {
         // ParseCommandLineOptions() expects argv[0] to be program name.
         // Lazily add that.
         if ( _codegenOptions.empty() ) 
             _codegenOptions.push_back("libLTO");
-        _codegenOptions.push_back(strdup(o.c_str()));
+        _codegenOptions.push_back(strdup(o.first.str().c_str()));
     }
 }
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index c69ce962ffc..684431a6f49 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -14,6 +14,7 @@
 #include "CodeGenInstruction.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include <set>
 using namespace llvm;
 
@@ -224,7 +225,8 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
   // Parse the DisableEncoding field.
   std::string DisableEncoding = R->getValueAsString("DisableEncoding");
   while (1) {
-    std::string OpName = getToken(DisableEncoding, " ,\t");
+    std::string OpName;
+    tie(OpName, DisableEncoding) = getToken(DisableEncoding, " ,\t");
     if (OpName.empty()) break;
 
     // Figure out which operand this is.