From 7e54d5b1562f085c898bf8fcc4ac939ec893444c Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Thu, 31 Dec 2009 04:24:34 +0000 Subject: [PATCH] Document the edit-distance algorithm used in StringRef, switch it over to SmallVector, and add a unit test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92340 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/StringRef.cpp | 19 ++++++++++++++----- unittests/ADT/StringRefTest.cpp | 5 +++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 9084ea6ece0..e4a9984828f 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" -#include +#include "llvm/ADT/SmallVector.h" using namespace llvm; // MSVC emits references to this into the translation units which reference it. @@ -36,17 +36,26 @@ int StringRef::compare_lower(StringRef RHS) const { return Length < RHS.Length ? -1 : 1; } -/// \brief Compute the edit distance between the two given strings. +// Compute the edit distance between the two given strings. unsigned StringRef::edit_distance(llvm::StringRef Other, bool AllowReplacements) { + // The algorithm implemented below is the "classic" + // dynamic-programming algorithm for computing the Levenshtein + // distance, which is described here: + // + // http://en.wikipedia.org/wiki/Levenshtein_distance + // + // Although the algorithm is typically described using an m x n + // array, only two rows are used at a time, so this implemenation + // just keeps two separate vectors for those two rows. size_type m = size(); size_type n = Other.size(); - std::vector previous(n+1, 0); - for (std::vector::size_type i = 0; i <= n; ++i) + SmallVector previous(n+1, 0); + for (SmallVector::size_type i = 0; i <= n; ++i) previous[i] = i; - std::vector current(n+1, 0); + SmallVector current(n+1, 0); for (size_type y = 1; y <= m; ++y) { current.assign(n+1, 0); current[0] = y; diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp index 6507c20b2b1..8507efa1865 100644 --- a/unittests/ADT/StringRefTest.cpp +++ b/unittests/ADT/StringRefTest.cpp @@ -247,6 +247,11 @@ TEST(StringRefTest, Count) { EXPECT_EQ(0U, Str.count("zz")); } +TEST(StringRefTest, EditDistance) { + StringRef Str("hello"); + EXPECT_EQ(2, Str.edit_distance("hill")); +} + TEST(StringRefTest, Misc) { std::string Storage; raw_string_ostream OS(Storage);