From 5ff21b411149095abb421c1e1263341360072cec Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 27 Dec 2013 04:28:57 +0000 Subject: [PATCH] Introduce a simple line-by-line iterator type into the Support library. This is an iterator which you can build around a MemoryBuffer. It will iterate through the non-empty, non-comment lines of the buffer as a forward iterator. It should be small and reasonably fast (although it could be made much faster if anyone cares, I don't really...). This will be used to more simply support the text-based sample profile file format, and is largely based on the original patch by Diego. I've re-worked the style of it and separated it from the work of producing a MemoryBuffer from a file which both simplifies the interface and makes it easier to test. The style of the API follows the C++ standard naming conventions to fit in better with iterators in general, much like the Path and FileSystem interfaces follow standard-based naming conventions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198068 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/LineIterator.h | 73 ++++++++++++++++ lib/Support/CMakeLists.txt | 1 + lib/Support/LineIterator.cpp | 68 +++++++++++++++ unittests/Support/CMakeLists.txt | 1 + unittests/Support/LineIteratorTest.cpp | 115 +++++++++++++++++++++++++ 5 files changed, 258 insertions(+) create mode 100644 include/llvm/Support/LineIterator.h create mode 100644 lib/Support/LineIterator.cpp create mode 100644 unittests/Support/LineIteratorTest.cpp diff --git a/include/llvm/Support/LineIterator.h b/include/llvm/Support/LineIterator.h new file mode 100644 index 00000000000..861c19881fe --- /dev/null +++ b/include/llvm/Support/LineIterator.h @@ -0,0 +1,73 @@ +//===- LineIterator.h - Iterator to read a text buffer's lines --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { + +class MemoryBuffer; + +/// \brief A forward iterator which reads non-blank text lines from a buffer. +/// +/// This class provides a forward iterator interface for reading one line at +/// a time from a buffer. When default constructed the iterator will be the +/// "end" iterator. +/// +/// The iterator also is aware of what line number it is currently processing +/// and can strip comment lines given the comment-starting character. +/// +/// Note that this iterator requires the buffer to be nul terminated. +class line_iterator + : public std::iterator { + const MemoryBuffer *Buffer; + char CommentMarker; + + unsigned LineNumber; + StringRef CurrentLine; + +public: + /// \brief Default construct an "end" iterator. + line_iterator() : Buffer(0) {} + + /// \brief Construct a new iterator around some memory buffer. + explicit line_iterator(const MemoryBuffer &Buffer, char CommentMarker = '\0'); + + /// \brief Return true if we've reached EOF or are an "end" iterator. + bool is_at_eof() const { return !Buffer; } + + /// \brief Return true if we're an "end" iterator or have reached EOF. + bool is_at_end() const { return is_at_eof(); } + + /// \brief Return the current line number. May return any number at EOF. + int64_t line_number() const { return LineNumber; } + + /// \brief Advance to the next (non-empty, non-comment) line. + line_iterator &operator++() { + advance(); + return *this; + } + + /// \brief Get the current line as a \c StringRef. + StringRef operator*() const { return CurrentLine; } + + friend bool operator==(const line_iterator &LHS, const line_iterator &RHS) { + return LHS.Buffer == RHS.Buffer && + LHS.CurrentLine.begin() == RHS.CurrentLine.begin(); + } + + friend bool operator!=(const line_iterator &LHS, const line_iterator &RHS) { + return !(LHS == RHS); + } + +private: + /// \brief Advance the iterator to the next line. + void advance(); +}; +} diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 3aecf3ffa4e..cf4edff632e 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_library(LLVMSupport IntrusiveRefCntPtr.cpp IsInf.cpp IsNAN.cpp + LineIterator.cpp Locale.cpp LockFileManager.cpp ManagedStatic.cpp diff --git a/lib/Support/LineIterator.cpp b/lib/Support/LineIterator.cpp new file mode 100644 index 00000000000..c14f96bf714 --- /dev/null +++ b/lib/Support/LineIterator.cpp @@ -0,0 +1,68 @@ +//===- LineIterator.cpp - Implementation of line iteration ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +line_iterator::line_iterator(const MemoryBuffer &Buffer, char CommentMarker) + : Buffer(Buffer.getBufferSize() ? &Buffer : 0), + CommentMarker(CommentMarker), LineNumber(1), + CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : 0, 0) { + // Ensure that if we are constructed on a non-empty memory buffer that it is + // a null terminated buffer. + if (Buffer.getBufferSize()) { + assert(Buffer.getBufferEnd()[0] == '\0'); + advance(); + } +} + +void line_iterator::advance() { + assert(Buffer && "Cannot advance past the end!"); + + const char *Pos = CurrentLine.end(); + assert(Pos == Buffer->getBufferStart() || *Pos == '\n' || *Pos == '\0'); + size_t Length = 0; + + if (CommentMarker == '\0') { + // If we're not stripping comments, this is simpler. + while (Pos[Length] == '\n') + ++Length; + Pos += Length; + LineNumber += Length; + Length = 0; + } else { + // Skip comments and count line numbers, which is a bit more complex. + for (;;) { + if (*Pos == CommentMarker) + do { + ++Pos; + } while (*Pos != '\0' && *Pos != '\n'); + if (*Pos != '\n') + break; + ++Pos; + ++LineNumber; + } + } + + if (*Pos == '\0') { + // We've hit the end of the buffer, reset ourselves to the end state. + Buffer = 0; + CurrentLine = StringRef(); + return; + } + + // Measure the line. + do { + ++Length; + } while (Pos[Length] != '\0' && Pos[Length] != '\n'); + + CurrentLine = StringRef(Pos, Length); +} diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt index 0abc2ffe1d9..b3f0622e34b 100644 --- a/unittests/Support/CMakeLists.txt +++ b/unittests/Support/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_unittest(SupportTests ErrorOrTest.cpp FileOutputBufferTest.cpp LeakDetectorTest.cpp + LineIteratorTest.cpp LockFileManagerTest.cpp ManagedStatic.cpp MathExtrasTest.cpp diff --git a/unittests/Support/LineIteratorTest.cpp b/unittests/Support/LineIteratorTest.cpp new file mode 100644 index 00000000000..d684e25df55 --- /dev/null +++ b/unittests/Support/LineIteratorTest.cpp @@ -0,0 +1,115 @@ +//===- LineIterator.cpp - Unit tests --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LineIterator.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::sys; + +namespace { + +TEST(LineIteratorTest, Basic) { + OwningPtr Buffer(MemoryBuffer::getMemBuffer("line 1\n" + "line 2\n" + "line 3")); + + line_iterator I = line_iterator(*Buffer), E; + + EXPECT_FALSE(I.is_at_eof()); + EXPECT_NE(E, I); + + EXPECT_EQ("line 1", *I); + EXPECT_EQ(1, I.line_number()); + ++I; + EXPECT_EQ("line 2", *I); + EXPECT_EQ(2, I.line_number()); + ++I; + EXPECT_EQ("line 3", *I); + EXPECT_EQ(3, I.line_number()); + ++I; + + EXPECT_TRUE(I.is_at_eof()); + EXPECT_EQ(E, I); +} + +TEST(LineIteratorTest, CommentSkipping) { + OwningPtr Buffer(MemoryBuffer::getMemBuffer("line 1\n" + "line 2\n" + "# Comment 1\n" + "line 4\n" + "# Comment 2")); + + line_iterator I = line_iterator(*Buffer, '#'), E; + + EXPECT_FALSE(I.is_at_eof()); + EXPECT_NE(E, I); + + EXPECT_EQ("line 1", *I); + EXPECT_EQ(1, I.line_number()); + ++I; + EXPECT_EQ("line 2", *I); + EXPECT_EQ(2, I.line_number()); + ++I; + EXPECT_EQ("line 4", *I); + EXPECT_EQ(4, I.line_number()); + ++I; + + EXPECT_TRUE(I.is_at_eof()); + EXPECT_EQ(E, I); +} + +TEST(LineIteratorTest, BlankSkipping) { + OwningPtr Buffer(MemoryBuffer::getMemBuffer("\n\n\n" + "line 1\n" + "\n\n\n" + "line 2\n" + "\n\n\n")); + + line_iterator I = line_iterator(*Buffer), E; + + EXPECT_FALSE(I.is_at_eof()); + EXPECT_NE(E, I); + + EXPECT_EQ("line 1", *I); + EXPECT_EQ(4, I.line_number()); + ++I; + EXPECT_EQ("line 2", *I); + EXPECT_EQ(8, I.line_number()); + ++I; + + EXPECT_TRUE(I.is_at_eof()); + EXPECT_EQ(E, I); +} + +TEST(LineIteratorTest, EmptyBuffers) { + OwningPtr Buffer(MemoryBuffer::getMemBuffer("")); + EXPECT_TRUE(line_iterator(*Buffer).is_at_eof()); + EXPECT_EQ(line_iterator(), line_iterator(*Buffer)); + + Buffer.reset(MemoryBuffer::getMemBuffer("\n\n\n")); + EXPECT_TRUE(line_iterator(*Buffer).is_at_eof()); + EXPECT_EQ(line_iterator(), line_iterator(*Buffer)); + + Buffer.reset(MemoryBuffer::getMemBuffer("# foo\n" + "\n" + "# bar")); + EXPECT_TRUE(line_iterator(*Buffer, '#').is_at_eof()); + EXPECT_EQ(line_iterator(), line_iterator(*Buffer, '#')); + + Buffer.reset(MemoryBuffer::getMemBuffer("\n" + "# baz\n" + "\n")); + EXPECT_TRUE(line_iterator(*Buffer, '#').is_at_eof()); + EXPECT_EQ(line_iterator(), line_iterator(*Buffer, '#')); +} + +} // anonymous namespace