Introduce a simple line-by-line iterator type into the Support library.

This is an iterator which you can build around a MemoryBuffer. It will
iterate through the non-empty, non-comment lines of the buffer as
a forward iterator. It should be small and reasonably fast (although it
could be made much faster if anyone cares, I don't really...).

This will be used to more simply support the text-based sample
profile file format, and is largely based on the original patch by
Diego. I've re-worked the style of it and separated it from the work of
producing a MemoryBuffer from a file which both simplifies the interface
and makes it easier to test.

The style of the API follows the C++ standard naming conventions to fit
in better with iterators in general, much like the Path and FileSystem
interfaces follow standard-based naming conventions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198068 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2013-12-27 04:28:57 +00:00
parent 52b162375a
commit 5ff21b4111
5 changed files with 258 additions and 0 deletions

View File

@ -0,0 +1,73 @@
//===- LineIterator.h - Iterator to read a text buffer's lines --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
#include <iterator>
namespace llvm {
class MemoryBuffer;
/// \brief A forward iterator which reads non-blank text lines from a buffer.
///
/// This class provides a forward iterator interface for reading one line at
/// a time from a buffer. When default constructed the iterator will be the
/// "end" iterator.
///
/// The iterator also is aware of what line number it is currently processing
/// and can strip comment lines given the comment-starting character.
///
/// Note that this iterator requires the buffer to be nul terminated.
class line_iterator
: public std::iterator<std::forward_iterator_tag, StringRef, ptrdiff_t> {
const MemoryBuffer *Buffer;
char CommentMarker;
unsigned LineNumber;
StringRef CurrentLine;
public:
/// \brief Default construct an "end" iterator.
line_iterator() : Buffer(0) {}
/// \brief Construct a new iterator around some memory buffer.
explicit line_iterator(const MemoryBuffer &Buffer, char CommentMarker = '\0');
/// \brief Return true if we've reached EOF or are an "end" iterator.
bool is_at_eof() const { return !Buffer; }
/// \brief Return true if we're an "end" iterator or have reached EOF.
bool is_at_end() const { return is_at_eof(); }
/// \brief Return the current line number. May return any number at EOF.
int64_t line_number() const { return LineNumber; }
/// \brief Advance to the next (non-empty, non-comment) line.
line_iterator &operator++() {
advance();
return *this;
}
/// \brief Get the current line as a \c StringRef.
StringRef operator*() const { return CurrentLine; }
friend bool operator==(const line_iterator &LHS, const line_iterator &RHS) {
return LHS.Buffer == RHS.Buffer &&
LHS.CurrentLine.begin() == RHS.CurrentLine.begin();
}
friend bool operator!=(const line_iterator &LHS, const line_iterator &RHS) {
return !(LHS == RHS);
}
private:
/// \brief Advance the iterator to the next line.
void advance();
};
}

View File

@ -30,6 +30,7 @@ add_llvm_library(LLVMSupport
IntrusiveRefCntPtr.cpp
IsInf.cpp
IsNAN.cpp
LineIterator.cpp
Locale.cpp
LockFileManager.cpp
ManagedStatic.cpp

View File

@ -0,0 +1,68 @@
//===- LineIterator.cpp - Implementation of line iteration ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
line_iterator::line_iterator(const MemoryBuffer &Buffer, char CommentMarker)
: Buffer(Buffer.getBufferSize() ? &Buffer : 0),
CommentMarker(CommentMarker), LineNumber(1),
CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : 0, 0) {
// Ensure that if we are constructed on a non-empty memory buffer that it is
// a null terminated buffer.
if (Buffer.getBufferSize()) {
assert(Buffer.getBufferEnd()[0] == '\0');
advance();
}
}
void line_iterator::advance() {
assert(Buffer && "Cannot advance past the end!");
const char *Pos = CurrentLine.end();
assert(Pos == Buffer->getBufferStart() || *Pos == '\n' || *Pos == '\0');
size_t Length = 0;
if (CommentMarker == '\0') {
// If we're not stripping comments, this is simpler.
while (Pos[Length] == '\n')
++Length;
Pos += Length;
LineNumber += Length;
Length = 0;
} else {
// Skip comments and count line numbers, which is a bit more complex.
for (;;) {
if (*Pos == CommentMarker)
do {
++Pos;
} while (*Pos != '\0' && *Pos != '\n');
if (*Pos != '\n')
break;
++Pos;
++LineNumber;
}
}
if (*Pos == '\0') {
// We've hit the end of the buffer, reset ourselves to the end state.
Buffer = 0;
CurrentLine = StringRef();
return;
}
// Measure the line.
do {
++Length;
} while (Pos[Length] != '\0' && Pos[Length] != '\n');
CurrentLine = StringRef(Pos, Length);
}

View File

@ -18,6 +18,7 @@ add_llvm_unittest(SupportTests
ErrorOrTest.cpp
FileOutputBufferTest.cpp
LeakDetectorTest.cpp
LineIteratorTest.cpp
LockFileManagerTest.cpp
ManagedStatic.cpp
MathExtrasTest.cpp

View File

@ -0,0 +1,115 @@
//===- LineIterator.cpp - Unit tests --------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/LineIterator.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "gtest/gtest.h"
using namespace llvm;
using namespace llvm::sys;
namespace {
TEST(LineIteratorTest, Basic) {
OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer("line 1\n"
"line 2\n"
"line 3"));
line_iterator I = line_iterator(*Buffer), E;
EXPECT_FALSE(I.is_at_eof());
EXPECT_NE(E, I);
EXPECT_EQ("line 1", *I);
EXPECT_EQ(1, I.line_number());
++I;
EXPECT_EQ("line 2", *I);
EXPECT_EQ(2, I.line_number());
++I;
EXPECT_EQ("line 3", *I);
EXPECT_EQ(3, I.line_number());
++I;
EXPECT_TRUE(I.is_at_eof());
EXPECT_EQ(E, I);
}
TEST(LineIteratorTest, CommentSkipping) {
OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer("line 1\n"
"line 2\n"
"# Comment 1\n"
"line 4\n"
"# Comment 2"));
line_iterator I = line_iterator(*Buffer, '#'), E;
EXPECT_FALSE(I.is_at_eof());
EXPECT_NE(E, I);
EXPECT_EQ("line 1", *I);
EXPECT_EQ(1, I.line_number());
++I;
EXPECT_EQ("line 2", *I);
EXPECT_EQ(2, I.line_number());
++I;
EXPECT_EQ("line 4", *I);
EXPECT_EQ(4, I.line_number());
++I;
EXPECT_TRUE(I.is_at_eof());
EXPECT_EQ(E, I);
}
TEST(LineIteratorTest, BlankSkipping) {
OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer("\n\n\n"
"line 1\n"
"\n\n\n"
"line 2\n"
"\n\n\n"));
line_iterator I = line_iterator(*Buffer), E;
EXPECT_FALSE(I.is_at_eof());
EXPECT_NE(E, I);
EXPECT_EQ("line 1", *I);
EXPECT_EQ(4, I.line_number());
++I;
EXPECT_EQ("line 2", *I);
EXPECT_EQ(8, I.line_number());
++I;
EXPECT_TRUE(I.is_at_eof());
EXPECT_EQ(E, I);
}
TEST(LineIteratorTest, EmptyBuffers) {
OwningPtr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(""));
EXPECT_TRUE(line_iterator(*Buffer).is_at_eof());
EXPECT_EQ(line_iterator(), line_iterator(*Buffer));
Buffer.reset(MemoryBuffer::getMemBuffer("\n\n\n"));
EXPECT_TRUE(line_iterator(*Buffer).is_at_eof());
EXPECT_EQ(line_iterator(), line_iterator(*Buffer));
Buffer.reset(MemoryBuffer::getMemBuffer("# foo\n"
"\n"
"# bar"));
EXPECT_TRUE(line_iterator(*Buffer, '#').is_at_eof());
EXPECT_EQ(line_iterator(), line_iterator(*Buffer, '#'));
Buffer.reset(MemoryBuffer::getMemBuffer("\n"
"# baz\n"
"\n"));
EXPECT_TRUE(line_iterator(*Buffer, '#').is_at_eof());
EXPECT_EQ(line_iterator(), line_iterator(*Buffer, '#'));
}
} // anonymous namespace