llvm-6502/include/llvm/Support/UnicodeCharRanges.h
Chandler Carruth 974a445bd9 Re-sort all of the includes with ./utils/sort_includes.py so that
subsequent changes are easier to review. About to fix some layering
issues, and wanted to separate out the necessary churn.

Also comment and sink the include of "Windows.h" in three .inc files to
match the usage in Memory.inc.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198685 91177308-0d34-0410-b5e6-96231b3b80d8
2014-01-07 11:48:04 +00:00

96 lines
3.0 KiB
C++

//===--- UnicodeCharRanges.h - Types and functions for character ranges ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_UNICODECHARRANGES_H
#define LLVM_SUPPORT_UNICODECHARRANGES_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
namespace llvm {
namespace sys {
/// \brief Represents a closed range of Unicode code points [Lower, Upper].
struct UnicodeCharRange {
uint32_t Lower;
uint32_t Upper;
};
inline bool operator<(uint32_t Value, UnicodeCharRange Range) {
return Value < Range.Lower;
}
inline bool operator<(UnicodeCharRange Range, uint32_t Value) {
return Range.Upper < Value;
}
/// \brief Holds a reference to an ordered array of UnicodeCharRange and allows
/// to quickly check if a code point is contained in the set represented by this
/// array.
class UnicodeCharSet {
public:
typedef llvm::ArrayRef<UnicodeCharRange> CharRanges;
/// \brief Constructs a UnicodeCharSet instance from an array of
/// UnicodeCharRanges.
///
/// Array pointed by \p Ranges should have the lifetime at least as long as
/// the UnicodeCharSet instance, and should not change. Array is validated by
/// the constructor, so it makes sense to create as few UnicodeCharSet
/// instances per each array of ranges, as possible.
UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {
assert(rangesAreValid());
}
/// \brief Returns true if the character set contains the Unicode code point
/// \p C.
bool contains(uint32_t C) const {
return std::binary_search(Ranges.begin(), Ranges.end(), C);
}
private:
/// \brief Returns true if each of the ranges is a proper closed range
/// [min, max], and if the ranges themselves are ordered and non-overlapping.
bool rangesAreValid() const {
uint32_t Prev = 0;
for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
if (I != Ranges.begin() && Prev >= I->Lower) {
DEBUG(llvm::dbgs() << "Upper bound 0x");
DEBUG(llvm::dbgs().write_hex(Prev));
DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x");
DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n");
return false;
}
if (I->Upper < I->Lower) {
DEBUG(llvm::dbgs() << "Upper bound 0x");
DEBUG(llvm::dbgs().write_hex(I->Lower));
DEBUG(llvm::dbgs() << " should not be less than lower bound 0x");
DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n");
return false;
}
Prev = I->Upper;
}
return true;
}
const CharRanges Ranges;
};
} // namespace sys
} // namespace llvm
#endif // LLVM_SUPPORT_UNICODECHARRANGES_H