llvm-6502/include/llvm/Support/UnicodeCharRanges.h
Alexander Kornienko ece0bec0c8 Implemented llvm::sys::locale::columnWidth and isPrint for the case of generic UTF8-capable terminal.
Summary:
This is a second attempt to get this right. After reading the Unicode
Standard I came up with the code that uses definitions of "printable" and
"column width" more suitable for terminal output (i.e. fixed-width fonts and
special treatment of many control characters).
The implementation here can probably be used for Windows and MacOS if someone
can test it properly.
The patch addresses PR14910.

Reviewers: jordan_rose, gribozavr

CC: llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D1253

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187837 91177308-0d34-0410-b5e6-96231b3b80d8
2013-08-07 00:07:07 +00:00

99 lines
2.8 KiB
C

//===--- UnicodeCharRanges.h - Types and functions for character ranges ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_UNICODECHARRANGES_H
#define LLVM_SUPPORT_UNICODECHARRANGES_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/raw_ostream.h"
namespace {
struct UnicodeCharRange {
uint32_t Lower;
uint32_t Upper;
};
typedef llvm::ArrayRef<UnicodeCharRange> UnicodeCharSet;
/// Returns true if each of the ranges in \p CharSet is a proper closed range
/// [min, max], and if the ranges themselves are ordered and non-overlapping.
static inline bool isValidCharSet(UnicodeCharSet CharSet) {
#ifndef NDEBUG
static llvm::SmallPtrSet<const UnicodeCharRange *, 16> Validated;
static llvm::sys::Mutex ValidationMutex;
// Check the validation cache.
{
llvm::MutexGuard Guard(ValidationMutex);
if (Validated.count(CharSet.data()))
return true;
}
// Walk through the ranges.
uint32_t Prev = 0;
for (UnicodeCharSet::iterator I = CharSet.begin(), E = CharSet.end();
I != E; ++I) {
if (I != CharSet.begin() && Prev >= I->Lower) {
DEBUG(llvm::dbgs() << "Upper bound 0x");
DEBUG(llvm::dbgs().write_hex(Prev));
DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x");
DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n");
return false;
}
if (I->Upper < I->Lower) {
DEBUG(llvm::dbgs() << "Upper bound 0x");
DEBUG(llvm::dbgs().write_hex(I->Lower));
DEBUG(llvm::dbgs() << " should not be less than lower bound 0x");
DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n");
return false;
}
Prev = I->Upper;
}
// Update the validation cache.
{
llvm::MutexGuard Guard(ValidationMutex);
Validated.insert(CharSet.data());
}
#endif
return true;
}
} // namespace
/// Returns true if the Unicode code point \p C is within the set of
/// characters specified by \p CharSet.
LLVM_READONLY static inline bool isCharInSet(uint32_t C,
UnicodeCharSet CharSet) {
assert(isValidCharSet(CharSet));
size_t LowPoint = 0;
size_t HighPoint = CharSet.size();
// Binary search the set of char ranges.
while (HighPoint != LowPoint) {
size_t MidPoint = (HighPoint + LowPoint) / 2;
if (C < CharSet[MidPoint].Lower)
HighPoint = MidPoint;
else if (C > CharSet[MidPoint].Upper)
LowPoint = MidPoint + 1;
else
return true;
}
return false;
}
#endif // LLVM_SUPPORT_UNICODECHARRANGES_H