diff --git a/include/llvm/Support/PathV2.h b/include/llvm/Support/PathV2.h index ab9f077349f..9417e4c16bc 100644 --- a/include/llvm/Support/PathV2.h +++ b/include/llvm/Support/PathV2.h @@ -24,6 +24,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_SYSTEM_PATHV2_H +#define LLVM_SYSTEM_PATHV2_H + #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/DataTypes.h" @@ -64,13 +67,20 @@ namespace path { /// class const_iterator { StringRef Path; //< The entire path. - StringRef Component; //< The current component. + StringRef Component; //< The current component. Not necessarily in Path. + size_t Position; //< The iterators current position within Path. + + // An end iterator has Position = Path.size() + 1. + friend const_iterator begin(const StringRef &path); + friend const_iterator end(const StringRef &path); public: typedef const StringRef value_type; + typedef ptrdiff_t difference_type; typedef value_type &reference; typedef value_type *pointer; typedef std::bidirectional_iterator_tag iterator_category; + reference operator*() const; pointer operator->() const; const_iterator &operator++(); // preincrement @@ -79,11 +89,18 @@ public: const_iterator &operator--(int); // postdecrement bool operator==(const const_iterator &RHS) const; bool operator!=(const const_iterator &RHS) const; - - const_iterator(); //< Default construct end iterator. - const_iterator(const StringRef &path); }; +/// @brief Get begin iterator over \a path. +/// @param path Input path. +/// @returns Iterator initialized with the first component of \a path. +const_iterator begin(const StringRef &path); + +/// @brief Get end iterator over \a path. +/// @param path Input path. +/// @returns Iterator initialized to the end of \a path. +const_iterator end(const StringRef &path); + /// @} /// @name Lexical Modifiers /// @{ @@ -136,7 +153,10 @@ error_code replace_extension(SmallVectorImpl &path, /// @param component The component to be appended to \a path. /// @returns errc::success if \a component has been appended to \a path, /// otherwise a platform specific error_code. -error_code append(SmallVectorImpl &path, const Twine &component); +error_code append(SmallVectorImpl &path, const Twine &a, + const Twine &b = "", + const Twine &c = "", + const Twine &d = ""); /// @brief Append to path. /// @@ -978,3 +998,5 @@ public: } // end namespace fs } // end namespace sys } // end namespace llvm + +#endif diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 105a507abd6..a64a18836b3 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -64,6 +64,7 @@ add_llvm_library(LLVMSupport Memory.cpp Mutex.cpp Path.cpp + PathV2.cpp Process.cpp Program.cpp RWMutex.cpp @@ -79,6 +80,7 @@ add_llvm_library(LLVMSupport Unix/Memory.inc Unix/Mutex.inc Unix/Path.inc + Unix/PathV2.inc Unix/Process.inc Unix/Program.inc Unix/RWMutex.inc @@ -92,6 +94,7 @@ add_llvm_library(LLVMSupport Windows/Memory.inc Windows/Mutex.inc Windows/Path.inc + Windows/PathV2.inc Windows/Process.inc Windows/Program.inc Windows/RWMutex.inc diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp new file mode 100644 index 00000000000..115f29447e3 --- /dev/null +++ b/lib/Support/PathV2.cpp @@ -0,0 +1,409 @@ +//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the operating system PathV2 API. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PathV2.h" +#include "llvm/Support/ErrorHandling.h" +#include + +namespace { + using llvm::StringRef; + + bool is_separator(const char value) { + switch(value) { +#ifdef LLVM_ON_WIN32 + case '\\': // fall through +#endif + case '/': return true; + default: return false; + } + } + +#ifdef LLVM_ON_WIN32 + const StringRef separators = "\\/"; + const char prefered_separator = '\\'; +#else + const StringRef separators = "/"; + const char prefered_separator = '/'; +#endif + + StringRef find_first_component(const StringRef &path) { + // Look for this first component in the following order. + // * empty (in this case we return an empty string) + // * either C: or {//,\\}net. + // * {/,\} + // * {.,..} + // * {file,directory}name + + if (path.empty()) + return path; + + // C: + if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':') + return StringRef(path.begin(), 2); + + // //net + if ((path.size() > 2) && + (path.startswith("\\\\") || path.startswith("//")) && + (path[2] != '\\' && path[2] != '/')) { + // Find the next directory separator. + size_t end = path.find_first_of("\\/", 2); + if (end == StringRef::npos) + return path; + else + return StringRef(path.begin(), end); + } + + // {/,\} + if (path[0] == '\\' || path[0] == '/') + return StringRef(path.begin(), 1); + + if (path.startswith("..")) + return StringRef(path.begin(), 2); + + if (path[0] == '.') + return StringRef(path.begin(), 1); + + // * {file,directory}name + size_t end = path.find_first_of("\\/", 2); + if (end == StringRef::npos) + return path; + else + return StringRef(path.begin(), end); + + return StringRef(); + } +} + +namespace llvm { +namespace sys { +namespace path { + +const_iterator begin(const StringRef &path) { + const_iterator i; + i.Path = path; + i.Component = find_first_component(path); + i.Position = 0; + return i; +} + +const_iterator end(const StringRef &path) { + const_iterator i; + i.Path = path; + i.Position = path.size(); + return i; +} + +const_iterator::reference const_iterator::operator*() const { + return Component; +} + +const_iterator::pointer const_iterator::operator->() const { + return &Component; +} + +const_iterator &const_iterator::operator++() { + assert(Position < Path.size() && "Tried to increment past end!"); + + // Increment Position to past the current component + Position += Component.size(); + + // Check for end. + if (Position == Path.size()) { + Component = StringRef(); + return *this; + } + + // Both POSIX and Windows treat paths that begin with exactly two separators + // specially. + bool was_net = Component.size() > 2 && + is_separator(Component[0]) && + Component[1] == Component[0] && + !is_separator(Component[2]); + + // Handle separators. + if (is_separator(Path[Position])) { + // Root dir. + if (was_net +#ifdef LLVM_ON_WIN32 + // c:/ + || Component.endswith(":") +#endif + ) { + Component = StringRef(Path.begin() + Position, 1); + return *this; + } + + // Skip extra separators. + while (Position != Path.size() && + is_separator(Path[Position])) { + ++Position; + } + + // Treat trailing '/' as a '.'. + if (Position == Path.size()) { + --Position; + Component = "."; + return *this; + } + } + + // Find next component. + size_t end_pos = Path.find_first_of(separators, Position); + if (end_pos == StringRef::npos) + end_pos = Path.size(); + Component = StringRef(Path.begin() + Position, end_pos - Position); + + return *this; +} + +bool const_iterator::operator==(const const_iterator &RHS) const { + return Path.begin() == RHS.Path.begin() && + Position == RHS.Position; +} + +bool const_iterator::operator!=(const const_iterator &RHS) const { + return !(*this == RHS); +} + +error_code root_path(const StringRef &path, StringRef &result) { + const_iterator b = begin(path), + pos = b, + e = end(path); + if (b != e) { + bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; + bool has_drive = +#ifdef LLVM_ON_WIN32 + b->endswith(":"); +#else + false; +#endif + + if (has_net || has_drive) { + if ((++pos != e) && is_separator((*pos)[0])) { + // {C:/,//net/}, so get the first two components. + result = StringRef(path.begin(), b->size() + pos->size()); + return make_error_code(errc::success); + } else { + // just {C:,//net}, return the first component. + result = *b; + return make_error_code(errc::success); + } + } + + // POSIX style root directory. + if (is_separator((*b)[0])) { + result = *b; + return make_error_code(errc::success); + } + + // No root_path. + result = StringRef(); + return make_error_code(errc::success); + } + + // No path :(. + result = StringRef(); + return make_error_code(errc::success); +} + +error_code root_name(const StringRef &path, StringRef &result) { + const_iterator b = begin(path), + e = end(path); + if (b != e) { + bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; + bool has_drive = +#ifdef LLVM_ON_WIN32 + b->endswith(":"); +#else + false; +#endif + + if (has_net || has_drive) { + // just {C:,//net}, return the first component. + result = *b; + return make_error_code(errc::success); + } + } + + // No path or no name. + result = StringRef(); + return make_error_code(errc::success); +} + +error_code root_directory(const StringRef &path, StringRef &result) { + const_iterator b = begin(path), + pos = b, + e = end(path); + if (b != e) { + bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; + bool has_drive = +#ifdef LLVM_ON_WIN32 + b->endswith(":"); +#else + false; +#endif + + if ((has_net || has_drive) && + // {C:,//net}, skip to the next component. + (++pos != e) && is_separator((*pos)[0])) { + result = *pos; + return make_error_code(errc::success); + } + + // POSIX style root directory. + if (!has_net && is_separator((*b)[0])) { + result = *b; + return make_error_code(errc::success); + } + } + + // No path or no root. + result = StringRef(); + return make_error_code(errc::success); +} + +error_code has_root_name(const Twine &path, bool &result) { + SmallString<128> storage; + StringRef p = path.toStringRef(storage); + + if (error_code ec = root_name(p, p)) return ec; + result = !p.empty(); + return make_error_code(errc::success); +} + +error_code has_root_directory(const Twine &path, bool &result) { + SmallString<128> storage; + StringRef p = path.toStringRef(storage); + + if (error_code ec = root_directory(p, p)) return ec; + result = !p.empty(); + return make_error_code(errc::success); +} + +error_code relative_path(const StringRef &path, StringRef &result) { + StringRef root; + if (error_code ec = root_path(path, root)) return ec; + result = StringRef(path.begin() + root.size(), path.size() - root.size()); + return make_error_code(errc::success); +} + +error_code append(SmallVectorImpl &path, const Twine &a, + const Twine &b, + const Twine &c, + const Twine &d) { + SmallString<32> a_storage; + SmallString<32> b_storage; + SmallString<32> c_storage; + SmallString<32> d_storage; + + SmallVector components; + if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage)); + if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage)); + if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage)); + if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage)); + + for (SmallVectorImpl::const_iterator i = components.begin(), + e = components.end(); + i != e; ++i) { + bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]); + bool component_has_sep = !i->empty() && is_separator((*i)[0]); + bool is_root_name; + if (error_code ec = has_root_name(*i, is_root_name)) return ec; + + if (path_has_sep) { + // Strip separators from beginning of component. + size_t loc = i->find_first_not_of(separators); + StringRef c = StringRef(i->begin() + loc, i->size() - loc); + + // Append it. + path.append(c.begin(), c.end()); + continue; + } + + if (!component_has_sep && !(path.empty() && is_root_name)) { + // Add a separator. + path.push_back(prefered_separator); + } + + path.append(i->begin(), i->end()); + } + + return make_error_code(errc::success); +} + +error_code make_absolute(SmallVectorImpl &path) { + StringRef p(path.data(), path.size()); + + bool rootName, rootDirectory; + if (error_code ec = has_root_name(p, rootName)) return ec; + if (error_code ec = has_root_directory(p, rootDirectory)) return ec; + + // Already absolute. + if (rootName && rootDirectory) + return make_error_code(errc::success); + + // All of the following conditions will need the current directory. + SmallString<128> current_dir; + if (error_code ec = current_path(current_dir)) return ec; + + // Relative path. Prepend the current directory. + if (!rootName && !rootDirectory) { + // Append path to the current directory. + if (error_code ec = append(current_dir, p)) return ec; + // Set path to the result. + path.swap(current_dir); + return make_error_code(errc::success); + } + + if (!rootName && rootDirectory) { + StringRef cdrn; + if (error_code ec = root_name(current_dir, cdrn)) return ec; + SmallString<128> curDirRootName(cdrn.begin(), cdrn.end()); + if (error_code ec = append(curDirRootName, p)) return ec; + // Set path to the result. + path.swap(curDirRootName); + return make_error_code(errc::success); + } + + if (rootName && !rootDirectory) { + StringRef pRootName; + StringRef bRootDirectory; + StringRef bRelativePath; + StringRef pRelativePath; + if (error_code ec = root_name(p, pRootName)) return ec; + if (error_code ec = root_directory(current_dir, bRootDirectory)) return ec; + if (error_code ec = relative_path(current_dir, bRelativePath)) return ec; + if (error_code ec = relative_path(p, pRelativePath)) return ec; + + SmallString<128> res; + if (error_code ec = append(res, pRootName, bRootDirectory, + bRelativePath, pRelativePath)) return ec; + path.swap(res); + return make_error_code(errc::success); + } + + llvm_unreachable("All rootName and rootDirectory combinations should have " + "occurred above!"); +} + +} +} +} + +// Include the truly platform-specific parts. +#if defined(LLVM_ON_UNIX) +#include "Unix/PathV2.inc" +#endif +#if defined(LLVM_ON_WIN32) +#include "Windows/PathV2.inc" +#endif diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc new file mode 100644 index 00000000000..bcd93f046dd --- /dev/null +++ b/lib/Support/Unix/PathV2.inc @@ -0,0 +1,39 @@ +//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific implementation of the PathV2 API. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +#include "Unix.h" + +namespace llvm { +namespace sys { +namespace path { + +error_code current_path(SmallVectorImpl &result) { + long size = ::pathconf(".", _PC_PATH_MAX); + result.reserve(size + 1); + result.set_size(size + 1); + + if (::getcwd(result.data(), result.size()) == 0) + return error_code(errno, system_category()); + + result.set_size(strlen(result.data())); + return make_error_code(errc::success); +} + +} // end namespace path +} // end namespace sys +} // end namespace llvm diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc new file mode 100644 index 00000000000..9c15e26b79d --- /dev/null +++ b/lib/Support/Windows/PathV2.inc @@ -0,0 +1,71 @@ +//===- llvm/Support/Win32/PathV2.cpp - Windows Path Impl --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Windows specific implementation of the PathV2 API. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Windows code that +//=== is guaranteed to work on *all* Windows variants. +//===----------------------------------------------------------------------===// + +#include "Windows.h" + +namespace llvm { +namespace sys { +namespace path { + +error_code current_path(SmallVectorImpl &result) { + SmallVector cur_path; + cur_path.reserve(128); +retry_cur_dir: + DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); + + // A zero return value indicates a failure other than insufficient space. + if (len == 0) + return make_error_code(windows_error(::GetLastError())); + + // If there's insufficient space, the len returned is larger than the len + // given. + if (len > cur_path.capacity()) { + cur_path.reserve(len); + goto retry_cur_dir; + } + + cur_path.set_size(len); + // cur_path now holds the current directory in utf-16. Convert to utf-8. + + // Find out how much space we need. Sadly, this function doesn't return the + // size needed unless you tell it the result size is 0, which means you + // _always_ have to call it twice. + len = ::WideCharToMultiByte(CP_UTF8, NULL, + cur_path.data(), cur_path.size(), + result.data(), 0, + NULL, NULL); + + if (len == 0) + return make_error_code(windows_error(::GetLastError())); + + result.reserve(len); + result.set_size(len); + // Now do the actual conversion. + len = ::WideCharToMultiByte(CP_UTF8, NULL, + cur_path.data(), cur_path.size(), + result.data(), result.size(), + NULL, NULL); + if (len == 0) + return make_error_code(windows_error(::GetLastError())); + + return make_error_code(errc::success); +} + +} // end namespace path +} // end namespace sys +} // end namespace llvm diff --git a/unittests/System/Path.cpp b/unittests/System/Path.cpp index 2b0817f50c5..daeb0175d25 100644 --- a/unittests/System/Path.cpp +++ b/unittests/System/Path.cpp @@ -7,15 +7,85 @@ // //===----------------------------------------------------------------------===// -// For now, just test that the header file parses. #include "llvm/Support/PathV2.h" #include "gtest/gtest.h" +using namespace llvm; + namespace { TEST(System, Path) { - // TODO: Add tests! + SmallVector paths; + paths.push_back(""); + paths.push_back("."); + paths.push_back(".."); + paths.push_back("foo"); + paths.push_back("/"); + paths.push_back("/foo"); + paths.push_back("foo/"); + paths.push_back("/foo/"); + paths.push_back("foo/bar"); + paths.push_back("/foo/bar"); + paths.push_back("//net"); + paths.push_back("//net/foo"); + paths.push_back("///foo///"); + paths.push_back("///foo///bar"); + paths.push_back("/."); + paths.push_back("./"); + paths.push_back("/.."); + paths.push_back("../"); + paths.push_back("foo/."); + paths.push_back("foo/.."); + paths.push_back("foo/./"); + paths.push_back("foo/./bar"); + paths.push_back("foo/.."); + paths.push_back("foo/../"); + paths.push_back("foo/../bar"); + paths.push_back("c:"); + paths.push_back("c:/"); + paths.push_back("c:foo"); + paths.push_back("c:/foo"); + paths.push_back("c:foo/"); + paths.push_back("c:/foo/"); + paths.push_back("c:/foo/bar"); + paths.push_back("prn:"); + paths.push_back("c:\\"); + paths.push_back("c:foo"); + paths.push_back("c:\\foo"); + paths.push_back("c:foo\\"); + paths.push_back("c:\\foo\\"); + paths.push_back("c:\\foo/"); + paths.push_back("c:/foo\\bar"); + + for (SmallVector::const_iterator i = paths.begin(), + e = paths.end(); + i != e; + ++i) { + outs() << *i << " =>\n Iteration: ["; + for (sys::path::const_iterator ci = sys::path::begin(*i), + ce = sys::path::end(*i); + ci != ce; + ++ci) { + outs() << *ci << ','; + } + outs() << "]\n"; + + StringRef res; + SmallString<16> temp_store; + if (error_code ec = sys::path::root_path(*i, res)) ASSERT_FALSE(ec.message().c_str()); + outs() << " root_path: " << res << '\n'; + if (error_code ec = sys::path::root_name(*i, res)) ASSERT_FALSE(ec.message().c_str()); + outs() << " root_name: " << res << '\n'; + if (error_code ec = sys::path::root_directory(*i, res)) ASSERT_FALSE(ec.message().c_str()); + outs() << " root_directory: " << res << '\n'; + + temp_store = *i; + if (error_code ec = sys::path::make_absolute(temp_store)) ASSERT_FALSE(ec.message().c_str()); + outs() << " make_absolute: " << temp_store << '\n'; + + outs().flush(); + } } } // anonymous namespace