Windows: Add support for unicode command lines

Summary:
The MSVCRT deliberately sends main() code-page specific characters.
This isn't too useful to LLVM as we end up converting the arguments to
UTF-16 and subsequently attempt to use the result as, for example, a
file name.  Instead, we need to have the ability to access the Unicode
command line and transform it to UTF-8.

This has the distinct advantage over using the MSVC-specific wmain()
function as our entry point because:
 - It doesn't work on cygwin.
 - It only work on MinGW with caveats and only then on certain versions.
 - We get to keep our entry point as main(). :)

N.B.  This patch includes fixes to other parts of lib/Support/Windows
s.t. we would be able to take advantage of getting the Unicode paths.
E.G.  clang spawning clang -cc1 would want to give it Unicode arguments.

Reviewers: aaron.ballman, Bigcheese, rnk, ruiu

Reviewed By: rnk

CC: llvm-commits, ygao

Differential Revision: http://llvm-reviews.chandlerc.com/D1834

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192069 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
David Majnemer 2013-10-06 20:25:49 +00:00
parent 2def17935c
commit 5a1a1856a4
18 changed files with 181 additions and 88 deletions

View File

@ -1399,6 +1399,7 @@ AC_CHECK_LIB(m,sin)
if test "$llvm_cv_os_type" = "MingW" ; then
AC_CHECK_LIB(imagehlp, main)
AC_CHECK_LIB(psapi, main)
AC_CHECK_LIB(shell32, main)
fi
dnl dlopen() is required for plugin support.

View File

@ -415,6 +415,7 @@ endif ()
if( MINGW )
set(HAVE_LIBIMAGEHLP 1)
set(HAVE_LIBPSAPI 1)
set(HAVE_LIBSHELL32 1)
# TODO: Check existence of libraries.
# include(CheckLibraryExists)
# CHECK_LIBRARY_EXISTS(imagehlp ??? . HAVE_LIBIMAGEHLP)

View File

@ -2,7 +2,7 @@ function(get_system_libs return_var)
# Returns in `return_var' a list of system libraries used by LLVM.
if( NOT MSVC )
if( MINGW )
set(system_libs ${system_libs} imagehlp psapi)
set(system_libs ${system_libs} imagehlp psapi shell32)
elseif( CMAKE_HOST_UNIX )
if( HAVE_LIBRT )
set(system_libs ${system_libs} rt)

View File

@ -128,7 +128,7 @@ macro(add_tablegen target project)
endif()
if( MINGW )
target_link_libraries(${target} imagehlp psapi)
target_link_libraries(${target} imagehlp psapi shell32)
if(CMAKE_SIZEOF_VOID_P MATCHES "8")
set_target_properties(${target} PROPERTIES LINK_FLAGS -Wl,--stack,16777216)
endif(CMAKE_SIZEOF_VOID_P MATCHES "8")

View File

@ -203,6 +203,9 @@
/* Define to 1 if you have the `pthread' library (-lpthread). */
#cmakedefine HAVE_LIBPTHREAD ${HAVE_LIBPTHREAD}
/* Define to 1 if you have the `shell32' library (-lshell32). */
#cmakedefine HAVE_LIBSHELL32 ${HAVE_LIBSHELL32}
/* Define to 1 if you have the `udis86' library (-ludis86). */
#undef HAVE_LIBUDIS86

View File

@ -217,6 +217,9 @@
/* Define to 1 if you have the `pthread' library (-lpthread). */
#undef HAVE_LIBPTHREAD
/* Define to 1 if you have the `shell32' library (-lshell32). */
#undef HAVE_LIBSHELL32
/* Define to 1 if you have the `udis86' library (-ludis86). */
#undef HAVE_LIBUDIS86

View File

@ -25,8 +25,11 @@
#ifndef LLVM_SUPPORT_PROCESS_H
#define LLVM_SUPPORT_PROCESS_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/TimeValue.h"
@ -168,6 +171,14 @@ public:
// string. \arg Name is assumed to be in UTF-8 encoding too.
static Optional<std::string> GetEnv(StringRef name);
/// This function returns a SmallVector containing the arguments passed from
/// the operating system to the program. This function expects to be handed
/// the vector passed in from main.
static error_code
GetArgumentVector(SmallVectorImpl<const char *> &Args,
ArrayRef<const char *> ArgsFromMain,
SpecificBumpPtrAllocator<char> &ArgAllocator);
/// This function determines if the standard input is connected directly
/// to a user's input (keyboard probably), rather than coming from a file
/// or pipe.

View File

@ -190,6 +190,14 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
return std::string(Val);
}
error_code Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut,
ArrayRef<const char *> ArgsIn,
SpecificBumpPtrAllocator<char> &) {
ArgsOut.append(ArgsIn.begin(), ArgsIn.end());
return error_code::success();
}
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(STDIN_FILENO);
}

View File

@ -83,8 +83,15 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
// This is mostly to ensure that the return value still shows up as "valid".
return DynamicLibrary(&OpenedHandles);
}
SmallVector<wchar_t, MAX_PATH> filenameUnicode;
if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
SetLastError(ec.value());
MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: ");
return DynamicLibrary();
}
HMODULE a_handle = LoadLibrary(filename);
HMODULE a_handle = LoadLibraryW(filenameUnicode.data());
if (a_handle == 0) {
MakeErrMsg(errMsg, std::string(filename) + ": Can't open : ");

View File

@ -128,7 +128,7 @@ retry_random_path:
BYTE val = 0;
if (!::CryptGenRandom(CryptoProvider, 1, &val))
return windows_error(::GetLastError());
random_path_utf16.push_back("0123456789abcdef"[val & 15]);
random_path_utf16.push_back(L"0123456789abcdef"[val & 15]);
}
else
random_path_utf16.push_back(*i);
@ -241,22 +241,23 @@ TimeValue file_status::getLastModificationTime() const {
}
error_code current_path(SmallVectorImpl<char> &result) {
SmallVector<wchar_t, 128> cur_path;
cur_path.reserve(128);
retry_cur_dir:
DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
SmallVector<wchar_t, MAX_PATH> cur_path;
DWORD len = MAX_PATH;
// A zero return value indicates a failure other than insufficient space.
if (len == 0)
return windows_error(::GetLastError());
// If there's insufficient space, the len returned is larger than the len
// given.
if (len > cur_path.capacity()) {
do {
cur_path.reserve(len);
goto retry_cur_dir;
}
len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
// A zero return value indicates a failure other than insufficient space.
if (len == 0)
return windows_error(::GetLastError());
// If there's insufficient space, the len returned is larger than the len
// given.
} while (len > cur_path.capacity());
// On success, GetCurrentDirectoryW returns the number of characters not
// including the null-terminator.
cur_path.set_size(len);
return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
}

View File

@ -11,18 +11,25 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Allocator.h"
#include "Windows.h"
#include <direct.h>
#include <io.h>
#include <malloc.h>
#include <psapi.h>
#include <Shellapi.h>
#ifdef __MINGW32__
#if (HAVE_LIBPSAPI != 1)
#error "libpsapi.a should be present"
#endif
#if (HAVE_LIBSHELL32 != 1)
#error "libshell32.a should be present"
#endif
#else
#pragma comment(lib, "psapi.lib")
#pragma comment(lib, "psapi.lib")
#pragma comment(lib, "Shell32.lib")
#endif
//===----------------------------------------------------------------------===//
@ -151,25 +158,58 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
// Environment variable can be encoded in non-UTF8 encoding, and there's no
// way to know what the encoding is. The only reliable way to look up
// multibyte environment variable is to use GetEnvironmentVariableW().
std::vector<wchar_t> Buf(16);
size_t Size = 0;
for (;;) {
Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size());
if (Size < Buf.size())
break;
SmallVector<wchar_t, MAX_PATH> Buf;
size_t Size = MAX_PATH;
do {
Buf.reserve(Size);
Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.capacity());
if (Size == 0)
return None;
// Try again with larger buffer.
Buf.resize(Size + 1);
}
if (Size == 0)
return None;
} while (Size > Buf.capacity());
Buf.set_size(Size);
// Convert the result from UTF-16 to UTF-8.
SmallVector<char, 128> Res;
SmallVector<char, MAX_PATH> Res;
if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res))
return None;
return std::string(&Res[0]);
}
error_code
Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
ArrayRef<const char *>,
SpecificBumpPtrAllocator<char> &ArgAllocator) {
int NewArgCount;
error_code ec;
wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(),
&NewArgCount);
if (!UnicodeCommandLine)
return windows_error(::GetLastError());
Args.reserve(NewArgCount);
for (int i = 0; i < NewArgCount; ++i) {
SmallVector<char, MAX_PATH> NewArgString;
ec = windows::UTF16ToUTF8(UnicodeCommandLine[i],
wcslen(UnicodeCommandLine[i]),
NewArgString);
if (ec)
break;
char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1);
::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1);
Args.push_back(Buffer);
}
LocalFree(UnicodeCommandLine);
if (ec)
return ec;
return error_code::success();
}
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(0);
}

View File

@ -42,42 +42,39 @@ std::string sys::FindProgramByName(const std::string &progName) {
// At this point, the file name is valid and does not contain slashes.
// Let Windows search for it.
std::string buffer;
buffer.resize(MAX_PATH);
char *dummy = NULL;
DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
&buffer[0], &dummy);
// See if it wasn't found.
if (len == 0)
SmallVector<wchar_t, MAX_PATH> progNameUnicode;
if (windows::UTF8ToUTF16(progName, progNameUnicode))
return "";
// See if we got the entire path.
if (len < MAX_PATH)
return buffer;
SmallVector<wchar_t, MAX_PATH> buffer;
DWORD len = MAX_PATH;
do {
buffer.reserve(len);
len = ::SearchPathW(NULL, progNameUnicode.data(), L".exe",
buffer.capacity(), buffer.data(), NULL);
// Buffer was too small; grow and retry.
while (true) {
buffer.resize(len+1);
DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, &buffer[0], &dummy);
// It is unlikely the search failed, but it's always possible some file
// was added or removed since the last search, so be paranoid...
if (len2 == 0)
// See if it wasn't found.
if (len == 0)
return "";
else if (len2 <= len)
return buffer;
len = len2;
}
// Buffer was too small; grow and retry.
} while (len > buffer.capacity());
buffer.set_size(len);
SmallVector<char, MAX_PATH> result;
if (windows::UTF16ToUTF8(buffer.begin(), buffer.size(), result))
return "";
return std::string(result.data(), result.size());
}
static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
HANDLE h;
if (path == 0) {
DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
GetCurrentProcess(), &h,
0, TRUE, DUPLICATE_SAME_ACCESS);
if (!DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
GetCurrentProcess(), &h,
0, TRUE, DUPLICATE_SAME_ACCESS))
return INVALID_HANDLE_VALUE;
return h;
}
@ -92,9 +89,13 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
sa.lpSecurityDescriptor = 0;
sa.bInheritHandle = TRUE;
h = CreateFile(fname.c_str(), fd ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, NULL);
SmallVector<wchar_t, 128> fnameUnicode;
if (windows::UTF8ToUTF16(fname, fnameUnicode))
return INVALID_HANDLE_VALUE;
h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, NULL);
if (h == INVALID_HANDLE_VALUE) {
MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
(fd ? "input: " : "output: "));
@ -218,34 +219,28 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
*p = 0;
// The pointer to the environment block for the new process.
OwningArrayPtr<char> envblock;
std::vector<wchar_t> EnvBlock;
if (envp) {
// An environment block consists of a null-terminated block of
// null-terminated strings. Convert the array of environment variables to
// an environment block by concatenating them.
for (unsigned i = 0; envp[i]; ++i) {
SmallVector<wchar_t, MAX_PATH> EnvString;
if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16");
return false;
}
// First, determine the length of the environment block.
len = 0;
for (unsigned i = 0; envp[i]; i++)
len += strlen(envp[i]) + 1;
// Now build the environment block.
envblock.reset(new char[len+1]);
p = envblock.get();
for (unsigned i = 0; envp[i]; i++) {
const char *ev = envp[i];
size_t len = strlen(ev) + 1;
memcpy(p, ev, len);
p += len;
EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end());
EnvBlock.push_back(0);
}
*p = 0;
EnvBlock.push_back(0);
}
// Create a child process.
STARTUPINFO si;
STARTUPINFOW si;
memset(&si, 0, sizeof(si));
si.cb = sizeof(si);
si.hStdInput = INVALID_HANDLE_VALUE;
@ -269,9 +264,14 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
// If stdout and stderr should go to the same place, redirect stderr
// to the handle already open for stdout.
DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
GetCurrentProcess(), &si.hStdError,
0, TRUE, DUPLICATE_SAME_ACCESS);
if (!DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
GetCurrentProcess(), &si.hStdError,
0, TRUE, DUPLICATE_SAME_ACCESS)) {
CloseHandle(si.hStdInput);
CloseHandle(si.hStdOutput);
MakeErrMsg(ErrMsg, "can't dup stderr to stdout");
return false;
}
} else {
// Just redirect stderr
si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
@ -289,9 +289,27 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
fflush(stdout);
fflush(stderr);
std::string ProgramStr = Program;
BOOL rc = CreateProcess(ProgramStr.c_str(), command.get(), NULL, NULL, TRUE,
0, envblock.get(), NULL, &si, &pi);
SmallVector<wchar_t, MAX_PATH> ProgramUtf16;
if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg,
std::string("Unable to convert application name to UTF-16"));
return false;
}
SmallVector<wchar_t, MAX_PATH> CommandUtf16;
if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg,
std::string("Unable to convert command-line to UTF-16"));
return false;
}
BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0,
TRUE, CREATE_UNICODE_ENVIRONMENT,
EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si,
&pi);
DWORD err = GetLastError();
// Regardless of whether the process got created or not, we are done with
@ -304,7 +322,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
if (!rc) {
SetLastError(err);
MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
ProgramStr + "'");
Program.str() + "'");
return false;
}

View File

@ -135,7 +135,7 @@ typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64);
static fpSymFunctionTableAccess64 SymFunctionTableAccess64;
static bool load64BitDebugHelp(void) {
HMODULE hLib = ::LoadLibrary("Dbghelp.dll");
HMODULE hLib = ::LoadLibrary(TEXT("Dbghelp.dll"));
if (hLib) {
StackWalk64 = (fpStackWalk64)
::GetProcAddress(hLib, "StackWalk64");

View File

@ -31,7 +31,6 @@
#include "llvm/Support/system_error.h"
#include <windows.h>
#include <wincrypt.h>
#include <shlobj.h>
#include <cassert>
#include <string>
#include <vector>

View File

@ -1087,6 +1087,7 @@ AC_CHECK_LIB(m,sin)
if test "$llvm_cv_os_type" = "MingW" ; then
AC_CHECK_LIB(imagehlp, main)
AC_CHECK_LIB(psapi, main)
AC_CHECK_LIB(shell32, main)
fi
dnl dlopen() is required for plugin support.

View File

@ -4,7 +4,7 @@ add_llvm_utility(FileCheck
target_link_libraries(FileCheck LLVMSupport)
if( MINGW )
target_link_libraries(FileCheck imagehlp psapi)
target_link_libraries(FileCheck imagehlp psapi shell32)
endif( MINGW )
if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
target_link_libraries(FileCheck pthread)

View File

@ -4,7 +4,7 @@ add_llvm_utility(FileUpdate
target_link_libraries(FileUpdate LLVMSupport)
if( MINGW )
target_link_libraries(FileUpdate imagehlp psapi)
target_link_libraries(FileUpdate imagehlp psapi shell32)
endif( MINGW )
if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
target_link_libraries(FileUpdate pthread)

View File

@ -4,7 +4,7 @@ add_llvm_utility(not
target_link_libraries(not LLVMSupport)
if( MINGW )
target_link_libraries(not imagehlp psapi)
target_link_libraries(not imagehlp psapi shell32)
endif( MINGW )
if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
target_link_libraries(not pthread)