Windows: Add support for unicode command lines

Summary:
The MSVCRT deliberately sends main() code-page specific characters.
This isn't too useful to LLVM as we end up converting the arguments to
UTF-16 and subsequently attempt to use the result as, for example, a
file name.  Instead, we need to have the ability to access the Unicode
command line and transform it to UTF-8.

This has the distinct advantage over using the MSVC-specific wmain()
function as our entry point because:
 - It doesn't work on cygwin.
 - It only work on MinGW with caveats and only then on certain versions.
 - We get to keep our entry point as main(). :)

N.B.  This patch includes fixes to other parts of lib/Support/Windows
s.t. we would be able to take advantage of getting the Unicode paths.
E.G.  clang spawning clang -cc1 would want to give it Unicode arguments.

Reviewers: aaron.ballman, Bigcheese, rnk, ruiu

Reviewed By: rnk

CC: llvm-commits, ygao

Differential Revision: http://llvm-reviews.chandlerc.com/D1834

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192069 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
David Majnemer
2013-10-06 20:25:49 +00:00
parent 2def17935c
commit 5a1a1856a4
18 changed files with 181 additions and 88 deletions

View File

@@ -42,42 +42,39 @@ std::string sys::FindProgramByName(const std::string &progName) {
// At this point, the file name is valid and does not contain slashes.
// Let Windows search for it.
std::string buffer;
buffer.resize(MAX_PATH);
char *dummy = NULL;
DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
&buffer[0], &dummy);
// See if it wasn't found.
if (len == 0)
SmallVector<wchar_t, MAX_PATH> progNameUnicode;
if (windows::UTF8ToUTF16(progName, progNameUnicode))
return "";
// See if we got the entire path.
if (len < MAX_PATH)
return buffer;
SmallVector<wchar_t, MAX_PATH> buffer;
DWORD len = MAX_PATH;
do {
buffer.reserve(len);
len = ::SearchPathW(NULL, progNameUnicode.data(), L".exe",
buffer.capacity(), buffer.data(), NULL);
// Buffer was too small; grow and retry.
while (true) {
buffer.resize(len+1);
DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, &buffer[0], &dummy);
// It is unlikely the search failed, but it's always possible some file
// was added or removed since the last search, so be paranoid...
if (len2 == 0)
// See if it wasn't found.
if (len == 0)
return "";
else if (len2 <= len)
return buffer;
len = len2;
}
// Buffer was too small; grow and retry.
} while (len > buffer.capacity());
buffer.set_size(len);
SmallVector<char, MAX_PATH> result;
if (windows::UTF16ToUTF8(buffer.begin(), buffer.size(), result))
return "";
return std::string(result.data(), result.size());
}
static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
HANDLE h;
if (path == 0) {
DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
GetCurrentProcess(), &h,
0, TRUE, DUPLICATE_SAME_ACCESS);
if (!DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
GetCurrentProcess(), &h,
0, TRUE, DUPLICATE_SAME_ACCESS))
return INVALID_HANDLE_VALUE;
return h;
}
@@ -92,9 +89,13 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
sa.lpSecurityDescriptor = 0;
sa.bInheritHandle = TRUE;
h = CreateFile(fname.c_str(), fd ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, NULL);
SmallVector<wchar_t, 128> fnameUnicode;
if (windows::UTF8ToUTF16(fname, fnameUnicode))
return INVALID_HANDLE_VALUE;
h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, NULL);
if (h == INVALID_HANDLE_VALUE) {
MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
(fd ? "input: " : "output: "));
@@ -218,34 +219,28 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
*p = 0;
// The pointer to the environment block for the new process.
OwningArrayPtr<char> envblock;
std::vector<wchar_t> EnvBlock;
if (envp) {
// An environment block consists of a null-terminated block of
// null-terminated strings. Convert the array of environment variables to
// an environment block by concatenating them.
for (unsigned i = 0; envp[i]; ++i) {
SmallVector<wchar_t, MAX_PATH> EnvString;
if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16");
return false;
}
// First, determine the length of the environment block.
len = 0;
for (unsigned i = 0; envp[i]; i++)
len += strlen(envp[i]) + 1;
// Now build the environment block.
envblock.reset(new char[len+1]);
p = envblock.get();
for (unsigned i = 0; envp[i]; i++) {
const char *ev = envp[i];
size_t len = strlen(ev) + 1;
memcpy(p, ev, len);
p += len;
EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end());
EnvBlock.push_back(0);
}
*p = 0;
EnvBlock.push_back(0);
}
// Create a child process.
STARTUPINFO si;
STARTUPINFOW si;
memset(&si, 0, sizeof(si));
si.cb = sizeof(si);
si.hStdInput = INVALID_HANDLE_VALUE;
@@ -269,9 +264,14 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
// If stdout and stderr should go to the same place, redirect stderr
// to the handle already open for stdout.
DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
GetCurrentProcess(), &si.hStdError,
0, TRUE, DUPLICATE_SAME_ACCESS);
if (!DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
GetCurrentProcess(), &si.hStdError,
0, TRUE, DUPLICATE_SAME_ACCESS)) {
CloseHandle(si.hStdInput);
CloseHandle(si.hStdOutput);
MakeErrMsg(ErrMsg, "can't dup stderr to stdout");
return false;
}
} else {
// Just redirect stderr
si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
@@ -289,9 +289,27 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
fflush(stdout);
fflush(stderr);
std::string ProgramStr = Program;
BOOL rc = CreateProcess(ProgramStr.c_str(), command.get(), NULL, NULL, TRUE,
0, envblock.get(), NULL, &si, &pi);
SmallVector<wchar_t, MAX_PATH> ProgramUtf16;
if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg,
std::string("Unable to convert application name to UTF-16"));
return false;
}
SmallVector<wchar_t, MAX_PATH> CommandUtf16;
if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg,
std::string("Unable to convert command-line to UTF-16"));
return false;
}
BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0,
TRUE, CREATE_UNICODE_ENVIRONMENT,
EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si,
&pi);
DWORD err = GetLastError();
// Regardless of whether the process got created or not, we are done with
@@ -304,7 +322,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
if (!rc) {
SetLastError(err);
MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
ProgramStr + "'");
Program.str() + "'");
return false;
}