Windows: Add support for unicode command lines

Summary:
The MSVCRT deliberately sends main() code-page specific characters.
This isn't too useful to LLVM as we end up converting the arguments to
UTF-16 and subsequently attempt to use the result as, for example, a
file name.  Instead, we need to have the ability to access the Unicode
command line and transform it to UTF-8.

This has the distinct advantage over using the MSVC-specific wmain()
function as our entry point because:
 - It doesn't work on cygwin.
 - It only work on MinGW with caveats and only then on certain versions.
 - We get to keep our entry point as main(). :)

N.B.  This patch includes fixes to other parts of lib/Support/Windows
s.t. we would be able to take advantage of getting the Unicode paths.
E.G.  clang spawning clang -cc1 would want to give it Unicode arguments.

Reviewers: aaron.ballman, Bigcheese, rnk, ruiu

Reviewed By: rnk

CC: llvm-commits, ygao

Differential Revision: http://llvm-reviews.chandlerc.com/D1834

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192069 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
David Majnemer
2013-10-06 20:25:49 +00:00
parent 2def17935c
commit 5a1a1856a4
18 changed files with 181 additions and 88 deletions

View File

@@ -11,18 +11,25 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Allocator.h"
#include "Windows.h"
#include <direct.h>
#include <io.h>
#include <malloc.h>
#include <psapi.h>
#include <Shellapi.h>
#ifdef __MINGW32__
#if (HAVE_LIBPSAPI != 1)
#error "libpsapi.a should be present"
#endif
#if (HAVE_LIBSHELL32 != 1)
#error "libshell32.a should be present"
#endif
#else
#pragma comment(lib, "psapi.lib")
#pragma comment(lib, "psapi.lib")
#pragma comment(lib, "Shell32.lib")
#endif
//===----------------------------------------------------------------------===//
@@ -151,25 +158,58 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
// Environment variable can be encoded in non-UTF8 encoding, and there's no
// way to know what the encoding is. The only reliable way to look up
// multibyte environment variable is to use GetEnvironmentVariableW().
std::vector<wchar_t> Buf(16);
size_t Size = 0;
for (;;) {
Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size());
if (Size < Buf.size())
break;
SmallVector<wchar_t, MAX_PATH> Buf;
size_t Size = MAX_PATH;
do {
Buf.reserve(Size);
Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.capacity());
if (Size == 0)
return None;
// Try again with larger buffer.
Buf.resize(Size + 1);
}
if (Size == 0)
return None;
} while (Size > Buf.capacity());
Buf.set_size(Size);
// Convert the result from UTF-16 to UTF-8.
SmallVector<char, 128> Res;
SmallVector<char, MAX_PATH> Res;
if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res))
return None;
return std::string(&Res[0]);
}
error_code
Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
ArrayRef<const char *>,
SpecificBumpPtrAllocator<char> &ArgAllocator) {
int NewArgCount;
error_code ec;
wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(),
&NewArgCount);
if (!UnicodeCommandLine)
return windows_error(::GetLastError());
Args.reserve(NewArgCount);
for (int i = 0; i < NewArgCount; ++i) {
SmallVector<char, MAX_PATH> NewArgString;
ec = windows::UTF16ToUTF8(UnicodeCommandLine[i],
wcslen(UnicodeCommandLine[i]),
NewArgString);
if (ec)
break;
char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1);
::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1);
Args.push_back(Buffer);
}
LocalFree(UnicodeCommandLine);
if (ec)
return ec;
return error_code::success();
}
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(0);
}