If we see UTF-8 BOM sequence at the beginning of a response file, we shall

remove these bytes before parsing.

Phabricator Revision: http://reviews.llvm.org/D7156



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226988 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Yunzhong Gao 2015-01-24 04:23:08 +00:00
parent 7a98df7f74
commit d3198ce4bc
4 changed files with 19 additions and 0 deletions

View File

@ -655,6 +655,13 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
NewArgv.push_back(nullptr);
}
// It is called byte order marker but the UTF-8 BOM is actually not affected
// by the host system's endianness.
static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
return (S.size() >= 3 &&
S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf');
}
static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
TokenizerCallback Tokenizer,
SmallVectorImpl<const char *> &NewArgv,
@ -674,6 +681,11 @@ static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
return false;
Str = StringRef(UTF8Buf);
}
// If we see UTF-8 BOM sequence at the beginning of a file, we shall remove
// these bytes before parsing.
// Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark
else if (hasUTF8ByteOrderMark(BufRef))
Str = StringRef(BufRef.data() + 3, BufRef.size() - 3);
// Tokenize the contents into NewArgv.
Tokenizer(Str, Saver, NewArgv, MarkEOLs);

View File

@ -0,0 +1 @@
-help

View File

@ -0,0 +1 @@
-help

View File

@ -6,6 +6,11 @@
; RUN: llvm-as @%t.list2 -o %t.bc
; RUN: llvm-nm %t.bc 2>&1 | FileCheck %s
; When the response file begins with UTF8 BOM sequence, we shall remove them.
; Neither command below should return a "Could not open input file" error.
; RUN: llvm-as @%S/Inputs/utf8-response > /dev/null
; RUN: llvm-as @%S/Inputs/utf8-bom-response > /dev/null
; CHECK: T foobar
define void @foobar() {