mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-10-11 23:25:15 +00:00
Implement TokenizeWindowsCommandLine.
This is a follow up patch for r187390 to implement the parser for the Windows-style command line. This should follow the rule as described at http://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx Differential Revision: http://llvm-reviews.chandlerc.com/D1235 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187430 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f6de55f5d4
commit
264e92d6db
@ -498,9 +498,109 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
|
|||||||
NewArgv.push_back(Saver.SaveString(Token.c_str()));
|
NewArgv.push_back(Saver.SaveString(Token.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Backslashes are interpreted in a rather complicated way in the Windows-style
|
||||||
|
/// command line, because backslashes are used both to separate path and to
|
||||||
|
/// escape double quote. This method consumes runs of backslashes as well as the
|
||||||
|
/// following double quote if it's escaped.
|
||||||
|
///
|
||||||
|
/// * If an even number of backslashes is followed by a double quote, one
|
||||||
|
/// backslash is output for every pair of backslashes, and the last double
|
||||||
|
/// quote remains unconsumed. The double quote will later be interpreted as
|
||||||
|
/// the start or end of a quoted string in the main loop outside of this
|
||||||
|
/// function.
|
||||||
|
///
|
||||||
|
/// * If an odd number of backslashes is followed by a double quote, one
|
||||||
|
/// backslash is output for every pair of backslashes, and a double quote is
|
||||||
|
/// output for the last pair of backslash-double quote. The double quote is
|
||||||
|
/// consumed in this case.
|
||||||
|
///
|
||||||
|
/// * Otherwise, backslashes are interpreted literally.
|
||||||
|
static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) {
|
||||||
|
size_t E = Src.size();
|
||||||
|
int BackslashCount = 0;
|
||||||
|
// Skip the backslashes.
|
||||||
|
do {
|
||||||
|
++I;
|
||||||
|
++BackslashCount;
|
||||||
|
} while (I != E && Src[I] == '\\');
|
||||||
|
|
||||||
|
bool FollowedByDoubleQuote = (I != E && Src[I] == '"');
|
||||||
|
if (FollowedByDoubleQuote) {
|
||||||
|
Token.append(BackslashCount / 2, '\\');
|
||||||
|
if (BackslashCount % 2 == 0)
|
||||||
|
return I - 1;
|
||||||
|
Token.push_back('"');
|
||||||
|
return I;
|
||||||
|
}
|
||||||
|
Token.append(BackslashCount, '\\');
|
||||||
|
return I - 1;
|
||||||
|
}
|
||||||
|
|
||||||
void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
|
void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
|
||||||
SmallVectorImpl<const char *> &NewArgv) {
|
SmallVectorImpl<const char *> &NewArgv) {
|
||||||
llvm_unreachable("FIXME not implemented");
|
SmallString<128> Token;
|
||||||
|
|
||||||
|
// This is a small state machine to consume characters until it reaches the
|
||||||
|
// end of the source string.
|
||||||
|
enum { INIT, UNQUOTED, QUOTED } State = INIT;
|
||||||
|
for (size_t I = 0, E = Src.size(); I != E; ++I) {
|
||||||
|
// INIT state indicates that the current input index is at the start of
|
||||||
|
// the string or between tokens.
|
||||||
|
if (State == INIT) {
|
||||||
|
if (isWhitespace(Src[I]))
|
||||||
|
continue;
|
||||||
|
if (Src[I] == '"') {
|
||||||
|
State = QUOTED;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (Src[I] == '\\') {
|
||||||
|
I = parseBackslash(Src, I, Token);
|
||||||
|
State = UNQUOTED;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Token.push_back(Src[I]);
|
||||||
|
State = UNQUOTED;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// UNQUOTED state means that it's reading a token not quoted by double
|
||||||
|
// quotes.
|
||||||
|
if (State == UNQUOTED) {
|
||||||
|
// Whitespace means the end of the token.
|
||||||
|
if (isWhitespace(Src[I])) {
|
||||||
|
NewArgv.push_back(Saver.SaveString(Token.c_str()));
|
||||||
|
Token.clear();
|
||||||
|
State = INIT;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (Src[I] == '"') {
|
||||||
|
State = QUOTED;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (Src[I] == '\\') {
|
||||||
|
I = parseBackslash(Src, I, Token);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Token.push_back(Src[I]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// QUOTED state means that it's reading a token quoted by double quotes.
|
||||||
|
if (State == QUOTED) {
|
||||||
|
if (Src[I] == '"') {
|
||||||
|
State = UNQUOTED;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (Src[I] == '\\') {
|
||||||
|
I = parseBackslash(Src, I, Token);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Token.push_back(Src[I]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Append the last token after hitting EOF with no whitespace.
|
||||||
|
if (!Token.empty())
|
||||||
|
NewArgv.push_back(Saver.SaveString(Token.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
|
static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
|
||||||
|
@ -125,21 +125,40 @@ class StrDupSaver : public cl::StringSaver {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef void ParserFunction(StringRef Source, llvm::cl::StringSaver &Saver,
|
||||||
|
SmallVectorImpl<const char *> &NewArgv);
|
||||||
|
|
||||||
|
|
||||||
|
void testCommandLineTokenizer(ParserFunction *parse, const char *Input,
|
||||||
|
const char *const Output[], size_t OutputSize) {
|
||||||
|
SmallVector<const char *, 0> Actual;
|
||||||
|
StrDupSaver Saver;
|
||||||
|
parse(Input, Saver, Actual);
|
||||||
|
EXPECT_EQ(OutputSize, Actual.size());
|
||||||
|
for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
|
||||||
|
if (I < OutputSize)
|
||||||
|
EXPECT_STREQ(Output[I], Actual[I]);
|
||||||
|
free(const_cast<char *>(Actual[I]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(CommandLineTest, TokenizeGNUCommandLine) {
|
TEST(CommandLineTest, TokenizeGNUCommandLine) {
|
||||||
const char *Input = "foo\\ bar \"foo bar\" \'foo bar\' 'foo\\\\bar' "
|
const char *Input = "foo\\ bar \"foo bar\" \'foo bar\' 'foo\\\\bar' "
|
||||||
"foo\"bar\"baz C:\\src\\foo.cpp \"C:\\src\\foo.cpp\"";
|
"foo\"bar\"baz C:\\src\\foo.cpp \"C:\\src\\foo.cpp\"";
|
||||||
const char *const Output[] = { "foo bar", "foo bar", "foo bar", "foo\\bar",
|
const char *const Output[] = { "foo bar", "foo bar", "foo bar", "foo\\bar",
|
||||||
"foobarbaz", "C:\\src\\foo.cpp",
|
"foobarbaz", "C:\\src\\foo.cpp",
|
||||||
"C:\\src\\foo.cpp" };
|
"C:\\src\\foo.cpp" };
|
||||||
SmallVector<const char *, 0> Actual;
|
testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
|
||||||
StrDupSaver Saver;
|
array_lengthof(Output));
|
||||||
cl::TokenizeGNUCommandLine(Input, Saver, Actual);
|
}
|
||||||
EXPECT_EQ(array_lengthof(Output), Actual.size());
|
|
||||||
for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
|
TEST(CommandLineTest, TokenizeWindowsCommandLine) {
|
||||||
if (I < array_lengthof(Output))
|
const char *Input = "a\\b c\\\\d e\\\\\"f g\" h\\\"i j\\\\\\\"k \"lmn\" o pqr "
|
||||||
EXPECT_STREQ(Output[I], Actual[I]);
|
"\"st \\\"u\" \\v";
|
||||||
free(const_cast<char *>(Actual[I]));
|
const char *const Output[] = { "a\\b", "c\\\\d", "e\\f g", "h\"i", "j\\\"k",
|
||||||
}
|
"lmn", "o", "pqr", "st \"u", "\\v" };
|
||||||
|
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
|
||||||
|
array_lengthof(Output));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
Loading…
Reference in New Issue
Block a user