diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index d0492fd16cf..a7de64f32e2 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -162,21 +162,43 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { CurPtr += 3; } +// Look ahead to search for first non-hex digit, if it's [hH], then we treat the +// integer as a hexadecimal, possibly with leading zeroes. +static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { + const char *FirstHex = 0; + const char *LookAhead = CurPtr; + while (1) { + if (isdigit(*LookAhead)) { + ++LookAhead; + } else if (isxdigit(*LookAhead)) { + if (!FirstHex) + FirstHex = LookAhead; + ++LookAhead; + } else { + break; + } + } + bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; + CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; + if (isHex) + return 16; + return DefaultRadix; +} + /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] /// Forward/Backward Label: [0-9][fb] /// Binary integer: 0b[01]+ /// Octal integer: 0[0-7]+ -/// Hex integer: 0x[0-9a-fA-F]+ +/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { - while (isdigit(*CurPtr)) - ++CurPtr; - + unsigned Radix = doLookAhead(CurPtr, 10); + bool isHex = Radix == 16; // Check for floating point literals. - if (*CurPtr == '.' || *CurPtr == 'e') { + if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { ++CurPtr; return LexFloatLiteral(); } @@ -184,15 +206,20 @@ AsmToken AsmLexer::LexDigit() { StringRef Result(TokStart, CurPtr - TokStart); long long Value; - if (Result.getAsInteger(10, Value)) { + if (Result.getAsInteger(Radix, Value)) { // Allow positive values that are too large to fit into a signed 64-bit // integer, but that do fit in an unsigned one, we just convert them over. unsigned long long UValue; - if (Result.getAsInteger(10, UValue)) - return ReturnError(TokStart, "invalid decimal number"); + if (Result.getAsInteger(Radix, UValue)) + return ReturnError(TokStart, !isHex ? "invalid decimal number" : + "invalid hexdecimal number"); Value = (long long)UValue; } + // Consume the [bB][hH]. + if (Radix == 2 || Radix == 16) + ++CurPtr; + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); @@ -243,6 +270,10 @@ AsmToken AsmLexer::LexDigit() { if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) return ReturnError(TokStart, "invalid hexadecimal number"); + // Consume the optional [hH]. + if (*CurPtr == 'h' || *CurPtr == 'H') + ++CurPtr; + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); @@ -251,15 +282,17 @@ AsmToken AsmLexer::LexDigit() { (int64_t)Result); } - // Must be an octal number, it starts with 0. - while (*CurPtr >= '0' && *CurPtr <= '9') - ++CurPtr; - - StringRef Result(TokStart, CurPtr - TokStart); + // Either octal or hexidecimal. long long Value; - if (Result.getAsInteger(8, Value)) + unsigned Radix = doLookAhead(CurPtr, 8); + StringRef Result(TokStart, CurPtr - TokStart); + if (Result.getAsInteger(Radix, Value)) return ReturnError(TokStart, "invalid octal number"); + // Consume the [hH]. + if (Radix == 16) + ++CurPtr; + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. SkipIgnoredIntegerSuffix(CurPtr); diff --git a/test/MC/X86/intel-syntax-hex.s b/test/MC/X86/intel-syntax-hex.s new file mode 100644 index 00000000000..35376c611bf --- /dev/null +++ b/test/MC/X86/intel-syntax-hex.s @@ -0,0 +1,26 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s +// rdar://12470373 + +// Checks to make sure we parse the hexidecimal suffix properly. +// CHECK: movl $10, %eax + mov eax, 10 +// CHECK: movl $16, %eax + mov eax, 10h +// CHECK: movl $16, %eax + mov eax, 10H +// CHECK: movl $4294967295, %eax + mov eax, 0ffffffffh +// CHECK: movl $4294967295, %eax + mov eax, 0xffffffff +// CHECK: movl $4294967295, %eax + mov eax, 0xffffffffh +// CHECK: movl $15, %eax + mov eax, 0fh +// CHECK: movl $162, %eax + mov eax, 0a2h +// CHECK: movl $162, %eax + mov eax, 0xa2 +// CHECK: movl $162, %eax + mov eax, 0xa2h +// CHECK: movl $674, %eax + mov eax, 2a2h