From 8915e27704b2afd362a69c6be1111fb06bbcc727 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Feb 2013 18:29:02 +0000 Subject: [PATCH] [ms-inline asm] Add support for lexing binary integers with a [bB] suffix. This is complicated by backward labels (e.g., 0b can be both a backward label and a binary zero). The current implementation assumes [0-9]b is always a label and thus it's possible for 0b and 1b to not be interpreted correctly for ms-style inline assembly. However, this is relatively simple to fix in the inline assembly (i.e., drop the [bB]). This patch also limits backward labels to [0-9]b, so that only 0b and 1b are ambiguous. Part of rdar://12470373 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174983 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 59 +++++++++++++++++++++---------- test/MC/MachO/direction_labels.s | 12 +++---- test/MC/X86/intel-syntax-binary.s | 14 ++++++++ 3 files changed, 60 insertions(+), 25 deletions(-) create mode 100644 test/MC/X86/intel-syntax-binary.s diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 530e94e8d37..8fcc264d688 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -179,26 +179,48 @@ static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { } } bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; - CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; + bool isBinary = LookAhead[-1] == 'b' || LookAhead[-1] == 'B'; + CurPtr = (isBinary || isHex || !FirstHex) ? LookAhead : FirstHex; if (isHex) return 16; + if (isBinary) { + --CurPtr; + return 2; + } return DefaultRadix; } /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] -/// Forward/Backward Label: [0-9][fb] -/// Binary integer: 0b[01]+ +/// Forward/Backward Label: [0-9]+f or [0-9]b +/// Binary integer: 0b[01]+ or [01][bB] /// Octal integer: 0[0-7]+ /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { + + // Backward Label: [0-9]b + if (*CurPtr == 'b') { + // See if we actually have "0b" as part of something like "jmp 0b\n" + if (!isdigit(CurPtr[1])) { + long long Value; + StringRef Result(TokStart, CurPtr - TokStart); + if (Result.getAsInteger(10, Value)) + return ReturnError(TokStart, "invalid backward label"); + + return AsmToken(AsmToken::Integer, Result, Value); + } + } + + // Binary integer: 1[01]*[bB] // Decimal integer: [1-9][0-9]* + // Hexidecimal integer: [1-9][0-9a-fA-F]*[hH] if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doLookAhead(CurPtr, 10); - bool isHex = Radix == 16; + bool isDecimal = Radix == 10; + // Check for floating point literals. - if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { + if (isDecimal && (*CurPtr == '.' || *CurPtr == 'e')) { ++CurPtr; return LexFloatLiteral(); } @@ -211,7 +233,7 @@ AsmToken AsmLexer::LexDigit() { // integer, but that do fit in an unsigned one, we just convert them over. unsigned long long UValue; if (Result.getAsInteger(Radix, UValue)) - return ReturnError(TokStart, !isHex ? "invalid decimal number" : + return ReturnError(TokStart, isDecimal ? "invalid decimal number" : "invalid hexdecimal number"); Value = (long long)UValue; } @@ -227,15 +249,9 @@ AsmToken AsmLexer::LexDigit() { return AsmToken(AsmToken::Integer, Result, Value); } + // Binary integer: 0b[01]+ if (*CurPtr == 'b') { - ++CurPtr; - // See if we actually have "0b" as part of something like "jmp 0b\n" - if (!isdigit(CurPtr[0])) { - --CurPtr; - StringRef Result(TokStart, CurPtr - TokStart); - return AsmToken(AsmToken::Integer, Result, 0); - } - const char *NumStart = CurPtr; + const char *NumStart = ++CurPtr; while (CurPtr[0] == '0' || CurPtr[0] == '1') ++CurPtr; @@ -256,6 +272,7 @@ AsmToken AsmLexer::LexDigit() { return AsmToken(AsmToken::Integer, Result, Value); } + // Hex integer: 0x[0-9a-fA-F]+ if (*CurPtr == 'x') { ++CurPtr; const char *NumStart = CurPtr; @@ -282,17 +299,21 @@ AsmToken AsmLexer::LexDigit() { (int64_t)Result); } - // Either octal or hexidecimal. + // Binary: 0[01]*[Bb], but not 0b. + // Octal: 0[0-7]* + // Hexidecimal: [0][0-9a-fA-F]*[hH] long long Value; unsigned Radix = doLookAhead(CurPtr, 8); - bool isHex = Radix == 16; + bool isBinary = Radix == 2; + bool isOctal = Radix == 8; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid octal number" : + return ReturnError(TokStart, isOctal ? "invalid octal number" : + isBinary ? "invalid binary number" : "invalid hexdecimal number"); - // Consume the [hH]. - if (Radix == 16) + // Consume the [bB][hH]. + if (Radix == 2 || Radix == 16) ++CurPtr; // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL diff --git a/test/MC/MachO/direction_labels.s b/test/MC/MachO/direction_labels.s index e224ed3a147..35f5d44f728 100644 --- a/test/MC/MachO/direction_labels.s +++ b/test/MC/MachO/direction_labels.s @@ -1,15 +1,15 @@ // RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s direction_labels: -10: nop - jmp 10b - nop +8: nop + jmp 8b + nop jne 0f 0: nop jne 0b - jmp 11f -11: nop - ret + jmp 9f +9: nop + ret // CHECK: ('cputype', 7) // CHECK: ('cpusubtype', 3) diff --git a/test/MC/X86/intel-syntax-binary.s b/test/MC/X86/intel-syntax-binary.s new file mode 100644 index 00000000000..1c4a4cc25b0 --- /dev/null +++ b/test/MC/X86/intel-syntax-binary.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s +// rdar://12470373 + +// Checks to make sure we parse the binary suffix properly. +// CHECK: movl $1, %eax + mov eax, 01b +// CHECK: movl $2, %eax + mov eax, 10b +// CHECK: movl $3, %eax + mov eax, 11b +// CHECK: movl $3, %eax + mov eax, 11B +// CHECK: movl $2711, %eax + mov eax, 101010010111B