From b2edeb4ad108e03f1311ef67f6bed72c49ada460 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Fri, 25 Mar 2022 18:10:13 -0500 Subject: [PATCH] Properly stringize tokens that start with a trigraph. This did not work correctly before, because such tokens were recorded as starting with the third character of the trigraph. Here is an example affected by this: #define mkstr(a) # a #include int main(void) { puts(mkstr(??!)); puts(mkstr(??!??!)); puts(mkstr('??<')); puts(mkstr(+??!)); puts(mkstr(+??')); } --- Scanner.asm | 11 ++++++++--- Scanner.pas | 10 +++++++--- cc.notes | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Scanner.asm b/Scanner.asm index 5409001..436e9be 100644 --- a/Scanner.asm +++ b/Scanner.asm @@ -446,6 +446,7 @@ rkModifiers ds 2 * * Outputs: * ch - character read +* currentChPtr - pointer to ch in source file * **************************************************************** * @@ -493,12 +494,15 @@ pf1 dey pf2 sty lastWasReturn ! 1: lab1 anop +! currentChPtr := chPtr; ! if chPtr = eofPtr then begin {flag end of file if we're there} lda chPtr + sta currentChPtr + ldx chPtr+2 + stx currentChPtr+2 cmp eofPtr bne la1 - lda chPtr+2 - cmp eofPtr+2 + cpx eofPtr+2 beq la2 la1 brl lb5 la2 anop @@ -621,7 +625,8 @@ lb4 lda [p1],Y ! else begin lb5 anop ! ch := chr(chPtr^); {fetch the character} - move4 chPtr,p1 + sta p1 + stx p1+2 lda [p1] and #$00FF sta ch diff --git a/Scanner.pas b/Scanner.pas index 3e86b1f..88abf9b 100644 --- a/Scanner.pas +++ b/Scanner.pas @@ -144,6 +144,7 @@ procedure NextCh; extern; { } { Globals: } { ch - character read } +{ currentChPtr - pointer to ch in source file } procedure NextToken; @@ -240,6 +241,7 @@ type var charStrPrefix: charStrPrefixEnum; {prefix of character/string literal} + currentChPtr: ptr; {pointer to current character in source file} customDefaultName: stringPtr; {name of custom pre-included default file} dateStr: longStringPtr; {macro date string} doingCommandLine: boolean; {are we processing the cc= command line?} @@ -2269,6 +2271,7 @@ if gotName then begin {read the file name from the line} changedSourceFile := true; ReadFile; {read the file} chPtr := bofPtr; {set the start, end pointers} + currentChPtr := bofPtr; eofPtr := pointer(ord4(bofPtr)+ffDCBGS.fileLength); firstPtr := chPtr; {first char in line} ch := chr(RETURN); {set the initial character} @@ -4188,6 +4191,7 @@ expandMacros := true; {enable macro expansion} reportEOL := false; {report eolsy as a token?} lineNumber := 1; {start the line counter} chPtr := start; {set the start, end pointers} +currentChPtr := start; eofPtr := endPtr; firstPtr := start; {first char in line} numErr := 0; {no errors so far} @@ -4942,8 +4946,8 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin end; end; {while} tokenLine := lineNumber; {record the position of the token} -tokenColumn := ord(ord4(chPtr)-ord4(firstPtr)); -tokenStart := pointer(ord4(chPtr)-1); +tokenColumn := ord(ord4(currentChPtr)-ord4(firstPtr)+1); +tokenStart := currentChPtr; 6: token.class := reservedSymbol; {default to the most common class} case charKinds[ord(ch)] of @@ -5366,7 +5370,7 @@ case charKinds[ord(ch)] of otherwise: Error(57); end; {case} -tokenEnd := pointer(ord4(chPtr)-1); {record the end of the token} +tokenEnd := currentChPtr; {record the end of the token} 2: if skipping then {conditional compilation branch} if not (token.kind in [eofsy,eolsy]) then diff --git a/cc.notes b/cc.notes index abd6421..7de66df 100644 --- a/cc.notes +++ b/cc.notes @@ -1784,7 +1784,7 @@ int foo(int[42]); 182. #pragma path directives were not saved in .sym files. This could cause ORCA/C not to search the proper paths for include files that were not represented in the .sym file (e.g. because they were included after a function). -183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator, or on tokens that were split over two or more lines using line continuations. +183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator, tokens that were split over two or more lines using line continuations, or tokens represented using trigraphs. -- Bugs from C 2.1.0 that have been fixed -----------------------------------