From 182cf6675492f7f639b53198a0b676a63a103974 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Tue, 1 Mar 2022 19:01:11 -0600 Subject: [PATCH] Properly stringize tokens with line continuations or non-initial trigraphs. Previously, continuations or trigraphs would be included in the string as-is, which should not be the case because they are (conceptually) processed in earlier compilation phases. Initial trigraphs still do not get stringized properly, because the token starting position is not recorded correctly for them. This fixes code like the following: #define mkstr(a) # a #include int main(void) { puts(mkstr(a\ bc)); puts(mkstr(qr\ )); puts(mkstr(\ xy)); puts(mkstr(12??/ 34)); puts(mkstr('??<')); } --- Scanner.pas | 70 ++++++++++++++++++++++++++++++++++++++++++++--------- cc.notes | 2 +- 2 files changed, 59 insertions(+), 13 deletions(-) diff --git a/Scanner.pas b/Scanner.pas index 9bea469..144bc70 100644 --- a/Scanner.pas +++ b/Scanner.pas @@ -1509,7 +1509,7 @@ tk1.sval := cp; end; {MergeStrings} -procedure BuildStringToken (cp: ptr; len: integer); +procedure BuildStringToken (cp: ptr; len: integer; rawSourceCode: boolean); { Create a string token from a string } { } @@ -1518,9 +1518,13 @@ procedure BuildStringToken (cp: ptr; len: integer); { Parameters: } { cp - pointer to the first character } { len - number of characters in the string } +{ rawSourceCode - process trigraphs & line continuations? } + +label 1; var i: integer; {loop variable} + ch: char; {work character} begin {BuildStringToken} token.kind := stringconst; @@ -1528,10 +1532,51 @@ token.class := stringConstant; token.ispstring := false; token.sval := pointer(GMalloc(len+3)); token.prefix := prefix_none; -for i := 1 to len do begin - token.sval^.str[i] := chr(cp^); - cp := pointer(ord4(cp)+1); - end; {for} +if rawSourceCode then begin + i := 1; +1: while i <= len do begin + ch := chr(cp^); + if ch = '?' then {handle trigraphs} + if i < len-1 then + if chr(ptr(ord4(cp)+1)^) = '?' then + if chr(ptr(ord4(cp)+2)^) in + ['=','(','/',')','''','<','!','>','-'] then begin + case chr(ptr(ord4(cp)+2)^) of + '(': ch := '['; + '<': ch := '{'; + '/': ch := '\'; + '''': ch := '^'; + '=': ch := '#'; + ')': ch := ']'; + '>': ch := '}'; + '!': ch := '|'; + '-': ch := '~'; + end; {case} + len := len-2; + cp := pointer(ord4(cp)+2); + end; {if} + if ch = '\' then {handle line continuations} + if i < len then + if charKinds[ptr(ord4(cp)+1)^] = ch_eol then begin + if i < len-1 then + if ptr(ord4(cp)+2)^ in [$06,$07] then begin + len := len-1; {skip debugger characters} + cp := pointer(ord4(cp)+1); + end; {if} + len := len-2; + cp := pointer(ord4(cp)+2); + goto 1; + end; + token.sval^.str[i] := ch; + cp := pointer(ord4(cp)+1); + i := i+1; + end; {while} + end {if} +else + for i := 1 to len do begin + token.sval^.str[i] := chr(cp^); + cp := pointer(ord4(cp)+1); + end; {for} token.sval^.str[len+1] := chr(0); token.sval^.length := len+1; PutBackToken(token, true); @@ -1800,26 +1845,27 @@ else begin if stringization then begin tcPtr := pptr^.tokens; if tcPtr = nil then - BuildStringToken(nil, 0); + BuildStringToken(nil, 0, false); while tcPtr <> nil do begin if tcPtr^.token.kind = stringconst then begin - BuildStringToken(@quoteStr[1], 1); + BuildStringToken(@quoteStr[1], 1, false); BuildStringToken(@tcPtr^.token.sval^.str, - tcPtr^.token.sval^.length-1); - BuildStringToken(@quoteStr[1], 1); + tcPtr^.token.sval^.length-1, false); + BuildStringToken(@quoteStr[1], 1, false); end {if} else begin if tcPtr <> pptr^.tokens then if charKinds[tcPtr^.tokenEnd^] = ch_white then - BuildStringToken(@spaceStr[1], 1); + BuildStringToken(@spaceStr[1], 1, false); BuildStringToken(tcPtr^.tokenStart, - ord(ord4(tcPtr^.tokenEnd)-ord4(tcPtr^.tokenStart))); + ord(ord4(tcPtr^.tokenEnd)-ord4(tcPtr^.tokenStart)), + true); {hack because stringconst may not have proper tokenEnd} if tcPtr^.next <> nil then if tcPtr^.next^.token.kind = stringconst then if charKinds[ptr(ord4(tcPtr^.tokenStart)-1)^] = ch_white then - BuildStringToken(@spaceStr[1], 1); + BuildStringToken(@spaceStr[1], 1, false); end; tcPtr := tcPtr^.next; end; {while} diff --git a/cc.notes b/cc.notes index 40ddf44..c697ae6 100644 --- a/cc.notes +++ b/cc.notes @@ -1784,7 +1784,7 @@ int foo(int[42]); 182. #pragma path directives were not saved in .sym files. This could cause ORCA/C not to search the proper paths for include files that were not represented in the .sym file (e.g. because they were included after a function). -183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator. +183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator, or on tokens that were split over two or more lines using line continuations. -- Bugs from C 2.1.0 that have been fixed -----------------------------------