mirror of
https://github.com/byteworksinc/ORCA-C.git
synced 2024-06-10 07:29:32 +00:00
Properly stringize tokens with line continuations or non-initial trigraphs.
Previously, continuations or trigraphs would be included in the string as-is, which should not be the case because they are (conceptually) processed in earlier compilation phases. Initial trigraphs still do not get stringized properly, because the token starting position is not recorded correctly for them. This fixes code like the following: #define mkstr(a) # a #include <stdio.h> int main(void) { puts(mkstr(a\ bc)); puts(mkstr(qr\ )); puts(mkstr(\ xy)); puts(mkstr(12??/ 34)); puts(mkstr('??<')); }
This commit is contained in:
parent
fec7b57ec2
commit
182cf66754
70
Scanner.pas
70
Scanner.pas
|
@ -1509,7 +1509,7 @@ tk1.sval := cp;
|
||||||
end; {MergeStrings}
|
end; {MergeStrings}
|
||||||
|
|
||||||
|
|
||||||
procedure BuildStringToken (cp: ptr; len: integer);
|
procedure BuildStringToken (cp: ptr; len: integer; rawSourceCode: boolean);
|
||||||
|
|
||||||
{ Create a string token from a string }
|
{ Create a string token from a string }
|
||||||
{ }
|
{ }
|
||||||
|
@ -1518,9 +1518,13 @@ procedure BuildStringToken (cp: ptr; len: integer);
|
||||||
{ Parameters: }
|
{ Parameters: }
|
||||||
{ cp - pointer to the first character }
|
{ cp - pointer to the first character }
|
||||||
{ len - number of characters in the string }
|
{ len - number of characters in the string }
|
||||||
|
{ rawSourceCode - process trigraphs & line continuations? }
|
||||||
|
|
||||||
|
label 1;
|
||||||
|
|
||||||
var
|
var
|
||||||
i: integer; {loop variable}
|
i: integer; {loop variable}
|
||||||
|
ch: char; {work character}
|
||||||
|
|
||||||
begin {BuildStringToken}
|
begin {BuildStringToken}
|
||||||
token.kind := stringconst;
|
token.kind := stringconst;
|
||||||
|
@ -1528,10 +1532,51 @@ token.class := stringConstant;
|
||||||
token.ispstring := false;
|
token.ispstring := false;
|
||||||
token.sval := pointer(GMalloc(len+3));
|
token.sval := pointer(GMalloc(len+3));
|
||||||
token.prefix := prefix_none;
|
token.prefix := prefix_none;
|
||||||
for i := 1 to len do begin
|
if rawSourceCode then begin
|
||||||
token.sval^.str[i] := chr(cp^);
|
i := 1;
|
||||||
cp := pointer(ord4(cp)+1);
|
1: while i <= len do begin
|
||||||
end; {for}
|
ch := chr(cp^);
|
||||||
|
if ch = '?' then {handle trigraphs}
|
||||||
|
if i < len-1 then
|
||||||
|
if chr(ptr(ord4(cp)+1)^) = '?' then
|
||||||
|
if chr(ptr(ord4(cp)+2)^) in
|
||||||
|
['=','(','/',')','''','<','!','>','-'] then begin
|
||||||
|
case chr(ptr(ord4(cp)+2)^) of
|
||||||
|
'(': ch := '[';
|
||||||
|
'<': ch := '{';
|
||||||
|
'/': ch := '\';
|
||||||
|
'''': ch := '^';
|
||||||
|
'=': ch := '#';
|
||||||
|
')': ch := ']';
|
||||||
|
'>': ch := '}';
|
||||||
|
'!': ch := '|';
|
||||||
|
'-': ch := '~';
|
||||||
|
end; {case}
|
||||||
|
len := len-2;
|
||||||
|
cp := pointer(ord4(cp)+2);
|
||||||
|
end; {if}
|
||||||
|
if ch = '\' then {handle line continuations}
|
||||||
|
if i < len then
|
||||||
|
if charKinds[ptr(ord4(cp)+1)^] = ch_eol then begin
|
||||||
|
if i < len-1 then
|
||||||
|
if ptr(ord4(cp)+2)^ in [$06,$07] then begin
|
||||||
|
len := len-1; {skip debugger characters}
|
||||||
|
cp := pointer(ord4(cp)+1);
|
||||||
|
end; {if}
|
||||||
|
len := len-2;
|
||||||
|
cp := pointer(ord4(cp)+2);
|
||||||
|
goto 1;
|
||||||
|
end;
|
||||||
|
token.sval^.str[i] := ch;
|
||||||
|
cp := pointer(ord4(cp)+1);
|
||||||
|
i := i+1;
|
||||||
|
end; {while}
|
||||||
|
end {if}
|
||||||
|
else
|
||||||
|
for i := 1 to len do begin
|
||||||
|
token.sval^.str[i] := chr(cp^);
|
||||||
|
cp := pointer(ord4(cp)+1);
|
||||||
|
end; {for}
|
||||||
token.sval^.str[len+1] := chr(0);
|
token.sval^.str[len+1] := chr(0);
|
||||||
token.sval^.length := len+1;
|
token.sval^.length := len+1;
|
||||||
PutBackToken(token, true);
|
PutBackToken(token, true);
|
||||||
|
@ -1800,26 +1845,27 @@ else begin
|
||||||
if stringization then begin
|
if stringization then begin
|
||||||
tcPtr := pptr^.tokens;
|
tcPtr := pptr^.tokens;
|
||||||
if tcPtr = nil then
|
if tcPtr = nil then
|
||||||
BuildStringToken(nil, 0);
|
BuildStringToken(nil, 0, false);
|
||||||
while tcPtr <> nil do begin
|
while tcPtr <> nil do begin
|
||||||
if tcPtr^.token.kind = stringconst then begin
|
if tcPtr^.token.kind = stringconst then begin
|
||||||
BuildStringToken(@quoteStr[1], 1);
|
BuildStringToken(@quoteStr[1], 1, false);
|
||||||
BuildStringToken(@tcPtr^.token.sval^.str,
|
BuildStringToken(@tcPtr^.token.sval^.str,
|
||||||
tcPtr^.token.sval^.length-1);
|
tcPtr^.token.sval^.length-1, false);
|
||||||
BuildStringToken(@quoteStr[1], 1);
|
BuildStringToken(@quoteStr[1], 1, false);
|
||||||
end {if}
|
end {if}
|
||||||
else begin
|
else begin
|
||||||
if tcPtr <> pptr^.tokens then
|
if tcPtr <> pptr^.tokens then
|
||||||
if charKinds[tcPtr^.tokenEnd^] = ch_white then
|
if charKinds[tcPtr^.tokenEnd^] = ch_white then
|
||||||
BuildStringToken(@spaceStr[1], 1);
|
BuildStringToken(@spaceStr[1], 1, false);
|
||||||
BuildStringToken(tcPtr^.tokenStart,
|
BuildStringToken(tcPtr^.tokenStart,
|
||||||
ord(ord4(tcPtr^.tokenEnd)-ord4(tcPtr^.tokenStart)));
|
ord(ord4(tcPtr^.tokenEnd)-ord4(tcPtr^.tokenStart)),
|
||||||
|
true);
|
||||||
|
|
||||||
{hack because stringconst may not have proper tokenEnd}
|
{hack because stringconst may not have proper tokenEnd}
|
||||||
if tcPtr^.next <> nil then
|
if tcPtr^.next <> nil then
|
||||||
if tcPtr^.next^.token.kind = stringconst then
|
if tcPtr^.next^.token.kind = stringconst then
|
||||||
if charKinds[ptr(ord4(tcPtr^.tokenStart)-1)^] = ch_white then
|
if charKinds[ptr(ord4(tcPtr^.tokenStart)-1)^] = ch_white then
|
||||||
BuildStringToken(@spaceStr[1], 1);
|
BuildStringToken(@spaceStr[1], 1, false);
|
||||||
end;
|
end;
|
||||||
tcPtr := tcPtr^.next;
|
tcPtr := tcPtr^.next;
|
||||||
end; {while}
|
end; {while}
|
||||||
|
|
2
cc.notes
2
cc.notes
|
@ -1784,7 +1784,7 @@ int foo(int[42]);
|
||||||
|
|
||||||
182. #pragma path directives were not saved in .sym files. This could cause ORCA/C not to search the proper paths for include files that were not represented in the .sym file (e.g. because they were included after a function).
|
182. #pragma path directives were not saved in .sym files. This could cause ORCA/C not to search the proper paths for include files that were not represented in the .sym file (e.g. because they were included after a function).
|
||||||
|
|
||||||
183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator.
|
183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator, or on tokens that were split over two or more lines using line continuations.
|
||||||
|
|
||||||
-- Bugs from C 2.1.0 that have been fixed -----------------------------------
|
-- Bugs from C 2.1.0 that have been fixed -----------------------------------
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user