Properly stringize tokens that start with a trigraph.

This did not work correctly before, because such tokens were recorded as starting with the third character of the trigraph. Here is an example affected by this: #define mkstr(a) # a #include <stdio.h> int main(void) { puts(mkstr(??!)); puts(mkstr(??!??!)); puts(mkstr('??<')); puts(mkstr(+??!)); puts(mkstr(+??')); }
2024-12-30 14:31:04 +00:00 · 2022-03-25 18:10:13 -05:00 · 2022-03-25 18:10:13 -05:00 · b2edeb4ad1
commit b2edeb4ad1
parent f531f38463
3 changed files with 16 additions and 7 deletions
--- a/Scanner.asm
+++ b/Scanner.asm
@ -446,6 +446,7 @@ rkModifiers ds 2
 *
 *  Outputs:
 *        ch - character read
+*        currentChPtr - pointer to ch in source file
 *
 ****************************************************************
 *
@ -493,12 +494,15 @@ pf1      dey
 pf2      sty   lastWasReturn
 ! 1:
 lab1     anop
+! currentChPtr := chPtr;
 ! if chPtr = eofPtr then begin          {flag end of file if we're there}
         lda   chPtr
+         sta   currentChPtr
+         ldx   chPtr+2
+         stx   currentChPtr+2
         cmp   eofPtr
         bne   la1
-         lda   chPtr+2
-         cmp   eofPtr+2
+         cpx   eofPtr+2
         beq   la2
 la1      brl   lb5
 la2      anop
@ -621,7 +625,8 @@ lb4      lda   [p1],Y
 ! else begin
 lb5      anop
 !    ch := chr(chPtr^);                 {fetch the character}
-         move4 chPtr,p1
+         sta   p1
+         stx   p1+2
         lda   [p1]
         and   #$00FF
         sta   ch
--- a/Scanner.pas
+++ b/Scanner.pas
@ -144,6 +144,7 @@ procedure NextCh; extern;
 {                                                               }
 { Globals:                                                      }
 {       ch - character read                                     }
+{       currentChPtr - pointer to ch in source file             }


 procedure NextToken;
@ -240,6 +241,7 @@ type

 var
   charStrPrefix: charStrPrefixEnum;    {prefix of character/string literal}
+   currentChPtr: ptr;                   {pointer to current character in source file}
   customDefaultName: stringPtr;        {name of custom pre-included default file}
   dateStr: longStringPtr;              {macro date string}
   doingCommandLine: boolean;           {are we processing the cc= command line?}
@ -2269,6 +2271,7 @@ if gotName then begin			{read the file name from the line}
   changedSourceFile := true;
   ReadFile;				{read the file}
   chPtr := bofPtr;			{set the start, end pointers}
+   currentChPtr := bofPtr;
   eofPtr := pointer(ord4(bofPtr)+ffDCBGS.fileLength);
   firstPtr := chPtr;			{first char in line}
   ch := chr(RETURN);			{set the initial character}
@ -4188,6 +4191,7 @@ expandMacros := true;                   {enable macro expansion}
 reportEOL := false;                     {report eolsy as a token?}
 lineNumber := 1;                        {start the line counter}
 chPtr := start;                         {set the start, end pointers}
+currentChPtr := start;
 eofPtr := endPtr;
 firstPtr := start;                      {first char in line}
 numErr := 0;                            {no errors so far}
@ -4942,8 +4946,8 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
      end;
   end; {while}
 tokenLine := lineNumber;                {record the position of the token}
-tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
-tokenStart := pointer(ord4(chPtr)-1);
+tokenColumn := ord(ord4(currentChPtr)-ord4(firstPtr)+1);
+tokenStart := currentChPtr;
 6:
 token.class := reservedSymbol;          {default to the most common class}
 case charKinds[ord(ch)] of
@ -5366,7 +5370,7 @@ case charKinds[ord(ch)] of

   otherwise: Error(57);
   end; {case}
-tokenEnd := pointer(ord4(chPtr)-1);     {record the end of the token}
+tokenEnd := currentChPtr;               {record the end of the token}
 2:
 if skipping then                        {conditional compilation branch}
   if not (token.kind in [eofsy,eolsy]) then
--- a/cc.notes
+++ b/cc.notes
@ -1784,7 +1784,7 @@ int foo(int[42]);

 182. #pragma path directives were not saved in .sym files.  This could cause ORCA/C not to search the proper paths for include files that were not represented in the .sym file (e.g. because they were included after a function).

-183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator, or on tokens that were split over two or more lines using line continuations.
+183. The # preprocessor operator would not work correctly on tokens that had been produced by the ## preprocessor operator, tokens that were split over two or more lines using line continuations, or tokens represented using trigraphs.

 -- Bugs from C 2.1.0 that have been fixed -----------------------------------