Support UTF-8/16/32 string literals and character constants (C11).

These have u8, u, or U prefixes, respectively. The types char16_t and char32_t (defined in <uchar.h>) are used for UTF-16 and UTF-32 code points.
This commit is contained in:
Stephen Heumann 2021-10-11 20:54:37 -05:00
parent 222c34a385
commit 5871820e0c
11 changed files with 689 additions and 188 deletions

View File

@ -172,7 +172,7 @@ type
{ They are created only by casts. }
intconst,uintconst,longconst,ulongconst,longlongconst,
ulonglongconst,floatconst,doubleconst,extendedconst,compconst,
charconst,scharconst,ucharconst,stringconst,
charconst,scharconst,ucharconst,ushortconst,stringconst,
{reserved words}
_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy,
_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy,
@ -210,6 +210,9 @@ type
ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
ch_backslash,letter,digit);
{prefixes of a character/string literal}
charStrPrefixEnum = (prefix_none,prefix_L,prefix_u16,prefix_U32,prefix_u8);
tokenSet = set of tokenEnum;
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
longlongConstant,realConstant,stringConstant,macroParameter);
@ -227,7 +230,8 @@ type
longlongConstant: (qval: longlong);
realConstant : (rval: extended);
stringConstant: (sval: longstringPtr;
ispstring: boolean);
ispstring: boolean;
prefix: charStrPrefixEnum);
macroParameter: (pnum: integer);
end;

View File

@ -407,7 +407,8 @@
{ GenS(pc_lca, str) }
{ }
{ Loads the address of a string onto the stack. Str is a }
{ pointer to a string constant. }
{ pointer to a string constant. No null terminator is added; }
{ it should be explicitly included in str if desired. }
{ }
{ }
{ pc_lda - load a local address }

View File

@ -28,6 +28,14 @@ const
type
ucsCodePoint = 0..maxUCSCodePoint;
utf8Rec = record
length: integer;
bytes: packed array [1..4] of byte;
end;
utf16Rec = record
length: integer;
codeUnits: packed array [1..2] of integer;
end;
function ConvertMacRomanToUCS(ch: char): ucsCodePoint;
@ -36,6 +44,7 @@ function ConvertMacRomanToUCS(ch: char): ucsCodePoint;
{ }
{ Returns UCS code point value for the character. }
function ConvertUCSToMacRoman(ch: ucsCodePoint): integer;
{ convert a character from UCS (Unicode) to MacRoman charset }
@ -43,6 +52,23 @@ function ConvertUCSToMacRoman(ch: ucsCodePoint): integer;
{ Returns ordinal value of the character, or -1 if it can't be }
{ converted. }
procedure UTF16Encode(ch: ucsCodePoint; var utf16: utf16Rec);
{ Encode a UCS code point in UTF-16 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-16 representation of the code point }
procedure UTF8Encode(ch: ucsCodePoint; var utf8: utf8Rec);
{ Encode a UCS code point in UTF-8 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-8 representation of the code point }
function ValidUCNForIdentifier(ch: ucsCodePoint; initial: boolean): boolean;
{ Check if a code point is valid for a UCN in an identifier }
@ -50,6 +76,8 @@ function ValidUCNForIdentifier(ch: ucsCodePoint; initial: boolean): boolean;
{ ch - the code point }
{ initial - is this UCN the initial element of the identifier? }
{----------------------------------------------------------------}
implementation
function ConvertMacRomanToUCS{(ch: char): ucsCodePoint};
@ -100,6 +128,60 @@ else begin
end; {ConvertUCSToMacRoman}
procedure UTF16Encode{ch: ucsCodePoint; var utf16: utf16Rec};
{ Encode a UCS code point in UTF-16 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-16 representation of the code point }
begin {UTF16Encode}
if ch <= $00ffff then begin
utf16.length := 1;
utf16.codeUnits[1] := ord(ch);
end {if}
else begin
utf16.length := 2;
ch := ch - $010000;
utf16.codeUnits[1] := $D800 | ord(ch >> 10);
utf16.codeUnits[2] := $DC00 | ord(ch & $03ff);
end; {else}
end; {UTF16Encode}
procedure UTF8Encode{ch: ucsCodePoint; var utf8: utf8Rec};
{ Encode a UCS code point in UTF-8 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-8 representation of the code point }
begin {UTF8Encode}
if ch <= $00007f then begin
utf8.length := 1;
utf8.bytes[1] := ord(ch);
end {if}
else if ch <= $0007ff then begin
utf8.length := 2;
utf8.bytes[1] := $C0 | ord(ch >> 6);
utf8.bytes[2] := $80 | ord(ch & $3f)
end {else if}
else if ch <= $00ffff then begin
utf8.length := 3;
utf8.bytes[1] := $E0 | ord(ch >> 12);
utf8.bytes[2] := $80 | ord((ch >> 6) & $3f);
utf8.bytes[3] := $80 | ord(ch & $3f);
end {else if}
else begin
utf8.length := 4;
utf8.bytes[1] := $F0 | ord(ch >> 18);
utf8.bytes[2] := $80 | ord((ch >> 12) & $3f);
utf8.bytes[3] := $80 | ord((ch >> 6) & $3f);
utf8.bytes[4] := $80 | ord(ch & $3f);
end; {else}
end; {UTF8Encode}
function ValidUCNForIdentifier{(ch: ucsCodePoint; initial: boolean): boolean};
{ Check if a code point is valid for a UCN in an identifier }

View File

@ -1046,7 +1046,7 @@ var
begin {RealVal}
if token.kind in [intconst,charconst,scharconst,ucharconst] then
RealVal := token.ival
else if token.kind = uintconst then begin
else if token.kind in [uintconst,ushortconst] then begin
if token.ival < 0 then
RealVal := (token.ival & $7FFF) + 32768.0
else
@ -1076,7 +1076,7 @@ var
begin {IntVal}
if token.kind in [intconst,charconst,scharconst,ucharconst] then
IntVal := token.ival
else if token.kind = uintconst then begin
else if token.kind in [uintconst,ushortconst] then begin
IntVal := token.ival & $0000FFFF;
end {else if}
else {if token.kind in [longconst,ulongconst] then} begin
@ -1097,7 +1097,7 @@ var
else
result.hi := 0;
end {if}
else if token.kind = uintconst then begin
else if token.kind in [uintconst,ushortconst] then begin
result.lo := token.ival & $0000FFFF;
result.hi := 0;
end {else if}
@ -1125,7 +1125,7 @@ var
begin {PPKind}
if token.kind in [intconst,longconst] then
PPKind := longlongconst
else if token.kind in [uintconst,ulongconst] then
else if token.kind in [uintconst,ushortconst,ulongconst] then
PPKind := ulonglongconst
else
PPKind := token.kind;
@ -1161,13 +1161,13 @@ var
op^.right := Pop;
op^.middle := Pop;
op^.left := Pop;
if op^.right^.token.kind in [intconst,uintconst,
if op^.right^.token.kind in [intconst,uintconst,ushortconst,
longconst,ulongconst,longlongconst,ulonglongconst,
charconst,scharconst,ucharconst] then
if op^.left^.token.kind in [intconst,uintconst,
if op^.left^.token.kind in [intconst,uintconst,ushortconst,
longconst,ulongconst,longlongconst,ulonglongconst,
charconst,scharconst,ucharconst] then
if op^.middle^.token.kind in [intconst,uintconst,
if op^.middle^.token.kind in [intconst,uintconst,ushortconst,
longconst,ulongconst,longlongconst,ulonglongconst,
charconst,scharconst,ucharconst] then begin
GetLongLongVal(llop1, op^.left^.token);
@ -1211,9 +1211,9 @@ var
op^.left := Pop;
kindRight := op^.right^.token.kind;
kindLeft := op^.left^.token.kind;
if kindRight in [intconst,uintconst,longconst,ulongconst,
if kindRight in [intconst,uintconst,ushortconst,longconst,ulongconst,
charconst,scharconst,ucharconst] then begin
if kindLeft in [intconst,uintconst,longconst,ulongconst,
if kindLeft in [intconst,uintconst,ushortconst,longconst,ulongconst,
charconst,scharconst,ucharconst] then begin
if kind = preprocessorExpression then
goto 2;
@ -1223,7 +1223,8 @@ var
ekind := ulongconst
else if (kindRight = longconst) or (kindLeft = longconst) then
ekind := longconst
else if (kindRight = uintconst) or (kindLeft = uintconst) then
else if (kindRight = uintconst) or (kindLeft = uintconst)
or (kindRight = ushortconst) or (kindLeft = ushortconst) then
ekind := uintconst
else
ekind := intconst;
@ -1289,7 +1290,8 @@ var
ekind := kindLeft;
end;
gtgtop : begin {>>}
if kindLeft in [uintconst,ulongconst] then
if kindLeft in [uintconst,ushortconst,ulongconst]
then
op1 := lshr(op1,op2)
else
op1 := op1 >> op2;
@ -1350,10 +1352,10 @@ var
end; {if}
end; {if}
2:
if kindRight in [intconst,uintconst,longconst,ulongconst,
if kindRight in [intconst,uintconst,ushortconst,longconst,ulongconst,
longlongconst,ulonglongconst,charconst,scharconst,ucharconst]
then begin
if kindLeft in [intconst,uintconst,longconst,ulongconst,
if kindLeft in [intconst,uintconst,ushortconst,longconst,ulongconst,
longlongconst,ulonglongconst,charconst,scharconst,ucharconst]
then begin
@ -1503,10 +1505,10 @@ var
if op^.right^.token.kind in [intconst,uintconst,longconst,ulongconst,
longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst,
compconst,charconst,scharconst,ucharconst] then
compconst,charconst,scharconst,ucharconst,ushortconst] then
if op^.left^.token.kind in [intconst,uintconst,longconst,ulongconst,
longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst,
compconst,charconst,scharconst,ucharconst] then
compconst,charconst,scharconst,ucharconst,ushortconst] then
begin
if fenvAccess then
if kind in [normalExpression, autoInitializerExpression] then
@ -1601,7 +1603,7 @@ var
op^.token.kind := ulongConst;
op^.token.class := longConstant;
if op^.left^.token.kind = stringConst then
op^.token.lval := op^.left^.token.sval^.length+1
op^.token.lval := op^.left^.token.sval^.length
else begin
lCodeGeneration := codeGeneration;
codeGeneration := false;
@ -1728,7 +1730,7 @@ var
begin
if (kind <> preprocessorExpression) and (op^.left^.token.kind
in [intconst,uintconst,longconst,ulongconst,charconst,scharconst,
ucharconst]) then begin
ucharconst,ushortconst]) then begin
{evaluate a constant operation}
ekind := op^.left^.token.kind;
@ -1759,7 +1761,7 @@ var
end {if}
else if op^.left^.token.kind in [longlongconst,ulonglongconst,
intconst,uintconst,longconst,ulongconst,charconst,scharconst,
ucharconst] then begin
ucharconst,ushortconst] then begin
{evaluate a constant operation with long long operand}
ekind := op^.left^.token.kind;
@ -3561,7 +3563,7 @@ case tree^.token.kind of
end; {case}
end;
intConst,uintConst,charConst,scharConst,ucharConst: begin
intConst,uintConst,ushortConst,charConst,scharConst,ucharConst: begin
Gen1t(pc_ldc, tree^.token.ival, cgWord);
lastwasconst := true;
lastconst := tree^.token.ival;
@ -3569,13 +3571,15 @@ case tree^.token.kind of
expressionType := intPtr
else if tree^.token.kind = uintConst then
expressionType := uIntPtr
else if tree^.token.kind = ushortConst then
expressionType := uShortPtr
else if tree^.token.kind = charConst then
expressionType := charPtr
else if tree^.token.kind = scharConst then
expressionType := scharPtr
else {if tree^.token.kind = ucharConst then}
expressionType := ucharPtr;
end; {case intConst,uintConst,charConst,scharConst,ucharConst}
end; {case intConst,uintConst,ushortConst,charConst,scharConst,ucharConst}
longConst,ulongConst: begin
GenLdcLong(tree^.token.lval);
@ -3621,7 +3625,7 @@ case tree^.token.kind of
stringConst: begin
GenS(pc_lca, tree^.token.sval);
expressionType := stringTypePtr;
expressionType := StringType(tree^.token.prefix);
end; {case stringConst}
eqch: begin {=}
@ -4685,11 +4689,12 @@ else begin {record the expression for an initialize
while castValue^.token.kind = castoper do
castValue := castValue^.left;
if castValue^.token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then begin
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
then begin
expressionValue := castValue^.token.ival;
isConstant := true;
expressionType := tree^.castType;
if (castValue^.token.kind = uintconst)
if (castValue^.token.kind in [uintconst,ushortconst])
or (expressionType^.kind = pointerType) then
expressionValue := expressionValue & $0000FFFF;
goto 1;
@ -4714,10 +4719,13 @@ else begin {record the expression for an initialize
expressionType := ucharPtr;
isConstant := true;
end {else if}
else if tree^.token.kind = uintconst then begin
else if tree^.token.kind in [uintconst,ushortconst] then begin
expressionValue := tree^.token.ival;
expressionValue := expressionValue & $0000FFFF;
expressionType := uIntPtr;
if tree^.token.kind = uintconst then
expressionType := uIntPtr
else {if tree^.token.kind = ushortconst then}
expressionType := uShortPtr;
isConstant := true;
end {else if}
else if tree^.token.kind = longconst then begin
@ -4773,7 +4781,7 @@ else begin {record the expression for an initialize
end {else if}
else if tree^.token.kind = stringconst then begin
expressionValue := ord4(tree^.token.sval);
expressionType := stringTypePtr;
expressionType := StringType(tree^.token.prefix);
isConstant := true;
if kind in [arrayExpression,preprocessorExpression] then begin
expressionType := intPtr;
@ -4817,7 +4825,7 @@ procedure InitExpression;
begin {InitExpression}
startTerm := [ident,intconst,uintconst,longconst,ulongconst,longlongconst,
ulonglongconst,floatconst,doubleconst,extendedconst,compconst,
charconst,scharconst,ucharconst,stringconst];
charconst,scharconst,ucharconst,ushortconst,stringconst];
startExpression:= startTerm +
[lparench,asteriskch,andch,plusch,minusch,excch,tildech,sizeofsy,
plusplusop,minusminusop,typedef,_Alignofsy,_Genericsy];

View File

@ -5890,11 +5890,10 @@ procedure GenTree {op: icptr};
gLong.where := onStack;
GenNative(m_pea, immediate, stringSize, nil, stringReference+shift16);
GenNative(m_pea, immediate, stringSize, nil, stringReference);
if maxString-stringSize >= op^.q+1 then begin
if maxString-stringSize >= op^.q then begin
for i := 1 to op^.q do
stringSpace[i+stringSize] := op^.str^.str[i];
stringSpace[stringSize+op^.q+1] := chr(0);
stringSize := stringSize+op^.q+1;
stringSize := stringSize+op^.q;
end
else
Error(cge3);

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'SCANNER'}
const
symFileVersion = 16; {version number of .sym file format}
symFileVersion = 17; {version number of .sym file format}
var
inhibitHeader: boolean; {should .sym includes be blocked?}
@ -719,6 +719,7 @@ procedure EndInclude {chPtr: ptr};
stringConstant: begin
WriteLongString(token.sval);
WriteByte(ord(token.ispstring));
WriteByte(ord(token.prefix));
end;
macroParameter: WriteWord(token.pnum);
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
@ -1008,6 +1009,10 @@ procedure EndInclude {chPtr: ptr};
WriteByte(16)
else if tp = uShortPtr then
WriteByte(17)
else if tp = utf16StringTypePtr then
WriteByte(18)
else if tp = utf32StringTypePtr then
WriteByte(19)
else if tp^.saveDisp <> 0 then begin
WriteByte(1);
WriteLong(tp^.saveDisp);
@ -1348,6 +1353,7 @@ var
stringConstant: begin
token.sval := ReadLongString;
token.ispstring := ReadByte <> 0;
token.prefix := charStrPrefixEnum(ReadByte);
end;
macroParameter: token.pnum := ReadWord;
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
@ -1741,6 +1747,8 @@ var
15: tp := uCharPtr;
16: tp := shortPtr;
17: tp := uShortPtr;
18: tp := utf16StringTypePtr;
19: tp := utf32StringTypePtr;
otherwise: begin
PurgeSymbols;

View File

@ -1909,7 +1909,7 @@ var
if tree^.token.kind = plusch then begin
rtree := tree^.right;
if rtree^.token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst] then
size := rtree^.token.ival
else if rtree^.token.kind in [longconst,ulongconst] then
size := rtree^.token.lval
@ -2086,7 +2086,8 @@ var
end;
pointerType:
if etype = stringTypePtr then begin
if (etype = stringTypePtr) or (etype = utf16StringTypePtr)
or (etype = utf32StringTypePtr) then begin
iPtr^.isConstant := true;
iPtr^.iType := ccPointer;
iPtr^.pval := 0;
@ -2151,12 +2152,12 @@ var
operator := tree^.token.kind;
while operator in [plusch,minusch] do begin
with tree^.right^.token do
if kind in [intConst,uintconst,longConst,ulongconst,
longlongConst,ulonglongconst,charconst,scharconst,
ucharconst] then begin
if kind in [intConst,uintconst,ushortconst,longConst,
ulongconst,longlongConst,ulonglongconst,charconst,
scharconst,ucharconst] then begin
if kind in [intConst,charconst,scharconst,ucharconst] then
offSet2 := ival
else if kind = uintConst then
else if kind in [uintConst,ushortconst] then
offset2 := ival & $0000ffff
else if kind in [longConst,ulongconst] then begin
offset2 := lval;
@ -2322,6 +2323,8 @@ var
kind: typeKind; {base type of an initializer}
ktp: typePtr; {array type with definedTypes removed}
lPrintMacroExpansions: boolean; {local copy of printMacroExpansions}
stringElementType: typePtr; {element type of string literal}
stringLength: integer; {elements in a string literal}
procedure Fill (count: longint; tp: typePtr);
@ -2450,44 +2453,61 @@ var
kind := ktp^.kind;
{handle string constants}
if (token.kind = stringConst) and (kind = scalarType)
and (ktp^.baseType in [cgByte,cgUByte]) then begin
if tp^.elements = 0 then begin
tp^.elements := token.sval^.length + 1;
RecomputeSizes(variable^.itype);
end {if}
else if tp^.elements < token.sval^.length then begin
Error(44);
errorFound := true;
end; {else if}
with ktp^ do begin
iPtr := pointer(Malloc(sizeof(initializerRecord)));
iPtr^.next := variable^.iPtr;
variable^.iPtr := iPtr;
iPtr^.count := 1;
iPtr^.bitdisp := 0;
iPtr^.bitsize := 0;
iPtr^.isStructOrUnion := false;
if (variable^.storage in [external,global,private]) then begin
iPtr^.isConstant := true;
iPtr^.itype := cgString;
iPtr^.sval := token.sval;
count := tp^.elements - token.sval^.length;
if count <> 0 then
Fill(count, sCharPtr);
if token.kind = stringConst then begin
stringElementType := StringType(token.prefix)^.aType;
if (kind = scalarType) and
(((ktp^.baseType in [cgByte,cgUByte])
and (stringElementType = charPtr))
or CompTypes(ktp,stringElementType)) then begin
stringLength := token.sval^.length div ord(stringElementType^.size);
if tp^.elements = 0 then begin
tp^.elements := stringLength;
RecomputeSizes(variable^.itype);
end {if}
else begin
iPtr^.isConstant := false;
new(ep);
iPtr^.iTree := ep;
ep^.next := nil;
ep^.left := nil;
ep^.middle := nil;
ep^.right := nil;
ep^.token := token;
end; {else}
end; {with}
NextToken;
else if tp^.elements < stringLength-1 then begin
Error(44);
errorFound := true;
end; {else if}
with ktp^ do begin
iPtr := pointer(Malloc(sizeof(initializerRecord)));
iPtr^.next := variable^.iPtr;
variable^.iPtr := iPtr;
iPtr^.count := 1;
iPtr^.bitdisp := 0;
iPtr^.bitsize := 0;
iPtr^.isStructOrUnion := false;
if (variable^.storage in [external,global,private]) then begin
iPtr^.isConstant := true;
iPtr^.itype := cgString;
iPtr^.sval := token.sval;
count := tp^.elements - stringLength;
if count > 0 then
Fill(count, stringElementType)
else if count = -1 then begin
iPtr^.sval := pointer(GMalloc(token.sval^.length+2));
CopyLongString(iPtr^.sval, token.sval);
iPtr^.sval^.length :=
iPtr^.sval^.length - ord(stringElementType^.size);
end; {else if}
end {if}
else begin
iPtr^.isConstant := false;
new(ep);
iPtr^.iTree := ep;
ep^.next := nil;
ep^.left := nil;
ep^.middle := nil;
ep^.right := nil;
ep^.token := token;
end; {else}
end; {with}
NextToken;
end {if}
else begin
Error(47);
errorFound := true;
NextToken;
end; {else}
end {if}
{handle arrays of non-strings}
@ -3643,7 +3663,8 @@ if isFunction then begin
NextToken;
Match(lparench,13);
if token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then begin
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
then begin
toolNum := token.ival;
NextToken;
end {if}
@ -3655,7 +3676,8 @@ if isFunction then begin
NextToken;
end {if}
else if token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then begin
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
then begin
dispatcher := token.ival;
NextToken;
end {if}
@ -4352,7 +4374,8 @@ var
elements := itype^.elements;
if elements = 0 then goto 1; {don't init flexible array member}
if iPtr^.iTree^.token.kind = stringConst then begin
size := iPtr^.iTree^.token.sval^.length+1;
elements := elements * itype^.aType^.size;
size := iPtr^.iTree^.token.sval^.length;
if size >= elements then
size := ord(elements)
else

View File

@ -239,6 +239,7 @@ type
onOffEnum = (on,off,default); {on-off values in standard pragmas}
var
charStrPrefix: charStrPrefixEnum; {prefix of character/string literal}
dateStr: longStringPtr; {macro date string}
doingCommandLine: boolean; {are we processing the cc= command line?}
doingPPExpression: boolean; {are we processing a preprocessor expression?}
@ -256,6 +257,7 @@ var
macroFound: macroRecordPtr; {last macro found by IsDefined}
mergingStrings: boolean; {is NextToken trying to merge strings?}
needWriteLine: boolean; {is there a line that needs to be written?}
octHexEscape: boolean; {octal/hex escape in char/string?}
onOffValue: onOffEnum; {value of last on-off switch}
wroteLine: boolean; {has the current line already been written?}
numErr: 0..maxErr; {number of errors in this line}
@ -343,6 +345,14 @@ procedure StartInclude (name: gsosOutStringPtr); extern;
{ 2. From Header.pas }
function StringType(prefix: charStrPrefixEnum): typePtr; extern;
{ returns the type of a string literal with specified prefix }
{ }
{ parameters: }
{ prefix - the prefix }
procedure TermHeader; extern;
{ Stop processing the header file }
@ -722,6 +732,9 @@ if list or (numErr <> 0) then begin
162: msg := @'invalid escape sequence';
163: msg := @'pointer assignment discards qualifier(s)';
164: msg := @'compound literals within functions are not supported by ORCA/C';
165: msg := @'''\p'' may not be used in a prefixed string';
166: msg := @'string literals with these prefixes may not be merged';
167: msg := @'''L''-prefixed character or string constants are not supported by ORCA/C';
otherwise: Error(57);
end; {case}
writeln(msg^);
@ -768,6 +781,34 @@ var
ch: char; {work character}
i: integer; {loop counter}
str: string[23]; {temp string}
c16ptr: ^integer; {pointer to char16_t value}
c32ptr: ^longint; {pointer to char32_t value}
procedure PrintHexDigits(i: longint; count: integer);
{ Print a digit as a hex character }
{ }
{ Parameters: }
{ i: value to print in hexadecimal }
{ count: number of digits to print }
var
digit: integer; {hex digit value}
shift: integer; {amount to shift by}
begin {PrintHexDigits}
shift := 4 * (count-1);
while shift >= 0 do begin
digit := ord(i >> shift) & $000F;
if digit < 10 then
write(chr(digit | ord('0')))
else
write(chr(digit + ord('A') - 10));
shift := shift - 4;
end; {while}
end; {PrintHexDigits}
begin {PrintToken}
case token.kind of
@ -778,7 +819,8 @@ case token.kind of
scharconst,
ucharconst,
intconst,
uintconst: write(token.ival:1);
uintconst,
ushortconst: write(token.ival:1);
longConst,
ulongConst: write(token.lval:1);
@ -801,21 +843,43 @@ case token.kind of
extendedConst: write(token.rval:1);
stringConst: begin
write('"');
for i := 1 to token.sval^.length do begin
ch := token.sval^.str[i];
if ch in [' '..'~'] then begin
if ch in ['"','\','?'] then
if token.prefix = prefix_u16 then begin
write('u"');
i := 1;
while i < token.sval^.length-2 do begin
write('\x');
c16Ptr := pointer(@token.sval^.str[i]);
PrintHexDigits(c16Ptr^, 4);
i := i + 2;
end; {while}
end {if}
else if token.prefix = prefix_U32 then begin
write('U"');
i := 1;
while i < token.sval^.length-4 do begin
write('\x');
c32Ptr := pointer(@token.sval^.str[i]);
PrintHexDigits(c32Ptr^, 8);
i := i + 4;
end; {while}
end {else if}
else begin
write('"');
for i := 1 to token.sval^.length-1 do begin
ch := token.sval^.str[i];
if ch in [' '..'~'] then begin
if ch in ['"','\','?'] then
write('\');
write(ch);
end {if}
else begin
write('\');
write(ch);
end {if}
else begin
write('\');
write((ord(ch)>>6):1);
write(((ord(ch)>>3) & $0007):1);
write((ord(ch) & $0007):1);
end; {else}
end; {for}
write((ord(ch)>>6):1);
write(((ord(ch)>>3) & $0007):1);
write((ord(ch) & $0007):1);
end; {else}
end; {for}
end; {else}
write('"');
end;
@ -1047,6 +1111,9 @@ procedure LongToPString (pstr: stringPtr; lstr: longStringPtr);
{ Convert a long string into a p string }
{ }
{ The long string is assumed to include a terminating null byte,}
{ which is not copied to the p-string. }
{ }
{ Parameters: }
{ pstr - pointer to the p-string }
{ lstr - pointer to the long string }
@ -1056,7 +1123,7 @@ var
len: integer; {string length}
begin {LongToPString}
len := lstr^.length;
len := lstr^.length-1;
if len > 255 then
len := 255;
pstr^[0] := chr(len);
@ -1083,6 +1150,56 @@ var
len,len1: integer; {length of strings}
lt: tokenType; {local copy of token}
str1,str2: stringPtr; {identifier strings}
elementType: typePtr; {string element type}
procedure ConvertString (var str: tokenType; prefix: charStrPrefixEnum);
{ Convert unprefixed string literal str to a prefixed one }
var
sPtr: longStringPtr; {new string}
i,j,k: integer; {loop counters}
codePoint: ucsCodePoint; {Unicode code point}
c16ptr: ^integer; {pointer to char16_t value}
c32ptr: ^longint; {pointer to char32_t value}
utf8: utf8Rec; {UTF-8 encoding of character}
utf16: utf16Rec; {UTF-16 encoding of character}
begin {ConvertString}
sPtr := pointer(Malloc(str.sval^.length*4));
k := 0;
for i := 1 to str.sval^.length do begin
codePoint := ConvertMacRomanToUCS(str.sval^.str[i]);
if prefix = prefix_u8 then begin
UTF8Encode(codePoint, utf8);
for j := 1 to utf8.length do begin
sPtr^.str[k+1] := chr(utf8.bytes[j]);
k := k+1;
end; {for}
end {if}
else if prefix = prefix_u16 then begin
UTF16Encode(codePoint, utf16);
c16Ptr := pointer(@sPtr^.str[k+1]);
c16Ptr^ := utf16.codeUnits[1];
k := k+2;
if utf16.length = 2 then begin
c16ptr := pointer(@sPtr^.str[k+1]);
c16Ptr^ := utf16.codeUnits[2];
k := k+2;
end; {if}
end {else if}
else if prefix = prefix_U32 then begin
c32Ptr := pointer(@sPtr^.str[k+1]);
c32Ptr^ := codePoint;
k := k+4;
end; {else if}
end; {for}
sPtr^.length := k;
str.sval := sPtr;
str.prefix := prefix;
end; {ConvertString}
begin {Merge}
kind1 := tk1.kind;
@ -1100,6 +1217,18 @@ if class1 in [identifier,reservedWord] then begin
str2 := @reservedWords[kind2]
else if class2 in numericConstants then
str2 := tk2.numString
else if (class2 = stringConstant) and (tk2.prefix = prefix_none) then begin
if str1^ = 'u' then
ConvertString(tk2, prefix_u16)
else if str1^ = 'U' then
ConvertString(tk2, prefix_U32)
else if str1^ = 'u8' then
ConvertString(tk2, prefix_u8)
else
Error(63);
tk1 := tk2;
goto 1;
end {else if}
else begin
Error(63);
goto 1;
@ -1145,7 +1274,16 @@ else if class1 in numericConstants then begin
else if class1 = stringConstant then begin
if class2 = stringConstant then begin
len1 := tk1.sval^.length;
if tk1.prefix = tk2.prefix then
{OK - nothing to do}
else if tk1.prefix = prefix_none then
ConvertString(tk1, tk2.prefix)
else if tk2.prefix = prefix_none then
ConvertString(tk2, tk1.prefix)
else
Error(166);
elementType := StringType(tk1.prefix)^.aType;
len1 := tk1.sval^.length - ord(elementType^.size);
len := len1+tk2.sval^.length;
cp := pointer(Malloc(len+2));
for i := 1 to len1 do
@ -1154,7 +1292,7 @@ else if class1 = stringConstant then begin
cp^.str[i+len1] := tk2.sval^.str[i];
cp^.length := len;
if tk1.ispstring then
cp^.str[1] := chr(len-1);
cp^.str[1] := chr(len-2);
tk1.sval := cp;
goto 1;
end; {if}
@ -1334,12 +1472,14 @@ begin {BuildStringToken}
token.kind := stringconst;
token.class := stringConstant;
token.ispstring := false;
token.sval := pointer(GMalloc(len+2));
token.sval := pointer(GMalloc(len+3));
token.prefix := prefix_none;
for i := 1 to len do begin
token.sval^.str[i] := chr(cp^);
cp := pointer(ord4(cp)+1);
end; {for}
token.sval^.length := len;
token.sval^.str[len+1] := chr(0);
token.sval^.length := len+1;
PutBackToken(token, true);
end; {BuildStringToken}
@ -1504,10 +1644,12 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst;
token.class := stringConstant;
token.ispstring := false;
sp := pointer(Malloc(5+sourceFileGS.theString.size));
sp^.length := sourceFileGS.theString.size;
token.prefix := prefix_none;
sp := pointer(Malloc(3+sourceFileGS.theString.size));
sp^.length := sourceFileGS.theString.size+1;
for i := 1 to sourceFileGS.theString.size do
sp^.str[i] := sourceFileGS.theString.theString[i];
sp^.str[i+1] := chr(0);
token.sval := sp;
tokenStart := @sp^.str;
tokenEnd := pointer(ord4(tokenStart)+sp^.length);
@ -1517,6 +1659,7 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst;
token.class := stringConstant;
token.ispstring := false;
token.prefix := prefix_none;
token.sval := dateStr;
tokenStart := @dateStr^.str;
tokenEnd := pointer(ord4(tokenStart)+dateStr^.length);
@ -1527,6 +1670,7 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst;
token.class := stringConstant;
token.ispstring := false;
token.prefix := prefix_none;
token.sval := timeStr;
tokenStart := @timeStr^.str;
tokenEnd := pointer(ord4(tokenStart)+timeStr^.length);
@ -1537,8 +1681,8 @@ if macro^.readOnly then begin {handle special macros}
token.kind := intConst; {__ORCAC__}
token.numString := @oneStr; {__STDC_NO_...__}
token.class := intConstant; {__ORCAC_HAS_LONG_LONG__}
token.ival := 1;
oneStr := '1';
token.ival := 1; {__STDC_UTF_16__}
oneStr := '1'; {__STDC_UTF_32__}
tokenStart := @oneStr[1];
tokenEnd := pointer(ord4(tokenStart)+1);
end;
@ -1547,6 +1691,7 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst;
token.class := stringConstant;
token.ispstring := false;
token.prefix := prefix_none;
token.sval := versionStrL;
tokenStart := @versionStrL^.str;
tokenEnd := pointer(ord4(tokenStart)+versionStrL^.length);
@ -1606,7 +1751,7 @@ else begin
if tcPtr^.token.kind = stringconst then begin
BuildStringToken(@quoteStr[1], 1);
BuildStringToken(@tcPtr^.token.sval^.str,
tcPtr^.token.sval^.length);
tcPtr^.token.sval^.length-1);
BuildStringToken(@quoteStr[1], 1);
end {if}
else begin
@ -1851,6 +1996,7 @@ if ch = '<' then begin {process a library file...}
token.kind := stringconst; {convert a <> style name to a string}
token.class := stringConstant;
token.ispstring := false;
token.prefix := prefix_none;
i := 0;
while not (charKinds[ord(ch)] in [ch_eol,ch_gt]) do begin
i := i+1;
@ -1883,7 +2029,7 @@ else begin
{handle file names that are strings or macro expansions}
expandMacros := true; {allow macros to be used in the name}
NextToken; {skip the command name}
if token.kind = stringConst then begin
if (token.kind = stringConst) and (token.prefix = prefix_none) then begin
LongToPString(@workString, token.sval);
CheckDelimiters(workString);
if mustExist then begin
@ -2417,6 +2563,10 @@ var
stringConstant: begin
if tk1^.token.sval^.length <> tk2^.token.sval^.length
then goto 3;
if tk1^.token.ispstring <> tk2^.token.ispstring then
goto 3;
if tk1^.token.prefix <> tk2^.token.prefix then
goto 3;
for i := 1 to tk1^.token.sval^.length do
if tk1^.token.sval^.str[i] <>
tk2^.token.sval^.str[i] then
@ -2562,7 +2712,7 @@ var
while not (token.kind in [eolsy, eofsy]) do begin
msg^ := concat(msg^, ' ');
if token.kind = stringConst then begin
len := token.sval^.length;
len := token.sval^.length-1;
for i := 1 to len do
msg^ := concat(msg^, token.sval^.str[i]);
end {if}
@ -2593,13 +2743,13 @@ var
begin {DoFloat}
FlagPragmas(p_float);
NextToken;
if token.kind in [intconst,uintconst] then begin
if token.kind in [intconst,uintconst,ushortconst] then begin
floatCard := token.ival;
NextToken;
end {if}
else
Error(18);
if token.kind in [intconst,uintconst] then begin
if token.kind in [intconst,uintconst,ushortconst] then begin
floatSlot := $C080 | (token.ival << 4);
NextToken;
end {if}
@ -2669,7 +2819,7 @@ var
NextToken;
isNegative := true;
end; {else if}
if token.kind in [intconst,uintconst] then begin
if token.kind in [intconst,uintconst,ushortconst] then begin
value := token.ival;
NextToken;
end {if}
@ -2899,11 +3049,14 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin
end {if}
else
Error(18);
if token.kind = stringconst then begin
if (token.kind = stringconst)
and (token.prefix = prefix_none) then begin
LongToPString(
pointer(ord4(@sourceFileGS.theString)+1),
token.sval);
sourceFileGS.theString.size := token.sval^.length;
sourceFileGS.theString.size := token.sval^.length-1;
if sourceFileGS.theString.size > 255 then
sourceFileGS.theString.size := 255;
NextToken;
end; {if}
if token.kind <> eolsy then
@ -3866,7 +4019,9 @@ var
token.sval := pointer(Malloc(i+3)); {put the string in the string pool}
CopyLongString(token.sval, pointer(sPtr));
dispose(sPtr);
token.sval^.str[i+1] := chr(0); {add null in case the string is extended}
token.sval^.str[i+1] := chr(0); {add null terminator}
token.sval^.length := i+1;
token.prefix := prefix_none;
end; {GetString}
@ -3912,6 +4067,7 @@ doingPPExpression := false; {not doing a preprocessor expression}
unix_1 := false; {int is 16 bits}
lintIsError := true; {lint messages are considered errors}
fenvAccess := false; {not accessing fp environment}
charStrPrefix := prefix_none; {no char/str prefix seen}
mergingStrings := false; {not currently merging strings}
{error codes for lint messages}
@ -3986,6 +4142,24 @@ mp^.algorithm := 6;
bp := pointer(ord4(macros) + hash(mp^.name));
mp^.next := bp^;
bp^ := mp;
new(mp); {__STDC_UTF_16__}
mp^.name := @'__STDC_UTF_16__';
mp^.parameters := -1;
mp^.tokens := nil;
mp^.readOnly := true;
mp^.algorithm := 5;
bp := pointer(ord4(macros) + hash(mp^.name));
mp^.next := bp^;
bp^ := mp;
new(mp); {__STDC_UTF_32__}
mp^.name := @'__STDC_UTF_32__';
mp^.parameters := -1;
mp^.tokens := nil;
mp^.readOnly := true;
mp^.algorithm := 5;
bp := pointer(ord4(macros) + hash(mp^.name));
mp^.next := bp^;
bp^ := mp;
new(mp); {__ORCAC_HAS_LONG_LONG__}
mp^.name := @'__ORCAC_HAS_LONG_LONG__';
mp^.parameters := -1;
@ -4042,8 +4216,8 @@ mp^.next := bp^;
bp^ := mp;
SetDateTime; {set up the macro date/time strings}
{set up the version string}
versionStrL := pointer(GMalloc(3 + length(versionStr)));
versionStrL^.length := length(versionStr);
versionStrL := pointer(GCalloc(3 + length(versionStr)));
versionStrL^.length := length(versionStr)+1;
versionStrL^.str := versionStr;
{Scan the command line options}
@ -4197,7 +4371,7 @@ procedure NextToken;
{ Read the next token from the file. }
label 1,2,3,4,5;
label 1,2,3,4,5,6;
type
three = (s100,s1000,s4000); {these declarations are used for a}
@ -4228,11 +4402,15 @@ var
tToken: tokenType; {for merging tokens}
sPtr,tsPtr: gstringPtr; {for forming string constants}
lLastWasReturn: boolean; {local copy of lastWasReturn}
codePoint: ucsCodePoint; {Unicode code point from UCN}
codePoint: longint; {Unicode character value}
chFromUCN: integer; {character given by UCN (converted)}
c16ptr: ^integer; {pointer to char16_t value}
c32ptr: ^longint; {pointer to char32_t value}
utf8: utf8Rec; {UTF-8 encoding of character}
utf16: utf16Rec; {UTF-16 encoding of character}
function EscapeCh: integer;
function EscapeCh: longint;
{ Find and return the next character in a string or char }
{ constant. Handle escape sequences if they are found. }
@ -4241,19 +4419,20 @@ var
{ Globals: }
{ ch - first character in sequence; set to first char }
{ after sequence }
label 1;
{ charStrPrefix - prefix of the char constant or string }
{ octHexEscape - true if this was an octal/hex escape seq.}
var
cnt: 0..3; {for counting octal escape sequences}
dig: 0..15; {value of a hex digit}
skipChar: boolean; {get next char when done?}
val: 0..maxint; {hex/octal escape code value}
val: longint; {hex/octal escape code value}
codePoint: ucsCodePoint; {code point given by UCN}
chFromUCN: integer; {character given by UCN (converted)}
begin {EscapeCh}
1: skipChar := true;
octHexEscape := false;
skipChar := true;
if ch = '\' then begin
NextCh;
if ch in ['0'..'7','a','b','t','n','v','f','p','r','x','u','U',
@ -4268,9 +4447,13 @@ var
NextCh;
end; {while}
if (val & $FF00) <> 0 then
if not skipping then
Error(162);
EscapeCh := val & $FF;
if charStrPrefix in [prefix_none,prefix_u8] then begin
if not skipping then
Error(162);
val := 0;
end; {if}
EscapeCh := val;
octHexEscape := true;
skipChar := false;
end;
'a': EscapeCh := 7;
@ -4294,28 +4477,41 @@ var
ch := chr(ord(ch)&$5F);
dig := ord(ch)-ord('A')+10;
end; {else}
val := (val << 4) | dig;
if (val & $FF00) <> 0 then begin
if ((charStrPrefix = prefix_none) and ((val & $F0) <> 0)) or
((charStrPrefix = prefix_u8) and ((val & $F0) <> 0)) or
((charStrPrefix = prefix_u16) and ((val & $F000) <> 0)) or
((charStrPrefix = prefix_u32) and ((val & $F0000000) <> 0))
then begin
if not skipping then