Support UTF-8/16/32 string literals and character constants (C11).

These have u8, u, or U prefixes, respectively. The types char16_t and char32_t (defined in <uchar.h>) are used for UTF-16 and UTF-32 code points.
This commit is contained in:
Stephen Heumann 2021-10-11 20:54:37 -05:00
parent 222c34a385
commit 5871820e0c
11 changed files with 689 additions and 188 deletions

View File

@ -172,7 +172,7 @@ type
{ They are created only by casts. } { They are created only by casts. }
intconst,uintconst,longconst,ulongconst,longlongconst, intconst,uintconst,longconst,ulongconst,longlongconst,
ulonglongconst,floatconst,doubleconst,extendedconst,compconst, ulonglongconst,floatconst,doubleconst,extendedconst,compconst,
charconst,scharconst,ucharconst,stringconst, charconst,scharconst,ucharconst,ushortconst,stringconst,
{reserved words} {reserved words}
_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy, _Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy,
_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy, _Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy,
@ -210,6 +210,9 @@ type
ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon, ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
ch_backslash,letter,digit); ch_backslash,letter,digit);
{prefixes of a character/string literal}
charStrPrefixEnum = (prefix_none,prefix_L,prefix_u16,prefix_U32,prefix_u8);
tokenSet = set of tokenEnum; tokenSet = set of tokenEnum;
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant, tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
longlongConstant,realConstant,stringConstant,macroParameter); longlongConstant,realConstant,stringConstant,macroParameter);
@ -227,7 +230,8 @@ type
longlongConstant: (qval: longlong); longlongConstant: (qval: longlong);
realConstant : (rval: extended); realConstant : (rval: extended);
stringConstant: (sval: longstringPtr; stringConstant: (sval: longstringPtr;
ispstring: boolean); ispstring: boolean;
prefix: charStrPrefixEnum);
macroParameter: (pnum: integer); macroParameter: (pnum: integer);
end; end;

View File

@ -407,7 +407,8 @@
{ GenS(pc_lca, str) } { GenS(pc_lca, str) }
{ } { }
{ Loads the address of a string onto the stack. Str is a } { Loads the address of a string onto the stack. Str is a }
{ pointer to a string constant. } { pointer to a string constant. No null terminator is added; }
{ it should be explicitly included in str if desired. }
{ } { }
{ } { }
{ pc_lda - load a local address } { pc_lda - load a local address }

View File

@ -28,6 +28,14 @@ const
type type
ucsCodePoint = 0..maxUCSCodePoint; ucsCodePoint = 0..maxUCSCodePoint;
utf8Rec = record
length: integer;
bytes: packed array [1..4] of byte;
end;
utf16Rec = record
length: integer;
codeUnits: packed array [1..2] of integer;
end;
function ConvertMacRomanToUCS(ch: char): ucsCodePoint; function ConvertMacRomanToUCS(ch: char): ucsCodePoint;
@ -36,6 +44,7 @@ function ConvertMacRomanToUCS(ch: char): ucsCodePoint;
{ } { }
{ Returns UCS code point value for the character. } { Returns UCS code point value for the character. }
function ConvertUCSToMacRoman(ch: ucsCodePoint): integer; function ConvertUCSToMacRoman(ch: ucsCodePoint): integer;
{ convert a character from UCS (Unicode) to MacRoman charset } { convert a character from UCS (Unicode) to MacRoman charset }
@ -43,6 +52,23 @@ function ConvertUCSToMacRoman(ch: ucsCodePoint): integer;
{ Returns ordinal value of the character, or -1 if it can't be } { Returns ordinal value of the character, or -1 if it can't be }
{ converted. } { converted. }
procedure UTF16Encode(ch: ucsCodePoint; var utf16: utf16Rec);
{ Encode a UCS code point in UTF-16 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-16 representation of the code point }
procedure UTF8Encode(ch: ucsCodePoint; var utf8: utf8Rec);
{ Encode a UCS code point in UTF-8 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-8 representation of the code point }
function ValidUCNForIdentifier(ch: ucsCodePoint; initial: boolean): boolean; function ValidUCNForIdentifier(ch: ucsCodePoint; initial: boolean): boolean;
{ Check if a code point is valid for a UCN in an identifier } { Check if a code point is valid for a UCN in an identifier }
@ -50,6 +76,8 @@ function ValidUCNForIdentifier(ch: ucsCodePoint; initial: boolean): boolean;
{ ch - the code point } { ch - the code point }
{ initial - is this UCN the initial element of the identifier? } { initial - is this UCN the initial element of the identifier? }
{----------------------------------------------------------------}
implementation implementation
function ConvertMacRomanToUCS{(ch: char): ucsCodePoint}; function ConvertMacRomanToUCS{(ch: char): ucsCodePoint};
@ -100,6 +128,60 @@ else begin
end; {ConvertUCSToMacRoman} end; {ConvertUCSToMacRoman}
procedure UTF16Encode{ch: ucsCodePoint; var utf16: utf16Rec};
{ Encode a UCS code point in UTF-16 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-16 representation of the code point }
begin {UTF16Encode}
if ch <= $00ffff then begin
utf16.length := 1;
utf16.codeUnits[1] := ord(ch);
end {if}
else begin
utf16.length := 2;
ch := ch - $010000;
utf16.codeUnits[1] := $D800 | ord(ch >> 10);
utf16.codeUnits[2] := $DC00 | ord(ch & $03ff);
end; {else}
end; {UTF16Encode}
procedure UTF8Encode{ch: ucsCodePoint; var utf8: utf8Rec};
{ Encode a UCS code point in UTF-8 }
{ }
{ ch - the code point }
{ utf16 - set to the UTF-8 representation of the code point }
begin {UTF8Encode}
if ch <= $00007f then begin
utf8.length := 1;
utf8.bytes[1] := ord(ch);
end {if}
else if ch <= $0007ff then begin
utf8.length := 2;
utf8.bytes[1] := $C0 | ord(ch >> 6);
utf8.bytes[2] := $80 | ord(ch & $3f)
end {else if}
else if ch <= $00ffff then begin
utf8.length := 3;
utf8.bytes[1] := $E0 | ord(ch >> 12);
utf8.bytes[2] := $80 | ord((ch >> 6) & $3f);
utf8.bytes[3] := $80 | ord(ch & $3f);
end {else if}
else begin
utf8.length := 4;
utf8.bytes[1] := $F0 | ord(ch >> 18);
utf8.bytes[2] := $80 | ord((ch >> 12) & $3f);
utf8.bytes[3] := $80 | ord((ch >> 6) & $3f);
utf8.bytes[4] := $80 | ord(ch & $3f);
end; {else}
end; {UTF8Encode}
function ValidUCNForIdentifier{(ch: ucsCodePoint; initial: boolean): boolean}; function ValidUCNForIdentifier{(ch: ucsCodePoint; initial: boolean): boolean};
{ Check if a code point is valid for a UCN in an identifier } { Check if a code point is valid for a UCN in an identifier }

View File

@ -1046,7 +1046,7 @@ var
begin {RealVal} begin {RealVal}
if token.kind in [intconst,charconst,scharconst,ucharconst] then if token.kind in [intconst,charconst,scharconst,ucharconst] then
RealVal := token.ival RealVal := token.ival
else if token.kind = uintconst then begin else if token.kind in [uintconst,ushortconst] then begin
if token.ival < 0 then if token.ival < 0 then
RealVal := (token.ival & $7FFF) + 32768.0 RealVal := (token.ival & $7FFF) + 32768.0
else else
@ -1076,7 +1076,7 @@ var
begin {IntVal} begin {IntVal}
if token.kind in [intconst,charconst,scharconst,ucharconst] then if token.kind in [intconst,charconst,scharconst,ucharconst] then
IntVal := token.ival IntVal := token.ival
else if token.kind = uintconst then begin else if token.kind in [uintconst,ushortconst] then begin
IntVal := token.ival & $0000FFFF; IntVal := token.ival & $0000FFFF;
end {else if} end {else if}
else {if token.kind in [longconst,ulongconst] then} begin else {if token.kind in [longconst,ulongconst] then} begin
@ -1097,7 +1097,7 @@ var
else else
result.hi := 0; result.hi := 0;
end {if} end {if}
else if token.kind = uintconst then begin else if token.kind in [uintconst,ushortconst] then begin
result.lo := token.ival & $0000FFFF; result.lo := token.ival & $0000FFFF;
result.hi := 0; result.hi := 0;
end {else if} end {else if}
@ -1125,7 +1125,7 @@ var
begin {PPKind} begin {PPKind}
if token.kind in [intconst,longconst] then if token.kind in [intconst,longconst] then
PPKind := longlongconst PPKind := longlongconst
else if token.kind in [uintconst,ulongconst] then else if token.kind in [uintconst,ushortconst,ulongconst] then
PPKind := ulonglongconst PPKind := ulonglongconst
else else
PPKind := token.kind; PPKind := token.kind;
@ -1161,13 +1161,13 @@ var
op^.right := Pop; op^.right := Pop;
op^.middle := Pop; op^.middle := Pop;
op^.left := Pop; op^.left := Pop;
if op^.right^.token.kind in [intconst,uintconst, if op^.right^.token.kind in [intconst,uintconst,ushortconst,
longconst,ulongconst,longlongconst,ulonglongconst, longconst,ulongconst,longlongconst,ulonglongconst,
charconst,scharconst,ucharconst] then charconst,scharconst,ucharconst] then
if op^.left^.token.kind in [intconst,uintconst, if op^.left^.token.kind in [intconst,uintconst,ushortconst,
longconst,ulongconst,longlongconst,ulonglongconst, longconst,ulongconst,longlongconst,ulonglongconst,
charconst,scharconst,ucharconst] then charconst,scharconst,ucharconst] then
if op^.middle^.token.kind in [intconst,uintconst, if op^.middle^.token.kind in [intconst,uintconst,ushortconst,
longconst,ulongconst,longlongconst,ulonglongconst, longconst,ulongconst,longlongconst,ulonglongconst,
charconst,scharconst,ucharconst] then begin charconst,scharconst,ucharconst] then begin
GetLongLongVal(llop1, op^.left^.token); GetLongLongVal(llop1, op^.left^.token);
@ -1211,9 +1211,9 @@ var
op^.left := Pop; op^.left := Pop;
kindRight := op^.right^.token.kind; kindRight := op^.right^.token.kind;
kindLeft := op^.left^.token.kind; kindLeft := op^.left^.token.kind;
if kindRight in [intconst,uintconst,longconst,ulongconst, if kindRight in [intconst,uintconst,ushortconst,longconst,ulongconst,
charconst,scharconst,ucharconst] then begin charconst,scharconst,ucharconst] then begin
if kindLeft in [intconst,uintconst,longconst,ulongconst, if kindLeft in [intconst,uintconst,ushortconst,longconst,ulongconst,
charconst,scharconst,ucharconst] then begin charconst,scharconst,ucharconst] then begin
if kind = preprocessorExpression then if kind = preprocessorExpression then
goto 2; goto 2;
@ -1223,7 +1223,8 @@ var
ekind := ulongconst ekind := ulongconst
else if (kindRight = longconst) or (kindLeft = longconst) then else if (kindRight = longconst) or (kindLeft = longconst) then
ekind := longconst ekind := longconst
else if (kindRight = uintconst) or (kindLeft = uintconst) then else if (kindRight = uintconst) or (kindLeft = uintconst)
or (kindRight = ushortconst) or (kindLeft = ushortconst) then
ekind := uintconst ekind := uintconst
else else
ekind := intconst; ekind := intconst;
@ -1289,7 +1290,8 @@ var
ekind := kindLeft; ekind := kindLeft;
end; end;
gtgtop : begin {>>} gtgtop : begin {>>}
if kindLeft in [uintconst,ulongconst] then if kindLeft in [uintconst,ushortconst,ulongconst]
then
op1 := lshr(op1,op2) op1 := lshr(op1,op2)
else else
op1 := op1 >> op2; op1 := op1 >> op2;
@ -1350,10 +1352,10 @@ var
end; {if} end; {if}
end; {if} end; {if}
2: 2:
if kindRight in [intconst,uintconst,longconst,ulongconst, if kindRight in [intconst,uintconst,ushortconst,longconst,ulongconst,
longlongconst,ulonglongconst,charconst,scharconst,ucharconst] longlongconst,ulonglongconst,charconst,scharconst,ucharconst]
then begin then begin
if kindLeft in [intconst,uintconst,longconst,ulongconst, if kindLeft in [intconst,uintconst,ushortconst,longconst,ulongconst,
longlongconst,ulonglongconst,charconst,scharconst,ucharconst] longlongconst,ulonglongconst,charconst,scharconst,ucharconst]
then begin then begin
@ -1503,10 +1505,10 @@ var
if op^.right^.token.kind in [intconst,uintconst,longconst,ulongconst, if op^.right^.token.kind in [intconst,uintconst,longconst,ulongconst,
longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst, longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst,
compconst,charconst,scharconst,ucharconst] then compconst,charconst,scharconst,ucharconst,ushortconst] then
if op^.left^.token.kind in [intconst,uintconst,longconst,ulongconst, if op^.left^.token.kind in [intconst,uintconst,longconst,ulongconst,
longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst, longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst,
compconst,charconst,scharconst,ucharconst] then compconst,charconst,scharconst,ucharconst,ushortconst] then
begin begin
if fenvAccess then if fenvAccess then
if kind in [normalExpression, autoInitializerExpression] then if kind in [normalExpression, autoInitializerExpression] then
@ -1601,7 +1603,7 @@ var
op^.token.kind := ulongConst; op^.token.kind := ulongConst;
op^.token.class := longConstant; op^.token.class := longConstant;
if op^.left^.token.kind = stringConst then if op^.left^.token.kind = stringConst then
op^.token.lval := op^.left^.token.sval^.length+1 op^.token.lval := op^.left^.token.sval^.length
else begin else begin
lCodeGeneration := codeGeneration; lCodeGeneration := codeGeneration;
codeGeneration := false; codeGeneration := false;
@ -1728,7 +1730,7 @@ var
begin begin
if (kind <> preprocessorExpression) and (op^.left^.token.kind if (kind <> preprocessorExpression) and (op^.left^.token.kind
in [intconst,uintconst,longconst,ulongconst,charconst,scharconst, in [intconst,uintconst,longconst,ulongconst,charconst,scharconst,
ucharconst]) then begin ucharconst,ushortconst]) then begin
{evaluate a constant operation} {evaluate a constant operation}
ekind := op^.left^.token.kind; ekind := op^.left^.token.kind;
@ -1759,7 +1761,7 @@ var
end {if} end {if}
else if op^.left^.token.kind in [longlongconst,ulonglongconst, else if op^.left^.token.kind in [longlongconst,ulonglongconst,
intconst,uintconst,longconst,ulongconst,charconst,scharconst, intconst,uintconst,longconst,ulongconst,charconst,scharconst,
ucharconst] then begin ucharconst,ushortconst] then begin
{evaluate a constant operation with long long operand} {evaluate a constant operation with long long operand}
ekind := op^.left^.token.kind; ekind := op^.left^.token.kind;
@ -3561,7 +3563,7 @@ case tree^.token.kind of
end; {case} end; {case}
end; end;
intConst,uintConst,charConst,scharConst,ucharConst: begin intConst,uintConst,ushortConst,charConst,scharConst,ucharConst: begin
Gen1t(pc_ldc, tree^.token.ival, cgWord); Gen1t(pc_ldc, tree^.token.ival, cgWord);
lastwasconst := true; lastwasconst := true;
lastconst := tree^.token.ival; lastconst := tree^.token.ival;
@ -3569,13 +3571,15 @@ case tree^.token.kind of
expressionType := intPtr expressionType := intPtr
else if tree^.token.kind = uintConst then else if tree^.token.kind = uintConst then
expressionType := uIntPtr expressionType := uIntPtr
else if tree^.token.kind = ushortConst then
expressionType := uShortPtr
else if tree^.token.kind = charConst then else if tree^.token.kind = charConst then
expressionType := charPtr expressionType := charPtr
else if tree^.token.kind = scharConst then else if tree^.token.kind = scharConst then
expressionType := scharPtr expressionType := scharPtr
else {if tree^.token.kind = ucharConst then} else {if tree^.token.kind = ucharConst then}
expressionType := ucharPtr; expressionType := ucharPtr;
end; {case intConst,uintConst,charConst,scharConst,ucharConst} end; {case intConst,uintConst,ushortConst,charConst,scharConst,ucharConst}
longConst,ulongConst: begin longConst,ulongConst: begin
GenLdcLong(tree^.token.lval); GenLdcLong(tree^.token.lval);
@ -3621,7 +3625,7 @@ case tree^.token.kind of
stringConst: begin stringConst: begin
GenS(pc_lca, tree^.token.sval); GenS(pc_lca, tree^.token.sval);
expressionType := stringTypePtr; expressionType := StringType(tree^.token.prefix);
end; {case stringConst} end; {case stringConst}
eqch: begin {=} eqch: begin {=}
@ -4685,11 +4689,12 @@ else begin {record the expression for an initialize
while castValue^.token.kind = castoper do while castValue^.token.kind = castoper do
castValue := castValue^.left; castValue := castValue^.left;
if castValue^.token.kind in if castValue^.token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then begin [intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
then begin
expressionValue := castValue^.token.ival; expressionValue := castValue^.token.ival;
isConstant := true; isConstant := true;
expressionType := tree^.castType; expressionType := tree^.castType;
if (castValue^.token.kind = uintconst) if (castValue^.token.kind in [uintconst,ushortconst])
or (expressionType^.kind = pointerType) then or (expressionType^.kind = pointerType) then
expressionValue := expressionValue & $0000FFFF; expressionValue := expressionValue & $0000FFFF;
goto 1; goto 1;
@ -4714,10 +4719,13 @@ else begin {record the expression for an initialize
expressionType := ucharPtr; expressionType := ucharPtr;
isConstant := true; isConstant := true;
end {else if} end {else if}
else if tree^.token.kind = uintconst then begin else if tree^.token.kind in [uintconst,ushortconst] then begin
expressionValue := tree^.token.ival; expressionValue := tree^.token.ival;
expressionValue := expressionValue & $0000FFFF; expressionValue := expressionValue & $0000FFFF;
expressionType := uIntPtr; if tree^.token.kind = uintconst then
expressionType := uIntPtr
else {if tree^.token.kind = ushortconst then}
expressionType := uShortPtr;
isConstant := true; isConstant := true;
end {else if} end {else if}
else if tree^.token.kind = longconst then begin else if tree^.token.kind = longconst then begin
@ -4773,7 +4781,7 @@ else begin {record the expression for an initialize
end {else if} end {else if}
else if tree^.token.kind = stringconst then begin else if tree^.token.kind = stringconst then begin
expressionValue := ord4(tree^.token.sval); expressionValue := ord4(tree^.token.sval);
expressionType := stringTypePtr; expressionType := StringType(tree^.token.prefix);
isConstant := true; isConstant := true;
if kind in [arrayExpression,preprocessorExpression] then begin if kind in [arrayExpression,preprocessorExpression] then begin
expressionType := intPtr; expressionType := intPtr;
@ -4817,7 +4825,7 @@ procedure InitExpression;
begin {InitExpression} begin {InitExpression}
startTerm := [ident,intconst,uintconst,longconst,ulongconst,longlongconst, startTerm := [ident,intconst,uintconst,longconst,ulongconst,longlongconst,
ulonglongconst,floatconst,doubleconst,extendedconst,compconst, ulonglongconst,floatconst,doubleconst,extendedconst,compconst,
charconst,scharconst,ucharconst,stringconst]; charconst,scharconst,ucharconst,ushortconst,stringconst];
startExpression:= startTerm + startExpression:= startTerm +
[lparench,asteriskch,andch,plusch,minusch,excch,tildech,sizeofsy, [lparench,asteriskch,andch,plusch,minusch,excch,tildech,sizeofsy,
plusplusop,minusminusop,typedef,_Alignofsy,_Genericsy]; plusplusop,minusminusop,typedef,_Alignofsy,_Genericsy];

View File

@ -5890,11 +5890,10 @@ procedure GenTree {op: icptr};
gLong.where := onStack; gLong.where := onStack;
GenNative(m_pea, immediate, stringSize, nil, stringReference+shift16); GenNative(m_pea, immediate, stringSize, nil, stringReference+shift16);
GenNative(m_pea, immediate, stringSize, nil, stringReference); GenNative(m_pea, immediate, stringSize, nil, stringReference);
if maxString-stringSize >= op^.q+1 then begin if maxString-stringSize >= op^.q then begin
for i := 1 to op^.q do for i := 1 to op^.q do
stringSpace[i+stringSize] := op^.str^.str[i]; stringSpace[i+stringSize] := op^.str^.str[i];
stringSpace[stringSize+op^.q+1] := chr(0); stringSize := stringSize+op^.q;
stringSize := stringSize+op^.q+1;
end end
else else
Error(cge3); Error(cge3);

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'SCANNER'} {$segment 'SCANNER'}
const const
symFileVersion = 16; {version number of .sym file format} symFileVersion = 17; {version number of .sym file format}
var var
inhibitHeader: boolean; {should .sym includes be blocked?} inhibitHeader: boolean; {should .sym includes be blocked?}
@ -719,6 +719,7 @@ procedure EndInclude {chPtr: ptr};
stringConstant: begin stringConstant: begin
WriteLongString(token.sval); WriteLongString(token.sval);
WriteByte(ord(token.ispstring)); WriteByte(ord(token.ispstring));
WriteByte(ord(token.prefix));
end; end;
macroParameter: WriteWord(token.pnum); macroParameter: WriteWord(token.pnum);
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
@ -1008,6 +1009,10 @@ procedure EndInclude {chPtr: ptr};
WriteByte(16) WriteByte(16)
else if tp = uShortPtr then else if tp = uShortPtr then
WriteByte(17) WriteByte(17)
else if tp = utf16StringTypePtr then
WriteByte(18)
else if tp = utf32StringTypePtr then
WriteByte(19)
else if tp^.saveDisp <> 0 then begin else if tp^.saveDisp <> 0 then begin
WriteByte(1); WriteByte(1);
WriteLong(tp^.saveDisp); WriteLong(tp^.saveDisp);
@ -1348,6 +1353,7 @@ var
stringConstant: begin stringConstant: begin
token.sval := ReadLongString; token.sval := ReadLongString;
token.ispstring := ReadByte <> 0; token.ispstring := ReadByte <> 0;
token.prefix := charStrPrefixEnum(ReadByte);
end; end;
macroParameter: token.pnum := ReadWord; macroParameter: token.pnum := ReadWord;
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch, reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
@ -1741,6 +1747,8 @@ var
15: tp := uCharPtr; 15: tp := uCharPtr;
16: tp := shortPtr; 16: tp := shortPtr;
17: tp := uShortPtr; 17: tp := uShortPtr;
18: tp := utf16StringTypePtr;
19: tp := utf32StringTypePtr;
otherwise: begin otherwise: begin
PurgeSymbols; PurgeSymbols;

View File

@ -1909,7 +1909,7 @@ var
if tree^.token.kind = plusch then begin if tree^.token.kind = plusch then begin
rtree := tree^.right; rtree := tree^.right;
if rtree^.token.kind in if rtree^.token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then [intconst,uintconst,ushortconst,charconst,scharconst,ucharconst] then
size := rtree^.token.ival size := rtree^.token.ival
else if rtree^.token.kind in [longconst,ulongconst] then else if rtree^.token.kind in [longconst,ulongconst] then
size := rtree^.token.lval size := rtree^.token.lval
@ -2086,7 +2086,8 @@ var
end; end;
pointerType: pointerType:
if etype = stringTypePtr then begin if (etype = stringTypePtr) or (etype = utf16StringTypePtr)
or (etype = utf32StringTypePtr) then begin
iPtr^.isConstant := true; iPtr^.isConstant := true;
iPtr^.iType := ccPointer; iPtr^.iType := ccPointer;
iPtr^.pval := 0; iPtr^.pval := 0;
@ -2151,12 +2152,12 @@ var
operator := tree^.token.kind; operator := tree^.token.kind;
while operator in [plusch,minusch] do begin while operator in [plusch,minusch] do begin
with tree^.right^.token do with tree^.right^.token do
if kind in [intConst,uintconst,longConst,ulongconst, if kind in [intConst,uintconst,ushortconst,longConst,
longlongConst,ulonglongconst,charconst,scharconst, ulongconst,longlongConst,ulonglongconst,charconst,
ucharconst] then begin scharconst,ucharconst] then begin
if kind in [intConst,charconst,scharconst,ucharconst] then if kind in [intConst,charconst,scharconst,ucharconst] then
offSet2 := ival offSet2 := ival
else if kind = uintConst then else if kind in [uintConst,ushortconst] then
offset2 := ival & $0000ffff offset2 := ival & $0000ffff
else if kind in [longConst,ulongconst] then begin else if kind in [longConst,ulongconst] then begin
offset2 := lval; offset2 := lval;
@ -2322,6 +2323,8 @@ var
kind: typeKind; {base type of an initializer} kind: typeKind; {base type of an initializer}
ktp: typePtr; {array type with definedTypes removed} ktp: typePtr; {array type with definedTypes removed}
lPrintMacroExpansions: boolean; {local copy of printMacroExpansions} lPrintMacroExpansions: boolean; {local copy of printMacroExpansions}
stringElementType: typePtr; {element type of string literal}
stringLength: integer; {elements in a string literal}
procedure Fill (count: longint; tp: typePtr); procedure Fill (count: longint; tp: typePtr);
@ -2450,44 +2453,61 @@ var
kind := ktp^.kind; kind := ktp^.kind;
{handle string constants} {handle string constants}
if (token.kind = stringConst) and (kind = scalarType) if token.kind = stringConst then begin
and (ktp^.baseType in [cgByte,cgUByte]) then begin stringElementType := StringType(token.prefix)^.aType;
if tp^.elements = 0 then begin if (kind = scalarType) and
tp^.elements := token.sval^.length + 1; (((ktp^.baseType in [cgByte,cgUByte])
RecomputeSizes(variable^.itype); and (stringElementType = charPtr))
end {if} or CompTypes(ktp,stringElementType)) then begin
else if tp^.elements < token.sval^.length then begin stringLength := token.sval^.length div ord(stringElementType^.size);
Error(44); if tp^.elements = 0 then begin
errorFound := true; tp^.elements := stringLength;
end; {else if} RecomputeSizes(variable^.itype);
with ktp^ do begin
iPtr := pointer(Malloc(sizeof(initializerRecord)));
iPtr^.next := variable^.iPtr;
variable^.iPtr := iPtr;
iPtr^.count := 1;
iPtr^.bitdisp := 0;
iPtr^.bitsize := 0;
iPtr^.isStructOrUnion := false;
if (variable^.storage in [external,global,private]) then begin
iPtr^.isConstant := true;
iPtr^.itype := cgString;
iPtr^.sval := token.sval;
count := tp^.elements - token.sval^.length;
if count <> 0 then
Fill(count, sCharPtr);
end {if} end {if}
else begin else if tp^.elements < stringLength-1 then begin
iPtr^.isConstant := false; Error(44);
new(ep); errorFound := true;
iPtr^.iTree := ep; end; {else if}
ep^.next := nil; with ktp^ do begin
ep^.left := nil; iPtr := pointer(Malloc(sizeof(initializerRecord)));
ep^.middle := nil; iPtr^.next := variable^.iPtr;
ep^.right := nil; variable^.iPtr := iPtr;
ep^.token := token; iPtr^.count := 1;
end; {else} iPtr^.bitdisp := 0;
end; {with} iPtr^.bitsize := 0;
NextToken; iPtr^.isStructOrUnion := false;
if (variable^.storage in [external,global,private]) then begin
iPtr^.isConstant := true;
iPtr^.itype := cgString;
iPtr^.sval := token.sval;
count := tp^.elements - stringLength;
if count > 0 then
Fill(count, stringElementType)
else if count = -1 then begin
iPtr^.sval := pointer(GMalloc(token.sval^.length+2));
CopyLongString(iPtr^.sval, token.sval);
iPtr^.sval^.length :=
iPtr^.sval^.length - ord(stringElementType^.size);
end; {else if}
end {if}
else begin
iPtr^.isConstant := false;
new(ep);
iPtr^.iTree := ep;
ep^.next := nil;
ep^.left := nil;
ep^.middle := nil;
ep^.right := nil;
ep^.token := token;
end; {else}
end; {with}
NextToken;
end {if}
else begin
Error(47);
errorFound := true;
NextToken;
end; {else}
end {if} end {if}
{handle arrays of non-strings} {handle arrays of non-strings}
@ -3643,7 +3663,8 @@ if isFunction then begin
NextToken; NextToken;
Match(lparench,13); Match(lparench,13);
if token.kind in if token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then begin [intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
then begin
toolNum := token.ival; toolNum := token.ival;
NextToken; NextToken;
end {if} end {if}
@ -3655,7 +3676,8 @@ if isFunction then begin
NextToken; NextToken;
end {if} end {if}
else if token.kind in else if token.kind in
[intconst,uintconst,charconst,scharconst,ucharconst] then begin [intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
then begin
dispatcher := token.ival; dispatcher := token.ival;
NextToken; NextToken;
end {if} end {if}
@ -4352,7 +4374,8 @@ var
elements := itype^.elements; elements := itype^.elements;
if elements = 0 then goto 1; {don't init flexible array member} if elements = 0 then goto 1; {don't init flexible array member}
if iPtr^.iTree^.token.kind = stringConst then begin if iPtr^.iTree^.token.kind = stringConst then begin
size := iPtr^.iTree^.token.sval^.length+1; elements := elements * itype^.aType^.size;
size := iPtr^.iTree^.token.sval^.length;
if size >= elements then if size >= elements then
size := ord(elements) size := ord(elements)
else else

View File

@ -239,6 +239,7 @@ type
onOffEnum = (on,off,default); {on-off values in standard pragmas} onOffEnum = (on,off,default); {on-off values in standard pragmas}
var var
charStrPrefix: charStrPrefixEnum; {prefix of character/string literal}
dateStr: longStringPtr; {macro date string} dateStr: longStringPtr; {macro date string}
doingCommandLine: boolean; {are we processing the cc= command line?} doingCommandLine: boolean; {are we processing the cc= command line?}
doingPPExpression: boolean; {are we processing a preprocessor expression?} doingPPExpression: boolean; {are we processing a preprocessor expression?}
@ -256,6 +257,7 @@ var
macroFound: macroRecordPtr; {last macro found by IsDefined} macroFound: macroRecordPtr; {last macro found by IsDefined}
mergingStrings: boolean; {is NextToken trying to merge strings?} mergingStrings: boolean; {is NextToken trying to merge strings?}
needWriteLine: boolean; {is there a line that needs to be written?} needWriteLine: boolean; {is there a line that needs to be written?}
octHexEscape: boolean; {octal/hex escape in char/string?}
onOffValue: onOffEnum; {value of last on-off switch} onOffValue: onOffEnum; {value of last on-off switch}
wroteLine: boolean; {has the current line already been written?} wroteLine: boolean; {has the current line already been written?}
numErr: 0..maxErr; {number of errors in this line} numErr: 0..maxErr; {number of errors in this line}
@ -343,6 +345,14 @@ procedure StartInclude (name: gsosOutStringPtr); extern;
{ 2. From Header.pas } { 2. From Header.pas }
function StringType(prefix: charStrPrefixEnum): typePtr; extern;
{ returns the type of a string literal with specified prefix }
{ }
{ parameters: }
{ prefix - the prefix }
procedure TermHeader; extern; procedure TermHeader; extern;
{ Stop processing the header file } { Stop processing the header file }
@ -722,6 +732,9 @@ if list or (numErr <> 0) then begin
162: msg := @'invalid escape sequence'; 162: msg := @'invalid escape sequence';
163: msg := @'pointer assignment discards qualifier(s)'; 163: msg := @'pointer assignment discards qualifier(s)';
164: msg := @'compound literals within functions are not supported by ORCA/C'; 164: msg := @'compound literals within functions are not supported by ORCA/C';
165: msg := @'''\p'' may not be used in a prefixed string';
166: msg := @'string literals with these prefixes may not be merged';
167: msg := @'''L''-prefixed character or string constants are not supported by ORCA/C';
otherwise: Error(57); otherwise: Error(57);
end; {case} end; {case}
writeln(msg^); writeln(msg^);
@ -768,6 +781,34 @@ var
ch: char; {work character} ch: char; {work character}
i: integer; {loop counter} i: integer; {loop counter}
str: string[23]; {temp string} str: string[23]; {temp string}
c16ptr: ^integer; {pointer to char16_t value}
c32ptr: ^longint; {pointer to char32_t value}
procedure PrintHexDigits(i: longint; count: integer);
{ Print a digit as a hex character }
{ }
{ Parameters: }
{ i: value to print in hexadecimal }
{ count: number of digits to print }
var
digit: integer; {hex digit value}
shift: integer; {amount to shift by}
begin {PrintHexDigits}
shift := 4 * (count-1);
while shift >= 0 do begin
digit := ord(i >> shift) & $000F;
if digit < 10 then
write(chr(digit | ord('0')))
else
write(chr(digit + ord('A') - 10));
shift := shift - 4;
end; {while}
end; {PrintHexDigits}
begin {PrintToken} begin {PrintToken}
case token.kind of case token.kind of
@ -778,7 +819,8 @@ case token.kind of
scharconst, scharconst,
ucharconst, ucharconst,
intconst, intconst,
uintconst: write(token.ival:1); uintconst,
ushortconst: write(token.ival:1);
longConst, longConst,
ulongConst: write(token.lval:1); ulongConst: write(token.lval:1);
@ -801,21 +843,43 @@ case token.kind of
extendedConst: write(token.rval:1); extendedConst: write(token.rval:1);
stringConst: begin stringConst: begin
write('"'); if token.prefix = prefix_u16 then begin
for i := 1 to token.sval^.length do begin write('u"');
ch := token.sval^.str[i]; i := 1;
if ch in [' '..'~'] then begin while i < token.sval^.length-2 do begin
if ch in ['"','\','?'] then write('\x');
c16Ptr := pointer(@token.sval^.str[i]);
PrintHexDigits(c16Ptr^, 4);
i := i + 2;
end; {while}
end {if}
else if token.prefix = prefix_U32 then begin
write('U"');
i := 1;
while i < token.sval^.length-4 do begin
write('\x');
c32Ptr := pointer(@token.sval^.str[i]);
PrintHexDigits(c32Ptr^, 8);
i := i + 4;
end; {while}
end {else if}
else begin
write('"');
for i := 1 to token.sval^.length-1 do begin
ch := token.sval^.str[i];
if ch in [' '..'~'] then begin
if ch in ['"','\','?'] then
write('\');
write(ch);
end {if}
else begin
write('\'); write('\');
write(ch); write((ord(ch)>>6):1);
end {if} write(((ord(ch)>>3) & $0007):1);
else begin write((ord(ch) & $0007):1);
write('\'); end; {else}
write((ord(ch)>>6):1); end; {for}
write(((ord(ch)>>3) & $0007):1); end; {else}
write((ord(ch) & $0007):1);
end; {else}
end; {for}
write('"'); write('"');
end; end;
@ -1047,6 +1111,9 @@ procedure LongToPString (pstr: stringPtr; lstr: longStringPtr);
{ Convert a long string into a p string } { Convert a long string into a p string }
{ } { }
{ The long string is assumed to include a terminating null byte,}
{ which is not copied to the p-string. }
{ }
{ Parameters: } { Parameters: }
{ pstr - pointer to the p-string } { pstr - pointer to the p-string }
{ lstr - pointer to the long string } { lstr - pointer to the long string }
@ -1056,7 +1123,7 @@ var
len: integer; {string length} len: integer; {string length}
begin {LongToPString} begin {LongToPString}
len := lstr^.length; len := lstr^.length-1;
if len > 255 then if len > 255 then
len := 255; len := 255;
pstr^[0] := chr(len); pstr^[0] := chr(len);
@ -1083,6 +1150,56 @@ var
len,len1: integer; {length of strings} len,len1: integer; {length of strings}
lt: tokenType; {local copy of token} lt: tokenType; {local copy of token}
str1,str2: stringPtr; {identifier strings} str1,str2: stringPtr; {identifier strings}
elementType: typePtr; {string element type}
procedure ConvertString (var str: tokenType; prefix: charStrPrefixEnum);
{ Convert unprefixed string literal str to a prefixed one }
var
sPtr: longStringPtr; {new string}
i,j,k: integer; {loop counters}
codePoint: ucsCodePoint; {Unicode code point}
c16ptr: ^integer; {pointer to char16_t value}
c32ptr: ^longint; {pointer to char32_t value}
utf8: utf8Rec; {UTF-8 encoding of character}
utf16: utf16Rec; {UTF-16 encoding of character}
begin {ConvertString}
sPtr := pointer(Malloc(str.sval^.length*4));
k := 0;
for i := 1 to str.sval^.length do begin
codePoint := ConvertMacRomanToUCS(str.sval^.str[i]);
if prefix = prefix_u8 then begin
UTF8Encode(codePoint, utf8);
for j := 1 to utf8.length do begin
sPtr^.str[k+1] := chr(utf8.bytes[j]);
k := k+1;
end; {for}
end {if}
else if prefix = prefix_u16 then begin
UTF16Encode(codePoint, utf16);
c16Ptr := pointer(@sPtr^.str[k+1]);
c16Ptr^ := utf16.codeUnits[1];
k := k+2;
if utf16.length = 2 then begin
c16ptr := pointer(@sPtr^.str[k+1]);
c16Ptr^ := utf16.codeUnits[2];
k := k+2;
end; {if}
end {else if}
else if prefix = prefix_U32 then begin
c32Ptr := pointer(@sPtr^.str[k+1]);
c32Ptr^ := codePoint;
k := k+4;
end; {else if}
end; {for}
sPtr^.length := k;
str.sval := sPtr;
str.prefix := prefix;
end; {ConvertString}
begin {Merge} begin {Merge}
kind1 := tk1.kind; kind1 := tk1.kind;
@ -1100,6 +1217,18 @@ if class1 in [identifier,reservedWord] then begin
str2 := @reservedWords[kind2] str2 := @reservedWords[kind2]
else if class2 in numericConstants then else if class2 in numericConstants then
str2 := tk2.numString str2 := tk2.numString
else if (class2 = stringConstant) and (tk2.prefix = prefix_none) then begin
if str1^ = 'u' then
ConvertString(tk2, prefix_u16)
else if str1^ = 'U' then
ConvertString(tk2, prefix_U32)
else if str1^ = 'u8' then
ConvertString(tk2, prefix_u8)
else
Error(63);
tk1 := tk2;
goto 1;
end {else if}
else begin else begin
Error(63); Error(63);
goto 1; goto 1;
@ -1145,7 +1274,16 @@ else if class1 in numericConstants then begin
else if class1 = stringConstant then begin else if class1 = stringConstant then begin
if class2 = stringConstant then begin if class2 = stringConstant then begin
len1 := tk1.sval^.length; if tk1.prefix = tk2.prefix then
{OK - nothing to do}
else if tk1.prefix = prefix_none then
ConvertString(tk1, tk2.prefix)
else if tk2.prefix = prefix_none then
ConvertString(tk2, tk1.prefix)
else
Error(166);
elementType := StringType(tk1.prefix)^.aType;
len1 := tk1.sval^.length - ord(elementType^.size);
len := len1+tk2.sval^.length; len := len1+tk2.sval^.length;
cp := pointer(Malloc(len+2)); cp := pointer(Malloc(len+2));
for i := 1 to len1 do for i := 1 to len1 do
@ -1154,7 +1292,7 @@ else if class1 = stringConstant then begin
cp^.str[i+len1] := tk2.sval^.str[i]; cp^.str[i+len1] := tk2.sval^.str[i];
cp^.length := len; cp^.length := len;
if tk1.ispstring then if tk1.ispstring then
cp^.str[1] := chr(len-1); cp^.str[1] := chr(len-2);
tk1.sval := cp; tk1.sval := cp;
goto 1; goto 1;
end; {if} end; {if}
@ -1334,12 +1472,14 @@ begin {BuildStringToken}
token.kind := stringconst; token.kind := stringconst;
token.class := stringConstant; token.class := stringConstant;
token.ispstring := false; token.ispstring := false;
token.sval := pointer(GMalloc(len+2)); token.sval := pointer(GMalloc(len+3));
token.prefix := prefix_none;
for i := 1 to len do begin for i := 1 to len do begin
token.sval^.str[i] := chr(cp^); token.sval^.str[i] := chr(cp^);
cp := pointer(ord4(cp)+1); cp := pointer(ord4(cp)+1);
end; {for} end; {for}
token.sval^.length := len; token.sval^.str[len+1] := chr(0);
token.sval^.length := len+1;
PutBackToken(token, true); PutBackToken(token, true);
end; {BuildStringToken} end; {BuildStringToken}
@ -1504,10 +1644,12 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst; token.kind := stringConst;
token.class := stringConstant; token.class := stringConstant;
token.ispstring := false; token.ispstring := false;
sp := pointer(Malloc(5+sourceFileGS.theString.size)); token.prefix := prefix_none;
sp^.length := sourceFileGS.theString.size; sp := pointer(Malloc(3+sourceFileGS.theString.size));
sp^.length := sourceFileGS.theString.size+1;
for i := 1 to sourceFileGS.theString.size do for i := 1 to sourceFileGS.theString.size do
sp^.str[i] := sourceFileGS.theString.theString[i]; sp^.str[i] := sourceFileGS.theString.theString[i];
sp^.str[i+1] := chr(0);
token.sval := sp; token.sval := sp;
tokenStart := @sp^.str; tokenStart := @sp^.str;
tokenEnd := pointer(ord4(tokenStart)+sp^.length); tokenEnd := pointer(ord4(tokenStart)+sp^.length);
@ -1517,6 +1659,7 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst; token.kind := stringConst;
token.class := stringConstant; token.class := stringConstant;
token.ispstring := false; token.ispstring := false;
token.prefix := prefix_none;
token.sval := dateStr; token.sval := dateStr;
tokenStart := @dateStr^.str; tokenStart := @dateStr^.str;
tokenEnd := pointer(ord4(tokenStart)+dateStr^.length); tokenEnd := pointer(ord4(tokenStart)+dateStr^.length);
@ -1527,6 +1670,7 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst; token.kind := stringConst;
token.class := stringConstant; token.class := stringConstant;
token.ispstring := false; token.ispstring := false;
token.prefix := prefix_none;
token.sval := timeStr; token.sval := timeStr;
tokenStart := @timeStr^.str; tokenStart := @timeStr^.str;
tokenEnd := pointer(ord4(tokenStart)+timeStr^.length); tokenEnd := pointer(ord4(tokenStart)+timeStr^.length);
@ -1537,8 +1681,8 @@ if macro^.readOnly then begin {handle special macros}
token.kind := intConst; {__ORCAC__} token.kind := intConst; {__ORCAC__}
token.numString := @oneStr; {__STDC_NO_...__} token.numString := @oneStr; {__STDC_NO_...__}
token.class := intConstant; {__ORCAC_HAS_LONG_LONG__} token.class := intConstant; {__ORCAC_HAS_LONG_LONG__}
token.ival := 1; token.ival := 1; {__STDC_UTF_16__}
oneStr := '1'; oneStr := '1'; {__STDC_UTF_32__}
tokenStart := @oneStr[1]; tokenStart := @oneStr[1];
tokenEnd := pointer(ord4(tokenStart)+1); tokenEnd := pointer(ord4(tokenStart)+1);
end; end;
@ -1547,6 +1691,7 @@ if macro^.readOnly then begin {handle special macros}
token.kind := stringConst; token.kind := stringConst;
token.class := stringConstant; token.class := stringConstant;
token.ispstring := false; token.ispstring := false;
token.prefix := prefix_none;
token.sval := versionStrL; token.sval := versionStrL;
tokenStart := @versionStrL^.str; tokenStart := @versionStrL^.str;
tokenEnd := pointer(ord4(tokenStart)+versionStrL^.length); tokenEnd := pointer(ord4(tokenStart)+versionStrL^.length);
@ -1606,7 +1751,7 @@ else begin
if tcPtr^.token.kind = stringconst then begin if tcPtr^.token.kind = stringconst then begin
BuildStringToken(@quoteStr[1], 1); BuildStringToken(@quoteStr[1], 1);
BuildStringToken(@tcPtr^.token.sval^.str, BuildStringToken(@tcPtr^.token.sval^.str,
tcPtr^.token.sval^.length); tcPtr^.token.sval^.length-1);
BuildStringToken(@quoteStr[1], 1); BuildStringToken(@quoteStr[1], 1);
end {if} end {if}
else begin else begin
@ -1851,6 +1996,7 @@ if ch = '<' then begin {process a library file...}
token.kind := stringconst; {convert a <> style name to a string} token.kind := stringconst; {convert a <> style name to a string}
token.class := stringConstant; token.class := stringConstant;
token.ispstring := false; token.ispstring := false;
token.prefix := prefix_none;
i := 0; i := 0;
while not (charKinds[ord(ch)] in [ch_eol,ch_gt]) do begin while not (charKinds[ord(ch)] in [ch_eol,ch_gt]) do begin
i := i+1; i := i+1;
@ -1883,7 +2029,7 @@ else begin
{handle file names that are strings or macro expansions} {handle file names that are strings or macro expansions}
expandMacros := true; {allow macros to be used in the name} expandMacros := true; {allow macros to be used in the name}
NextToken; {skip the command name} NextToken; {skip the command name}
if token.kind = stringConst then begin if (token.kind = stringConst) and (token.prefix = prefix_none) then begin
LongToPString(@workString, token.sval); LongToPString(@workString, token.sval);
CheckDelimiters(workString); CheckDelimiters(workString);
if mustExist then begin if mustExist then begin
@ -2417,6 +2563,10 @@ var
stringConstant: begin stringConstant: begin
if tk1^.token.sval^.length <> tk2^.token.sval^.length if tk1^.token.sval^.length <> tk2^.token.sval^.length
then goto 3; then goto 3;
if tk1^.token.ispstring <> tk2^.token.ispstring then
goto 3;
if tk1^.token.prefix <> tk2^.token.prefix then
goto 3;
for i := 1 to tk1^.token.sval^.length do for i := 1 to tk1^.token.sval^.length do
if tk1^.token.sval^.str[i] <> if tk1^.token.sval^.str[i] <>
tk2^.token.sval^.str[i] then tk2^.token.sval^.str[i] then
@ -2562,7 +2712,7 @@ var
while not (token.kind in [eolsy, eofsy]) do begin while not (token.kind in [eolsy, eofsy]) do begin
msg^ := concat(msg^, ' '); msg^ := concat(msg^, ' ');
if token.kind = stringConst then begin if token.kind = stringConst then begin
len := token.sval^.length; len := token.sval^.length-1;
for i := 1 to len do for i := 1 to len do
msg^ := concat(msg^, token.sval^.str[i]); msg^ := concat(msg^, token.sval^.str[i]);
end {if} end {if}
@ -2593,13 +2743,13 @@ var
begin {DoFloat} begin {DoFloat}
FlagPragmas(p_float); FlagPragmas(p_float);
NextToken; NextToken;
if token.kind in [intconst,uintconst] then begin if token.kind in [intconst,uintconst,ushortconst] then begin
floatCard := token.ival; floatCard := token.ival;
NextToken; NextToken;
end {if} end {if}
else else
Error(18); Error(18);
if token.kind in [intconst,uintconst] then begin if token.kind in [intconst,uintconst,ushortconst] then begin
floatSlot := $C080 | (token.ival << 4); floatSlot := $C080 | (token.ival << 4);
NextToken; NextToken;
end {if} end {if}
@ -2669,7 +2819,7 @@ var
NextToken; NextToken;
isNegative := true; isNegative := true;
end; {else if} end; {else if}
if token.kind in [intconst,uintconst] then begin if token.kind in [intconst,uintconst,ushortconst] then begin
value := token.ival; value := token.ival;
NextToken; NextToken;
end {if} end {if}
@ -2899,11 +3049,14 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin
end {if} end {if}
else else
Error(18); Error(18);
if token.kind = stringconst then begin if (token.kind = stringconst)
and (token.prefix = prefix_none) then begin
LongToPString( LongToPString(
pointer(ord4(@sourceFileGS.theString)+1), pointer(ord4(@sourceFileGS.theString)+1),
token.sval); token.sval);
sourceFileGS.theString.size := token.sval^.length; sourceFileGS.theString.size := token.sval^.length-1;
if sourceFileGS.theString.size > 255 then
sourceFileGS.theString.size := 255;
NextToken; NextToken;
end; {if} end; {if}
if token.kind <> eolsy then if token.kind <> eolsy then
@ -3866,7 +4019,9 @@ var
token.sval := pointer(Malloc(i+3)); {put the string in the string pool} token.sval := pointer(Malloc(i+3)); {put the string in the string pool}
CopyLongString(token.sval, pointer(sPtr)); CopyLongString(token.sval, pointer(sPtr));
dispose(sPtr); dispose(sPtr);
token.sval^.str[i+1] := chr(0); {add null in case the string is extended} token.sval^.str[i+1] := chr(0); {add null terminator}
token.sval^.length := i+1;
token.prefix := prefix_none;
end; {GetString} end; {GetString}
@ -3912,6 +4067,7 @@ doingPPExpression := false; {not doing a preprocessor expression}
unix_1 := false; {int is 16 bits} unix_1 := false; {int is 16 bits}
lintIsError := true; {lint messages are considered errors} lintIsError := true; {lint messages are considered errors}
fenvAccess := false; {not accessing fp environment} fenvAccess := false; {not accessing fp environment}
charStrPrefix := prefix_none; {no char/str prefix seen}
mergingStrings := false; {not currently merging strings} mergingStrings := false; {not currently merging strings}
{error codes for lint messages} {error codes for lint messages}
@ -3986,6 +4142,24 @@ mp^.algorithm := 6;
bp := pointer(ord4(macros) + hash(mp^.name)); bp := pointer(ord4(macros) + hash(mp^.name));
mp^.next := bp^; mp^.next := bp^;
bp^ := mp; bp^ := mp;
new(mp); {__STDC_UTF_16__}
mp^.name := @'__STDC_UTF_16__';
mp^.parameters := -1;
mp^.tokens := nil;
mp^.readOnly := true;
mp^.algorithm := 5;
bp := pointer(ord4(macros) + hash(mp^.name));
mp^.next := bp^;
bp^ := mp;
new(mp); {__STDC_UTF_32__}
mp^.name := @'__STDC_UTF_32__';
mp^.parameters := -1;
mp^.tokens := nil;
mp^.readOnly := true;
mp^.algorithm := 5;
bp := pointer(ord4(macros) + hash(mp^.name));
mp^.next := bp^;
bp^ := mp;
new(mp); {__ORCAC_HAS_LONG_LONG__} new(mp); {__ORCAC_HAS_LONG_LONG__}
mp^.name := @'__ORCAC_HAS_LONG_LONG__'; mp^.name := @'__ORCAC_HAS_LONG_LONG__';
mp^.parameters := -1; mp^.parameters := -1;
@ -4042,8 +4216,8 @@ mp^.next := bp^;
bp^ := mp; bp^ := mp;
SetDateTime; {set up the macro date/time strings} SetDateTime; {set up the macro date/time strings}
{set up the version string} {set up the version string}
versionStrL := pointer(GMalloc(3 + length(versionStr))); versionStrL := pointer(GCalloc(3 + length(versionStr)));
versionStrL^.length := length(versionStr); versionStrL^.length := length(versionStr)+1;
versionStrL^.str := versionStr; versionStrL^.str := versionStr;
{Scan the command line options} {Scan the command line options}
@ -4197,7 +4371,7 @@ procedure NextToken;
{ Read the next token from the file. } { Read the next token from the file. }
label 1,2,3,4,5; label 1,2,3,4,5,6;
type type
three = (s100,s1000,s4000); {these declarations are used for a} three = (s100,s1000,s4000); {these declarations are used for a}
@ -4228,11 +4402,15 @@ var
tToken: tokenType; {for merging tokens} tToken: tokenType; {for merging tokens}
sPtr,tsPtr: gstringPtr; {for forming string constants} sPtr,tsPtr: gstringPtr; {for forming string constants}
lLastWasReturn: boolean; {local copy of lastWasReturn} lLastWasReturn: boolean; {local copy of lastWasReturn}
codePoint: ucsCodePoint; {Unicode code point from UCN} codePoint: longint; {Unicode character value}
chFromUCN: integer; {character given by UCN (converted)} chFromUCN: integer; {character given by UCN (converted)}
c16ptr: ^integer; {pointer to char16_t value}
c32ptr: ^longint; {pointer to char32_t value}
utf8: utf8Rec; {UTF-8 encoding of character}
utf16: utf16Rec; {UTF-16 encoding of character}
function EscapeCh: integer; function EscapeCh: longint;
{ Find and return the next character in a string or char } { Find and return the next character in a string or char }
{ constant. Handle escape sequences if they are found. } { constant. Handle escape sequences if they are found. }
@ -4241,19 +4419,20 @@ var
{ Globals: } { Globals: }
{ ch - first character in sequence; set to first char } { ch - first character in sequence; set to first char }
{ after sequence } { after sequence }
{ charStrPrefix - prefix of the char constant or string }
label 1; { octHexEscape - true if this was an octal/hex escape seq.}
var var
cnt: 0..3; {for counting octal escape sequences} cnt: 0..3; {for counting octal escape sequences}
dig: 0..15; {value of a hex digit} dig: 0..15; {value of a hex digit}
skipChar: boolean; {get next char when done?} skipChar: boolean; {get next char when done?}
val: 0..maxint; {hex/octal escape code value} val: longint; {hex/octal escape code value}
codePoint: ucsCodePoint; {code point given by UCN} codePoint: ucsCodePoint; {code point given by UCN}
chFromUCN: integer; {character given by UCN (converted)} chFromUCN: integer; {character given by UCN (converted)}
begin {EscapeCh} begin {EscapeCh}
1: skipChar := true; octHexEscape := false;
skipChar := true;
if ch = '\' then begin if ch = '\' then begin
NextCh; NextCh;
if ch in ['0'..'7','a','b','t','n','v','f','p','r','x','u','U', if ch in ['0'..'7','a','b','t','n','v','f','p','r','x','u','U',
@ -4268,9 +4447,13 @@ var
NextCh; NextCh;
end; {while} end; {while}
if (val & $FF00) <> 0 then if (val & $FF00) <> 0 then
if not skipping then if charStrPrefix in [prefix_none,prefix_u8] then begin
Error(162); if not skipping then
EscapeCh := val & $FF; Error(162);
val := 0;
end; {if}
EscapeCh := val;
octHexEscape := true;
skipChar := false; skipChar := false;
end; end;
'a': EscapeCh := 7; 'a': EscapeCh := 7;
@ -4294,28 +4477,41 @@ var
ch := chr(ord(ch)&$5F); ch := chr(ord(ch)&$5F);
dig := ord(ch)-ord('A')+10; dig := ord(ch)-ord('A')+10;
end; {else} end; {else}
val := (val << 4) | dig; if ((charStrPrefix = prefix_none) and ((val & $F0) <> 0)) or
if (val & $FF00) <> 0 then begin ((charStrPrefix = prefix_u8) and ((val & $F0) <> 0)) or
((charStrPrefix = prefix_u16) and ((val & $F000) <> 0)) or
((charStrPrefix = prefix_u32) and ((val & $F0000000) <> 0))
then begin
if not skipping then if not skipping then
Error(162); Error(162);
while ch in ['0'..'9','a'..'f','A'..'F'] do
NextCh;
val := 0; val := 0;
end; {if} end {if}
NextCh; else begin
val := (val << 4) | dig;
NextCh;
end; {else}
end; {while} end; {while}
skipChar := false; skipChar := false;
EscapeCh := val & $FF; EscapeCh := val;
octHexEscape := true;
end; end;
'u','U': begin 'u','U': begin
codePoint := UniversalCharacterName; codePoint := UniversalCharacterName;
chFromUCN := ConvertUCSToMacRoman(codePoint);
skipChar := false; skipChar := false;
if chFromUCN >= 0 then if charStrPrefix = prefix_none then begin
EscapeCh := chFromUCN chFromUCN := ConvertUCSToMacRoman(codePoint);
else begin if chFromUCN >= 0 then
EscapeCh := 0; EscapeCh := chFromUCN
if not skipping then else begin
Error(146); EscapeCh := 0;
end; {else} if not skipping then
Error(146);
end; {else}
end {if}
else
EscapeCh := codePoint;
end; end;
'''','"','?','\': EscapeCh := ord(ch); '''','"','?','\': EscapeCh := ord(ch);
otherwise: Error(57); otherwise: Error(57);
@ -4327,7 +4523,10 @@ var
end; {else} end; {else}
end {if} end {if}
else else
EscapeCh := ord(ch); if charStrPrefix = prefix_none then
EscapeCh := ord(ch)
else
EscapeCh := ConvertMacRomanToUCS(ord(ch));
if skipChar then if skipChar then
NextCh; NextCh;
end; {EscapeCh} end; {EscapeCh}
@ -4353,11 +4552,20 @@ var
{skip the leading quote} {skip the leading quote}
NextCh; NextCh;
if charStrPrefix = prefix_L then begin
charStrPrefix := prefix_u16;
if not skipping then
Error(167);
end; {if}
{read the characters in the constant} {read the characters in the constant}
while (not (charKinds[ord(ch)] in [ch_char,ch_eol,ch_eof])) do begin while (not (charKinds[ord(ch)] in [ch_char,ch_eol,ch_eof])) do begin
if cnt < maxint then if cnt < maxint then
cnt := cnt + 1; cnt := cnt + 1;
result := (result << 8) | EscapeCh; if charStrPrefix = prefix_none then
result := (result << 8) | EscapeCh
else
result := EscapeCh;
end; {while} end; {while}
doingStringOrCharacter := false; doingStringOrCharacter := false;
@ -4371,16 +4579,35 @@ var
Error(2); Error(2);
{create the token} {create the token}
if allowLongIntChar and (cnt >= 3) then begin if charStrPrefix = prefix_none then begin
token.kind := longconst; if allowLongIntChar and (cnt >= 3) then begin
token.kind := longconst;
token.class := longConstant;
token.lval := result;
end {if}
else begin
token.kind := intconst;
token.class := intConstant;
token.ival := long(result).lsw;
end; {else}
end {if}
else if charStrPrefix = prefix_u16 then begin
token.kind := ushortconst;
token.class := intConstant;
if octHexEscape then
token.ival := long(result).lsw
else begin
UTF16Encode(result, utf16);
token.ival := utf16.codeUnits[1];
end; {else}
end {else if}
else if charStrPrefix = prefix_U32 then begin
token.kind := ulongconst;
token.class := longConstant; token.class := longConstant;
token.lval := result; token.lval := result;
end {if} end; {else if}
else begin
token.kind := intconst; charStrPrefix := prefix_none; {no prefix for next char/str (so far)}
token.class := intConstant;
token.ival := long(result).lsw;
end {else}
end; {CharConstant} end; {CharConstant}
@ -4499,6 +4726,7 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
tokenLine := lineNumber; {record the position of the token} tokenLine := lineNumber; {record the position of the token}
tokenColumn := ord(ord4(chPtr)-ord4(firstPtr)); tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
tokenStart := pointer(ord4(chPtr)-1); tokenStart := pointer(ord4(chPtr)-1);
6:
token.class := reservedSymbol; {default to the most common class} token.class := reservedSymbol; {default to the most common class}
case charKinds[ord(ch)] of case charKinds[ord(ch)] of
@ -4740,49 +4968,123 @@ case charKinds[ord(ch)] of
doingStringOrCharacter := true; {change character scanning} doingStringOrCharacter := true; {change character scanning}
token.kind := stringconst; {set up the token} token.kind := stringconst; {set up the token}
token.class := stringConstant; token.class := stringConstant;
i := 0; {set up for the string scan} ispstring := false; {set up for the string scan}
ispstring := false;
setLength := false; setLength := false;
new(sPtr,s100);
NextCh; {skip the opening "} NextCh; {skip the opening "}
{read the characters} {read the characters}
while not (charKinds[ord(ch)] in [ch_string,ch_eol,ch_eof]) do begin if charStrPrefix = prefix_none then begin
i := i+1; i := 0;
if i = 101 then begin new(sPtr,s100);
sPtr^.len1 := 100; while not (charKinds[ord(ch)] in [ch_string,ch_eol,ch_eof]) do begin
new(tsPtr,s1000); i := i+1;
CopyLongString(pointer(tsPtr), pointer(sPtr)); if i = 101 then begin
dispose(sPtr); sPtr^.len1 := 100;
sPtr := tsPtr; new(tsPtr,s1000);
end {if} CopyLongString(pointer(tsPtr), pointer(sPtr));
else if i = 1001 then begin dispose(sPtr);
sPtr^.len2 := 1000; sPtr := tsPtr;
new(tsPtr,s4000); end {if}
CopyLongString(pointer(tsPtr), pointer(sPtr)); else if i = 1001 then begin
dispose(sPtr); sPtr^.len2 := 1000;
sPtr := tsPtr; new(tsPtr,s4000);
end {else if} CopyLongString(pointer(tsPtr), pointer(sPtr));
else if i = longstringlen then begin dispose(sPtr);
i := 1001; sPtr := tsPtr;
Error(90); end {else if}
end; {else if} else if i = longstringlen then begin
sPtr^.str1[i] := chr(EscapeCh); i := 1001;
if (i = 1) and ispstring then Error(90);
setLength := true; end; {else if}
end; {while} sPtr^.str1[i] := chr(ord(EscapeCh));
if (i = 1) and ispstring then
setLength := true;
end; {while}
end {if}
else begin
if charStrPrefix = prefix_L then begin
charStrPrefix := prefix_u16;
if not skipping then
Error(167);
end; {if}
i := 1;
new(sPtr,s4000);
while not (charKinds[ord(ch)] in [ch_string,ch_eol,ch_eof]) do begin
if i > longstringlen-8 then begin {leave space for char and null}
i := 1;
Error(90);
end; {if}
codePoint := EscapeCh;
if charStrPrefix = prefix_u8 then begin
if octHexEscape then begin
sPtr^.str1[i] := chr(ord(codePoint));
i := i+1;
end {if}
else begin
UTF8Encode(codePoint, utf8);
for j := 1 to utf8.length do begin
sPtr^.str1[i] := chr(utf8.bytes[j]);
i := i+1;
end; {for}
end; {else}
end {if}
else if charStrPrefix = prefix_u16 then begin
c16ptr := pointer(@sPtr^.str1[i]);
if octHexEscape then begin
c16ptr^ := ord(codePoint);
i := i+2;
end {if}
else begin
UTF16Encode(codePoint, utf16);
c16Ptr^ := utf16.codeUnits[1];
i := i+2;
if utf16.length = 2 then begin
c16ptr := pointer(@sPtr^.str1[i]);
c16Ptr^ := utf16.codeUnits[2];
i := i+2;
end; {if}
end {else}
end {else}
else if charStrPrefix = prefix_U32 then begin
c32ptr := pointer(@sPtr^.str1[i]);
c32ptr^ := codePoint;
i := i+4;
end {else}
end; {while}
i := i-1;
end; {else}
doingStringOrCharacter := false; {process the end of the string} doingStringOrCharacter := false; {process the end of the string}
if ch = '"' then if ch = '"' then
NextCh NextCh
else else
Error(3); Error(3);
if setLength then {check for a p-string} if setLength then {check for a p-string}
sPtr^.str1[1] := chr(i-1); if charStrPrefix <> prefix_none then begin
if not skipping then
Error(165);
setLength := false;
end {if}
else
sPtr^.str1[1] := chr(i-1);
token.ispstring := setLength; token.ispstring := setLength;
sPtr^.len1 := i; {set the string length} sPtr^.len1 := i; {set the string length}
token.sval := pointer(Malloc(i+3)); {put the string in the string pool} token.sval := pointer(Malloc(i+6)); {put the string in the string pool}
CopyLongString(token.sval, pointer(sPtr)); CopyLongString(token.sval, pointer(sPtr));
dispose(sPtr); dispose(sPtr);
token.sval^.str[i+1] := chr(0); {add null in case the string is extended} token.sval^.str[i+1] := chr(0); {add null terminator}
if charStrPrefix = prefix_u16 then begin
token.sval^.str[i+2] := chr(0);
token.sval^.length := i+2;
end {if}
else if charStrPrefix = prefix_U32 then begin
token.sval^.str[i+2] := chr(0);
token.sval^.str[i+3] := chr(0);
token.sval^.str[i+4] := chr(0);
token.sval^.length := i+4;
end {else if}
else
token.sval^.length := i+1;
token.prefix := charStrPrefix; {record prefix}
charStrPrefix := prefix_none; {no prefix for next char/str (so far)}
end; end;
letter,ch_backslash: begin {reserved words and identifiers} letter,ch_backslash: begin {reserved words and identifiers}
@ -4819,6 +5121,25 @@ case charKinds[ord(ch)] of
end; {if} end; {if}
end; {while} end; {while}
workString[0] := chr(i); workString[0] := chr(i);
if i = 1 then begin {detect prefixed char/string literal}
if charKinds[ord(ch)] in [ch_char,ch_string] then begin
if workString[1] in ['L','u','U'] then begin
if workString[1] = 'L' then
charStrPrefix := prefix_L
else if workString[1] = 'u' then
charStrPrefix := prefix_u16
else if workString[1] = 'U' then
charStrPrefix := prefix_U32;
goto 6;
end; {if}
end; {if}
end {if}
else if i = 2 then
if charKinds[ord(ch)] = ch_string then
if workString = 'u8' then begin
charStrPrefix := prefix_u8;
goto 6;
end; {if}
CheckIdentifier; CheckIdentifier;
end; end;

View File

@ -45,8 +45,11 @@
{ boolPtr - pointer to the base type for _Bool } { boolPtr - pointer to the base type for _Bool }
{ voidPtr - pointer to the base type for void } { voidPtr - pointer to the base type for void }
{ voidPtrPtr - typeless pointer, for some type casting } { voidPtrPtr - typeless pointer, for some type casting }
{ stringTypePtr - pointer to the base type for string } { stringTypePtr - pointer to the base type for string literals }
{ constants } { utf16StringTypePtr - pointer to the base type for UTF-16 }
{ string literals }
{ utf32StringTypePtr - pointer to the base type for UTF-32 }
{ string literals }
{ constCharPtr - pointer to the type const char } { constCharPtr - pointer to the type const char }
{ defaultStruct - default for structures with errors } { defaultStruct - default for structures with errors }
{ } { }
@ -82,8 +85,8 @@ var
{base types} {base types}
charPtr,sCharPtr,uCharPtr,shortPtr,uShortPtr,intPtr,uIntPtr,int32Ptr, charPtr,sCharPtr,uCharPtr,shortPtr,uShortPtr,intPtr,uIntPtr,int32Ptr,
uInt32Ptr,longPtr,uLongPtr,longLongPtr,uLongLongPtr,boolPtr, uInt32Ptr,longPtr,uLongPtr,longLongPtr,uLongLongPtr,boolPtr,
floatPtr,doublePtr,compPtr,extendedPtr,stringTypePtr,voidPtr, floatPtr,doublePtr,compPtr,extendedPtr,stringTypePtr,utf16StringTypePtr,
voidPtrPtr,constCharPtr,defaultStruct: typePtr; utf32StringTypePtr,voidPtr,voidPtrPtr,constCharPtr,defaultStruct: typePtr;
{---------------------------------------------------------------} {---------------------------------------------------------------}
@ -229,6 +232,14 @@ procedure ResolveForwardReference (iPtr: identPtr);
{ parameters: } { parameters: }
{ iPtr - ptr to the forward declared identifier } { iPtr - ptr to the forward declared identifier }
function StringType(prefix: charStrPrefixEnum): typePtr;
{ returns the type of a string literal with specified prefix }
{ }
{ parameters: }
{ prefix - the prefix }
{---------------------------------------------------------------} {---------------------------------------------------------------}
implementation implementation
@ -1559,6 +1570,24 @@ with stringTypePtr^ do begin
aType := charPtr; aType := charPtr;
elements := 1; elements := 1;
end; {with} end; {with}
new(utf16StringTypePtr); {UTF-16 string constant type}
with utf16StringTypePtr^ do begin
size := 0;
saveDisp := 0;
qualifiers := [];
kind := arrayType;
aType := uShortPtr;
elements := 1;
end; {with}
new(utf32StringTypePtr); {UTF-32 string constant type}
with utf32StringTypePtr^ do begin
size := 0;
saveDisp := 0;
qualifiers := [];
kind := arrayType;
aType := uLongPtr;
elements := 1;
end; {with}
new(voidPtr); {void} new(voidPtr); {void}
with voidPtr^ do begin with voidPtr^ do begin
size := 0; size := 0;
@ -1940,6 +1969,23 @@ if tPtr^.kind in [structType,unionType] then begin
end; {if} end; {if}
end; {ResolveForwardReference} end; {ResolveForwardReference}
function StringType{prefix: charStrPrefixEnum): typePtr};
{ returns the type of a string literal with specified prefix }
{ }
{ parameters: }
{ prefix - the prefix }
begin {StringType}
if prefix in [prefix_none,prefix_u8] then
StringType := stringTypePtr
else if prefix in [prefix_u16,prefix_L] then
StringType := utf16StringTypePtr
else
StringType := utf32StringTypePtr;
end; {StringType}
end. end.
{$append 'symbol.asm'} {$append 'symbol.asm'}

View File

@ -286,7 +286,7 @@ charSym start single character symbols
! constants ! constants
enum (intconst,uintconst,longconst,ulongconst,longlongconst) enum (intconst,uintconst,longconst,ulongconst,longlongconst)
enum (ulonglongconst,floatconst,doubleconst,extendedconst,compconst) enum (ulonglongconst,floatconst,doubleconst,extendedconst,compconst)
enum (charconst,scharconst,ucharconst,stringconst) enum (charconst,scharconst,ucharconst,ushortconst,stringconst)
! reserved words ! reserved words
enum (_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy) enum (_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy)
enum (_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy) enum (_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy)
@ -366,6 +366,7 @@ icp start in-coming priority for expression
dc i1'200' charconst dc i1'200' charconst
dc i1'200' scharconst dc i1'200' scharconst
dc i1'200' ucharconst dc i1'200' ucharconst
dc i1'200' ushortconst
dc i1'200' stringconst dc i1'200' stringconst
dc i1'200' _Alignassy dc i1'200' _Alignassy
dc i1'16' _Alignofsy dc i1'16' _Alignofsy
@ -539,6 +540,7 @@ isp start in stack priority for expression
dc i1'0' charconst dc i1'0' charconst
dc i1'0' scharconst dc i1'0' scharconst
dc i1'0' ucharconst dc i1'0' ucharconst
dc i1'0' ushortconst
dc i1'0' stringconst dc i1'0' stringconst
dc i1'0' _Alignassy dc i1'0' _Alignassy
dc i1'16' _Alignofsy dc i1'16' _Alignofsy
@ -912,7 +914,7 @@ wordHash start reserved word hash table
! constants ! constants
enum (intconst,uintconst,longconst,ulongconst,longlongconst) enum (intconst,uintconst,longconst,ulongconst,longlongconst)
enum (ulonglongconst,floatconst,doubleconst,extendedconst,compconst) enum (ulonglongconst,floatconst,doubleconst,extendedconst,compconst)
enum (charconst,scharconst,ucharconst,stringconst) enum (charconst,scharconst,ucharconst,ushortconst,stringconst)
! reserved words ! reserved words
enum (_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy) enum (_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy)
enum (_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy) enum (_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy)

View File

@ -116,6 +116,8 @@ p. 237
ORCA/C now supports character constants containing multiple characters. See "Multi-Character Character Constants," below. ORCA/C now supports character constants containing multiple characters. See "Multi-Character Character Constants," below.
Character and string constants may now have prefixes indicating they should use Unicode encodings. See "New Language Features," below.
p. 238 p. 238
The limit on the total length of string constants in a single function has been raised to 12500 characters. The limit on the total length of string constants in a single function has been raised to 12500 characters.
@ -128,7 +130,7 @@ p. 240
The discussion of escape sequences states that hexadecimal numeric escape sequences can contain from one to three digits. This was true until ORCA/C 2.1, when the compiler was changed to respect the ANSI C standard. The compiler will now scan a hexadecimal escape sequence until no more hexadecimal characters are found. (ORCA/C 2.1 would also scan an octal escape sequence until no more octal characters were found, but this has been changed back to limit octal escape sequences to at most three octal digits, as required by the C standards.) The discussion of escape sequences states that hexadecimal numeric escape sequences can contain from one to three digits. This was true until ORCA/C 2.1, when the compiler was changed to respect the ANSI C standard. The compiler will now scan a hexadecimal escape sequence until no more hexadecimal characters are found. (ORCA/C 2.1 would also scan an octal escape sequence until no more octal characters were found, but this has been changed back to limit octal escape sequences to at most three octal digits, as required by the C standards.)
The value of an octal or hexadecimal escape sequence must be within the range of representable character values (0-255). Also, \ may not be followed by a character other than one of the ones described as forming an escape sequence. ORCA/C now gives an error in these cases. Accordingly, the examples of "\410" and '\g' mentioned in the manual are now treated as errors. The value of an octal or hexadecimal escape sequence must be within the range of representable values in the relevant type (0-255 for char). Also, \ may not be followed by a character other than one of the ones described as forming an escape sequence. ORCA/C now gives an error in these cases. Accordingly, the examples of "\410" and '\g' mentioned in the manual are now treated as errors.
p. 241 p. 241
@ -373,7 +375,7 @@ and may appear wherever a declaration can (including inside and outside function
These behave the same as the existing tokens [, ], {, }, #, and ## (respectively), apart from their spelling. These behave the same as the existing tokens [, ], {, }, #, and ## (respectively), apart from their spelling.
14. (C99) Universal character names are now supported in string literals, character constants, and identifiers. These are sequences of the form \unnnn or \Unnnnnnnn, where the nnnn or nnnnnnnn is a hexadecimal representation of a Unicode code point. These may be used to represent characters in a way that is independent of the source and execution character sets. In a string literal or character constant, only characters that can be mapped to the execution character set may be represented. There are also certain other restrictions on what characters can be used; see the C standards for details. For ORCA/C the source and execution character sets are both considered to be Mac OS Roman, the character set used in the IIGS desktop environment. 14. (C99) Universal character names are now supported in string literals, character constants, and identifiers. These are sequences of the form \unnnn or \Unnnnnnnn, where the nnnn or nnnnnnnn is a hexadecimal representation of a Unicode code point. These may be used to represent characters in a way that is independent of the source and execution character sets. In an unprefixed string literal or character constant, only characters that can be mapped to the execution character set may be represented. There are also certain other restrictions on what characters can be used; see the C standards for details. In ORCA/C the source and execution character sets are both considered to be the character set used in the IIGS desktop environment, known as Mac OS Roman.
15. (C99) Function-like macros may take a variable number of arguments. To specify such a macro, include "..." as the last (or only) member of its parameter list. When the macro is used, one or more corresponding arguments may be provided. To access these arguments within the macro, use the special identifier __VA_ARGS__. This functions similarly to a normal macro parameter, but its expansion includes the tokens from all the corresponding arguments that were provided, including any commas separating arguments. 15. (C99) Function-like macros may take a variable number of arguments. To specify such a macro, include "..." as the last (or only) member of its parameter list. When the macro is used, one or more corresponding arguments may be provided. To access these arguments within the macro, use the special identifier __VA_ARGS__. This functions similarly to a normal macro parameter, but its expansion includes the tokens from all the corresponding arguments that were provided, including any commas separating arguments.
@ -385,6 +387,8 @@ __STDC_HOSTED__ normally expands to the integer constant 1, indicating that ORCA
__STDC_NO_ATOMICS__, __STDC_NO_COMPLEX__, __STDC_NO_THREADS__, and __STDC_NO_VLA__ all expand to the integer constant 1. These indicate that ORCA/C does not implement certain C language features that are optional under the C11 and later standards (atomics, complex numbers, threads, and variable length arrays). __STDC_NO_ATOMICS__, __STDC_NO_COMPLEX__, __STDC_NO_THREADS__, and __STDC_NO_VLA__ all expand to the integer constant 1. These indicate that ORCA/C does not implement certain C language features that are optional under the C11 and later standards (atomics, complex numbers, threads, and variable length arrays).
__STDC_UTF_16__ and __STDC_UTF_32__ expand to the integer constant 1. These indicate that the char16_t and char32_t types (discussed below) use UTF-16 and UTF-32 encodings.
18. (C99) The _Bool type is now supported. This is a boolean type that can hold the values 0 or 1. When a value of another type is converted to _Bool, the result is 0 if the value compares equal to 0, or 1 otherwise. 18. (C99) The _Bool type is now supported. This is a boolean type that can hold the values 0 or 1. When a value of another type is converted to _Bool, the result is 0 if the value compares equal to 0, or 1 otherwise.
19. (C99) The types "long long" and "unsigned long long" are now supported. In ORCA/C, these are 64-bit integer types, capable of representing a larger range of values than the existing smaller integer types. All operations that can be done on other integer types can now be done on these types as well. 19. (C99) The types "long long" and "unsigned long long" are now supported. In ORCA/C, these are 64-bit integer types, capable of representing a larger range of values than the existing smaller integer types. All operations that can be done on other integer types can now be done on these types as well.
@ -430,11 +434,13 @@ As an example, this expression evaluates to 2 because the type of 1+2 is int:
Generic selection expressions are primarily useful within macros, which can give different behavior based on the types of the arguments passed to them. Generic selection expressions are primarily useful within macros, which can give different behavior based on the types of the arguments passed to them.
23. (C11) Character constants and string literals may now have prefixes indicating they should use Unicode encodings. The prefixes u8, u, and U indicate UTF-8, UTF-16, and UTF-32 encodings, respectively. The u8 prefix may only be used on string literals. The U and u prefixes may be used on string literals or character constants. U- and u-prefixed character constants have the types char32_t and char16_t (as defined in <uchar.h>); U- and u-prefixed string literals are treated as arrays of those types. For example, the string literal U"abc" designates an array with four members of type char32_t: the three letters encoded in UTF-32, plus a null terminator.
Multi-Character Character Constants Multi-Character Character Constants
----------------------------------- -----------------------------------
Character constants containing multiple characters are now supported, as required by the C standards. The value of such constants is implementation-defined. In ORCA/C, the value is initially set to the ordinal value of the first character, as in a single-character constant. For each subsequent character encountered, the existing value is shifted left by eight bit positions, and the ordinal value of the new character is placed in the lower eight bits. (This is similar to the behavior of GCC and Clang.) Unprefixed character constants containing multiple characters are now supported, as required by the C standards. The value of such constants is implementation-defined. In ORCA/C, the value is initially set to the ordinal value of the first character, as in a single-character constant. For each subsequent character encountered, the existing value is shifted left by eight bit positions, and the ordinal value of the new character is placed in the lower eight bits. (This is similar to the behavior of GCC and Clang.)
A new bit is also introduced in #pragma ignore that affects the interpretation of such constants. Setting #pragma ignore bit 1 (a value of 2) causes character constants with three or more characters to be treated as having type long, rather than type int. This non-standard feature effectively allows a character constant to contain the values of up to four characters, rather than only two. A new bit is also introduced in #pragma ignore that affects the interpretation of such constants. Setting #pragma ignore bit 1 (a value of 2) causes character constants with three or more characters to be treated as having type long, rather than type int. This non-standard feature effectively allows a character constant to contain the values of up to four characters, rather than only two.
@ -579,6 +585,7 @@ ORCA/C now includes several new headers specified by recent C standards.
9. (C11) The <uchar.h> header defines the types char16_t and char32_t suitable for holding UTF-16 and UTF-32 code units, and provides functions for handling Unicode characters. 9. (C11) The <uchar.h> header defines the types char16_t and char32_t suitable for holding UTF-16 and UTF-32 code units, and provides functions for handling Unicode characters.
Library Updates Library Updates
--------------- ---------------