mirror of
https://github.com/byteworksinc/ORCA-C.git
synced 2025-01-13 16:32:45 +00:00
Support UTF-8/16/32 string literals and character constants (C11).
These have u8, u, or U prefixes, respectively. The types char16_t and char32_t (defined in <uchar.h>) are used for UTF-16 and UTF-32 code points.
This commit is contained in:
parent
222c34a385
commit
5871820e0c
@ -172,7 +172,7 @@ type
|
||||
{ They are created only by casts. }
|
||||
intconst,uintconst,longconst,ulongconst,longlongconst,
|
||||
ulonglongconst,floatconst,doubleconst,extendedconst,compconst,
|
||||
charconst,scharconst,ucharconst,stringconst,
|
||||
charconst,scharconst,ucharconst,ushortconst,stringconst,
|
||||
{reserved words}
|
||||
_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy,
|
||||
_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy,
|
||||
@ -210,6 +210,9 @@ type
|
||||
ch_asterisk,ch_slash,ch_percent,ch_carot,ch_pound,ch_colon,
|
||||
ch_backslash,letter,digit);
|
||||
|
||||
{prefixes of a character/string literal}
|
||||
charStrPrefixEnum = (prefix_none,prefix_L,prefix_u16,prefix_U32,prefix_u8);
|
||||
|
||||
tokenSet = set of tokenEnum;
|
||||
tokenClass = (reservedWord,reservedSymbol,identifier,intConstant,longConstant,
|
||||
longlongConstant,realConstant,stringConstant,macroParameter);
|
||||
@ -227,7 +230,8 @@ type
|
||||
longlongConstant: (qval: longlong);
|
||||
realConstant : (rval: extended);
|
||||
stringConstant: (sval: longstringPtr;
|
||||
ispstring: boolean);
|
||||
ispstring: boolean;
|
||||
prefix: charStrPrefixEnum);
|
||||
macroParameter: (pnum: integer);
|
||||
end;
|
||||
|
||||
|
@ -407,7 +407,8 @@
|
||||
{ GenS(pc_lca, str) }
|
||||
{ }
|
||||
{ Loads the address of a string onto the stack. Str is a }
|
||||
{ pointer to a string constant. }
|
||||
{ pointer to a string constant. No null terminator is added; }
|
||||
{ it should be explicitly included in str if desired. }
|
||||
{ }
|
||||
{ }
|
||||
{ pc_lda - load a local address }
|
||||
|
82
Charset.pas
82
Charset.pas
@ -28,6 +28,14 @@ const
|
||||
|
||||
type
|
||||
ucsCodePoint = 0..maxUCSCodePoint;
|
||||
utf8Rec = record
|
||||
length: integer;
|
||||
bytes: packed array [1..4] of byte;
|
||||
end;
|
||||
utf16Rec = record
|
||||
length: integer;
|
||||
codeUnits: packed array [1..2] of integer;
|
||||
end;
|
||||
|
||||
|
||||
function ConvertMacRomanToUCS(ch: char): ucsCodePoint;
|
||||
@ -36,6 +44,7 @@ function ConvertMacRomanToUCS(ch: char): ucsCodePoint;
|
||||
{ }
|
||||
{ Returns UCS code point value for the character. }
|
||||
|
||||
|
||||
function ConvertUCSToMacRoman(ch: ucsCodePoint): integer;
|
||||
|
||||
{ convert a character from UCS (Unicode) to MacRoman charset }
|
||||
@ -43,6 +52,23 @@ function ConvertUCSToMacRoman(ch: ucsCodePoint): integer;
|
||||
{ Returns ordinal value of the character, or -1 if it can't be }
|
||||
{ converted. }
|
||||
|
||||
|
||||
procedure UTF16Encode(ch: ucsCodePoint; var utf16: utf16Rec);
|
||||
|
||||
{ Encode a UCS code point in UTF-16 }
|
||||
{ }
|
||||
{ ch - the code point }
|
||||
{ utf16 - set to the UTF-16 representation of the code point }
|
||||
|
||||
|
||||
procedure UTF8Encode(ch: ucsCodePoint; var utf8: utf8Rec);
|
||||
|
||||
{ Encode a UCS code point in UTF-8 }
|
||||
{ }
|
||||
{ ch - the code point }
|
||||
{ utf16 - set to the UTF-8 representation of the code point }
|
||||
|
||||
|
||||
function ValidUCNForIdentifier(ch: ucsCodePoint; initial: boolean): boolean;
|
||||
|
||||
{ Check if a code point is valid for a UCN in an identifier }
|
||||
@ -50,6 +76,8 @@ function ValidUCNForIdentifier(ch: ucsCodePoint; initial: boolean): boolean;
|
||||
{ ch - the code point }
|
||||
{ initial - is this UCN the initial element of the identifier? }
|
||||
|
||||
{----------------------------------------------------------------}
|
||||
|
||||
implementation
|
||||
|
||||
function ConvertMacRomanToUCS{(ch: char): ucsCodePoint};
|
||||
@ -100,6 +128,60 @@ else begin
|
||||
end; {ConvertUCSToMacRoman}
|
||||
|
||||
|
||||
procedure UTF16Encode{ch: ucsCodePoint; var utf16: utf16Rec};
|
||||
|
||||
{ Encode a UCS code point in UTF-16 }
|
||||
{ }
|
||||
{ ch - the code point }
|
||||
{ utf16 - set to the UTF-16 representation of the code point }
|
||||
|
||||
begin {UTF16Encode}
|
||||
if ch <= $00ffff then begin
|
||||
utf16.length := 1;
|
||||
utf16.codeUnits[1] := ord(ch);
|
||||
end {if}
|
||||
else begin
|
||||
utf16.length := 2;
|
||||
ch := ch - $010000;
|
||||
utf16.codeUnits[1] := $D800 | ord(ch >> 10);
|
||||
utf16.codeUnits[2] := $DC00 | ord(ch & $03ff);
|
||||
end; {else}
|
||||
end; {UTF16Encode}
|
||||
|
||||
|
||||
procedure UTF8Encode{ch: ucsCodePoint; var utf8: utf8Rec};
|
||||
|
||||
{ Encode a UCS code point in UTF-8 }
|
||||
{ }
|
||||
{ ch - the code point }
|
||||
{ utf16 - set to the UTF-8 representation of the code point }
|
||||
|
||||
begin {UTF8Encode}
|
||||
if ch <= $00007f then begin
|
||||
utf8.length := 1;
|
||||
utf8.bytes[1] := ord(ch);
|
||||
end {if}
|
||||
else if ch <= $0007ff then begin
|
||||
utf8.length := 2;
|
||||
utf8.bytes[1] := $C0 | ord(ch >> 6);
|
||||
utf8.bytes[2] := $80 | ord(ch & $3f)
|
||||
end {else if}
|
||||
else if ch <= $00ffff then begin
|
||||
utf8.length := 3;
|
||||
utf8.bytes[1] := $E0 | ord(ch >> 12);
|
||||
utf8.bytes[2] := $80 | ord((ch >> 6) & $3f);
|
||||
utf8.bytes[3] := $80 | ord(ch & $3f);
|
||||
end {else if}
|
||||
else begin
|
||||
utf8.length := 4;
|
||||
utf8.bytes[1] := $F0 | ord(ch >> 18);
|
||||
utf8.bytes[2] := $80 | ord((ch >> 12) & $3f);
|
||||
utf8.bytes[3] := $80 | ord((ch >> 6) & $3f);
|
||||
utf8.bytes[4] := $80 | ord(ch & $3f);
|
||||
end; {else}
|
||||
end; {UTF8Encode}
|
||||
|
||||
|
||||
function ValidUCNForIdentifier{(ch: ucsCodePoint; initial: boolean): boolean};
|
||||
|
||||
{ Check if a code point is valid for a UCN in an identifier }
|
||||
|
@ -1046,7 +1046,7 @@ var
|
||||
begin {RealVal}
|
||||
if token.kind in [intconst,charconst,scharconst,ucharconst] then
|
||||
RealVal := token.ival
|
||||
else if token.kind = uintconst then begin
|
||||
else if token.kind in [uintconst,ushortconst] then begin
|
||||
if token.ival < 0 then
|
||||
RealVal := (token.ival & $7FFF) + 32768.0
|
||||
else
|
||||
@ -1076,7 +1076,7 @@ var
|
||||
begin {IntVal}
|
||||
if token.kind in [intconst,charconst,scharconst,ucharconst] then
|
||||
IntVal := token.ival
|
||||
else if token.kind = uintconst then begin
|
||||
else if token.kind in [uintconst,ushortconst] then begin
|
||||
IntVal := token.ival & $0000FFFF;
|
||||
end {else if}
|
||||
else {if token.kind in [longconst,ulongconst] then} begin
|
||||
@ -1097,7 +1097,7 @@ var
|
||||
else
|
||||
result.hi := 0;
|
||||
end {if}
|
||||
else if token.kind = uintconst then begin
|
||||
else if token.kind in [uintconst,ushortconst] then begin
|
||||
result.lo := token.ival & $0000FFFF;
|
||||
result.hi := 0;
|
||||
end {else if}
|
||||
@ -1125,7 +1125,7 @@ var
|
||||
begin {PPKind}
|
||||
if token.kind in [intconst,longconst] then
|
||||
PPKind := longlongconst
|
||||
else if token.kind in [uintconst,ulongconst] then
|
||||
else if token.kind in [uintconst,ushortconst,ulongconst] then
|
||||
PPKind := ulonglongconst
|
||||
else
|
||||
PPKind := token.kind;
|
||||
@ -1161,13 +1161,13 @@ var
|
||||
op^.right := Pop;
|
||||
op^.middle := Pop;
|
||||
op^.left := Pop;
|
||||
if op^.right^.token.kind in [intconst,uintconst,
|
||||
if op^.right^.token.kind in [intconst,uintconst,ushortconst,
|
||||
longconst,ulongconst,longlongconst,ulonglongconst,
|
||||
charconst,scharconst,ucharconst] then
|
||||
if op^.left^.token.kind in [intconst,uintconst,
|
||||
if op^.left^.token.kind in [intconst,uintconst,ushortconst,
|
||||
longconst,ulongconst,longlongconst,ulonglongconst,
|
||||
charconst,scharconst,ucharconst] then
|
||||
if op^.middle^.token.kind in [intconst,uintconst,
|
||||
if op^.middle^.token.kind in [intconst,uintconst,ushortconst,
|
||||
longconst,ulongconst,longlongconst,ulonglongconst,
|
||||
charconst,scharconst,ucharconst] then begin
|
||||
GetLongLongVal(llop1, op^.left^.token);
|
||||
@ -1211,9 +1211,9 @@ var
|
||||
op^.left := Pop;
|
||||
kindRight := op^.right^.token.kind;
|
||||
kindLeft := op^.left^.token.kind;
|
||||
if kindRight in [intconst,uintconst,longconst,ulongconst,
|
||||
if kindRight in [intconst,uintconst,ushortconst,longconst,ulongconst,
|
||||
charconst,scharconst,ucharconst] then begin
|
||||
if kindLeft in [intconst,uintconst,longconst,ulongconst,
|
||||
if kindLeft in [intconst,uintconst,ushortconst,longconst,ulongconst,
|
||||
charconst,scharconst,ucharconst] then begin
|
||||
if kind = preprocessorExpression then
|
||||
goto 2;
|
||||
@ -1223,7 +1223,8 @@ var
|
||||
ekind := ulongconst
|
||||
else if (kindRight = longconst) or (kindLeft = longconst) then
|
||||
ekind := longconst
|
||||
else if (kindRight = uintconst) or (kindLeft = uintconst) then
|
||||
else if (kindRight = uintconst) or (kindLeft = uintconst)
|
||||
or (kindRight = ushortconst) or (kindLeft = ushortconst) then
|
||||
ekind := uintconst
|
||||
else
|
||||
ekind := intconst;
|
||||
@ -1289,7 +1290,8 @@ var
|
||||
ekind := kindLeft;
|
||||
end;
|
||||
gtgtop : begin {>>}
|
||||
if kindLeft in [uintconst,ulongconst] then
|
||||
if kindLeft in [uintconst,ushortconst,ulongconst]
|
||||
then
|
||||
op1 := lshr(op1,op2)
|
||||
else
|
||||
op1 := op1 >> op2;
|
||||
@ -1350,10 +1352,10 @@ var
|
||||
end; {if}
|
||||
end; {if}
|
||||
2:
|
||||
if kindRight in [intconst,uintconst,longconst,ulongconst,
|
||||
if kindRight in [intconst,uintconst,ushortconst,longconst,ulongconst,
|
||||
longlongconst,ulonglongconst,charconst,scharconst,ucharconst]
|
||||
then begin
|
||||
if kindLeft in [intconst,uintconst,longconst,ulongconst,
|
||||
if kindLeft in [intconst,uintconst,ushortconst,longconst,ulongconst,
|
||||
longlongconst,ulonglongconst,charconst,scharconst,ucharconst]
|
||||
then begin
|
||||
|
||||
@ -1503,10 +1505,10 @@ var
|
||||
|
||||
if op^.right^.token.kind in [intconst,uintconst,longconst,ulongconst,
|
||||
longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst,
|
||||
compconst,charconst,scharconst,ucharconst] then
|
||||
compconst,charconst,scharconst,ucharconst,ushortconst] then
|
||||
if op^.left^.token.kind in [intconst,uintconst,longconst,ulongconst,
|
||||
longlongconst,ulonglongconst,floatconst,doubleconst,extendedconst,
|
||||
compconst,charconst,scharconst,ucharconst] then
|
||||
compconst,charconst,scharconst,ucharconst,ushortconst] then
|
||||
begin
|
||||
if fenvAccess then
|
||||
if kind in [normalExpression, autoInitializerExpression] then
|
||||
@ -1601,7 +1603,7 @@ var
|
||||
op^.token.kind := ulongConst;
|
||||
op^.token.class := longConstant;
|
||||
if op^.left^.token.kind = stringConst then
|
||||
op^.token.lval := op^.left^.token.sval^.length+1
|
||||
op^.token.lval := op^.left^.token.sval^.length
|
||||
else begin
|
||||
lCodeGeneration := codeGeneration;
|
||||
codeGeneration := false;
|
||||
@ -1728,7 +1730,7 @@ var
|
||||
begin
|
||||
if (kind <> preprocessorExpression) and (op^.left^.token.kind
|
||||
in [intconst,uintconst,longconst,ulongconst,charconst,scharconst,
|
||||
ucharconst]) then begin
|
||||
ucharconst,ushortconst]) then begin
|
||||
|
||||
{evaluate a constant operation}
|
||||
ekind := op^.left^.token.kind;
|
||||
@ -1759,7 +1761,7 @@ var
|
||||
end {if}
|
||||
else if op^.left^.token.kind in [longlongconst,ulonglongconst,
|
||||
intconst,uintconst,longconst,ulongconst,charconst,scharconst,
|
||||
ucharconst] then begin
|
||||
ucharconst,ushortconst] then begin
|
||||
|
||||
{evaluate a constant operation with long long operand}
|
||||
ekind := op^.left^.token.kind;
|
||||
@ -3561,7 +3563,7 @@ case tree^.token.kind of
|
||||
end; {case}
|
||||
end;
|
||||
|
||||
intConst,uintConst,charConst,scharConst,ucharConst: begin
|
||||
intConst,uintConst,ushortConst,charConst,scharConst,ucharConst: begin
|
||||
Gen1t(pc_ldc, tree^.token.ival, cgWord);
|
||||
lastwasconst := true;
|
||||
lastconst := tree^.token.ival;
|
||||
@ -3569,13 +3571,15 @@ case tree^.token.kind of
|
||||
expressionType := intPtr
|
||||
else if tree^.token.kind = uintConst then
|
||||
expressionType := uIntPtr
|
||||
else if tree^.token.kind = ushortConst then
|
||||
expressionType := uShortPtr
|
||||
else if tree^.token.kind = charConst then
|
||||
expressionType := charPtr
|
||||
else if tree^.token.kind = scharConst then
|
||||
expressionType := scharPtr
|
||||
else {if tree^.token.kind = ucharConst then}
|
||||
expressionType := ucharPtr;
|
||||
end; {case intConst,uintConst,charConst,scharConst,ucharConst}
|
||||
end; {case intConst,uintConst,ushortConst,charConst,scharConst,ucharConst}
|
||||
|
||||
longConst,ulongConst: begin
|
||||
GenLdcLong(tree^.token.lval);
|
||||
@ -3621,7 +3625,7 @@ case tree^.token.kind of
|
||||
|
||||
stringConst: begin
|
||||
GenS(pc_lca, tree^.token.sval);
|
||||
expressionType := stringTypePtr;
|
||||
expressionType := StringType(tree^.token.prefix);
|
||||
end; {case stringConst}
|
||||
|
||||
eqch: begin {=}
|
||||
@ -4685,11 +4689,12 @@ else begin {record the expression for an initialize
|
||||
while castValue^.token.kind = castoper do
|
||||
castValue := castValue^.left;
|
||||
if castValue^.token.kind in
|
||||
[intconst,uintconst,charconst,scharconst,ucharconst] then begin
|
||||
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
|
||||
then begin
|
||||
expressionValue := castValue^.token.ival;
|
||||
isConstant := true;
|
||||
expressionType := tree^.castType;
|
||||
if (castValue^.token.kind = uintconst)
|
||||
if (castValue^.token.kind in [uintconst,ushortconst])
|
||||
or (expressionType^.kind = pointerType) then
|
||||
expressionValue := expressionValue & $0000FFFF;
|
||||
goto 1;
|
||||
@ -4714,10 +4719,13 @@ else begin {record the expression for an initialize
|
||||
expressionType := ucharPtr;
|
||||
isConstant := true;
|
||||
end {else if}
|
||||
else if tree^.token.kind = uintconst then begin
|
||||
else if tree^.token.kind in [uintconst,ushortconst] then begin
|
||||
expressionValue := tree^.token.ival;
|
||||
expressionValue := expressionValue & $0000FFFF;
|
||||
expressionType := uIntPtr;
|
||||
if tree^.token.kind = uintconst then
|
||||
expressionType := uIntPtr
|
||||
else {if tree^.token.kind = ushortconst then}
|
||||
expressionType := uShortPtr;
|
||||
isConstant := true;
|
||||
end {else if}
|
||||
else if tree^.token.kind = longconst then begin
|
||||
@ -4773,7 +4781,7 @@ else begin {record the expression for an initialize
|
||||
end {else if}
|
||||
else if tree^.token.kind = stringconst then begin
|
||||
expressionValue := ord4(tree^.token.sval);
|
||||
expressionType := stringTypePtr;
|
||||
expressionType := StringType(tree^.token.prefix);
|
||||
isConstant := true;
|
||||
if kind in [arrayExpression,preprocessorExpression] then begin
|
||||
expressionType := intPtr;
|
||||
@ -4817,7 +4825,7 @@ procedure InitExpression;
|
||||
begin {InitExpression}
|
||||
startTerm := [ident,intconst,uintconst,longconst,ulongconst,longlongconst,
|
||||
ulonglongconst,floatconst,doubleconst,extendedconst,compconst,
|
||||
charconst,scharconst,ucharconst,stringconst];
|
||||
charconst,scharconst,ucharconst,ushortconst,stringconst];
|
||||
startExpression:= startTerm +
|
||||
[lparench,asteriskch,andch,plusch,minusch,excch,tildech,sizeofsy,
|
||||
plusplusop,minusminusop,typedef,_Alignofsy,_Genericsy];
|
||||
|
5
Gen.pas
5
Gen.pas
@ -5890,11 +5890,10 @@ procedure GenTree {op: icptr};
|
||||
gLong.where := onStack;
|
||||
GenNative(m_pea, immediate, stringSize, nil, stringReference+shift16);
|
||||
GenNative(m_pea, immediate, stringSize, nil, stringReference);
|
||||
if maxString-stringSize >= op^.q+1 then begin
|
||||
if maxString-stringSize >= op^.q then begin
|
||||
for i := 1 to op^.q do
|
||||
stringSpace[i+stringSize] := op^.str^.str[i];
|
||||
stringSpace[stringSize+op^.q+1] := chr(0);
|
||||
stringSize := stringSize+op^.q+1;
|
||||
stringSize := stringSize+op^.q;
|
||||
end
|
||||
else
|
||||
Error(cge3);
|
||||
|
10
Header.pas
10
Header.pas
@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
|
||||
{$segment 'SCANNER'}
|
||||
|
||||
const
|
||||
symFileVersion = 16; {version number of .sym file format}
|
||||
symFileVersion = 17; {version number of .sym file format}
|
||||
|
||||
var
|
||||
inhibitHeader: boolean; {should .sym includes be blocked?}
|
||||
@ -719,6 +719,7 @@ procedure EndInclude {chPtr: ptr};
|
||||
stringConstant: begin
|
||||
WriteLongString(token.sval);
|
||||
WriteByte(ord(token.ispstring));
|
||||
WriteByte(ord(token.prefix));
|
||||
end;
|
||||
macroParameter: WriteWord(token.pnum);
|
||||
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
|
||||
@ -1008,6 +1009,10 @@ procedure EndInclude {chPtr: ptr};
|
||||
WriteByte(16)
|
||||
else if tp = uShortPtr then
|
||||
WriteByte(17)
|
||||
else if tp = utf16StringTypePtr then
|
||||
WriteByte(18)
|
||||
else if tp = utf32StringTypePtr then
|
||||
WriteByte(19)
|
||||
else if tp^.saveDisp <> 0 then begin
|
||||
WriteByte(1);
|
||||
WriteLong(tp^.saveDisp);
|
||||
@ -1348,6 +1353,7 @@ var
|
||||
stringConstant: begin
|
||||
token.sval := ReadLongString;
|
||||
token.ispstring := ReadByte <> 0;
|
||||
token.prefix := charStrPrefixEnum(ReadByte);
|
||||
end;
|
||||
macroParameter: token.pnum := ReadWord;
|
||||
reservedSymbol: if token.kind in [lbracech,rbracech,lbrackch,
|
||||
@ -1741,6 +1747,8 @@ var
|
||||
15: tp := uCharPtr;
|
||||
16: tp := shortPtr;
|
||||
17: tp := uShortPtr;
|
||||
18: tp := utf16StringTypePtr;
|
||||
19: tp := utf32StringTypePtr;
|
||||
|
||||
otherwise: begin
|
||||
PurgeSymbols;
|
||||
|
115
Parser.pas
115
Parser.pas
@ -1909,7 +1909,7 @@ var
|
||||
if tree^.token.kind = plusch then begin
|
||||
rtree := tree^.right;
|
||||
if rtree^.token.kind in
|
||||
[intconst,uintconst,charconst,scharconst,ucharconst] then
|
||||
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst] then
|
||||
size := rtree^.token.ival
|
||||
else if rtree^.token.kind in [longconst,ulongconst] then
|
||||
size := rtree^.token.lval
|
||||
@ -2086,7 +2086,8 @@ var
|
||||
end;
|
||||
|
||||
pointerType:
|
||||
if etype = stringTypePtr then begin
|
||||
if (etype = stringTypePtr) or (etype = utf16StringTypePtr)
|
||||
or (etype = utf32StringTypePtr) then begin
|
||||
iPtr^.isConstant := true;
|
||||
iPtr^.iType := ccPointer;
|
||||
iPtr^.pval := 0;
|
||||
@ -2151,12 +2152,12 @@ var
|
||||
operator := tree^.token.kind;
|
||||
while operator in [plusch,minusch] do begin
|
||||
with tree^.right^.token do
|
||||
if kind in [intConst,uintconst,longConst,ulongconst,
|
||||
longlongConst,ulonglongconst,charconst,scharconst,
|
||||
ucharconst] then begin
|
||||
if kind in [intConst,uintconst,ushortconst,longConst,
|
||||
ulongconst,longlongConst,ulonglongconst,charconst,
|
||||
scharconst,ucharconst] then begin
|
||||
if kind in [intConst,charconst,scharconst,ucharconst] then
|
||||
offSet2 := ival
|
||||
else if kind = uintConst then
|
||||
else if kind in [uintConst,ushortconst] then
|
||||
offset2 := ival & $0000ffff
|
||||
else if kind in [longConst,ulongconst] then begin
|
||||
offset2 := lval;
|
||||
@ -2322,6 +2323,8 @@ var
|
||||
kind: typeKind; {base type of an initializer}
|
||||
ktp: typePtr; {array type with definedTypes removed}
|
||||
lPrintMacroExpansions: boolean; {local copy of printMacroExpansions}
|
||||
stringElementType: typePtr; {element type of string literal}
|
||||
stringLength: integer; {elements in a string literal}
|
||||
|
||||
|
||||
procedure Fill (count: longint; tp: typePtr);
|
||||
@ -2450,44 +2453,61 @@ var
|
||||
kind := ktp^.kind;
|
||||
|
||||
{handle string constants}
|
||||
if (token.kind = stringConst) and (kind = scalarType)
|
||||
and (ktp^.baseType in [cgByte,cgUByte]) then begin
|
||||
if tp^.elements = 0 then begin
|
||||
tp^.elements := token.sval^.length + 1;
|
||||
RecomputeSizes(variable^.itype);
|
||||
end {if}
|
||||
else if tp^.elements < token.sval^.length then begin
|
||||
Error(44);
|
||||
errorFound := true;
|
||||
end; {else if}
|
||||
with ktp^ do begin
|
||||
iPtr := pointer(Malloc(sizeof(initializerRecord)));
|
||||
iPtr^.next := variable^.iPtr;
|
||||
variable^.iPtr := iPtr;
|
||||
iPtr^.count := 1;
|
||||
iPtr^.bitdisp := 0;
|
||||
iPtr^.bitsize := 0;
|
||||
iPtr^.isStructOrUnion := false;
|
||||
if (variable^.storage in [external,global,private]) then begin
|
||||
iPtr^.isConstant := true;
|
||||
iPtr^.itype := cgString;
|
||||
iPtr^.sval := token.sval;
|
||||
count := tp^.elements - token.sval^.length;
|
||||
if count <> 0 then
|
||||
Fill(count, sCharPtr);
|
||||
if token.kind = stringConst then begin
|
||||
stringElementType := StringType(token.prefix)^.aType;
|
||||
if (kind = scalarType) and
|
||||
(((ktp^.baseType in [cgByte,cgUByte])
|
||||
and (stringElementType = charPtr))
|
||||
or CompTypes(ktp,stringElementType)) then begin
|
||||
stringLength := token.sval^.length div ord(stringElementType^.size);
|
||||
if tp^.elements = 0 then begin
|
||||
tp^.elements := stringLength;
|
||||
RecomputeSizes(variable^.itype);
|
||||
end {if}
|
||||
else begin
|
||||
iPtr^.isConstant := false;
|
||||
new(ep);
|
||||
iPtr^.iTree := ep;
|
||||
ep^.next := nil;
|
||||
ep^.left := nil;
|
||||
ep^.middle := nil;
|
||||
ep^.right := nil;
|
||||
ep^.token := token;
|
||||
end; {else}
|
||||
end; {with}
|
||||
NextToken;
|
||||
else if tp^.elements < stringLength-1 then begin
|
||||
Error(44);
|
||||
errorFound := true;
|
||||
end; {else if}
|
||||
with ktp^ do begin
|
||||
iPtr := pointer(Malloc(sizeof(initializerRecord)));
|
||||
iPtr^.next := variable^.iPtr;
|
||||
variable^.iPtr := iPtr;
|
||||
iPtr^.count := 1;
|
||||
iPtr^.bitdisp := 0;
|
||||
iPtr^.bitsize := 0;
|
||||
iPtr^.isStructOrUnion := false;
|
||||
if (variable^.storage in [external,global,private]) then begin
|
||||
iPtr^.isConstant := true;
|
||||
iPtr^.itype := cgString;
|
||||
iPtr^.sval := token.sval;
|
||||
count := tp^.elements - stringLength;
|
||||
if count > 0 then
|
||||
Fill(count, stringElementType)
|
||||
else if count = -1 then begin
|
||||
iPtr^.sval := pointer(GMalloc(token.sval^.length+2));
|
||||
CopyLongString(iPtr^.sval, token.sval);
|
||||
iPtr^.sval^.length :=
|
||||
iPtr^.sval^.length - ord(stringElementType^.size);
|
||||
end; {else if}
|
||||
end {if}
|
||||
else begin
|
||||
iPtr^.isConstant := false;
|
||||
new(ep);
|
||||
iPtr^.iTree := ep;
|
||||
ep^.next := nil;
|
||||
ep^.left := nil;
|
||||
ep^.middle := nil;
|
||||
ep^.right := nil;
|
||||
ep^.token := token;
|
||||
end; {else}
|
||||
end; {with}
|
||||
NextToken;
|
||||
end {if}
|
||||
else begin
|
||||
Error(47);
|
||||
errorFound := true;
|
||||
NextToken;
|
||||
end; {else}
|
||||
end {if}
|
||||
|
||||
{handle arrays of non-strings}
|
||||
@ -3643,7 +3663,8 @@ if isFunction then begin
|
||||
NextToken;
|
||||
Match(lparench,13);
|
||||
if token.kind in
|
||||
[intconst,uintconst,charconst,scharconst,ucharconst] then begin
|
||||
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
|
||||
then begin
|
||||
toolNum := token.ival;
|
||||
NextToken;
|
||||
end {if}
|
||||
@ -3655,7 +3676,8 @@ if isFunction then begin
|
||||
NextToken;
|
||||
end {if}
|
||||
else if token.kind in
|
||||
[intconst,uintconst,charconst,scharconst,ucharconst] then begin
|
||||
[intconst,uintconst,ushortconst,charconst,scharconst,ucharconst]
|
||||
then begin
|
||||
dispatcher := token.ival;
|
||||
NextToken;
|
||||
end {if}
|
||||
@ -4352,7 +4374,8 @@ var
|
||||
elements := itype^.elements;
|
||||
if elements = 0 then goto 1; {don't init flexible array member}
|
||||
if iPtr^.iTree^.token.kind = stringConst then begin
|
||||
size := iPtr^.iTree^.token.sval^.length+1;
|
||||
elements := elements * itype^.aType^.size;
|
||||
size := iPtr^.iTree^.token.sval^.length;
|
||||
if size >= elements then
|
||||
size := ord(elements)
|
||||
else
|
||||
|
519
Scanner.pas
519
Scanner.pas
@ -239,6 +239,7 @@ type
|
||||
onOffEnum = (on,off,default); {on-off values in standard pragmas}
|
||||
|
||||
var
|
||||
charStrPrefix: charStrPrefixEnum; {prefix of character/string literal}
|
||||
dateStr: longStringPtr; {macro date string}
|
||||
doingCommandLine: boolean; {are we processing the cc= command line?}
|
||||
doingPPExpression: boolean; {are we processing a preprocessor expression?}
|
||||
@ -256,6 +257,7 @@ var
|
||||
macroFound: macroRecordPtr; {last macro found by IsDefined}
|
||||
mergingStrings: boolean; {is NextToken trying to merge strings?}
|
||||
needWriteLine: boolean; {is there a line that needs to be written?}
|
||||
octHexEscape: boolean; {octal/hex escape in char/string?}
|
||||
onOffValue: onOffEnum; {value of last on-off switch}
|
||||
wroteLine: boolean; {has the current line already been written?}
|
||||
numErr: 0..maxErr; {number of errors in this line}
|
||||
@ -343,6 +345,14 @@ procedure StartInclude (name: gsosOutStringPtr); extern;
|
||||
{ 2. From Header.pas }
|
||||
|
||||
|
||||
function StringType(prefix: charStrPrefixEnum): typePtr; extern;
|
||||
|
||||
{ returns the type of a string literal with specified prefix }
|
||||
{ }
|
||||
{ parameters: }
|
||||
{ prefix - the prefix }
|
||||
|
||||
|
||||
procedure TermHeader; extern;
|
||||
|
||||
{ Stop processing the header file }
|
||||
@ -722,6 +732,9 @@ if list or (numErr <> 0) then begin
|
||||
162: msg := @'invalid escape sequence';
|
||||
163: msg := @'pointer assignment discards qualifier(s)';
|
||||
164: msg := @'compound literals within functions are not supported by ORCA/C';
|
||||
165: msg := @'''\p'' may not be used in a prefixed string';
|
||||
166: msg := @'string literals with these prefixes may not be merged';
|
||||
167: msg := @'''L''-prefixed character or string constants are not supported by ORCA/C';
|
||||
otherwise: Error(57);
|
||||
end; {case}
|
||||
writeln(msg^);
|
||||
@ -768,6 +781,34 @@ var
|
||||
ch: char; {work character}
|
||||
i: integer; {loop counter}
|
||||
str: string[23]; {temp string}
|
||||
c16ptr: ^integer; {pointer to char16_t value}
|
||||
c32ptr: ^longint; {pointer to char32_t value}
|
||||
|
||||
|
||||
procedure PrintHexDigits(i: longint; count: integer);
|
||||
|
||||
{ Print a digit as a hex character }
|
||||
{ }
|
||||
{ Parameters: }
|
||||
{ i: value to print in hexadecimal }
|
||||
{ count: number of digits to print }
|
||||
|
||||
var
|
||||
digit: integer; {hex digit value}
|
||||
shift: integer; {amount to shift by}
|
||||
|
||||
begin {PrintHexDigits}
|
||||
shift := 4 * (count-1);
|
||||
while shift >= 0 do begin
|
||||
digit := ord(i >> shift) & $000F;
|
||||
if digit < 10 then
|
||||
write(chr(digit | ord('0')))
|
||||
else
|
||||
write(chr(digit + ord('A') - 10));
|
||||
shift := shift - 4;
|
||||
end; {while}
|
||||
end; {PrintHexDigits}
|
||||
|
||||
|
||||
begin {PrintToken}
|
||||
case token.kind of
|
||||
@ -778,7 +819,8 @@ case token.kind of
|
||||
scharconst,
|
||||
ucharconst,
|
||||
intconst,
|
||||
uintconst: write(token.ival:1);
|
||||
uintconst,
|
||||
ushortconst: write(token.ival:1);
|
||||
|
||||
longConst,
|
||||
ulongConst: write(token.lval:1);
|
||||
@ -801,21 +843,43 @@ case token.kind of
|
||||
extendedConst: write(token.rval:1);
|
||||
|
||||
stringConst: begin
|
||||
write('"');
|
||||
for i := 1 to token.sval^.length do begin
|
||||
ch := token.sval^.str[i];
|
||||
if ch in [' '..'~'] then begin
|
||||
if ch in ['"','\','?'] then
|
||||
if token.prefix = prefix_u16 then begin
|
||||
write('u"');
|
||||
i := 1;
|
||||
while i < token.sval^.length-2 do begin
|
||||
write('\x');
|
||||
c16Ptr := pointer(@token.sval^.str[i]);
|
||||
PrintHexDigits(c16Ptr^, 4);
|
||||
i := i + 2;
|
||||
end; {while}
|
||||
end {if}
|
||||
else if token.prefix = prefix_U32 then begin
|
||||
write('U"');
|
||||
i := 1;
|
||||
while i < token.sval^.length-4 do begin
|
||||
write('\x');
|
||||
c32Ptr := pointer(@token.sval^.str[i]);
|
||||
PrintHexDigits(c32Ptr^, 8);
|
||||
i := i + 4;
|
||||
end; {while}
|
||||
end {else if}
|
||||
else begin
|
||||
write('"');
|
||||
for i := 1 to token.sval^.length-1 do begin
|
||||
ch := token.sval^.str[i];
|
||||
if ch in [' '..'~'] then begin
|
||||
if ch in ['"','\','?'] then
|
||||
write('\');
|
||||
write(ch);
|
||||
end {if}
|
||||
else begin
|
||||
write('\');
|
||||
write(ch);
|
||||
end {if}
|
||||
else begin
|
||||
write('\');
|
||||
write((ord(ch)>>6):1);
|
||||
write(((ord(ch)>>3) & $0007):1);
|
||||
write((ord(ch) & $0007):1);
|
||||
end; {else}
|
||||
end; {for}
|
||||
write((ord(ch)>>6):1);
|
||||
write(((ord(ch)>>3) & $0007):1);
|
||||
write((ord(ch) & $0007):1);
|
||||
end; {else}
|
||||
end; {for}
|
||||
end; {else}
|
||||
write('"');
|
||||
end;
|
||||
|
||||
@ -1047,6 +1111,9 @@ procedure LongToPString (pstr: stringPtr; lstr: longStringPtr);
|
||||
|
||||
{ Convert a long string into a p string }
|
||||
{ }
|
||||
{ The long string is assumed to include a terminating null byte,}
|
||||
{ which is not copied to the p-string. }
|
||||
{ }
|
||||
{ Parameters: }
|
||||
{ pstr - pointer to the p-string }
|
||||
{ lstr - pointer to the long string }
|
||||
@ -1056,7 +1123,7 @@ var
|
||||
len: integer; {string length}
|
||||
|
||||
begin {LongToPString}
|
||||
len := lstr^.length;
|
||||
len := lstr^.length-1;
|
||||
if len > 255 then
|
||||
len := 255;
|
||||
pstr^[0] := chr(len);
|
||||
@ -1083,6 +1150,56 @@ var
|
||||
len,len1: integer; {length of strings}
|
||||
lt: tokenType; {local copy of token}
|
||||
str1,str2: stringPtr; {identifier strings}
|
||||
elementType: typePtr; {string element type}
|
||||
|
||||
|
||||
procedure ConvertString (var str: tokenType; prefix: charStrPrefixEnum);
|
||||
|
||||
{ Convert unprefixed string literal str to a prefixed one }
|
||||
|
||||
var
|
||||
sPtr: longStringPtr; {new string}
|
||||
i,j,k: integer; {loop counters}
|
||||
codePoint: ucsCodePoint; {Unicode code point}
|
||||
c16ptr: ^integer; {pointer to char16_t value}
|
||||
c32ptr: ^longint; {pointer to char32_t value}
|
||||
utf8: utf8Rec; {UTF-8 encoding of character}
|
||||
utf16: utf16Rec; {UTF-16 encoding of character}
|
||||
|
||||
begin {ConvertString}
|
||||
sPtr := pointer(Malloc(str.sval^.length*4));
|
||||
k := 0;
|
||||
for i := 1 to str.sval^.length do begin
|
||||
codePoint := ConvertMacRomanToUCS(str.sval^.str[i]);
|
||||
if prefix = prefix_u8 then begin
|
||||
UTF8Encode(codePoint, utf8);
|
||||
for j := 1 to utf8.length do begin
|
||||
sPtr^.str[k+1] := chr(utf8.bytes[j]);
|
||||
k := k+1;
|
||||
end; {for}
|
||||
end {if}
|
||||
else if prefix = prefix_u16 then begin
|
||||
UTF16Encode(codePoint, utf16);
|
||||
c16Ptr := pointer(@sPtr^.str[k+1]);
|
||||
c16Ptr^ := utf16.codeUnits[1];
|
||||
k := k+2;
|
||||
if utf16.length = 2 then begin
|
||||
c16ptr := pointer(@sPtr^.str[k+1]);
|
||||
c16Ptr^ := utf16.codeUnits[2];
|
||||
k := k+2;
|
||||
end; {if}
|
||||
end {else if}
|
||||
else if prefix = prefix_U32 then begin
|
||||
c32Ptr := pointer(@sPtr^.str[k+1]);
|
||||
c32Ptr^ := codePoint;
|
||||
k := k+4;
|
||||
end; {else if}
|
||||
end; {for}
|
||||
sPtr^.length := k;
|
||||
str.sval := sPtr;
|
||||
str.prefix := prefix;
|
||||
end; {ConvertString}
|
||||
|
||||
|
||||
begin {Merge}
|
||||
kind1 := tk1.kind;
|
||||
@ -1100,6 +1217,18 @@ if class1 in [identifier,reservedWord] then begin
|
||||
str2 := @reservedWords[kind2]
|
||||
else if class2 in numericConstants then
|
||||
str2 := tk2.numString
|
||||
else if (class2 = stringConstant) and (tk2.prefix = prefix_none) then begin
|
||||
if str1^ = 'u' then
|
||||
ConvertString(tk2, prefix_u16)
|
||||
else if str1^ = 'U' then
|
||||
ConvertString(tk2, prefix_U32)
|
||||
else if str1^ = 'u8' then
|
||||
ConvertString(tk2, prefix_u8)
|
||||
else
|
||||
Error(63);
|
||||
tk1 := tk2;
|
||||
goto 1;
|
||||
end {else if}
|
||||
else begin
|
||||
Error(63);
|
||||
goto 1;
|
||||
@ -1145,7 +1274,16 @@ else if class1 in numericConstants then begin
|
||||
|
||||
else if class1 = stringConstant then begin
|
||||
if class2 = stringConstant then begin
|
||||
len1 := tk1.sval^.length;
|
||||
if tk1.prefix = tk2.prefix then
|
||||
{OK - nothing to do}
|
||||
else if tk1.prefix = prefix_none then
|
||||
ConvertString(tk1, tk2.prefix)
|
||||
else if tk2.prefix = prefix_none then
|
||||
ConvertString(tk2, tk1.prefix)
|
||||
else
|
||||
Error(166);
|
||||
elementType := StringType(tk1.prefix)^.aType;
|
||||
len1 := tk1.sval^.length - ord(elementType^.size);
|
||||
len := len1+tk2.sval^.length;
|
||||
cp := pointer(Malloc(len+2));
|
||||
for i := 1 to len1 do
|
||||
@ -1154,7 +1292,7 @@ else if class1 = stringConstant then begin
|
||||
cp^.str[i+len1] := tk2.sval^.str[i];
|
||||
cp^.length := len;
|
||||
if tk1.ispstring then
|
||||
cp^.str[1] := chr(len-1);
|
||||
cp^.str[1] := chr(len-2);
|
||||
tk1.sval := cp;
|
||||
goto 1;
|
||||
end; {if}
|
||||
@ -1334,12 +1472,14 @@ begin {BuildStringToken}
|
||||
token.kind := stringconst;
|
||||
token.class := stringConstant;
|
||||
token.ispstring := false;
|
||||
token.sval := pointer(GMalloc(len+2));
|
||||
token.sval := pointer(GMalloc(len+3));
|
||||
token.prefix := prefix_none;
|
||||
for i := 1 to len do begin
|
||||
token.sval^.str[i] := chr(cp^);
|
||||
cp := pointer(ord4(cp)+1);
|
||||
end; {for}
|
||||
token.sval^.length := len;
|
||||
token.sval^.str[len+1] := chr(0);
|
||||
token.sval^.length := len+1;
|
||||
PutBackToken(token, true);
|
||||
end; {BuildStringToken}
|
||||
|
||||
@ -1504,10 +1644,12 @@ if macro^.readOnly then begin {handle special macros}
|
||||
token.kind := stringConst;
|
||||
token.class := stringConstant;
|
||||
token.ispstring := false;
|
||||
sp := pointer(Malloc(5+sourceFileGS.theString.size));
|
||||
sp^.length := sourceFileGS.theString.size;
|
||||
token.prefix := prefix_none;
|
||||
sp := pointer(Malloc(3+sourceFileGS.theString.size));
|
||||
sp^.length := sourceFileGS.theString.size+1;
|
||||
for i := 1 to sourceFileGS.theString.size do
|
||||
sp^.str[i] := sourceFileGS.theString.theString[i];
|
||||
sp^.str[i+1] := chr(0);
|
||||
token.sval := sp;
|
||||
tokenStart := @sp^.str;
|
||||
tokenEnd := pointer(ord4(tokenStart)+sp^.length);
|
||||
@ -1517,6 +1659,7 @@ if macro^.readOnly then begin {handle special macros}
|
||||
token.kind := stringConst;
|
||||
token.class := stringConstant;
|
||||
token.ispstring := false;
|
||||
token.prefix := prefix_none;
|
||||
token.sval := dateStr;
|
||||
tokenStart := @dateStr^.str;
|
||||
tokenEnd := pointer(ord4(tokenStart)+dateStr^.length);
|
||||
@ -1527,6 +1670,7 @@ if macro^.readOnly then begin {handle special macros}
|
||||
token.kind := stringConst;
|
||||
token.class := stringConstant;
|
||||
token.ispstring := false;
|
||||
token.prefix := prefix_none;
|
||||
token.sval := timeStr;
|
||||
tokenStart := @timeStr^.str;
|
||||
tokenEnd := pointer(ord4(tokenStart)+timeStr^.length);
|
||||
@ -1537,8 +1681,8 @@ if macro^.readOnly then begin {handle special macros}
|
||||
token.kind := intConst; {__ORCAC__}
|
||||
token.numString := @oneStr; {__STDC_NO_...__}
|
||||
token.class := intConstant; {__ORCAC_HAS_LONG_LONG__}
|
||||
token.ival := 1;
|
||||
oneStr := '1';
|
||||
token.ival := 1; {__STDC_UTF_16__}
|
||||
oneStr := '1'; {__STDC_UTF_32__}
|
||||
tokenStart := @oneStr[1];
|
||||
tokenEnd := pointer(ord4(tokenStart)+1);
|
||||
end;
|
||||
@ -1547,6 +1691,7 @@ if macro^.readOnly then begin {handle special macros}
|
||||
token.kind := stringConst;
|
||||
token.class := stringConstant;
|
||||
token.ispstring := false;
|
||||
token.prefix := prefix_none;
|
||||
token.sval := versionStrL;
|
||||
tokenStart := @versionStrL^.str;
|
||||
tokenEnd := pointer(ord4(tokenStart)+versionStrL^.length);
|
||||
@ -1606,7 +1751,7 @@ else begin
|
||||
if tcPtr^.token.kind = stringconst then begin
|
||||
BuildStringToken(@quoteStr[1], 1);
|
||||
BuildStringToken(@tcPtr^.token.sval^.str,
|
||||
tcPtr^.token.sval^.length);
|
||||
tcPtr^.token.sval^.length-1);
|
||||
BuildStringToken(@quoteStr[1], 1);
|
||||
end {if}
|
||||
else begin
|
||||
@ -1851,6 +1996,7 @@ if ch = '<' then begin {process a library file...}
|
||||
token.kind := stringconst; {convert a <> style name to a string}
|
||||
token.class := stringConstant;
|
||||
token.ispstring := false;
|
||||
token.prefix := prefix_none;
|
||||
i := 0;
|
||||
while not (charKinds[ord(ch)] in [ch_eol,ch_gt]) do begin
|
||||
i := i+1;
|
||||
@ -1883,7 +2029,7 @@ else begin
|
||||
{handle file names that are strings or macro expansions}
|
||||
expandMacros := true; {allow macros to be used in the name}
|
||||
NextToken; {skip the command name}
|
||||
if token.kind = stringConst then begin
|
||||
if (token.kind = stringConst) and (token.prefix = prefix_none) then begin
|
||||
LongToPString(@workString, token.sval);
|
||||
CheckDelimiters(workString);
|
||||
if mustExist then begin
|
||||
@ -2417,6 +2563,10 @@ var
|
||||
stringConstant: begin
|
||||
if tk1^.token.sval^.length <> tk2^.token.sval^.length
|
||||
then goto 3;
|
||||
if tk1^.token.ispstring <> tk2^.token.ispstring then
|
||||
goto 3;
|
||||
if tk1^.token.prefix <> tk2^.token.prefix then
|
||||
goto 3;
|
||||
for i := 1 to tk1^.token.sval^.length do
|
||||
if tk1^.token.sval^.str[i] <>
|
||||
tk2^.token.sval^.str[i] then
|
||||
@ -2562,7 +2712,7 @@ var
|
||||
while not (token.kind in [eolsy, eofsy]) do begin
|
||||
msg^ := concat(msg^, ' ');
|
||||
if token.kind = stringConst then begin
|
||||
len := token.sval^.length;
|
||||
len := token.sval^.length-1;
|
||||
for i := 1 to len do
|
||||
msg^ := concat(msg^, token.sval^.str[i]);
|
||||
end {if}
|
||||
@ -2593,13 +2743,13 @@ var
|
||||
begin {DoFloat}
|
||||
FlagPragmas(p_float);
|
||||
NextToken;
|
||||
if token.kind in [intconst,uintconst] then begin
|
||||
if token.kind in [intconst,uintconst,ushortconst] then begin
|
||||
floatCard := token.ival;
|
||||
NextToken;
|
||||
end {if}
|
||||
else
|
||||
Error(18);
|
||||
if token.kind in [intconst,uintconst] then begin
|
||||
if token.kind in [intconst,uintconst,ushortconst] then begin
|
||||
floatSlot := $C080 | (token.ival << 4);
|
||||
NextToken;
|
||||
end {if}
|
||||
@ -2669,7 +2819,7 @@ var
|
||||
NextToken;
|
||||
isNegative := true;
|
||||
end; {else if}
|
||||
if token.kind in [intconst,uintconst] then begin
|
||||
if token.kind in [intconst,uintconst,ushortconst] then begin
|
||||
value := token.ival;
|
||||
NextToken;
|
||||
end {if}
|
||||
@ -2899,11 +3049,14 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin
|
||||
end {if}
|
||||
else
|
||||
Error(18);
|
||||
if token.kind = stringconst then begin
|
||||
if (token.kind = stringconst)
|
||||
and (token.prefix = prefix_none) then begin
|
||||
LongToPString(
|
||||
pointer(ord4(@sourceFileGS.theString)+1),
|
||||
token.sval);
|
||||
sourceFileGS.theString.size := token.sval^.length;
|
||||
sourceFileGS.theString.size := token.sval^.length-1;
|
||||
if sourceFileGS.theString.size > 255 then
|
||||
sourceFileGS.theString.size := 255;
|
||||
NextToken;
|
||||
end; {if}
|
||||
if token.kind <> eolsy then
|
||||
@ -3866,7 +4019,9 @@ var
|
||||
token.sval := pointer(Malloc(i+3)); {put the string in the string pool}
|
||||
CopyLongString(token.sval, pointer(sPtr));
|
||||
dispose(sPtr);
|
||||
token.sval^.str[i+1] := chr(0); {add null in case the string is extended}
|
||||
token.sval^.str[i+1] := chr(0); {add null terminator}
|
||||
token.sval^.length := i+1;
|
||||
token.prefix := prefix_none;
|
||||
end; {GetString}
|
||||
|
||||
|
||||
@ -3912,6 +4067,7 @@ doingPPExpression := false; {not doing a preprocessor expression}
|
||||
unix_1 := false; {int is 16 bits}
|
||||
lintIsError := true; {lint messages are considered errors}
|
||||
fenvAccess := false; {not accessing fp environment}
|
||||
charStrPrefix := prefix_none; {no char/str prefix seen}
|
||||
mergingStrings := false; {not currently merging strings}
|
||||
|
||||
{error codes for lint messages}
|
||||
@ -3986,6 +4142,24 @@ mp^.algorithm := 6;
|
||||
bp := pointer(ord4(macros) + hash(mp^.name));
|
||||
mp^.next := bp^;
|
||||
bp^ := mp;
|
||||
new(mp); {__STDC_UTF_16__}
|
||||
mp^.name := @'__STDC_UTF_16__';
|
||||
mp^.parameters := -1;
|
||||
mp^.tokens := nil;
|
||||
mp^.readOnly := true;
|
||||
mp^.algorithm := 5;
|
||||
bp := pointer(ord4(macros) + hash(mp^.name));
|
||||
mp^.next := bp^;
|
||||
bp^ := mp;
|
||||
new(mp); {__STDC_UTF_32__}
|
||||
mp^.name := @'__STDC_UTF_32__';
|
||||
mp^.parameters := -1;
|
||||
mp^.tokens := nil;
|
||||
mp^.readOnly := true;
|
||||
mp^.algorithm := 5;
|
||||
bp := pointer(ord4(macros) + hash(mp^.name));
|
||||
mp^.next := bp^;
|
||||
bp^ := mp;
|
||||
new(mp); {__ORCAC_HAS_LONG_LONG__}
|
||||
mp^.name := @'__ORCAC_HAS_LONG_LONG__';
|
||||
mp^.parameters := -1;
|
||||
@ -4042,8 +4216,8 @@ mp^.next := bp^;
|
||||
bp^ := mp;
|
||||
SetDateTime; {set up the macro date/time strings}
|
||||
{set up the version string}
|
||||
versionStrL := pointer(GMalloc(3 + length(versionStr)));
|
||||
versionStrL^.length := length(versionStr);
|
||||
versionStrL := pointer(GCalloc(3 + length(versionStr)));
|
||||
versionStrL^.length := length(versionStr)+1;
|
||||
versionStrL^.str := versionStr;
|
||||
|
||||
{Scan the command line options}
|
||||
@ -4197,7 +4371,7 @@ procedure NextToken;
|
||||
|
||||
{ Read the next token from the file. }
|
||||
|
||||
label 1,2,3,4,5;
|
||||
label 1,2,3,4,5,6;
|
||||
|
||||
type
|
||||
three = (s100,s1000,s4000); {these declarations are used for a}
|
||||
@ -4228,11 +4402,15 @@ var
|
||||
tToken: tokenType; {for merging tokens}
|
||||
sPtr,tsPtr: gstringPtr; {for forming string constants}
|
||||
lLastWasReturn: boolean; {local copy of lastWasReturn}
|
||||
codePoint: ucsCodePoint; {Unicode code point from UCN}
|
||||
codePoint: longint; {Unicode character value}
|
||||
chFromUCN: integer; {character given by UCN (converted)}
|
||||
c16ptr: ^integer; {pointer to char16_t value}
|
||||
c32ptr: ^longint; {pointer to char32_t value}
|
||||
utf8: utf8Rec; {UTF-8 encoding of character}
|
||||
utf16: utf16Rec; {UTF-16 encoding of character}
|
||||
|
||||
|
||||
function EscapeCh: integer;
|
||||
function EscapeCh: longint;
|
||||
|
||||
{ Find and return the next character in a string or char }
|
||||
{ constant. Handle escape sequences if they are found. }
|
||||
@ -4241,19 +4419,20 @@ var
|
||||
{ Globals: }
|
||||
{ ch - first character in sequence; set to first char }
|
||||
{ after sequence }
|
||||
|
||||
label 1;
|
||||
|
||||
{ charStrPrefix - prefix of the char constant or string }
|
||||
{ octHexEscape - true if this was an octal/hex escape seq.}
|
||||
|
||||
var
|
||||
cnt: 0..3; {for counting octal escape sequences}
|
||||
dig: 0..15; {value of a hex digit}
|
||||
skipChar: boolean; {get next char when done?}
|
||||
val: 0..maxint; {hex/octal escape code value}
|
||||
val: longint; {hex/octal escape code value}
|
||||
codePoint: ucsCodePoint; {code point given by UCN}
|
||||
chFromUCN: integer; {character given by UCN (converted)}
|
||||
|
||||
begin {EscapeCh}
|
||||
1: skipChar := true;
|
||||
octHexEscape := false;
|
||||
skipChar := true;
|
||||
if ch = '\' then begin
|
||||
NextCh;
|
||||
if ch in ['0'..'7','a','b','t','n','v','f','p','r','x','u','U',
|
||||
@ -4268,9 +4447,13 @@ var
|
||||
NextCh;
|
||||
end; {while}
|
||||
if (val & $FF00) <> 0 then
|
||||
if not skipping then
|
||||
Error(162);
|
||||
EscapeCh := val & $FF;
|
||||
if charStrPrefix in [prefix_none,prefix_u8] then begin
|
||||
if not skipping then
|
||||
Error(162);
|
||||
val := 0;
|
||||
end; {if}
|
||||
EscapeCh := val;
|
||||
octHexEscape := true;
|
||||
skipChar := false;
|
||||
end;
|
||||
'a': EscapeCh := 7;
|
||||
@ -4294,28 +4477,41 @@ var
|
||||
ch := chr(ord(ch)&$5F);
|
||||
dig := ord(ch)-ord('A')+10;
|
||||
end; {else}
|
||||
val := (val << 4) | dig;
|
||||
if (val & $FF00) <> 0 then begin
|
||||
if ((charStrPrefix = prefix_none) and ((val & $F0) <> 0)) or
|
||||
((charStrPrefix = prefix_u8) and ((val & $F0) <> 0)) or
|
||||
((charStrPrefix = prefix_u16) and ((val & $F000) <> 0)) or
|
||||
((charStrPrefix = prefix_u32) and ((val & $F0000000) <> 0))
|
||||
then begin
|
||||
if not skipping then
|
||||
Error(162);
|
||||
while ch in ['0'..'9','a'..'f','A'..'F'] do
|
||||
NextCh;
|
||||
val := 0;
|
||||
end; {if}
|
||||
NextCh;
|
||||
end {if}
|
||||
else begin
|
||||
val := (val << 4) | dig;
|
||||
NextCh;
|
||||
end; {else}
|
||||
end; {while}
|
||||
skipChar := false;
|
||||
EscapeCh := val & $FF;
|
||||
EscapeCh := val;
|
||||
octHexEscape := true;
|
||||
end;
|
||||
'u','U': begin
|
||||
codePoint := UniversalCharacterName;
|
||||
chFromUCN := ConvertUCSToMacRoman(codePoint);
|
||||
skipChar := false;
|
||||
if chFromUCN >= 0 then
|
||||
EscapeCh := chFromUCN
|
||||
else begin
|
||||
EscapeCh := 0;
|
||||
if not skipping then
|
||||
Error(146);
|
||||
end; {else}
|
||||
if charStrPrefix = prefix_none then begin
|
||||
chFromUCN := ConvertUCSToMacRoman(codePoint);
|
||||
if chFromUCN >= 0 then
|
||||
EscapeCh := chFromUCN
|
||||
else begin
|
||||
EscapeCh := 0;
|
||||
if not skipping then
|
||||
Error(146);
|
||||
end; {else}
|
||||
end {if}
|
||||
else
|
||||
EscapeCh := codePoint;
|
||||
end;
|
||||
'''','"','?','\': EscapeCh := ord(ch);
|
||||
otherwise: Error(57);
|
||||
@ -4327,7 +4523,10 @@ var
|
||||
end; {else}
|
||||
end {if}
|
||||
else
|
||||
EscapeCh := ord(ch);
|
||||
if charStrPrefix = prefix_none then
|
||||
EscapeCh := ord(ch)
|
||||
else
|
||||
EscapeCh := ConvertMacRomanToUCS(ord(ch));
|
||||
if skipChar then
|
||||
NextCh;
|
||||
end; {EscapeCh}
|
||||
@ -4353,11 +4552,20 @@ var
|
||||
{skip the leading quote}
|
||||
NextCh;
|
||||
|
||||
if charStrPrefix = prefix_L then begin
|
||||
charStrPrefix := prefix_u16;
|
||||
if not skipping then
|
||||
Error(167);
|
||||
end; {if}
|
||||
|
||||
{read the characters in the constant}
|
||||
while (not (charKinds[ord(ch)] in [ch_char,ch_eol,ch_eof])) do begin
|
||||
if cnt < maxint then
|
||||
cnt := cnt + 1;
|
||||
result := (result << 8) | EscapeCh;
|
||||
if charStrPrefix = prefix_none then
|
||||
result := (result << 8) | EscapeCh
|
||||
else
|
||||
result := EscapeCh;
|
||||
end; {while}
|
||||
doingStringOrCharacter := false;
|
||||
|
||||
@ -4371,16 +4579,35 @@ var
|
||||
Error(2);
|
||||
|
||||
{create the token}
|
||||
if allowLongIntChar and (cnt >= 3) then begin
|
||||
token.kind := longconst;
|
||||
if charStrPrefix = prefix_none then begin
|
||||
if allowLongIntChar and (cnt >= 3) then begin
|
||||
token.kind := longconst;
|
||||
token.class := longConstant;
|
||||
token.lval := result;
|
||||
end {if}
|
||||
else begin
|
||||
token.kind := intconst;
|
||||
token.class := intConstant;
|
||||
token.ival := long(result).lsw;
|
||||
end; {else}
|
||||
end {if}
|
||||
else if charStrPrefix = prefix_u16 then begin
|
||||
token.kind := ushortconst;
|
||||
token.class := intConstant;
|
||||
if octHexEscape then
|
||||
token.ival := long(result).lsw
|
||||
else begin
|
||||
UTF16Encode(result, utf16);
|
||||
token.ival := utf16.codeUnits[1];
|
||||
end; {else}
|
||||
end {else if}
|
||||
else if charStrPrefix = prefix_U32 then begin
|
||||
token.kind := ulongconst;
|
||||
token.class := longConstant;
|
||||
token.lval := result;
|
||||
end {if}
|
||||
else begin
|
||||
token.kind := intconst;
|
||||
token.class := intConstant;
|
||||
token.ival := long(result).lsw;
|
||||
end {else}
|
||||
end; {else if}
|
||||
|
||||
charStrPrefix := prefix_none; {no prefix for next char/str (so far)}
|
||||
end; {CharConstant}
|
||||
|
||||
|
||||
@ -4499,6 +4726,7 @@ while charKinds[ord(ch)] in [illegal,ch_white,ch_eol] do begin
|
||||
tokenLine := lineNumber; {record the position of the token}
|
||||
tokenColumn := ord(ord4(chPtr)-ord4(firstPtr));
|
||||
tokenStart := pointer(ord4(chPtr)-1);
|
||||
6:
|
||||
token.class := reservedSymbol; {default to the most common class}
|
||||
case charKinds[ord(ch)] of
|
||||
|
||||
@ -4740,49 +4968,123 @@ case charKinds[ord(ch)] of
|
||||
doingStringOrCharacter := true; {change character scanning}
|
||||
token.kind := stringconst; {set up the token}
|
||||
token.class := stringConstant;
|
||||
i := 0; {set up for the string scan}
|
||||
ispstring := false;
|
||||
ispstring := false; {set up for the string scan}
|
||||
setLength := false;
|
||||
new(sPtr,s100);
|
||||
NextCh; {skip the opening "}
|
||||
{read the characters}
|
||||
while not (charKinds[ord(ch)] in [ch_string,ch_eol,ch_eof]) do begin
|
||||
i := i+1;
|
||||
if i = 101 then begin
|
||||
sPtr^.len1 := 100;
|
||||
new(tsPtr,s1000);
|
||||
CopyLongString(pointer(tsPtr), pointer(sPtr));
|
||||
dispose(sPtr);
|
||||
sPtr := tsPtr;
|
||||
end {if}
|
||||
else if i = 1001 then begin
|
||||
sPtr^.len2 := 1000;
|
||||
new(tsPtr,s4000);
|
||||
CopyLongString(pointer(tsPtr), pointer(sPtr));
|
||||
dispose(sPtr);
|
||||
sPtr := tsPtr;
|
||||
end {else if}
|
||||
else if i = longstringlen then begin
|
||||
i := 1001;
|
||||
Error(90);
|
||||
end; {else if}
|
||||
sPtr^.str1[i] := chr(EscapeCh);
|
||||
if (i = 1) and ispstring then
|
||||
setLength := true;
|
||||
end; {while}
|
||||
if charStrPrefix = prefix_none then begin
|
||||
i := 0;
|
||||
new(sPtr,s100);
|
||||
while not (charKinds[ord(ch)] in [ch_string,ch_eol,ch_eof]) do begin
|
||||
i := i+1;
|
||||
if i = 101 then begin
|
||||
sPtr^.len1 := 100;
|
||||
new(tsPtr,s1000);
|
||||
CopyLongString(pointer(tsPtr), pointer(sPtr));
|
||||
dispose(sPtr);
|
||||
sPtr := tsPtr;
|
||||
end {if}
|
||||
else if i = 1001 then begin
|
||||
sPtr^.len2 := 1000;
|
||||
new(tsPtr,s4000);
|
||||
CopyLongString(pointer(tsPtr), pointer(sPtr));
|
||||
dispose(sPtr);
|
||||
sPtr := tsPtr;
|
||||
end {else if}
|
||||
else if i = longstringlen then begin
|
||||
i := 1001;
|
||||
Error(90);
|
||||
end; {else if}
|
||||
sPtr^.str1[i] := chr(ord(EscapeCh));
|
||||
if (i = 1) and ispstring then
|
||||
setLength := true;
|
||||
end; {while}
|
||||
end {if}
|
||||
else begin
|
||||
if charStrPrefix = prefix_L then begin
|
||||
charStrPrefix := prefix_u16;
|
||||
if not skipping then
|
||||
Error(167);
|
||||
end; {if}
|
||||
i := 1;
|
||||
new(sPtr,s4000);
|
||||
while not (charKinds[ord(ch)] in [ch_string,ch_eol,ch_eof]) do begin
|
||||
if i > longstringlen-8 then begin {leave space for char and null}
|
||||
i := 1;
|
||||
Error(90);
|
||||
end; {if}
|
||||
codePoint := EscapeCh;
|
||||
if charStrPrefix = prefix_u8 then begin
|
||||
if octHexEscape then begin
|
||||
sPtr^.str1[i] := chr(ord(codePoint));
|
||||
i := i+1;
|
||||
end {if}
|
||||
else begin
|
||||
UTF8Encode(codePoint, utf8);
|
||||
for j := 1 to utf8.length do begin
|
||||
sPtr^.str1[i] := chr(utf8.bytes[j]);
|
||||
i := i+1;
|
||||
end; {for}
|
||||
end; {else}
|
||||
end {if}
|
||||
else if charStrPrefix = prefix_u16 then begin
|
||||
c16ptr := pointer(@sPtr^.str1[i]);
|
||||
if octHexEscape then begin
|
||||
c16ptr^ := ord(codePoint);
|
||||
i := i+2;
|
||||
end {if}
|
||||
else begin
|
||||
UTF16Encode(codePoint, utf16);
|
||||
c16Ptr^ := utf16.codeUnits[1];
|
||||
i := i+2;
|
||||
if utf16.length = 2 then begin
|
||||
c16ptr := pointer(@sPtr^.str1[i]);
|
||||
c16Ptr^ := utf16.codeUnits[2];
|
||||
i := i+2;
|
||||
end; {if}
|
||||
end {else}
|
||||
end {else}
|
||||
else if charStrPrefix = prefix_U32 then begin
|
||||
c32ptr := pointer(@sPtr^.str1[i]);
|
||||
c32ptr^ := codePoint;
|
||||
i := i+4;
|
||||
end {else}
|
||||
end; {while}
|
||||
i := i-1;
|
||||
end; {else}
|
||||
doingStringOrCharacter := false; {process the end of the string}
|
||||
if ch = '"' then
|
||||
NextCh
|
||||
else
|
||||
Error(3);
|
||||
if setLength then {check for a p-string}
|
||||
sPtr^.str1[1] := chr(i-1);
|
||||
if charStrPrefix <> prefix_none then begin
|
||||
if not skipping then
|
||||
Error(165);
|
||||
setLength := false;
|
||||
end {if}
|
||||
else
|
||||
sPtr^.str1[1] := chr(i-1);
|
||||
token.ispstring := setLength;
|
||||
sPtr^.len1 := i; {set the string length}
|
||||
token.sval := pointer(Malloc(i+3)); {put the string in the string pool}
|
||||
token.sval := pointer(Malloc(i+6)); {put the string in the string pool}
|
||||
CopyLongString(token.sval, pointer(sPtr));
|
||||
dispose(sPtr);
|
||||
token.sval^.str[i+1] := chr(0); {add null in case the string is extended}
|
||||
token.sval^.str[i+1] := chr(0); {add null terminator}
|
||||
if charStrPrefix = prefix_u16 then begin
|
||||
token.sval^.str[i+2] := chr(0);
|
||||
token.sval^.length := i+2;
|
||||
end {if}
|
||||
else if charStrPrefix = prefix_U32 then begin
|
||||
token.sval^.str[i+2] := chr(0);
|
||||
token.sval^.str[i+3] := chr(0);
|
||||
token.sval^.str[i+4] := chr(0);
|
||||
token.sval^.length := i+4;
|
||||
end {else if}
|
||||
else
|
||||
token.sval^.length := i+1;
|
||||
token.prefix := charStrPrefix; {record prefix}
|
||||
charStrPrefix := prefix_none; {no prefix for next char/str (so far)}
|
||||
end;
|
||||
|
||||
letter,ch_backslash: begin {reserved words and identifiers}
|
||||
@ -4819,6 +5121,25 @@ case charKinds[ord(ch)] of
|
||||
end; {if}
|
||||
end; {while}
|
||||
workString[0] := chr(i);
|
||||
if i = 1 then begin {detect prefixed char/string literal}
|
||||
if charKinds[ord(ch)] in [ch_char,ch_string] then begin
|
||||
if workString[1] in ['L','u','U'] then begin
|
||||
if workString[1] = 'L' then
|
||||
charStrPrefix := prefix_L
|
||||
else if workString[1] = 'u' then
|
||||
charStrPrefix := prefix_u16
|
||||
else if workString[1] = 'U' then
|
||||
charStrPrefix := prefix_U32;
|
||||
goto 6;
|
||||
end; {if}
|
||||
end; {if}
|
||||
end {if}
|
||||
else if i = 2 then
|
||||
if charKinds[ord(ch)] = ch_string then
|
||||
if workString = 'u8' then begin
|
||||
charStrPrefix := prefix_u8;
|
||||
goto 6;
|
||||
end; {if}
|
||||
CheckIdentifier;
|
||||
end;
|
||||
|
||||
|
54
Symbol.pas
54
Symbol.pas
@ -45,8 +45,11 @@
|
||||
{ boolPtr - pointer to the base type for _Bool }
|
||||
{ voidPtr - pointer to the base type for void }
|
||||
{ voidPtrPtr - typeless pointer, for some type casting }
|
||||
{ stringTypePtr - pointer to the base type for string }
|
||||
{ constants }
|
||||
{ stringTypePtr - pointer to the base type for string literals }
|
||||
{ utf16StringTypePtr - pointer to the base type for UTF-16 }
|
||||
{ string literals }
|
||||
{ utf32StringTypePtr - pointer to the base type for UTF-32 }
|
||||
{ string literals }
|
||||
{ constCharPtr - pointer to the type const char }
|
||||
{ defaultStruct - default for structures with errors }
|
||||
{ }
|
||||
@ -82,8 +85,8 @@ var
|
||||
{base types}
|
||||
charPtr,sCharPtr,uCharPtr,shortPtr,uShortPtr,intPtr,uIntPtr,int32Ptr,
|
||||
uInt32Ptr,longPtr,uLongPtr,longLongPtr,uLongLongPtr,boolPtr,
|
||||
floatPtr,doublePtr,compPtr,extendedPtr,stringTypePtr,voidPtr,
|
||||
voidPtrPtr,constCharPtr,defaultStruct: typePtr;
|
||||
floatPtr,doublePtr,compPtr,extendedPtr,stringTypePtr,utf16StringTypePtr,
|
||||
utf32StringTypePtr,voidPtr,voidPtrPtr,constCharPtr,defaultStruct: typePtr;
|
||||
|
||||
{---------------------------------------------------------------}
|
||||
|
||||
@ -229,6 +232,14 @@ procedure ResolveForwardReference (iPtr: identPtr);
|
||||
{ parameters: }
|
||||
{ iPtr - ptr to the forward declared identifier }
|
||||
|
||||
|
||||
function StringType(prefix: charStrPrefixEnum): typePtr;
|
||||
|
||||
{ returns the type of a string literal with specified prefix }
|
||||
{ }
|
||||
{ parameters: }
|
||||
{ prefix - the prefix }
|
||||
|
||||
{---------------------------------------------------------------}
|
||||
|
||||
implementation
|
||||
@ -1559,6 +1570,24 @@ with stringTypePtr^ do begin
|
||||
aType := charPtr;
|
||||
elements := 1;
|
||||
end; {with}
|
||||
new(utf16StringTypePtr); {UTF-16 string constant type}
|
||||
with utf16StringTypePtr^ do begin
|
||||
size := 0;
|
||||
saveDisp := 0;
|
||||
qualifiers := [];
|
||||
kind := arrayType;
|
||||
aType := uShortPtr;
|
||||
elements := 1;
|
||||
end; {with}
|
||||
new(utf32StringTypePtr); {UTF-32 string constant type}
|
||||
with utf32StringTypePtr^ do begin
|
||||
size := 0;
|
||||
saveDisp := 0;
|
||||
qualifiers := [];
|
||||
kind := arrayType;
|
||||
aType := uLongPtr;
|
||||
elements := 1;
|
||||
end; {with}
|
||||
new(voidPtr); {void}
|
||||
with voidPtr^ do begin
|
||||
size := 0;
|
||||
@ -1940,6 +1969,23 @@ if tPtr^.kind in [structType,unionType] then begin
|
||||
end; {if}
|
||||
end; {ResolveForwardReference}
|
||||
|
||||
|
||||
function StringType{prefix: charStrPrefixEnum): typePtr};
|
||||
|
||||
{ returns the type of a string literal with specified prefix }
|
||||
{ }
|
||||
{ parameters: }
|
||||
{ prefix - the prefix }
|
||||
|
||||
begin {StringType}
|
||||
if prefix in [prefix_none,prefix_u8] then
|
||||
StringType := stringTypePtr
|
||||
else if prefix in [prefix_u16,prefix_L] then
|
||||
StringType := utf16StringTypePtr
|
||||
else
|
||||
StringType := utf32StringTypePtr;
|
||||
end; {StringType}
|
||||
|
||||
end.
|
||||
|
||||
{$append 'symbol.asm'}
|
||||
|
@ -286,7 +286,7 @@ charSym start single character symbols
|
||||
! constants
|
||||
enum (intconst,uintconst,longconst,ulongconst,longlongconst)
|
||||
enum (ulonglongconst,floatconst,doubleconst,extendedconst,compconst)
|
||||
enum (charconst,scharconst,ucharconst,stringconst)
|
||||
enum (charconst,scharconst,ucharconst,ushortconst,stringconst)
|
||||
! reserved words
|
||||
enum (_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy)
|
||||
enum (_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy)
|
||||
@ -366,6 +366,7 @@ icp start in-coming priority for expression
|
||||
dc i1'200' charconst
|
||||
dc i1'200' scharconst
|
||||
dc i1'200' ucharconst
|
||||
dc i1'200' ushortconst
|
||||
dc i1'200' stringconst
|
||||
dc i1'200' _Alignassy
|
||||
dc i1'16' _Alignofsy
|
||||
@ -539,6 +540,7 @@ isp start in stack priority for expression
|
||||
dc i1'0' charconst
|
||||
dc i1'0' scharconst
|
||||
dc i1'0' ucharconst
|
||||
dc i1'0' ushortconst
|
||||
dc i1'0' stringconst
|
||||
dc i1'0' _Alignassy
|
||||
dc i1'16' _Alignofsy
|
||||
@ -912,7 +914,7 @@ wordHash start reserved word hash table
|
||||
! constants
|
||||
enum (intconst,uintconst,longconst,ulongconst,longlongconst)
|
||||
enum (ulonglongconst,floatconst,doubleconst,extendedconst,compconst)
|
||||
enum (charconst,scharconst,ucharconst,stringconst)
|
||||
enum (charconst,scharconst,ucharconst,ushortconst,stringconst)
|
||||
! reserved words
|
||||
enum (_Alignassy,_Alignofsy,_Atomicsy,_Boolsy,_Complexsy)
|
||||
enum (_Genericsy,_Imaginarysy,_Noreturnsy,_Static_assertsy,_Thread_localsy)
|
||||
|
13
cc.notes
13
cc.notes
@ -116,6 +116,8 @@ p. 237
|
||||
|
||||
ORCA/C now supports character constants containing multiple characters. See "Multi-Character Character Constants," below.
|
||||
|
||||
Character and string constants may now have prefixes indicating they should use Unicode encodings. See "New Language Features," below.
|
||||
|
||||
p. 238
|
||||
|
||||
The limit on the total length of string constants in a single function has been raised to 12500 characters.
|
||||
@ -128,7 +130,7 @@ p. 240
|
||||
|
||||
The discussion of escape sequences states that hexadecimal numeric escape sequences can contain from one to three digits. This was true until ORCA/C 2.1, when the compiler was changed to respect the ANSI C standard. The compiler will now scan a hexadecimal escape sequence until no more hexadecimal characters are found. (ORCA/C 2.1 would also scan an octal escape sequence until no more octal characters were found, but this has been changed back to limit octal escape sequences to at most three octal digits, as required by the C standards.)
|
||||
|
||||
The value of an octal or hexadecimal escape sequence must be within the range of representable character values (0-255). Also, \ may not be followed by a character other than one of the ones described as forming an escape sequence. ORCA/C now gives an error in these cases. Accordingly, the examples of "\410" and '\g' mentioned in the manual are now treated as errors.
|
||||
The value of an octal or hexadecimal escape sequence must be within the range of representable values in the relevant type (0-255 for char). Also, \ may not be followed by a character other than one of the ones described as forming an escape sequence. ORCA/C now gives an error in these cases. Accordingly, the examples of "\410" and '\g' mentioned in the manual are now treated as errors.
|
||||
|
||||
p. 241
|
||||
|
||||
@ -373,7 +375,7 @@ and may appear wherever a declaration can (including inside and outside function
|
||||
|
||||
These behave the same as the existing tokens [, ], {, }, #, and ## (respectively), apart from their spelling.
|
||||
|
||||
14. (C99) Universal character names are now supported in string literals, character constants, and identifiers. These are sequences of the form \unnnn or \Unnnnnnnn, where the nnnn or nnnnnnnn is a hexadecimal representation of a Unicode code point. These may be used to represent characters in a way that is independent of the source and execution character sets. In a string literal or character constant, only characters that can be mapped to the execution character set may be represented. There are also certain other restrictions on what characters can be used; see the C standards for details. For ORCA/C the source and execution character sets are both considered to be Mac OS Roman, the character set used in the IIGS desktop environment.
|
||||
14. (C99) Universal character names are now supported in string literals, character constants, and identifiers. These are sequences of the form \unnnn or \Unnnnnnnn, where the nnnn or nnnnnnnn is a hexadecimal representation of a Unicode code point. These may be used to represent characters in a way that is independent of the source and execution character sets. In an unprefixed string literal or character constant, only characters that can be mapped to the execution character set may be represented. There are also certain other restrictions on what characters can be used; see the C standards for details. In ORCA/C the source and execution character sets are both considered to be the character set used in the IIGS desktop environment, known as Mac OS Roman.
|
||||
|
||||
15. (C99) Function-like macros may take a variable number of arguments. To specify such a macro, include "..." as the last (or only) member of its parameter list. When the macro is used, one or more corresponding arguments may be provided. To access these arguments within the macro, use the special identifier __VA_ARGS__. This functions similarly to a normal macro parameter, but its expansion includes the tokens from all the corresponding arguments that were provided, including any commas separating arguments.
|
||||
|
||||
@ -385,6 +387,8 @@ __STDC_HOSTED__ normally expands to the integer constant 1, indicating that ORCA
|
||||
|
||||
__STDC_NO_ATOMICS__, __STDC_NO_COMPLEX__, __STDC_NO_THREADS__, and __STDC_NO_VLA__ all expand to the integer constant 1. These indicate that ORCA/C does not implement certain C language features that are optional under the C11 and later standards (atomics, complex numbers, threads, and variable length arrays).
|
||||
|
||||
__STDC_UTF_16__ and __STDC_UTF_32__ expand to the integer constant 1. These indicate that the char16_t and char32_t types (discussed below) use UTF-16 and UTF-32 encodings.
|
||||
|
||||
18. (C99) The _Bool type is now supported. This is a boolean type that can hold the values 0 or 1. When a value of another type is converted to _Bool, the result is 0 if the value compares equal to 0, or 1 otherwise.
|
||||
|
||||
19. (C99) The types "long long" and "unsigned long long" are now supported. In ORCA/C, these are 64-bit integer types, capable of representing a larger range of values than the existing smaller integer types. All operations that can be done on other integer types can now be done on these types as well.
|
||||
@ -430,11 +434,13 @@ As an example, this expression evaluates to 2 because the type of 1+2 is int:
|
||||
|
||||
Generic selection expressions are primarily useful within macros, which can give different behavior based on the types of the arguments passed to them.
|
||||
|
||||
23. (C11) Character constants and string literals may now have prefixes indicating they should use Unicode encodings. The prefixes u8, u, and U indicate UTF-8, UTF-16, and UTF-32 encodings, respectively. The u8 prefix may only be used on string literals. The U and u prefixes may be used on string literals or character constants. U- and u-prefixed character constants have the types char32_t and char16_t (as defined in <uchar.h>); U- and u-prefixed string literals are treated as arrays of those types. For example, the string literal U"abc" designates an array with four members of type char32_t: the three letters encoded in UTF-32, plus a null terminator.
|
||||
|
||||
|
||||
Multi-Character Character Constants
|
||||
-----------------------------------
|
||||
|
||||
Character constants containing multiple characters are now supported, as required by the C standards. The value of such constants is implementation-defined. In ORCA/C, the value is initially set to the ordinal value of the first character, as in a single-character constant. For each subsequent character encountered, the existing value is shifted left by eight bit positions, and the ordinal value of the new character is placed in the lower eight bits. (This is similar to the behavior of GCC and Clang.)
|
||||
Unprefixed character constants containing multiple characters are now supported, as required by the C standards. The value of such constants is implementation-defined. In ORCA/C, the value is initially set to the ordinal value of the first character, as in a single-character constant. For each subsequent character encountered, the existing value is shifted left by eight bit positions, and the ordinal value of the new character is placed in the lower eight bits. (This is similar to the behavior of GCC and Clang.)
|
||||
|
||||
A new bit is also introduced in #pragma ignore that affects the interpretation of such constants. Setting #pragma ignore bit 1 (a value of 2) causes character constants with three or more characters to be treated as having type long, rather than type int. This non-standard feature effectively allows a character constant to contain the values of up to four characters, rather than only two.
|
||||
|
||||
@ -579,6 +585,7 @@ ORCA/C now includes several new headers specified by recent C standards.
|
||||
|
||||
9. (C11) The <uchar.h> header defines the types char16_t and char32_t suitable for holding UTF-16 and UTF-32 code units, and provides functions for handling Unicode characters.
|
||||
|
||||
|
||||
Library Updates
|
||||
---------------
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user