Add debugging option to detect illegal use of null pointers.

This adds debugging code to detect null pointer dereferences, as well as pointer arithmetic on null pointers (which is also undefined behavior, and can lead to later dereferences of the resulting pointers).

Note that ORCA/Pascal can already detect null pointer dereferences as part of its more general range-checking code. This implementation for ORCA/C will report the same error as ORCA/Pascal ("Subrange exceeded"). However, it does not include any of the other forms of range checking that ORCA/Pascal does, and (unlike in ORCA/Pascal) it is controlled by a separate flag from stack overflow checking.
This commit is contained in:
Stephen Heumann 2023-02-12 18:56:02 -06:00
parent a32ddedc0c
commit a6ef872513
10 changed files with 104 additions and 31 deletions

View File

@ -196,6 +196,15 @@
{ a SIZE bit value. Extra bits are dropped. }
{ }
{ }
{ pc_ckp - check for null pointer }
{ }
{ Gen0(pc_ckp) }
{ Gen0(pc_ckn) }
{ }
{ Make sure a pointer value is not null. The pc_ckp form }
{ checks the value at tos; pc_ckn checks the value at tos-1. }
{ }
{ }
{ pc_cop - copy to a local variable }
{ }
{ Gen2t(pc_cop, label, disp, type) }

View File

@ -134,6 +134,8 @@ opt[pc_sqr] := 'sqr';
opt[pc_wsr] := 'wsr';
opt[pc_rbo] := 'rbo';
opt[pc_rev] := 'rev';
opt[pc_ckp] := 'ckp';
opt[pc_ckn] := 'ckn';
end; {InitWriteCode}
@ -281,7 +283,7 @@ with code^ do
pc_bnt,pc_blx,pc_bnl,pc_ngi,pc_ngl,pc_ngr,pc_ixa,pc_mdl,
pc_udi,pc_udl,pc_bqr,pc_bqx,pc_baq,pc_bnq,pc_ngq,pc_adq,pc_sbq,
pc_mpq,pc_umq,pc_dvq,pc_udq,pc_mdq,pc_uqm,pc_slq,pc_sqr,pc_wsr,
pc_rbo,pc_sll,pc_shr,pc_usr,pc_slr,pc_vsr: ;
pc_rbo,pc_sll,pc_shr,pc_usr,pc_slr,pc_vsr,pc_ckp,pc_ckn: ;
dc_prm:

View File

@ -254,7 +254,8 @@ type
pc_gli,pc_gdl,pc_gld,pc_cpi,pc_tri,pc_lbu,pc_lbf,pc_sbf,pc_cbf,dc_cns,
dc_prm,pc_nat,pc_bno,pc_nop,pc_psh,pc_ili,pc_iil,pc_ild,pc_idl,
pc_bqr,pc_bqx,pc_baq,pc_bnq,pc_ngq,pc_adq,pc_sbq,pc_mpq,pc_umq,pc_dvq,
pc_udq,pc_mdq,pc_uqm,pc_slq,pc_sqr,pc_wsr,pc_rbo,pc_fix,pc_rev);
pc_udq,pc_mdq,pc_uqm,pc_slq,pc_sqr,pc_wsr,pc_rbo,pc_fix,pc_rev,pc_ckp,
pc_ckn);
{intermediate code}
{-----------------}
@ -333,6 +334,7 @@ var
{quality or characteristics of }
{code }
{------------------------------}
checkNullPointers: boolean; {check for null pointer dereferences?}
checkStack: boolean; {check stack for stack errors?}
cLineOptimize: boolean; {+o flag set?}
code: icptr; {current intermediate code record}
@ -843,6 +845,7 @@ profileFlag := false; {don't generate profiling code}
debugFlag := false; {don't generate debug code}
debugStrFlag := false; {don't generate gsbug debug strings}
traceBack := false; {don't generate traceback code}
checkNullPointers := false; {don't check null pointers}
volatile := false; {no volatile qualifiers found}
registers := cLineOptimize; {don't do register optimizations}

15
DAG.pas
View File

@ -502,7 +502,7 @@ var
[pc_mov,pc_cbf,pc_cop,pc_cpi,pc_cpo,pc_gil,pc_gli,pc_gdl,
pc_gld,pc_iil,pc_ili,pc_idl,pc_ild,pc_lil,pc_lli,pc_ldl,
pc_lld,pc_sbf,pc_sro,pc_sto,pc_str,pc_cui,pc_cup,pc_tl1,
pc_fix] then
pc_fix,pc_ckp] then
SideEffects := true
else if op^.opcode = pc_ldc then
SideEffects := false
@ -2817,7 +2817,7 @@ case op^.opcode of
pc_cnn, pc_cnv:
TypeOf := baseTypeEnum(op^.q & $000F);
pc_stk:
pc_stk, pc_ckp:
TypeOf := TypeOf(op^.left);
pc_bno:
@ -5479,7 +5479,7 @@ case code^.opcode of
pc_bnt, pc_bnl, pc_cnv, pc_dec, pc_inc, pc_ind, pc_lbf, pc_lbu,
pc_ngi, pc_ngl, pc_ngr, pc_not, pc_stk, pc_cop, pc_cpo, pc_tl1,
pc_sro, pc_str, pc_fjp, pc_tjp, pc_xjp, pc_cup, pc_pop, pc_iil,
pc_ili, pc_idl, pc_ild, pc_bnq, pc_ngq, pc_rbo, pc_rev:
pc_ili, pc_idl, pc_ild, pc_bnq, pc_ngq, pc_rbo, pc_rev, pc_ckp:
begin
code^.left := Pop;
Push(code);
@ -5514,6 +5514,15 @@ case code^.opcode of
Push(code);
end;
pc_ckn:
begin
code^.opcode := pc_ckp;
temp := Pop;
code^.left := Pop;
Push(code);
Push(temp);
end;
pc_cnn:
begin
code^.opcode := pc_cnv;

View File

@ -2811,6 +2811,8 @@ procedure ChangePointer (op: pcodes; size: longint; tp: baseTypeEnum);
begin {ChangePointer}
if size = 0 then
Error(122);
if checkNullPointers then
Gen0(pc_ckn);
case tp of
cgByte,cgUByte,cgWord,cgUWord: begin
if (size = long(size).lsw) and (op = pc_adl)
@ -2937,7 +2939,7 @@ var
end; {ExpressionKind}
procedure LoadAddress (tree: tokenPtr);
procedure LoadAddress (tree: tokenPtr; nullCheck: boolean);
{ load the address of an l-value }
{ }
@ -2994,7 +2996,7 @@ var
{evaluate a compound literal and load its address}
AutoInit(tree^.id, 0, true);
tree^.token.kind := ident;
LoadAddress(tree);
LoadAddress(tree, false);
tree^.token.kind := compoundliteral;
Gen0t(pc_bno, cgULong);
end {if}
@ -3002,6 +3004,8 @@ var
{load the address of the item pointed to by the pointer}
GenerateCode(tree^.left);
if nullCheck then
Gen0(pc_ckp);
isBitField := false;
if not (expressionType^.kind in [pointerType,arrayType,functionType]) then
Error(79);
@ -3009,7 +3013,7 @@ var
else if tree^.token.kind = dotch then begin
{load the address of a field of a record}
LoadAddress(tree^.left);
LoadAddress(tree^.left, nullCheck);
eType := expressionType;
if eType^.kind in [arrayType,pointerType] then begin
if eType^.kind = arrayType then
@ -3032,15 +3036,18 @@ var
else if tree^.token.kind = castoper then begin
{load the address of a field of a record}
LoadAddress(tree^.left);
LoadAddress(tree^.left, nullCheck);
expressionType := tree^.castType;
if expressionType^.kind <> arrayType then
expressionType := MakePointerTo(expressionType);
end {else if}
else if ExpressionKind(tree) in [arrayType,pointerType,structType,unionType]
then
GenerateCode(tree)
then begin
GenerateCode(tree);
if nullCheck then
Gen0(pc_ckp);
end {else if}
else begin
expressionType := intPtr; {set default type in case of error}
if doDispose then {prevent spurious errors}
@ -3123,6 +3130,8 @@ var
end; {case}
pointerType,arrayType: begin
if checkNullPointers then
Gen0(pc_ckp);
GenldcLong(expressionType^.pType^.size);
if inc then
Gen0(pc_adl)
@ -3201,10 +3210,12 @@ var
lSize := iType^.pType^.size;
if lSize = 0 then
Error(122);
if long(lSize).msw <> 0 then begin
if (long(lSize).msw <> 0) or checkNullPointers then begin
{handle inc/dec of >64K}
{handle inc/dec of >64K or with null pointer check}
LoadScalar(tree^.id);
if checkNullPointers then
Gen0(pc_ckp);
GenLdcLong(lSize);
if pc_l in [pc_lli,pc_lil] then
Gen0(pc_adl)
@ -3242,7 +3253,7 @@ var
else begin
{do an indirect ++ or --}
LoadAddress(tree); {get the address to save to}
LoadAddress(tree, checkNullPointers); {get the address to save to}
if expressionType^.kind = arrayType then
expressionType := expressionType^.aType
else if expressionType^.kind = pointerType then
@ -3508,7 +3519,9 @@ var
if (ftype^.toolNum = 0) and (ftype^.dispatcher = 0) then begin
if indirect then begin
fntype := expressionType;
GenerateCode(ftree);
GenerateCode(ftree);
if checkNullPointers then
Gen0(pc_ckp);
expressionType := fntype;
Gen1t(pc_cui, ord(hasVarargs and strictVararg),
UsualUnaryConversions);
@ -3683,15 +3696,15 @@ case tree^.token.kind of
arrayType: begin
LoadAddress(tree);
LoadAddress(tree, false);
expressionType := expressionType^.ptype;
end;
functionType:
LoadAddress(tree);
LoadAddress(tree, false);
structType, unionType: begin
LoadAddress(tree);
LoadAddress(tree, false);
if expressionType^.kind = pointerType then
expressionType := expressionType^.ptype;
CheckForIncompleteStructType;
@ -3817,7 +3830,7 @@ case tree^.token.kind of
end; {with}
end {if}
else begin
LoadAddress(tree^.left);
LoadAddress(tree^.left, checkNullPointers);
lType := expressionType;
lisBitField := isBitField;
lbitDisp := bitDisp;
@ -3874,7 +3887,7 @@ case tree^.token.kind of
end {if}
else begin
doingScalar := false;
LoadAddress(tree^.left);
LoadAddress(tree^.left, checkNullPointers);
lisBitField := isBitField;
lbitDisp := bitDisp;
lbitSize := bitSize;
@ -4388,6 +4401,10 @@ case tree^.token.kind of
{NOTE: assumes aType & pType overlap in typeRecord}
else if not CompTypes(lType^.aType, expressionType^.aType) then
Error(47);
if checkNullPointers then begin
Gen0(pc_ckn);
Gen0(pc_ckp);
end; {if}
Gen0(pc_sbl);
if size <> 1 then begin
GenLdcLong(size);
@ -4629,7 +4646,7 @@ case tree^.token.kind of
if not (tree^.left^.token.kind in
[ident,compoundliteral,stringconst,uasterisk]) then
L_Value(tree^.left);
LoadAddress(tree^.left);
LoadAddress(tree^.left, false);
if tree^.left^.token.kind = stringconst then begin
{build pointer-to-array type for address of string constant}
tType := pointer(Malloc(sizeof(typeRecord)));
@ -4653,6 +4670,9 @@ case tree^.token.kind of
lType := lType^.pType;
expressionType := lType;
isVolatile := tqVolatile in lType^.qualifiers;
if checkNullPointers then
if lType^.kind <> functionType then
Gen0(pc_ckp);
if lType^.kind = scalarType then
if lType^.baseType = cgVoid then
Gen2(pc_cnv, cgULong, cgVoid)
@ -4673,7 +4693,7 @@ case tree^.token.kind of
end; {case uasterisk}
dotch: begin {.}
LoadAddress(tree^.left);
LoadAddress(tree^.left, checkNullPointers);
lType := expressionType;
if lType^.kind in [arrayType,pointerType,structType,unionType] then begin
if lType^.kind = arrayType then

19
Gen.pas
View File

@ -999,6 +999,24 @@ else {if op^.opcode = pc_sbq then} begin
end; {GenAdqSbq}
procedure GenCkp (op: icptr);
{ generate code for pc_ckp }
{ }
{ parameters: }
{ op - pc_ckp operation }
begin {GenCkp}
if op^.left^.opcode in [pc_lda,pc_lad,pc_lca,pc_lao] then
GenTree(op^.left)
else begin
gLong.preference := onStack;
GenTree(op^.left);
GenCall(98);
end; {else}
end; {GenCkp}
procedure GenCmp (op: icptr; rOpcode: pcodes; lb: integer);
{ generate code for pc_les, pc_leq, pc_grt or pc_geq }
@ -7494,6 +7512,7 @@ case op^.opcode of
pc_bnq,pc_ngq: GenUnaryQuad(op);
pc_bno: GenBno(op);
pc_bnt,pc_ngi,pc_not: GenBntNgiNot(op);
pc_ckp: GenCkp(op);
pc_cnv: GenCnv(op);
pc_cui: GenCui(op);
pc_cup: GenCup(op);

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'HEADER'}
const
symFileVersion = 37; {version number of .sym file format}
symFileVersion = 38; {version number of .sym file format}
var
inhibitHeader: boolean; {should .sym includes be blocked?}
@ -827,6 +827,7 @@ procedure EndInclude {chPtr: ptr};
| (ord(profileFlag) << 2)
| (ord(traceBack) << 3)
| (ord(checkStack) << 4)
| (ord(checkNullPointers) << 5)
| (ord(debugStrFlag) << 15));
p_lint: begin
@ -1496,6 +1497,7 @@ var
profileFlag := odd(val >> 2);
traceback := odd(val >> 3);
checkStack := odd(val >> 4);
checkNullPointers := odd(val >> 5);
debugStrFlag := odd(val >> 15);
end;

View File

@ -2274,6 +2274,7 @@ case callNum of
95: sp := @'~REALFIX';
96: sp := @'~DOUBLEFIX';
97: sp := @'~COMPFIX';
98: sp := @'~CHECKPTRC';
otherwise:
Error(cge1);
end; {case}

View File

@ -3491,6 +3491,7 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin
{ 4 - generate profiles }
{ 8 - generate traceback code }
{ 16 - check for stack errors }
{ 32 - check for null pointer dereferences }
{ 32768 - generate inline function names }
FlagPragmas(p_debug);
NumericDirective;
@ -3503,6 +3504,7 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin
profileFlag := odd(val >> 2);
traceBack := odd(val >> 3);
checkStack := odd(val >> 4);
checkNullPointers := odd(val >> 5);
debugStrFlag := odd(val >> 15);
profileFlag := profileFlag or debugFlag;
if token.kind <> eolsy then

View File

@ -16,8 +16,8 @@ Updated by Stephen Heumann and Kelvin Sherlock, 2017-2023
5. Certain errors that were previously ignored are now detected.
6. New option added to generate inline function names for use with
assembly-level debugging tools. See "Inline Function Names."
6. Two new #pragma debug bits are defined. See "Additions to
#pragma debug."
7. Some new headers specified by recent C standards are added.
See "New Headers."
@ -185,7 +185,7 @@ ORCA/C now supports several standard pragmas of the form "#pragma STDC ...", as
p. 256
The #pragma debug directive supports a new bit. If bit 15 is set, ORCA/C generates inline function names for use with assembly-level debugging tools. See "Inline Function Names," below.
The #pragma debug directive supports two new bits. If bit 5 is set, ORCA/C generates code to check for illegal use of null pointers. If bit 15 is set, ORCA/C generates inline function names for use with assembly-level debugging tools. See "Additions to #pragma debug," below.
p. 257
@ -700,14 +700,20 @@ Note that _Generic expressions always use the stricter type compatibility rules
(Mike Westerfield, Kelvin Sherlock, Stephen Heumann)
Inline Function Names
---------------------
Additions to #pragma debug
--------------------------
ORCA/C now supports recording the names of functions using the inline name format documented in Apple IIGS Technical Note #103. This allows assembly-level debugging tools such as GSBug and Nifty List to display the names of functions in an ORCA/C program while debugging it. To enable generation of inline function names, set #pragma debug bit 15 (a value of 0x8000).
Two new debugging features can now be enabled with #pragma debug:
Note that inline function names are unrelated to the other types of debug code that ORCA/C can generate. In particular, inline function names are not needed for source-level debugging using the desktop development environment or other compatible source-level debuggers, although it is possible to enable both types of debugging information at the same time.
* Checking for illegal use of null pointers:
(Kelvin Sherlock)
Setting #pragma debug bit 5 (a value of 32) turns on checking for illegal use of null pointers. If this bit is set, ORCA/C will detect when your program would dereference a null pointer or when it would do pointer arithmetic on a null pointer. A "Subrange exceeded" error will be reported in these cases.
* Inline function names:
Setting #pragma debug bit 15 (a value of 0x8000) causes ORCA/C to record the names of functions using the inline name format documented in Apple IIGS Technical Note #103. This allows assembly-level debugging tools such as GSBug and Nifty List to display the names of functions in an ORCA/C program while debugging it. Note that inline function names are unrelated to the other types of debug code that ORCA/C can generate. In particular, inline function names are not needed for source-level debugging using the desktop development environment or other compatible source-level debuggers, although it is possible to enable both types of debugging information at the same time.
(Stephen Heumann, Kelvin Sherlock)
Enhancements to #pragma lint