From a6ef872513769f7105cda9b4549a4eed86d65239 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Sun, 12 Feb 2023 18:56:02 -0600 Subject: [PATCH] Add debugging option to detect illegal use of null pointers. This adds debugging code to detect null pointer dereferences, as well as pointer arithmetic on null pointers (which is also undefined behavior, and can lead to later dereferences of the resulting pointers). Note that ORCA/Pascal can already detect null pointer dereferences as part of its more general range-checking code. This implementation for ORCA/C will report the same error as ORCA/Pascal ("Subrange exceeded"). However, it does not include any of the other forms of range checking that ORCA/Pascal does, and (unlike in ORCA/Pascal) it is controlled by a separate flag from stack overflow checking. --- CGI.Comments | 9 +++++++++ CGI.Debug | 4 +++- CGI.pas | 5 ++++- DAG.pas | 15 +++++++++++--- Expression.pas | 54 ++++++++++++++++++++++++++++++++++---------------- Gen.pas | 19 ++++++++++++++++++ Header.pas | 4 +++- Native.pas | 1 + Scanner.pas | 2 ++ cc.notes | 22 ++++++++++++-------- 10 files changed, 104 insertions(+), 31 deletions(-) diff --git a/CGI.Comments b/CGI.Comments index e04c8fa..296ca7e 100644 --- a/CGI.Comments +++ b/CGI.Comments @@ -196,6 +196,15 @@ { a SIZE bit value. Extra bits are dropped. } { } { } +{ pc_ckp - check for null pointer } +{ } +{ Gen0(pc_ckp) } +{ Gen0(pc_ckn) } +{ } +{ Make sure a pointer value is not null. The pc_ckp form } +{ checks the value at tos; pc_ckn checks the value at tos-1. } +{ } +{ } { pc_cop - copy to a local variable } { } { Gen2t(pc_cop, label, disp, type) } diff --git a/CGI.Debug b/CGI.Debug index 56e5ac5..5b93648 100644 --- a/CGI.Debug +++ b/CGI.Debug @@ -134,6 +134,8 @@ opt[pc_sqr] := 'sqr'; opt[pc_wsr] := 'wsr'; opt[pc_rbo] := 'rbo'; opt[pc_rev] := 'rev'; +opt[pc_ckp] := 'ckp'; +opt[pc_ckn] := 'ckn'; end; {InitWriteCode} @@ -281,7 +283,7 @@ with code^ do pc_bnt,pc_blx,pc_bnl,pc_ngi,pc_ngl,pc_ngr,pc_ixa,pc_mdl, pc_udi,pc_udl,pc_bqr,pc_bqx,pc_baq,pc_bnq,pc_ngq,pc_adq,pc_sbq, pc_mpq,pc_umq,pc_dvq,pc_udq,pc_mdq,pc_uqm,pc_slq,pc_sqr,pc_wsr, - pc_rbo,pc_sll,pc_shr,pc_usr,pc_slr,pc_vsr: ; + pc_rbo,pc_sll,pc_shr,pc_usr,pc_slr,pc_vsr,pc_ckp,pc_ckn: ; dc_prm: diff --git a/CGI.pas b/CGI.pas index 759d137..eadb764 100644 --- a/CGI.pas +++ b/CGI.pas @@ -254,7 +254,8 @@ type pc_gli,pc_gdl,pc_gld,pc_cpi,pc_tri,pc_lbu,pc_lbf,pc_sbf,pc_cbf,dc_cns, dc_prm,pc_nat,pc_bno,pc_nop,pc_psh,pc_ili,pc_iil,pc_ild,pc_idl, pc_bqr,pc_bqx,pc_baq,pc_bnq,pc_ngq,pc_adq,pc_sbq,pc_mpq,pc_umq,pc_dvq, - pc_udq,pc_mdq,pc_uqm,pc_slq,pc_sqr,pc_wsr,pc_rbo,pc_fix,pc_rev); + pc_udq,pc_mdq,pc_uqm,pc_slq,pc_sqr,pc_wsr,pc_rbo,pc_fix,pc_rev,pc_ckp, + pc_ckn); {intermediate code} {-----------------} @@ -333,6 +334,7 @@ var {quality or characteristics of } {code } {------------------------------} + checkNullPointers: boolean; {check for null pointer dereferences?} checkStack: boolean; {check stack for stack errors?} cLineOptimize: boolean; {+o flag set?} code: icptr; {current intermediate code record} @@ -843,6 +845,7 @@ profileFlag := false; {don't generate profiling code} debugFlag := false; {don't generate debug code} debugStrFlag := false; {don't generate gsbug debug strings} traceBack := false; {don't generate traceback code} +checkNullPointers := false; {don't check null pointers} volatile := false; {no volatile qualifiers found} registers := cLineOptimize; {don't do register optimizations} diff --git a/DAG.pas b/DAG.pas index 84503e8..c00c2d1 100644 --- a/DAG.pas +++ b/DAG.pas @@ -502,7 +502,7 @@ var [pc_mov,pc_cbf,pc_cop,pc_cpi,pc_cpo,pc_gil,pc_gli,pc_gdl, pc_gld,pc_iil,pc_ili,pc_idl,pc_ild,pc_lil,pc_lli,pc_ldl, pc_lld,pc_sbf,pc_sro,pc_sto,pc_str,pc_cui,pc_cup,pc_tl1, - pc_fix] then + pc_fix,pc_ckp] then SideEffects := true else if op^.opcode = pc_ldc then SideEffects := false @@ -2817,7 +2817,7 @@ case op^.opcode of pc_cnn, pc_cnv: TypeOf := baseTypeEnum(op^.q & $000F); - pc_stk: + pc_stk, pc_ckp: TypeOf := TypeOf(op^.left); pc_bno: @@ -5479,7 +5479,7 @@ case code^.opcode of pc_bnt, pc_bnl, pc_cnv, pc_dec, pc_inc, pc_ind, pc_lbf, pc_lbu, pc_ngi, pc_ngl, pc_ngr, pc_not, pc_stk, pc_cop, pc_cpo, pc_tl1, pc_sro, pc_str, pc_fjp, pc_tjp, pc_xjp, pc_cup, pc_pop, pc_iil, - pc_ili, pc_idl, pc_ild, pc_bnq, pc_ngq, pc_rbo, pc_rev: + pc_ili, pc_idl, pc_ild, pc_bnq, pc_ngq, pc_rbo, pc_rev, pc_ckp: begin code^.left := Pop; Push(code); @@ -5514,6 +5514,15 @@ case code^.opcode of Push(code); end; + pc_ckn: + begin + code^.opcode := pc_ckp; + temp := Pop; + code^.left := Pop; + Push(code); + Push(temp); + end; + pc_cnn: begin code^.opcode := pc_cnv; diff --git a/Expression.pas b/Expression.pas index 0bc28ac..35bb113 100644 --- a/Expression.pas +++ b/Expression.pas @@ -2811,6 +2811,8 @@ procedure ChangePointer (op: pcodes; size: longint; tp: baseTypeEnum); begin {ChangePointer} if size = 0 then Error(122); +if checkNullPointers then + Gen0(pc_ckn); case tp of cgByte,cgUByte,cgWord,cgUWord: begin if (size = long(size).lsw) and (op = pc_adl) @@ -2937,7 +2939,7 @@ var end; {ExpressionKind} - procedure LoadAddress (tree: tokenPtr); + procedure LoadAddress (tree: tokenPtr; nullCheck: boolean); { load the address of an l-value } { } @@ -2994,7 +2996,7 @@ var {evaluate a compound literal and load its address} AutoInit(tree^.id, 0, true); tree^.token.kind := ident; - LoadAddress(tree); + LoadAddress(tree, false); tree^.token.kind := compoundliteral; Gen0t(pc_bno, cgULong); end {if} @@ -3002,6 +3004,8 @@ var {load the address of the item pointed to by the pointer} GenerateCode(tree^.left); + if nullCheck then + Gen0(pc_ckp); isBitField := false; if not (expressionType^.kind in [pointerType,arrayType,functionType]) then Error(79); @@ -3009,7 +3013,7 @@ var else if tree^.token.kind = dotch then begin {load the address of a field of a record} - LoadAddress(tree^.left); + LoadAddress(tree^.left, nullCheck); eType := expressionType; if eType^.kind in [arrayType,pointerType] then begin if eType^.kind = arrayType then @@ -3032,15 +3036,18 @@ var else if tree^.token.kind = castoper then begin {load the address of a field of a record} - LoadAddress(tree^.left); + LoadAddress(tree^.left, nullCheck); expressionType := tree^.castType; if expressionType^.kind <> arrayType then expressionType := MakePointerTo(expressionType); end {else if} else if ExpressionKind(tree) in [arrayType,pointerType,structType,unionType] - then - GenerateCode(tree) + then begin + GenerateCode(tree); + if nullCheck then + Gen0(pc_ckp); + end {else if} else begin expressionType := intPtr; {set default type in case of error} if doDispose then {prevent spurious errors} @@ -3123,6 +3130,8 @@ var end; {case} pointerType,arrayType: begin + if checkNullPointers then + Gen0(pc_ckp); GenldcLong(expressionType^.pType^.size); if inc then Gen0(pc_adl) @@ -3201,10 +3210,12 @@ var lSize := iType^.pType^.size; if lSize = 0 then Error(122); - if long(lSize).msw <> 0 then begin + if (long(lSize).msw <> 0) or checkNullPointers then begin - {handle inc/dec of >64K} + {handle inc/dec of >64K or with null pointer check} LoadScalar(tree^.id); + if checkNullPointers then + Gen0(pc_ckp); GenLdcLong(lSize); if pc_l in [pc_lli,pc_lil] then Gen0(pc_adl) @@ -3242,7 +3253,7 @@ var else begin {do an indirect ++ or --} - LoadAddress(tree); {get the address to save to} + LoadAddress(tree, checkNullPointers); {get the address to save to} if expressionType^.kind = arrayType then expressionType := expressionType^.aType else if expressionType^.kind = pointerType then @@ -3508,7 +3519,9 @@ var if (ftype^.toolNum = 0) and (ftype^.dispatcher = 0) then begin if indirect then begin fntype := expressionType; - GenerateCode(ftree); + GenerateCode(ftree); + if checkNullPointers then + Gen0(pc_ckp); expressionType := fntype; Gen1t(pc_cui, ord(hasVarargs and strictVararg), UsualUnaryConversions); @@ -3683,15 +3696,15 @@ case tree^.token.kind of arrayType: begin - LoadAddress(tree); + LoadAddress(tree, false); expressionType := expressionType^.ptype; end; functionType: - LoadAddress(tree); + LoadAddress(tree, false); structType, unionType: begin - LoadAddress(tree); + LoadAddress(tree, false); if expressionType^.kind = pointerType then expressionType := expressionType^.ptype; CheckForIncompleteStructType; @@ -3817,7 +3830,7 @@ case tree^.token.kind of end; {with} end {if} else begin - LoadAddress(tree^.left); + LoadAddress(tree^.left, checkNullPointers); lType := expressionType; lisBitField := isBitField; lbitDisp := bitDisp; @@ -3874,7 +3887,7 @@ case tree^.token.kind of end {if} else begin doingScalar := false; - LoadAddress(tree^.left); + LoadAddress(tree^.left, checkNullPointers); lisBitField := isBitField; lbitDisp := bitDisp; lbitSize := bitSize; @@ -4388,6 +4401,10 @@ case tree^.token.kind of {NOTE: assumes aType & pType overlap in typeRecord} else if not CompTypes(lType^.aType, expressionType^.aType) then Error(47); + if checkNullPointers then begin + Gen0(pc_ckn); + Gen0(pc_ckp); + end; {if} Gen0(pc_sbl); if size <> 1 then begin GenLdcLong(size); @@ -4629,7 +4646,7 @@ case tree^.token.kind of if not (tree^.left^.token.kind in [ident,compoundliteral,stringconst,uasterisk]) then L_Value(tree^.left); - LoadAddress(tree^.left); + LoadAddress(tree^.left, false); if tree^.left^.token.kind = stringconst then begin {build pointer-to-array type for address of string constant} tType := pointer(Malloc(sizeof(typeRecord))); @@ -4653,6 +4670,9 @@ case tree^.token.kind of lType := lType^.pType; expressionType := lType; isVolatile := tqVolatile in lType^.qualifiers; + if checkNullPointers then + if lType^.kind <> functionType then + Gen0(pc_ckp); if lType^.kind = scalarType then if lType^.baseType = cgVoid then Gen2(pc_cnv, cgULong, cgVoid) @@ -4673,7 +4693,7 @@ case tree^.token.kind of end; {case uasterisk} dotch: begin {.} - LoadAddress(tree^.left); + LoadAddress(tree^.left, checkNullPointers); lType := expressionType; if lType^.kind in [arrayType,pointerType,structType,unionType] then begin if lType^.kind = arrayType then diff --git a/Gen.pas b/Gen.pas index 9b28d7e..50c9e52 100644 --- a/Gen.pas +++ b/Gen.pas @@ -999,6 +999,24 @@ else {if op^.opcode = pc_sbq then} begin end; {GenAdqSbq} +procedure GenCkp (op: icptr); + +{ generate code for pc_ckp } +{ } +{ parameters: } +{ op - pc_ckp operation } + +begin {GenCkp} +if op^.left^.opcode in [pc_lda,pc_lad,pc_lca,pc_lao] then + GenTree(op^.left) +else begin + gLong.preference := onStack; + GenTree(op^.left); + GenCall(98); + end; {else} +end; {GenCkp} + + procedure GenCmp (op: icptr; rOpcode: pcodes; lb: integer); { generate code for pc_les, pc_leq, pc_grt or pc_geq } @@ -7494,6 +7512,7 @@ case op^.opcode of pc_bnq,pc_ngq: GenUnaryQuad(op); pc_bno: GenBno(op); pc_bnt,pc_ngi,pc_not: GenBntNgiNot(op); + pc_ckp: GenCkp(op); pc_cnv: GenCnv(op); pc_cui: GenCui(op); pc_cup: GenCup(op); diff --git a/Header.pas b/Header.pas index 3e7e978..ecc81fb 100644 --- a/Header.pas +++ b/Header.pas @@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI; {$segment 'HEADER'} const - symFileVersion = 37; {version number of .sym file format} + symFileVersion = 38; {version number of .sym file format} var inhibitHeader: boolean; {should .sym includes be blocked?} @@ -827,6 +827,7 @@ procedure EndInclude {chPtr: ptr}; | (ord(profileFlag) << 2) | (ord(traceBack) << 3) | (ord(checkStack) << 4) + | (ord(checkNullPointers) << 5) | (ord(debugStrFlag) << 15)); p_lint: begin @@ -1496,6 +1497,7 @@ var profileFlag := odd(val >> 2); traceback := odd(val >> 3); checkStack := odd(val >> 4); + checkNullPointers := odd(val >> 5); debugStrFlag := odd(val >> 15); end; diff --git a/Native.pas b/Native.pas index 08c2f88..5413905 100644 --- a/Native.pas +++ b/Native.pas @@ -2274,6 +2274,7 @@ case callNum of 95: sp := @'~REALFIX'; 96: sp := @'~DOUBLEFIX'; 97: sp := @'~COMPFIX'; + 98: sp := @'~CHECKPTRC'; otherwise: Error(cge1); end; {case} diff --git a/Scanner.pas b/Scanner.pas index b08cb09..8c4d62d 100644 --- a/Scanner.pas +++ b/Scanner.pas @@ -3491,6 +3491,7 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin { 4 - generate profiles } { 8 - generate traceback code } { 16 - check for stack errors } + { 32 - check for null pointer dereferences } { 32768 - generate inline function names } FlagPragmas(p_debug); NumericDirective; @@ -3503,6 +3504,7 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin profileFlag := odd(val >> 2); traceBack := odd(val >> 3); checkStack := odd(val >> 4); + checkNullPointers := odd(val >> 5); debugStrFlag := odd(val >> 15); profileFlag := profileFlag or debugFlag; if token.kind <> eolsy then diff --git a/cc.notes b/cc.notes index 64b4ad5..413ec86 100644 --- a/cc.notes +++ b/cc.notes @@ -16,8 +16,8 @@ Updated by Stephen Heumann and Kelvin Sherlock, 2017-2023 5. Certain errors that were previously ignored are now detected. - 6. New option added to generate inline function names for use with - assembly-level debugging tools. See "Inline Function Names." + 6. Two new #pragma debug bits are defined. See "Additions to + #pragma debug." 7. Some new headers specified by recent C standards are added. See "New Headers." @@ -185,7 +185,7 @@ ORCA/C now supports several standard pragmas of the form "#pragma STDC ...", as p. 256 -The #pragma debug directive supports a new bit. If bit 15 is set, ORCA/C generates inline function names for use with assembly-level debugging tools. See "Inline Function Names," below. +The #pragma debug directive supports two new bits. If bit 5 is set, ORCA/C generates code to check for illegal use of null pointers. If bit 15 is set, ORCA/C generates inline function names for use with assembly-level debugging tools. See "Additions to #pragma debug," below. p. 257 @@ -700,14 +700,20 @@ Note that _Generic expressions always use the stricter type compatibility rules (Mike Westerfield, Kelvin Sherlock, Stephen Heumann) -Inline Function Names ---------------------- +Additions to #pragma debug +-------------------------- -ORCA/C now supports recording the names of functions using the inline name format documented in Apple IIGS Technical Note #103. This allows assembly-level debugging tools such as GSBug and Nifty List to display the names of functions in an ORCA/C program while debugging it. To enable generation of inline function names, set #pragma debug bit 15 (a value of 0x8000). +Two new debugging features can now be enabled with #pragma debug: -Note that inline function names are unrelated to the other types of debug code that ORCA/C can generate. In particular, inline function names are not needed for source-level debugging using the desktop development environment or other compatible source-level debuggers, although it is possible to enable both types of debugging information at the same time. +* Checking for illegal use of null pointers: -(Kelvin Sherlock) +Setting #pragma debug bit 5 (a value of 32) turns on checking for illegal use of null pointers. If this bit is set, ORCA/C will detect when your program would dereference a null pointer or when it would do pointer arithmetic on a null pointer. A "Subrange exceeded" error will be reported in these cases. + +* Inline function names: + +Setting #pragma debug bit 15 (a value of 0x8000) causes ORCA/C to record the names of functions using the inline name format documented in Apple IIGS Technical Note #103. This allows assembly-level debugging tools such as GSBug and Nifty List to display the names of functions in an ORCA/C program while debugging it. Note that inline function names are unrelated to the other types of debug code that ORCA/C can generate. In particular, inline function names are not needed for source-level debugging using the desktop development environment or other compatible source-level debuggers, although it is possible to enable both types of debugging information at the same time. + +(Stephen Heumann, Kelvin Sherlock) Enhancements to #pragma lint