From bb1bd176f44d9a0b4f2fc49328a94000493aa0e5 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Wed, 7 Dec 2022 21:35:15 -0600 Subject: [PATCH] Add a command-line option to select the C standard to use. This provides a more straightforward way to place the compiler in a "strict conformance" mode. This could essentially be achieved by setting several pragma options, but having a single setting is simpler. "Compatibility modes" for older standards can also be selected, although these actually continue to enable most C17 features (since they are unlikely to cause compatibility problems for older code). --- CCommon.pas | 5 +++ CGI.pas | 3 +- Scanner.pas | 118 +++++++++++++++++++++++++++++++++++++++++----------- Table.asm | 8 ++++ Table.pas | 1 + cc.notes | 36 ++++++++++++++-- 6 files changed, 143 insertions(+), 28 deletions(-) diff --git a/CCommon.pas b/CCommon.pas index c7fef1b..694e93d 100644 --- a/CCommon.pas +++ b/CCommon.pas @@ -142,6 +142,9 @@ type end; gsosOutStringPtr = ^gsosOutString; + { C language standards } + cStandardEnum = (c89,c95,c99,c11,c17,c23); + { The base types include two main categories. The values starting } { with cg are defined in the code generator, and may be passed to the } { code generator for resolution. The cc types are used internally in } @@ -486,6 +489,7 @@ var bofPtr: ptr; {pointer to the start of sourceFile} chPtr: ptr; {pointer to the next character in the file} changedSourceFile: boolean; {source file changed in function?} + cStd: cStandardEnum; {selected C standard} debugSourceFileGS: gsosOutString; {debug source file name} {debugType is also in SCANNER.ASM} debugType: (stop,break,autogo); {line number debug types} @@ -510,6 +514,7 @@ var partialFileGS: gsosOutString; {partial compile list} pragmaKeepFile: gsosOutStringPtr; {filename specified in #pragma keep} sourceFileGS: gsosOutString; {presumed source file name} + strictMode: boolean; {strictly follow standard, without extensions?} tempList: tempPtr; {list of temp work variables} longlong0: longlong; {the value 0 as a longlong} longlong1: longlong; {the value 1 as a longlong} diff --git a/CGI.pas b/CGI.pas index 0e0803b..70ebee7 100644 --- a/CGI.pas +++ b/CGI.pas @@ -823,7 +823,8 @@ currentSegment := ' '; {start with the blank segment} defaultSegment := ' '; smallMemoryModel := true; {small memory model} dataBank := false; {don't save/restore data bank} -strictVararg := not cLineOptimize; {save/restore caller's stack around vararg} +strictVararg := {save/restore caller's stack around vararg} + (not cLineOptimize) or strictMode; saveStack := not cLineOptimize; {save/restore caller's stack reg} checkStack := false; {don't check stack for stack errors} stackSize := 0; {default to the launcher's stack size} diff --git a/Scanner.pas b/Scanner.pas index 7a08ade..b86249c 100644 --- a/Scanner.pas +++ b/Scanner.pas @@ -2038,9 +2038,9 @@ if macro^.readOnly then begin {handle special macros} 9: begin {__STDC_VERSION__} token.kind := longconst; token.class := longconstant; - token.lval := 201710; + token.lval := stdcVersion[cStd]; token.numString := @stdcVersionStr; - stdcVersionStr := '201710L'; + stdcVersionStr := concat(cnvis(token.lval),'L'); tokenStart := @stdcVersionStr[1]; tokenEnd := pointer(ord4(tokenStart)+length(stdcVersionStr)); end; @@ -3692,11 +3692,12 @@ if ch in ['a','d','e','i','l','p','u','w'] then begin goto 2; end; {if} 'w': - if token.name^ = 'warning' then begin - if tskipping then goto 2; - DoError(false); - goto 2; - end; {if} + if token.name^ = 'warning' then + if (cStd >= c23) or not strictMode then begin + if tskipping then goto 2; + DoError(false); + goto 2; + end; {if} otherwise: Error(57); end; {case} end; @@ -3797,7 +3798,7 @@ procedure DoNumber {scanWork: boolean}; { after sequence } { workString - string to take numbers from } -label 1; +label 1,2; var c2: char; {next character to process} @@ -3916,17 +3917,23 @@ else begin if c2 in ['x','X','b','B'] then {detect hex numbers} if stringIndex = 1 then if numString[1] = '0' then begin - stringIndex := 2; c2 := chr(ord(c2) & $5f); + if c2 = 'X' then + isHex := true + else {if c2 = 'B' then} + if (cStd >= c23) or not strictMode then + isBin := true + else + goto 2; + stringIndex := 2; numString[2] := c2; - if c2 = 'X' then isHex := true; - if c2 = 'B' then isBin := true; NextChar; GetDigits; if not isHex or not (c2 in ['.','p','P']) then goto 1; end; {if} end; +2: if c2 = '.' then begin {handle a decimal} stringIndex := stringIndex+1; numString[stringIndex] := '.'; @@ -4241,6 +4248,7 @@ var lch: char; {next command line character} cp: ptr; {character pointer} i: 0..hashSize; {loop variable} + stdName: stringPtr; {selected C standard} tPtr: tokenListRecordPtr; {for building macros from command line} mp: macroRecordPtr; {for building the predefined macros} @@ -4477,6 +4485,8 @@ pragmaKeepFile := nil; {no #pragma keep file so far} doingFakeFile := false; {not doing a fake file} doingDigitSequence := false; {not expecting a digit sequence} preprocessing := false; {not preprocessing} +cStd := c17; {default to C17} +strictMode := false; {...with extensions} {error codes for lint messages} {if changed, also change maxLint} @@ -4637,16 +4647,6 @@ mp^.algorithm := 7; bp := pointer(ord4(macros) + hash(mp^.name)); mp^.next := bp^; bp^ := mp; -new(mp); {__STDC_VERSION__} -mp^.name := @'__STDC_VERSION__'; -mp^.parameters := -1; -mp^.tokens := nil; -mp^.readOnly := true; -mp^.saved := true; -mp^.algorithm := 9; -bp := pointer(ord4(macros) + hash(mp^.name)); -mp^.next := bp^; -bp^ := mp; new(mp); {_Pragma pseudo-macro} mp^.name := @'_Pragma'; mp^.parameters := 1; @@ -4762,7 +4762,7 @@ repeat end {if} else FlagErrorAndSkip; - end {if} + end {else if} else if lch in ['p','P'] then begin NextCh; {get the filename} if lch = '"' then begin @@ -4775,8 +4775,42 @@ repeat end {if} else FlagErrorAndSkip; - end {if} - else {not -d, -i, -p: flag the error} + end {else if} + else if lch in ['s','S'] then begin + NextCh; + stdName := GetWord; + if (stdName^ = 'c89compat') or (stdName^ = 'c90compat') then begin + cStd := c89; + strictMode := false; + end {if} + else if (stdName^ = 'c94compat') or (stdName^ = 'c95compat') then begin + cStd := c95; + strictMode := false; + end {else if} + else if (stdName^ = 'c99compat') then begin + cStd := c99; + strictMode := false; + end {else if} + else if (stdName^ = 'c11compat') then begin + cStd := c11; + strictMode := false; + end {else if} + else if (stdName^ = 'c11') then begin + cStd := c11; + strictMode := true; + end {else if} + else if (stdName^ = 'c17compat') or (stdName^ = 'c18compat') then begin + cStd := c17; + strictMode := false; + end {else if} + else if (stdName^ = 'c17') or (stdName^ = 'c18') then begin + cStd := c17; + strictMode := true; + end {else if} + else + FlagErrorAndSkip; + end {else if} + else {not -d, -i, -p, -s: flag the error} FlagErrorAndSkip; end {if} else if lch <> chr(0) then begin @@ -4787,6 +4821,42 @@ until lch = chr(0); {if more characters, loop} if numErr <> 0 then WriteLine; doingCommandLine := false; + +{Standard-dependent configuration} +if cStd >= c95 then begin + new(mp); {add __STDC_VERSION__ macro} + mp^.name := @'__STDC_VERSION__'; + mp^.parameters := -1; + mp^.tokens := nil; + mp^.readOnly := true; + mp^.saved := true; + mp^.algorithm := 9; + bp := pointer(ord4(macros) + hash(mp^.name)); + mp^.next := bp^; + bp^ := mp; + end; {if} +if cStd < c99 then begin + allowSlashSlashComments := false; + allowMixedDeclarations := false; + c99Scope := false; + end; {if} +if strictMode then begin + extendedKeywords := false; + extendedParameters := false; + looseTypeChecks := false; + if cStd >= c99 then + lint := lint | lintC99Syntax; + new(mp); {add __KeepNamespacePure__ macro} + mp^.name := @'__KeepNamespacePure__'; + mp^.parameters := -1; + mp^.tokens := nil; + mp^.readOnly := false; + mp^.saved := true; + mp^.algorithm := 0; + bp := pointer(ord4(macros) + hash(mp^.name)); + mp^.next := bp^; + bp^ := mp; + end; {if} end; {InitScanner} diff --git a/Table.asm b/Table.asm index 7825b84..b3cbc45 100644 --- a/Table.asm +++ b/Table.asm @@ -940,6 +940,14 @@ wordHash start reserved word hash table dc i'shortsy,typedefsy,unionsy,voidsy,whilesy,succwhilesy' end +stdcVersion start __STDC_VERSION__ values + + dc i4'199409' c95 + dc i4'199901' c99 + dc i4'201112' c11 + dc i4'201710' c17 + end + macRomanToUCS start dc i2'$00C4, $00C5, $00C7, $00C9, $00D1, $00D6, $00DC, $00E1' dc i2'$00E0, $00E2, $00E4, $00E3, $00E5, $00E7, $00E9, $00E8' diff --git a/Table.pas b/Table.pas index 5c1b256..ea8225e 100644 --- a/Table.pas +++ b/Table.pas @@ -22,6 +22,7 @@ var charSym: array[minChar..maxChar] of tokenEnum; {symbols for single char symbols} reservedWords: array[_Alignassy..whilesy] of string[14]; {reserved word strings} wordHash: array[0..25] of tokenEnum; {for hashing reserved words} + stdcVersion: array[c95..c17] of longint; {__STDC_VERSION__ values} {from ASM.PAS} {------------} diff --git a/cc.notes b/cc.notes index ba981eb..1cef8e7 100644 --- a/cc.notes +++ b/cc.notes @@ -53,7 +53,10 @@ Updated by Stephen Heumann and Kelvin Sherlock, 2017-2022 17. New pragma for controlling ORCA/C extensions. See "#pragma extensions." - 18. The code generated for certain operations has been improved. + 18. New option to select which C standard to use. See "C Standard + Selection." + + 19. The code generated for certain operations has been improved. 2.1.1 B3 1. Bugs squashed. See bug notes, below. @@ -122,7 +125,7 @@ p. 128 The ASML, ASMLG, ASSEMBLE, CMPL, CMPLG, COMPILE, and RUN commands now accept a new flag, +F. The +F flag causes the compiler to include the file name in any error messages that it prints. It is currently only effective for ORCA/C. -There is also a new option, -p, which can be used within the cc= portion of the command line to specify a custom pre-include file. See "Custom Pre-Include File," below. +There is also two new options which can be used within the cc= portion of the command line. The -p option specifies a custom pre-include file. The -s option specifies the C standard to use See "Custom Pre-Include File" and "C Standard Selection," below. p. 233 @@ -497,7 +500,7 @@ __STDC_NO_ATOMICS__, __STDC_NO_COMPLEX__, __STDC_NO_THREADS__, and __STDC_NO_VLA __STDC_UTF_16__ and __STDC_UTF_32__ expand to the integer constant 1. These indicate that the char16_t and char32_t types (discussed below) use UTF-16 and UTF-32 encodings. -__STDC_VERSION__ expands to the constant 201710L, indicating that ORCA/C supports the C17 language standard. ORCA/C now supports all the major language features required by C17, although there are still some missing library functions and a few other small deviations from the standard. +__STDC_VERSION__ expands to a constant indicating the C language standard in use, unless the C89 or C90 standards (which do not define this macro) are selected. By default, it expands to 201710L, corresponding to the C17 standard. 18. (C99) The _Bool type is now supported. This is a boolean type that can hold the values 0 or 1. When a value of another type is converted to _Bool, the result is 0 if the value compares equal to 0, or 1 otherwise. @@ -769,6 +772,33 @@ If bit 0 (a value of 1) is set, then asm, comp, extended, pascal, and segment ar If bit 1 (a value of 2) is set, then function parameters declared with the types float, double, or comp are treated as actually having the type long double (aka extended) rather than their declared type. This results in faster code and also allows the parameters to have the greater precision and range of the extended type. However, this change of types may cause some standard-compliant code not to work properly, particularly if it takes the address of such a parameter and tries to access it through the resulting pointer. If bit 1 is clear, these parameters are treated as having their declared type, restricting their precision and range and causing pointers to them to behave in the standard way. Bit 1 is set by default, matching ORCA/C's historical behavior. +C Standard Selection +-------------------- + +Several editions of the C language standards have been published by ANSI and ISO. ORCA/C now provides a command-line option that allows you to select which C standard ORCA/C should follow. This is specified by using a new option, -s, within the cc= portion of the command line. This is immediately followed by the name of a language mode, as given in the below chart: + + C Standard Compatibility Mode Strict Conformance Mode + ---------- ------------------ ----------------------- + ANSI X3.159-1989 c89compat (not available) + ISO/IEC 9899:1990 c90compat (not available) + ISO/IEC 9899:1990/Amd 1:1995 c94compat or c95compat (not available) + ISO/IEC 9899:1999 c99compat (not available) + ISO/IEC 9899:2011 c11compat c11 + ISO/IEC 9899:2018 c17compat or c18compat c17 or c18 + +The strict conformance modes cause ORCA/C to follow the specified standard as closely as possible. The compatibility modes are generally compatible with code that follows the corresponding standard, but they also include ORCA/C extensions and features from earlier and later standards (excluding those likely to cause compatibility problems). + +The default language mode is currently c17compat. ORCA/C now supports all the major language features required by C17, although there are still some missing library functions and a few other small deviations from the standard. + +If a non-default language mode is selected, it will affect the initial settings of various pragma flags, as described below. However, these can still be changed by subsequent #pragma directives. + +If a language standard prior to C99 is selected, then #pragma ignore bits 3 and 4 are cleared. This disables support for // comments, requires declarations to come before statements, and uses C89-compatible scope rules. + +If a strict conformance mode is selected, then all #pragma extensions bits are cleared, #pragma ignore bit 5 is cleared, #pragma lint bit 6 is set, and #pragma optimize bit 6 is cleared even if +O is specified on the command line. This disables ORCA/C extensions, enables strict type checks, enables lint checks for syntax disallowed in C99 and later, and ensures standard-compliant behavior for calls to functions with variable arguments. In addition, the macro __KeepNamespacePure__ is defined; this causes headers to omit non-standard function and macro definitions. + +The macro __STDC_VERSION__ is defined to the value specified by the selected language standard, if any. (In C89 or C90 modes, it is not defined.) + + File Names in Error Messages ----------------------------