Use C89 semantics for integer conversions

Previously, the following rules were used for binary operators:
* If one of the values is a long, the result is long.
* If one of the values is unsigned, the result is also unsigned.
* Otherwise the result is an int.

C89 specifies the "usual arithmetic conversions" as:
* The integral promotions are performed on both operands.
* Then the following rules are applied:
  * If either operand has type unsigned long int, the other operand is
    converted to unsigned long int.
  * Otherwise, if one operand has type long int and the other has type
    unsigned int, if a long int can represent all values of an unsigned int,
    the operand of type unsigned int is converted to long int; if a long int
    cannot represent all the values of an unsigned int, both operands are
    converted to unsigned long int.
  * Otherwise, if either operand has type long int, the other operand is
    converted to long int.
  * Otherwise, if either operand has type unsigned int, the other operand is
    converted to unsigned int.
  * Otherwise, both operands have type int.
https://port70.net/~nsz/c/c89/c89-draft.html#3.2.1.5

As one example, these rules give a different result for an operator
with one long operand and one unsigned int operand.  Previously,
the result type was unsigned long.  With C89 semantics, it is just long,
since long can represent all unsigned ints.

Integral promotions convert types shorter than int to int (or unsigned int).
Both char and unsigned char are promoted to int since int can represent
all unsigned chars.
https://port70.net/~nsz/c/c89/c89-draft.html#3.2.1.1

Rename promoteint to ArithmeticConvert, since this is more accurate.

Fixes #170
This commit is contained in:
Jesse Rosenstock 2020-08-14 14:54:10 +02:00 committed by Oliver Schmidt
parent 8197e3c7cd
commit c4698dfd07
5 changed files with 170 additions and 47 deletions

View File

@ -1327,26 +1327,52 @@ void g_reglong (unsigned Flags)
static unsigned g_intpromotion (unsigned flags)
/* Return new flags for integral promotions for types smaller than int. */
{
/* https://port70.net/~nsz/c/c89/c89-draft.html#3.2.1.1
** A char, a short int, or an int bit-field, or their signed or unsigned varieties, or an
** object that has enumeration type, may be used in an expression wherever an int or
** unsigned int may be used. If an int can represent all values of the original type, the value
** is converted to an int; otherwise it is converted to an unsigned int.
** These are called the integral promotions.
*/
if ((flags & CF_TYPEMASK) == CF_CHAR) {
/* int can represent all unsigned chars, so unsigned char is promoted to int. */
flags &= ~CF_TYPEMASK;
flags &= ~CF_UNSIGNED;
flags |= CF_INT;
return flags;
} else if ((flags & CF_TYPEMASK) == CF_SHORT) {
/* int cannot represent all unsigned shorts, so unsigned short is promoted to
** unsigned int.
*/
flags &= ~CF_TYPEMASK;
flags |= CF_INT;
return flags;
} else {
/* Otherwise, the type is not smaller than int, so leave it alone. */
return flags;
}
}
unsigned g_typeadjust (unsigned lhs, unsigned rhs)
/* Adjust the integer operands before doing a binary operation. lhs is a flags
** value, that corresponds to the value on TOS, rhs corresponds to the value
** in (e)ax. The return value is the the flags value for the resulting type.
*/
{
unsigned ltype, rtype;
unsigned result;
/* Get the type spec from the flags */
ltype = lhs & CF_TYPEMASK;
rtype = rhs & CF_TYPEMASK;
unsigned ltype = lhs & CF_TYPEMASK;
unsigned rtype = rhs & CF_TYPEMASK;
/* Check if a conversion is needed */
if (ltype == CF_LONG && rtype != CF_LONG && (rhs & CF_CONST) == 0) {
/* We must promote the primary register to long */
g_reglong (rhs);
/* Get the new rhs type */
rhs = (rhs & ~CF_TYPEMASK) | CF_LONG;
rtype = CF_LONG;
} else if (ltype != CF_LONG && (lhs & CF_CONST) == 0 && rtype == CF_LONG) {
/* We must promote the lhs to long */
if (lhs & CF_PRIMARY) {
@ -1354,25 +1380,64 @@ unsigned g_typeadjust (unsigned lhs, unsigned rhs)
} else {
g_toslong (lhs);
}
/* Get the new rhs type */
lhs = (lhs & ~CF_TYPEMASK) | CF_LONG;
ltype = CF_LONG;
}
/* Determine the result type for the operation:
** - The result is const if both operands are const.
** - The result is unsigned if one of the operands is unsigned.
** - The result is long if one of the operands is long.
** - Otherwise the result is int sized.
/* Result is const if both operands are const. */
unsigned const_flag = (lhs & CF_CONST) & (rhs & CF_CONST);
/* https://port70.net/~nsz/c/c89/c89-draft.html#3.2.1.5
** Many binary operators that expect operands of arithmetic type cause conversions and yield
** result types in a similar way. The purpose is to yield a common type, which is also the type
** of the result. This pattern is called the usual arithmetic conversions.
*/
result = (lhs & CF_CONST) & (rhs & CF_CONST);
result |= (lhs & CF_UNSIGNED) | (rhs & CF_UNSIGNED);
if (rtype == CF_LONG || ltype == CF_LONG) {
result |= CF_LONG;
} else {
result |= CF_INT;
/* Note that this logic is largely duplicated by ArithmeticConvert. */
/* Apply integral promotions for types char/short. */
lhs = g_intpromotion (lhs);
rhs = g_intpromotion (rhs);
ltype = lhs & CF_TYPEMASK;
rtype = rhs & CF_TYPEMASK;
/* If either operand has type unsigned long int, the other operand is converted to
** unsigned long int.
*/
if ((ltype == CF_LONG && (lhs & CF_UNSIGNED)) ||
(rtype == CF_LONG && (rhs & CF_UNSIGNED))) {
return const_flag | CF_UNSIGNED | CF_LONG;
}
return result;
/* Otherwise, if one operand has type long int and the other has type unsigned int,
** if a long int can represent all values of an unsigned int, the operand of type unsigned int
** is converted to long int ; if a long int cannot represent all the values of an unsigned int,
** both operands are converted to unsigned long int.
*/
if ((ltype == CF_LONG && rtype == CF_INT && (rhs & CF_UNSIGNED)) ||
(rtype == CF_LONG && ltype == CF_INT && (rhs & CF_UNSIGNED))) {
/* long can represent all unsigneds, so we are in the first sub-case. */
return const_flag | CF_LONG;
}
/* Otherwise, if either operand has type long int, the other operand is converted to long int.
*/
if (ltype == CF_LONG || rtype == CF_LONG) {
return const_flag | CF_LONG;
}
/* Otherwise, if either operand has type unsigned int, the other operand is converted to
** unsigned int.
*/
if ((ltype == CF_INT && (lhs & CF_UNSIGNED)) ||
(rtype == CF_INT && (rhs & CF_UNSIGNED))) {
return const_flag | CF_UNSIGNED | CF_INT;
}
/* Otherwise, both operands have type int. */
CHECK (ltype == CF_INT);
CHECK (!(lhs & CF_UNSIGNED));
CHECK (rtype == CF_INT);
CHECK (!(rhs & CF_UNSIGNED));
return const_flag | CF_INT;
}

View File

@ -1214,12 +1214,26 @@ Type* IntPromotion (Type* T)
/* We must have an int to apply int promotions */
PRECONDITION (IsClassInt (T));
/* An integer can represent all values from either signed or unsigned char,
** so convert chars to int and leave all other types alone.
/* https://port70.net/~nsz/c/c89/c89-draft.html#3.2.1.1
** A char, a short int, or an int bit-field, or their signed or unsigned varieties, or an
** object that has enumeration type, may be used in an expression wherever an int or
** unsigned int may be used. If an int can represent all values of the original type, the value
** is converted to an int; otherwise it is converted to an unsigned int.
** These are called the integral promotions.
*/
if (IsTypeChar (T)) {
/* An integer can represent all values from either signed or unsigned char, so convert
** chars to int.
*/
return type_int;
} else if (IsTypeShort (T)) {
/* An integer cannot represent all values from unsigned short, so convert unsigned short
** to unsigned int.
*/
return IsSignUnsigned (T) ? type_uint : type_int;
} else {
/* Otherwise, the type is not smaller than int, so leave it alone. */
return T;
}
}

View File

@ -373,6 +373,16 @@ INLINE int IsTypeChar (const Type* T)
# define IsTypeChar(T) (GetRawType (GetUnderlyingType (T)) == T_TYPE_CHAR)
#endif
#if defined(HAVE_INLINE)
INLINE int IsTypeShort (const Type* T)
/* Return true if this is a short type (signed or unsigned) */
{
return (GetRawType (GetUnderlyingType (T)) == T_TYPE_SHORT);
}
#else
# define IsTypeShort(T) (GetRawType (GetUnderlyingType (T)) == T_TYPE_SHORT)
#endif
#if defined(HAVE_INLINE)
INLINE int IsTypeInt (const Type* T)
/* Return true if this is an int type (signed or unsigned) */

View File

@ -140,27 +140,61 @@ void MarkedExprWithCheck (void (*Func) (ExprDesc*), ExprDesc* Expr)
static Type* promoteint (Type* lhst, Type* rhst)
/* In an expression with two ints, return the type of the result */
static Type* ArithmeticConvert (Type* lhst, Type* rhst)
/* Perform the usual arithmetic conversions for binary operators. */
{
/* Rules for integer types:
** - If one of the values is a long, the result is long.
** - If one of the values is unsigned, the result is also unsigned.
** - Otherwise the result is an int.
/* https://port70.net/~nsz/c/c89/c89-draft.html#3.2.1.5
** Many binary operators that expect operands of arithmetic type cause conversions and yield
** result types in a similar way. The purpose is to yield a common type, which is also the type
** of the result. This pattern is called the usual arithmetic conversions.
*/
/* There are additional rules for floating point types that we don't bother with, since
** floating point types are not (yet) supported.
** The integral promotions are performed on both operands.
*/
lhst = IntPromotion (lhst);
rhst = IntPromotion (rhst);
/* If either operand has type unsigned long int, the other operand is converted to
** unsigned long int.
*/
if ((IsTypeLong (lhst) && IsSignUnsigned (lhst)) ||
(IsTypeLong (rhst) && IsSignUnsigned (rhst))) {
return type_ulong;
}
/* Otherwise, if one operand has type long int and the other has type unsigned int,
** if a long int can represent all values of an unsigned int, the operand of type unsigned int
** is converted to long int ; if a long int cannot represent all the values of an unsigned int,
** both operands are converted to unsigned long int.
*/
if ((IsTypeLong (lhst) && IsTypeInt (rhst) && IsSignUnsigned (rhst)) ||
(IsTypeLong (rhst) && IsTypeInt (lhst) && IsSignUnsigned (lhst))) {
/* long can represent all unsigneds, so we are in the first sub-case. */
return type_long;
}
/* Otherwise, if either operand has type long int, the other operand is converted to long int.
*/
if (IsTypeLong (lhst) || IsTypeLong (rhst)) {
if (IsSignUnsigned (lhst) || IsSignUnsigned (rhst)) {
return type_ulong;
} else {
return type_long;
}
} else {
if (IsSignUnsigned (lhst) || IsSignUnsigned (rhst)) {
return type_uint;
} else {
return type_int;
}
return type_long;
}
/* Otherwise, if either operand has type unsigned int, the other operand is converted to
** unsigned int.
*/
if ((IsTypeInt (lhst) && IsSignUnsigned (lhst)) ||
(IsTypeInt (rhst) && IsSignUnsigned (rhst))) {
return type_uint;
}
/* Otherwise, both operands have type int. */
CHECK (IsTypeInt (lhst));
CHECK (IsSignSigned (lhst));
CHECK (IsTypeInt (rhst));
CHECK (IsSignSigned (rhst));
return type_int;
}
@ -198,7 +232,7 @@ static unsigned typeadjust (ExprDesc* lhs, ExprDesc* rhs, int NoPush)
flags = g_typeadjust (ltype, rtype);
/* Set the type of the result */
lhs->Type = promoteint (lhst, rhst);
lhs->Type = ArithmeticConvert (lhst, rhst);
/* Return the code generator flags */
return flags;
@ -2066,7 +2100,7 @@ static void hie_internal (const GenDesc* Ops, /* List of generators */
RemoveCode (&Mark1);
/* Get the type of the result */
Expr->Type = promoteint (Expr->Type, Expr2.Type);
Expr->Type = ArithmeticConvert (Expr->Type, Expr2.Type);
/* Handle the op differently for signed and unsigned types */
if (IsSignSigned (Expr->Type)) {
@ -2163,7 +2197,7 @@ static void hie_internal (const GenDesc* Ops, /* List of generators */
/* Determine the type of the operation result. */
type |= g_typeadjust (ltype, rtype);
Expr->Type = promoteint (Expr->Type, Expr2.Type);
Expr->Type = ArithmeticConvert (Expr->Type, Expr2.Type);
/* Generate code */
Gen->Func (type, Expr->IVal);
@ -2196,7 +2230,7 @@ static void hie_internal (const GenDesc* Ops, /* List of generators */
/* Determine the type of the operation result. */
type |= g_typeadjust (ltype, rtype);
Expr->Type = promoteint (Expr->Type, Expr2.Type);
Expr->Type = ArithmeticConvert (Expr->Type, Expr2.Type);
/* Generate code */
Gen->Func (type, Expr2.IVal);
@ -3340,7 +3374,7 @@ static void hieQuest (ExprDesc* Expr)
/* Get common type */
ResultType = promoteint (Expr2.Type, Expr3.Type);
ResultType = ArithmeticConvert (Expr2.Type, Expr3.Type);
/* Convert the third expression to this type if needed */
TypeConversion (&Expr3, ResultType);