Commit my changes to arith.c which fixed a couple of bugs and decreased

code size.  Please read the TODO comments regarding accessing shell
variables from the arith() funciton.
This commit is contained in:
Manuel Novoa III 2001-09-11 01:11:31 +00:00
parent f6ecaccf92
commit 6a9d1f652b

View File

@ -24,19 +24,88 @@
* as a replacement for yacc-based parsers. However, it may well be faster * as a replacement for yacc-based parsers. However, it may well be faster
* than a comparable parser writen in yacc. The supported operators are * than a comparable parser writen in yacc. The supported operators are
* listed in #defines below. Parens, order of operations, and error handling * listed in #defines below. Parens, order of operations, and error handling
* are supported. This code is threadsafe. */ * are supported. This code is threadsafe. The exact expression format should
* be that which POSIX specifies for shells. */
/* To use the routine, call it with an expression string. It returns an /* The code uses a simple two-stack algorithm. See
* integer result. You will also need to define an "error" function * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
* that takes printf arguments and _does not return_, or modify the code * for a detailed explaination of the infix-to-postfix algorithm on which
* to use another error mechanism. */ * this is based (this code differs in that it applies operators immediately
* to the stack instead of adding them to a queue to end up with an
* expression). */
/* To use the routine, call it with an expression string and error return
* pointer */
/*
* Aug 24, 2001 Manuel Novoa III
*
* Reduced the generated code size by about 30% (i386) and fixed several bugs.
*
* 1) In arith_apply():
* a) Cached values of *numptr and &(numptr[-1]).
* b) Removed redundant test for zero denominator.
*
* 2) In arith():
* a) Eliminated redundant code for processing operator tokens by moving
* to a table-based implementation. Also folded handling of parens
* into the table.
* b) Combined all 3 loops which called arith_apply to reduce generated
* code size at the cost of speed.
*
* 3) The following expressions were treated as valid by the original code:
* 1() , 0! , 1 ( *3 ) .
* These bugs have been fixed by internally enclosing the expression in
* parens and then checking that all binary ops and right parens are
* preceded by a valid expression (NUM_TOKEN).
*
* Note: It may be desireable to replace Aaron's test for whitespace with
* ctype's isspace() if it is used by another busybox applet or if additional
* whitespace chars should be considered. Look below the "#include"s for a
* precompiler test.
*/
/*
* Aug 26, 2001 Manuel Novoa III
*
* Return 0 for null expressions. Pointed out by vodz.
*
* Merge in Aaron's comments previously posted to the busybox list,
* modified slightly to take account of my changes to the code.
*
* TODO: May want to allow access to variables in the arith code.
* This would:
* 1) allow us to evaluate $A as 0 if A isn't set (although this
* would require changes to ash.c too).
* 2) allow us to write expressions as $(( A + 2 )).
* This could be done using a callback function passed to the
* arith() function of by requiring such a function with fixed
* name as an extern.
*/
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <ctype.h>
#include "libbb.h" #include "libbb.h"
/*
* Use "#if 1" below for Aaron's original test for whitespace.
* Use "#if 0" for ctype's isspace().
* */
#if 1
#undef isspace
#define isspace(arithval) \
(arithval == ' ' || arithval == '\n' || arithval == '\t')
#endif
typedef char operator; typedef char operator;
/* An operator's token id is a bit of a bitfield. The lower 5 bits are the
* precedence, and high 3 are an ID unique accross operators of that
* precedence. The ID portion is so that multiple operators can have the
* same precedence, ensuring that the leftmost one is evaluated first.
* Consider * and /. */
#define tok_decl(prec,id) (((id)<<5)|(prec)) #define tok_decl(prec,id) (((id)<<5)|(prec))
#define PREC(op) ((op)&0x1F) #define PREC(op) ((op)&0x1F)
@ -70,194 +139,235 @@ typedef char operator;
#define TOK_DIV tok_decl(10,1) #define TOK_DIV tok_decl(10,1)
#define TOK_REM tok_decl(10,2) #define TOK_REM tok_decl(10,2)
/* For now all unary operators have the same precedence, and that's used to
* identify them as unary operators */
#define UNARYPREC 14 #define UNARYPREC 14
#define TOK_BNOT tok_decl(UNARYPREC,0) #define TOK_BNOT tok_decl(UNARYPREC,0)
#define TOK_NOT tok_decl(UNARYPREC,1) #define TOK_NOT tok_decl(UNARYPREC,1)
#define TOK_UMINUS tok_decl(UNARYPREC,2) #define TOK_UMINUS tok_decl(UNARYPREC,2)
#define TOK_UPLUS tok_decl(UNARYPREC,3)
#define TOK_NUM tok_decl(15,0) #define TOK_NUM tok_decl(15,0)
#define TOK_RPAREN tok_decl(15,1)
#define TOK_ERROR tok_decl(15,2) /* just a place-holder really */
#define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr) #define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr)
#define NUMPTR (*numstackptr) #define NUMPTR (*numstackptr)
/* "applying" a token means performing it on the top elements on the integer
* stack. For a unary operator it will only change the top element, but a
* binary operator will pop two arguments and push a result */
static short arith_apply(operator op, long *numstack, long **numstackptr) static short arith_apply(operator op, long *numstack, long **numstackptr)
{ {
if (NUMPTR == numstack) goto err; long numptr_val;
if (op == TOK_UMINUS) long *NUMPTR_M1;
NUMPTR[-1] *= -1;
else if (op == TOK_NOT)
NUMPTR[-1] = !(NUMPTR[-1]);
else if (op == TOK_BNOT)
NUMPTR[-1] = ~(NUMPTR[-1]);
if (NUMPTR == numstack) goto err; /* There is no operator that can work
without arguments */
NUMPTR_M1 = NUMPTR - 1;
if (op == TOK_UMINUS)
*NUMPTR_M1 *= -1;
else if (op == TOK_NOT)
*NUMPTR_M1 = !(*NUMPTR_M1);
else if (op == TOK_BNOT)
*NUMPTR_M1 = ~(*NUMPTR_M1);
else if (op != TOK_UPLUS) {
/* Binary operators */ /* Binary operators */
else { if (NUMPTR_M1 == numstack) goto err; /* ... and binary operators need two
if (NUMPTR-1 == numstack) goto err; arguments */
--NUMPTR; numptr_val = *--NUMPTR; /* ... and they pop one */
NUMPTR_M1 = NUMPTR - 1;
if (op == TOK_BOR) if (op == TOK_BOR)
NUMPTR[-1] |= *NUMPTR; *NUMPTR_M1 |= numptr_val;
else if (op == TOK_OR) else if (op == TOK_OR)
NUMPTR[-1] = *NUMPTR || NUMPTR[-1]; *NUMPTR_M1 = numptr_val || *NUMPTR_M1;
else if (op == TOK_BAND) else if (op == TOK_BAND)
NUMPTR[-1] &= *NUMPTR; *NUMPTR_M1 &= numptr_val;
else if (op == TOK_AND) else if (op == TOK_AND)
NUMPTR[-1] = NUMPTR[-1] && *NUMPTR; *NUMPTR_M1 = *NUMPTR_M1 && numptr_val;
else if (op == TOK_EQ) else if (op == TOK_EQ)
NUMPTR[-1] = (NUMPTR[-1] == *NUMPTR); *NUMPTR_M1 = (*NUMPTR_M1 == numptr_val);
else if (op == TOK_NE) else if (op == TOK_NE)
NUMPTR[-1] = (NUMPTR[-1] != *NUMPTR); *NUMPTR_M1 = (*NUMPTR_M1 != numptr_val);
else if (op == TOK_GE) else if (op == TOK_GE)
NUMPTR[-1] = (NUMPTR[-1] >= *NUMPTR); *NUMPTR_M1 = (*NUMPTR_M1 >= numptr_val);
else if (op == TOK_RSHIFT) else if (op == TOK_RSHIFT)
NUMPTR[-1] >>= *NUMPTR; *NUMPTR_M1 >>= numptr_val;
else if (op == TOK_LSHIFT) else if (op == TOK_LSHIFT)
NUMPTR[-1] <<= *NUMPTR; *NUMPTR_M1 <<= numptr_val;
else if (op == TOK_GT) else if (op == TOK_GT)
NUMPTR[-1] = (NUMPTR[-1] > *NUMPTR); *NUMPTR_M1 = (*NUMPTR_M1 > numptr_val);
else if (op == TOK_LT) else if (op == TOK_LT)
NUMPTR[-1] = (NUMPTR[-1] < *NUMPTR); *NUMPTR_M1 = (*NUMPTR_M1 < numptr_val);
else if (op == TOK_LE) else if (op == TOK_LE)
NUMPTR[-1] = (NUMPTR[-1] <= *NUMPTR); *NUMPTR_M1 = (*NUMPTR_M1 <= numptr_val);
else if (op == TOK_MUL) else if (op == TOK_MUL)
NUMPTR[-1] *= *NUMPTR; *NUMPTR_M1 *= numptr_val;
else if (op == TOK_DIV) {
if(*NUMPTR==0)
return -2;
NUMPTR[-1] /= *NUMPTR;
}
else if (op == TOK_REM) {
if(*NUMPTR==0)
return -2;
NUMPTR[-1] %= *NUMPTR;
}
else if (op == TOK_ADD) else if (op == TOK_ADD)
NUMPTR[-1] += *NUMPTR; *NUMPTR_M1 += numptr_val;
else if (op == TOK_SUB) else if (op == TOK_SUB)
NUMPTR[-1] -= *NUMPTR; *NUMPTR_M1 -= numptr_val;
else if(numptr_val==0) /* zero divisor check */
return -2;
else if (op == TOK_DIV)
*NUMPTR_M1 /= numptr_val;
else if (op == TOK_REM)
*NUMPTR_M1 %= numptr_val;
/* WARNING!!! WARNING!!! WARNING!!! */
/* Any new operators should be added BEFORE the zero divisor check! */
} }
return 0; return 0;
err: return(-1); err: return(-1);
} }
extern long arith (const char *startbuf, int *errcode) static const char endexpression[] = ")";
{
register char arithval;
const char *expr = startbuf;
operator lasttok = TOK_MUL, op; /* + and - (in that order) must be last */
size_t datasizes = strlen(startbuf); static const char op_char[] = "!<>=|&*/%~()+-";
static const char op_token[] = {
/* paired with equal */
TOK_NE, TOK_LE, TOK_GE,
/* paired with self -- note: ! is special-cased below*/
TOK_ERROR, TOK_LSHIFT, TOK_RSHIFT, TOK_EQ, TOK_OR, TOK_AND,
/* singles */
TOK_NOT, TOK_LT, TOK_GT, TOK_ERROR, TOK_BOR, TOK_BAND,
TOK_MUL, TOK_DIV, TOK_REM, TOK_BNOT, TOK_LPAREN, TOK_RPAREN,
TOK_ADD, TOK_SUB, TOK_UPLUS, TOK_UMINUS
};
#define NUM_PAIR_EQUAL 3
#define NUM_PAIR_SAME 6
extern long arith (const char *expr, int *errcode)
{
register char arithval; /* Current character under analysis */
operator lasttok, op;
unsigned char prec; unsigned char prec;
long *numstack, *numstackptr; const char *p = endexpression;
operator *stack = alloca(datasizes * sizeof(operator)), *stackptr = stack;
*errcode = 0; size_t datasizes = strlen(expr);
numstack = alloca((datasizes/2+1)*sizeof(long)), numstackptr = numstack;
while ((arithval = *expr)) { /* Stack of integers */
if (arithval == ' ' || arithval == '\n' || arithval == '\t') /* The proof that there can be no more than strlen(startbuf)/2+1 integers
goto prologue; * in any given correct or incorrect expression is left as an excersize to
* the reader. */
long *numstack = alloca((datasizes/2)*sizeof(long)),
*numstackptr = numstack;
/* Stack of operator tokens */
operator *stack = alloca((datasizes+1) * sizeof(operator)),
*stackptr = stack;
*stackptr++ = lasttok = TOK_LPAREN; /* start off with a left paren */
loop:
if ((arithval = *expr) == 0) {
if (p == endexpression) { /* Null expression. */
return (*errcode = 0);
}
/* This is only reached after all tokens have been extracted from the
* input stream. If there are still tokens on the operator stack, they
* are to be applied in order. At the end, there should be a final
* result on the integer stack */
if (expr != endexpression + 1) { /* If we haven't done so already, */
expr = endexpression; /* append a closing right paren */
goto loop; /* and let the loop process it. */
}
/* At this point, we're done with the expression. */
if (numstackptr != numstack+1) {/* ... but if there isn't, it's bad */
err:
return (*errcode = -1);
/* NOTREACHED */
}
return *numstack;
} else {
/* Continue processing the expression. */
if (isspace(arithval)) {
goto prologue; /* Skip whitespace */
}
if ((unsigned)arithval-'0' <= 9) /* isdigit */ { if ((unsigned)arithval-'0' <= 9) /* isdigit */ {
*numstackptr++ = strtol(expr, (char **) &expr, 10); *numstackptr++ = strtol(expr, (char **) &expr, 10);
lasttok = TOK_NUM; lasttok = TOK_NUM;
continue; goto loop;
} if (arithval == '(') {
*stackptr++ = TOK_LPAREN;
lasttok = TOK_LPAREN;
goto prologue;
} if (arithval == ')') {
lasttok = TOK_NUM;
while (stackptr != stack) {
op = *--stackptr;
if (op == TOK_LPAREN)
goto prologue;
*errcode = ARITH_APPLY(op);
if(*errcode) return *errcode;
} }
goto err; /* Mismatched parens */ #if 1
} if (arithval == '|') { if ((p = strchr(op_char, arithval)) == NULL) {
if (*++expr == '|') goto err;
op = TOK_OR; }
else { #else
for ( p=op_char ; *p != arithval ; p++ ) {
if (!*p) {
goto err;
}
}
#endif
p = op_token + (int)(p - op_char);
++expr;
if ((p >= op_token + NUM_PAIR_EQUAL) || (*expr != '=')) {
p += NUM_PAIR_EQUAL;
if ((p >= op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL)
|| (*expr != arithval) || (arithval == '!')) {
--expr; --expr;
op = TOK_BOR; if (arithval == '=') { /* single = */
goto err;
} }
} else if (arithval == '&') { p += NUM_PAIR_SAME;
if (*++expr == '&') /* Plus and minus are binary (not unary) _only_ if the last
op = TOK_AND; * token was as number, or a right paren (which pretends to be
else { * a number, since it evaluates to one). Think about it.
--expr; * It makes sense. */
op = TOK_BAND; if ((lasttok != TOK_NUM)
&& (p >= op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL
+ sizeof(op_char) - 2)) {
p += 2; /* Unary plus or minus */
} }
} else if (arithval == '=') {
if (*++expr != '=') goto err; /* Unknown token */
op = TOK_EQ;
} else if (arithval == '!') {
if (*++expr == '=')
op = TOK_NE;
else {
--expr;
op = TOK_NOT;
} }
} else if (arithval == '>') {
switch (*++expr) {
case '=':
op = TOK_GE;
break;
case '>':
op = TOK_RSHIFT;
break;
default:
--expr;
op = TOK_GT;
} }
} else if (arithval == '<') { op = *p;
switch (*++expr) {
case '=':
op = TOK_LE;
break;
case '<':
op = TOK_LSHIFT;
break;
default:
--expr;
op = TOK_LT;
}
} else if (arithval == '*')
op = TOK_MUL;
else if (arithval == '/')
op = TOK_DIV;
else if (arithval == '%')
op = TOK_REM;
else if (arithval == '+') {
if (lasttok != TOK_NUM) goto prologue; /* Unary plus */
op = TOK_ADD;
} else if (arithval == '-')
op = (lasttok == TOK_NUM) ? TOK_SUB : TOK_UMINUS;
else if (arithval == '~')
op = TOK_BNOT;
else goto err; /* Unknown token */
/* We don't want a unary operator to cause recursive descent on the
* stack, because there can be many in a row and it could cause an
* operator to be evaluated before its argument is pushed onto the
* integer stack. */
/* But for binary operators, "apply" everything on the operator
* stack until we find an operator with a lesser priority than the
* one we have just extracted. */
/* Left paren is given the lowest priority so it will never be
* "applied" in this way */
prec = PREC(op); prec = PREC(op);
if (prec != UNARYPREC) if ((prec > 0) && (prec != UNARYPREC)) { /* not left paren or unary */
while (stackptr != stack && PREC(stackptr[-1]) >= prec) { if (lasttok != TOK_NUM) { /* binary op must be preceded by a num */
*errcode = ARITH_APPLY(*--stackptr); goto err;
if(*errcode) return *errcode;
} }
*stackptr++ = op;
lasttok = op;
prologue: ++expr;
} /* yay */
while (stackptr != stack) { while (stackptr != stack) {
if (op == TOK_RPAREN) {
/* The algorithm employed here is simple: while we don't
* hit an open paren nor the bottom of the stack, pop
* tokens and apply them */
if (stackptr[-1] == TOK_LPAREN) {
--stackptr;
lasttok = TOK_NUM; /* Any operator directly after a */
/* close paren should consider itself binary */
goto prologue;
}
} else if (PREC(stackptr[-1]) < prec) {
break;
}
*errcode = ARITH_APPLY(*--stackptr); *errcode = ARITH_APPLY(*--stackptr);
if(*errcode) return *errcode; if(*errcode) return *errcode;
} }
if (numstackptr != numstack+1) { if (op == TOK_RPAREN) {
err: goto err;
*errcode = -1; }
return -1;
/* NOTREACHED */
} }
return *numstack; /* Push this operator to the stack and remember it. */
*stackptr++ = lasttok = op;
prologue:
++expr;
goto loop;
}
} }