mirror of
https://github.com/GnoConsortium/gno.git
synced 2024-12-21 07:30:05 +00:00
These are Dave Tribby's changes that were necessary to make
sed (which uses these routines) work correctly. See the file GNO.notes for more details.
This commit is contained in:
parent
a3f1e8a401
commit
8590ffb646
71
lib/libc/regex/GNO.notes
Normal file
71
lib/libc/regex/GNO.notes
Normal file
@ -0,0 +1,71 @@
|
||||
Notes on the porting of regex to GNO
|
||||
|
||||
Dave Tribby * November 7, 1997
|
||||
|
||||
|
||||
Devin Reade did the initial conversion of the BSD sources to compile
|
||||
under GNO 2.0.6 headers with ORCA/C on the Apple IIGS.
|
||||
|
||||
I completed the porting to the extent that the program sed (which
|
||||
uses regex) works for many different test cases.
|
||||
|
||||
The most time-comsuming aspect of the port was finding all the places
|
||||
where long integers should be used instead of int or unsigned int.
|
||||
|
||||
|
||||
DEBUGGING
|
||||
|
||||
The file engine.c is used in an interesting way by regexec.c:
|
||||
it includes engine.c *twice*, after muchos fiddling with the
|
||||
macros that code uses. This lets the same code operate on two
|
||||
different representations for state sets.
|
||||
|
||||
When it was necessary to do source-level debugging, Splat! got
|
||||
very confused about line numbers until I added the following at
|
||||
the beginning of the file:
|
||||
#ifdef __ORCAC__
|
||||
#line 2 "engine.c"
|
||||
#endif
|
||||
|
||||
The regex code had an compilation macro, REDEBUG, that would
|
||||
turn on diagnostic messages when defined. I added additional
|
||||
output statements that are turned on by REDEBUG.
|
||||
|
||||
|
||||
PERFORMANCE ENHANCEMENT
|
||||
|
||||
The program sed took a long time to execute a compilcated sed
|
||||
program (provided with the BSD source code) that evaluates
|
||||
expressions. I attempted to speed up the regex routines by
|
||||
recoding two routines in assembly language (using the asm {}
|
||||
construct available in ORCA/C). The two routines are
|
||||
isinsets() and samesets() in regcomp.c. The C code was left
|
||||
in place and can be turned on by compiling with the macro
|
||||
__NOASM__ set.
|
||||
|
||||
I also noticed an instance in regcomp.c where many fields of
|
||||
a structure were individually set to 0 at initialization. I
|
||||
recoded to set all the fields to 0 by a call to memset() and
|
||||
then set only the non-zero fields individually.
|
||||
|
||||
The following script was used to time the results:
|
||||
|
||||
cd /src/gno/usr.bin/sed
|
||||
echo '(4+4)*3' | time ./sed -f tests/math.sed
|
||||
|
||||
The unmodified code took 51 seconds to run on my 8MHz Apple IIGS.
|
||||
It took 49 seconds after I recoded isinsets() in asm, and 48 seconds
|
||||
after I recoded samesets(). Initializing fields via memset() brought
|
||||
it down to 47 seconds.
|
||||
|
||||
I saw no other obvious candidates for recoding. The modest results
|
||||
from these changes (even though they were made in routines that
|
||||
ranked high in the profile) did not warrent further efforts.
|
||||
|
||||
|
||||
BUILDING
|
||||
|
||||
Because I did not rebuild all of libc, I created a library called
|
||||
regex that only includes the regex routines. The commands I used
|
||||
to build regex are included in the file make.cmds. I will leave it
|
||||
to Devin to incorporate regex into the full libc build structure.
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# $Id: Makefile,v 1.1 1997/10/08 07:07:49 gdr Exp $
|
||||
# $Id: Makefile,v 1.2 1997/11/17 04:13:07 gdr Exp $
|
||||
#
|
||||
# Devin Reade, October 1997.
|
||||
#
|
||||
@ -11,7 +11,7 @@ OBJS = regcomp.o regerror.o regexec.o regfree.o
|
||||
|
||||
# I've not bothered to determine what POSIX_MISTAKE means, but 4.4BSD
|
||||
# uses it.
|
||||
CFLAGS += -DPOSIX_MISTAKE
|
||||
CFLAGS += -v -r -DPOSIX_MISTAKE
|
||||
|
||||
build .PHONY: ../libc
|
||||
obj: $(OBJS)
|
||||
|
@ -1,3 +1,6 @@
|
||||
#ifdef __ORCAC__ /* For debugger since engine.c in included by regexec.c */
|
||||
#line 2 "engine.c"
|
||||
#endif
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
@ -36,7 +39,7 @@
|
||||
*
|
||||
* @(#)engine.c 8.5 (Berkeley) 3/20/94
|
||||
*
|
||||
* $Id: engine.c,v 1.2 1997/10/08 07:07:50 gdr Exp $
|
||||
* $Id: engine.c,v 1.3 1997/11/17 04:13:07 gdr Exp $
|
||||
*/
|
||||
|
||||
#ifdef __ORCAC__
|
||||
@ -898,7 +901,11 @@ step(register struct re_guts *g,
|
||||
register sopno pc;
|
||||
register onestate here; /* note, macros know this name */
|
||||
register sopno look;
|
||||
#ifndef __ORCAC__
|
||||
register int i;
|
||||
#else
|
||||
unsigned long i;
|
||||
#endif
|
||||
|
||||
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
|
||||
s = g->strip[pc];
|
||||
|
28
lib/libc/regex/make.cmds
Normal file
28
lib/libc/regex/make.cmds
Normal file
@ -0,0 +1,28 @@
|
||||
# Commands to create regex.lib, since normal Makefile
|
||||
# assumes that it will be part of libc.
|
||||
|
||||
# Name of library
|
||||
set libname=/lib/regex.lib
|
||||
|
||||
# Optimization compile option
|
||||
set optimize=-O72
|
||||
|
||||
# Any debug needed?
|
||||
set debug=""
|
||||
|
||||
# Macro definitions
|
||||
# set macros="-DPOSIX_MISTAKE -DREDEBUG -D__NOASM__"
|
||||
set macros="-DPOSIX_MISTAKE"
|
||||
|
||||
# Places to look for libraries
|
||||
set inc="-I/usr/include -I/lang/orca/libraries/orcacdefs"
|
||||
|
||||
# Compile the source files
|
||||
17/occ $optimize $debug -o regcomp.o -c -a0 -i -r -w $inc -v -r $macros regcomp.c
|
||||
17/occ $optimize $debug -o regerror.o -c -a0 -i -r -w $inc -v -r $macros regerror.c
|
||||
17/occ $optimize $debug -o regexec.o -c -a0 -i -r -w $inc -v -r $macros regexec.c
|
||||
17/occ $optimize $debug -o regfree.o -c -a0 -i -r -w $inc -v -r $macros regfree.c
|
||||
|
||||
# Create library
|
||||
rm $libname
|
||||
17/makelib -p $libname +regcomp.o +regerror.o +regexec.o +regfree.o
|
@ -36,7 +36,7 @@
|
||||
*
|
||||
* @(#)regcomp.c 8.5 (Berkeley) 3/20/94
|
||||
*
|
||||
* $Id: regcomp.c,v 1.2 1997/10/08 07:07:50 gdr Exp $
|
||||
* $Id: regcomp.c,v 1.3 1997/11/17 04:13:07 gdr Exp $
|
||||
*/
|
||||
|
||||
#ifdef __ORCAC__
|
||||
@ -56,6 +56,10 @@ static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#ifdef __ORCAC__
|
||||
/* For some reason this macro definition causes a problem after regex2.h */
|
||||
#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
|
||||
#endif
|
||||
#include "regex2.h"
|
||||
|
||||
#include "cclass.h"
|
||||
@ -157,7 +161,9 @@ static char nuls[10]; /* place to point scanner in event of error */
|
||||
#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
|
||||
#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
|
||||
#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
|
||||
#ifndef __ORCAC__
|
||||
#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
|
||||
#endif
|
||||
#define HERE() (p->slen)
|
||||
#define THERE() (p->slen - 1)
|
||||
#define THERETHERE() (p->slen - 2)
|
||||
@ -197,6 +203,10 @@ regcomp(regex_t *preg,
|
||||
# define GOODFLAGS(f) ((f)&~REG_DUMP)
|
||||
#endif
|
||||
|
||||
#ifdef REDEBUG
|
||||
fprintf(stdout, "regcomp: pattern \"%s\"\n", pattern);
|
||||
#endif
|
||||
|
||||
cflags = GOODFLAGS(cflags);
|
||||
if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
|
||||
return(REG_INVARG);
|
||||
@ -227,6 +237,7 @@ regcomp(regex_t *preg,
|
||||
p->end = p->next + len;
|
||||
p->error = 0;
|
||||
p->ncsalloc = 0;
|
||||
#ifndef __ORCAC__
|
||||
for (i = 0; i < NPAREN; i++) {
|
||||
p->pbegin[i] = 0;
|
||||
p->pend[i] = 0;
|
||||
@ -246,6 +257,20 @@ regcomp(regex_t *preg,
|
||||
g->categories = &g->catspace[-(CHAR_MIN)];
|
||||
(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
|
||||
g->backrefs = 0;
|
||||
#else
|
||||
/* Performance tune-up for Apple IIGS: set all 0-valued fields in */
|
||||
/* record via memset() and then set only non-0 values. */
|
||||
(void) memset((char *)g, 0,
|
||||
sizeof(struct re_guts)+(NC-1)*sizeof(cat_t));
|
||||
p->g = g;
|
||||
p->next = (char *)pattern; /* convenience; we do not modify it */
|
||||
p->end = p->next + len;
|
||||
(void) memset((char *)p->pbegin, 0, sizeof(sopno)*NPAREN*2);
|
||||
g->csetsize = NC;
|
||||
g->cflags = cflags;
|
||||
g->ncategories = 1; /* category 0 is "everything else" */
|
||||
g->categories = &g->catspace[-(CHAR_MIN)];
|
||||
#endif
|
||||
|
||||
/* do it */
|
||||
EMIT(OEND, 0);
|
||||
@ -1342,6 +1367,7 @@ isinsets(register struct re_guts *g,
|
||||
{
|
||||
register uch *col;
|
||||
register int i;
|
||||
#if defined(__NOASM__) || !defined(__ORCAC__)
|
||||
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
|
||||
register unsigned uc = (unsigned char)c;
|
||||
|
||||
@ -1349,6 +1375,56 @@ isinsets(register struct re_guts *g,
|
||||
if (col[uc] != 0)
|
||||
return(1);
|
||||
return(0);
|
||||
#else
|
||||
/* Hand-optimized code for Apple IIGS */
|
||||
int ncols;
|
||||
int setsize;
|
||||
int rtnval;
|
||||
|
||||
col = g->setbits;
|
||||
setsize = g->csetsize;
|
||||
ncols = g->ncsets+(CHAR_BIT-1);
|
||||
#if CHAR_BIT == 8
|
||||
asm{
|
||||
lda ncols
|
||||
lsr A
|
||||
lsr A
|
||||
lsr A
|
||||
sta ncols
|
||||
}
|
||||
#else
|
||||
ncols = ncols / CHAR_BIT;
|
||||
#endif
|
||||
asm{
|
||||
stz rtnval ; Assume return = 0.
|
||||
lda ncols ; Count i down from ncols
|
||||
beq done
|
||||
sta i ; to 0 to see when done.
|
||||
lda c ; Y-reg = offset from col
|
||||
and #0x00FF
|
||||
tay
|
||||
|
||||
; Loop through cols
|
||||
nextcol:
|
||||
lda [col],y ; Get the byte value.
|
||||
and #0x00FF
|
||||
bne nonzero ; Done if != 0.
|
||||
dec i ; Decrement loop counter.
|
||||
beq done ; If 0, return 0.
|
||||
lda col ; Increment base pointer
|
||||
clc
|
||||
adc setsize
|
||||
sta col
|
||||
bcc nextcol
|
||||
inc col+2
|
||||
bra nextcol ; and stay in loop.
|
||||
|
||||
nonzero: ; Non-zero value found:
|
||||
inc rtnval ; Set return value to 1
|
||||
}
|
||||
done:
|
||||
return(rtnval);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1362,6 +1438,7 @@ samesets(register struct re_guts *g,
|
||||
{
|
||||
register uch *col;
|
||||
register int i;
|
||||
#if defined(__NOASM__) || !defined(__ORCAC__)
|
||||
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
|
||||
register unsigned uc1 = (unsigned char)c1;
|
||||
register unsigned uc2 = (unsigned char)c2;
|
||||
@ -1370,6 +1447,57 @@ samesets(register struct re_guts *g,
|
||||
if (col[uc1] != col[uc2])
|
||||
return(0);
|
||||
return(1);
|
||||
#else
|
||||
/* Hand-optimized code for Apple IIGS */
|
||||
int ncols;
|
||||
int setsize;
|
||||
int rtnval;
|
||||
int c1val;
|
||||
|
||||
col = g->setbits;
|
||||
setsize = g->csetsize;
|
||||
#if CHAR_BIT == 8
|
||||
ncols = (g->ncsets+(CHAR_BIT-1)) >> 3;
|
||||
#else
|
||||
ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
|
||||
#endif
|
||||
asm{
|
||||
stz rtnval ; Assume return = 0
|
||||
lda ncols ; Count i down from ncols
|
||||
beq notfound
|
||||
sta i ; to 0 to check when done.
|
||||
|
||||
; Loop through cols
|
||||
nextcol:
|
||||
lda c1
|
||||
and #0x00FF
|
||||
tay
|
||||
lda [col],y ; Get the 1st value
|
||||
and #0x00FF
|
||||
sta c1val
|
||||
lda c2
|
||||
and #0x00FF
|
||||
tay
|
||||
lda [col],y ; Get the 2nd value
|
||||
and #0x00FF
|
||||
cmp c1val
|
||||
bne done ; Done if != (return 0)
|
||||
dec i ; Decrement loop counter
|
||||
beq notfound ; If done, return 1
|
||||
lda col ; Increment base pointer
|
||||
clc
|
||||
adc setsize
|
||||
sta col
|
||||
bcc nextcol
|
||||
inc col+2
|
||||
bra nextcol ; and stay in loop.
|
||||
|
||||
notfound:
|
||||
inc rtnval ; rtnval = 1
|
||||
}
|
||||
done:
|
||||
return(rtnval);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1435,6 +1563,10 @@ doemit(register struct parse *p,
|
||||
sop op,
|
||||
size_t opnd)
|
||||
{
|
||||
#ifdef REDEBUG
|
||||
/* Debug code added by DMT for GNO implementation */
|
||||
char *opname;
|
||||
#endif
|
||||
/* avoid making error situations worse */
|
||||
if (p->error != 0)
|
||||
return;
|
||||
@ -1449,6 +1581,81 @@ doemit(register struct parse *p,
|
||||
|
||||
/* finally, it's all reduced to the easy case */
|
||||
p->strip[p->slen++] = SOP(op, opnd);
|
||||
|
||||
#ifdef REDEBUG
|
||||
/* Debug code added by DMT for GNO implementation */
|
||||
switch (op) {
|
||||
case OEND:
|
||||
opname = "OEND";
|
||||
break;
|
||||
case OCHAR:
|
||||
opname = "OCHAR";
|
||||
break;
|
||||
case OBOL:
|
||||
opname = "OBOL";
|
||||
break;
|
||||
case OEOL:
|
||||
opname = "OEOL";
|
||||
break;
|
||||
case OANY:
|
||||
opname = "OANY";
|
||||
break;
|
||||
case OANYOF:
|
||||
opname = "OANYOF";
|
||||
break;
|
||||
case OBACK_:
|
||||
opname = "OBACK_";
|
||||
break;
|
||||
case O_BACK:
|
||||
opname = "O_BACK";
|
||||
break;
|
||||
case OPLUS_:
|
||||
opname = "OPLUS_";
|
||||
break;
|
||||
case O_PLUS:
|
||||
opname = "O_PLUS";
|
||||
break;
|
||||
case OQUEST_:
|
||||
opname = "OQUEST_";
|
||||
break;
|
||||
case O_QUEST:
|
||||
opname = "O_QUEST";
|
||||
break;
|
||||
case OLPAREN:
|
||||
opname = "OLPAREN";
|
||||
break;
|
||||
case ORPAREN:
|
||||
opname = "ORPAREN";
|
||||
break;
|
||||
case OCH_:
|
||||
opname = "OCH_";
|
||||
break;
|
||||
case OOR1:
|
||||
opname = "OOR1";
|
||||
break;
|
||||
case OOR2:
|
||||
opname = "OOR2";
|
||||
break;
|
||||
case O_CH:
|
||||
opname = "O_CH";
|
||||
break;
|
||||
case OBOW:
|
||||
opname = "OBOW";
|
||||
break;
|
||||
case OEOW:
|
||||
opname = "OEOW";
|
||||
break;
|
||||
default: /* uh oh */
|
||||
opname = "unknown";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stdout, "emit %2ld: %s %lu", p->slen-1,opname,opnd);
|
||||
if (op == OCHAR && opnd>=32 && opnd<=126) {
|
||||
fprintf(stdout, " \"%c\"", (char)opnd);
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -36,7 +36,7 @@
|
||||
*
|
||||
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
|
||||
*
|
||||
* $Id: regexec.c,v 1.2 1997/10/08 07:07:50 gdr Exp $
|
||||
* $Id: regexec.c,v 1.3 1997/11/17 04:13:08 gdr Exp $
|
||||
*/
|
||||
|
||||
#ifdef __ORCAC__
|
||||
@ -71,24 +71,41 @@ static int nope = 0; /* for use in asserts; shuts lint up */
|
||||
#define states long
|
||||
#define states1 states /* for later use in regexec() decision */
|
||||
#define CLEAR(v) ((v) = 0)
|
||||
#ifndef __ORCAC__
|
||||
#define SET0(v, n) ((v) &= ~(1 << (n)))
|
||||
#define SET1(v, n) ((v) |= 1 << (n))
|
||||
#define ISSET(v, n) ((v) & (1 << (n)))
|
||||
#else /* ORCA/C doesn't use 32 bits by default */
|
||||
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
|
||||
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
|
||||
#define ISSET(v, n) ((v) & ((unsigned long)1 << (n)))
|
||||
#endif
|
||||
#define ASSIGN(d, s) ((d) = (s))
|
||||
#define EQ(a, b) ((a) == (b))
|
||||
#define STATEVARS int dummy /* dummy version */
|
||||
#define STATESETUP(m, n) /* nothing */
|
||||
#define STATETEARDOWN(m) /* nothing */
|
||||
#define SETUP(v) ((v) = 0)
|
||||
#ifndef __ORCAC__
|
||||
#define onestate int
|
||||
#define INIT(o, n) ((o) = (unsigned)1 << (n))
|
||||
#else /* ORCA/C doesn't use 32 bits by default */
|
||||
#define onestate long
|
||||
#define INIT(o, n) ((o) = (unsigned long)1 << (n))
|
||||
#endif
|
||||
#define INC(o) ((o) <<= 1)
|
||||
#define ISSTATEIN(v, o) ((v) & (o))
|
||||
/* some abbreviations; note that some of these know variable names! */
|
||||
/* do "if I'm here, I can also be there" etc without branches */
|
||||
#ifndef __ORCAC__
|
||||
#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
|
||||
#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
|
||||
#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
|
||||
#else /* ORCA/C doesn't use 32 bits by default */
|
||||
#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
|
||||
#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
|
||||
#define ISSETBACK(v, n) ((v) & ((unsigned long)(here) >> (n)))
|
||||
#endif
|
||||
/* function names */
|
||||
#define SNAMES /* engine.c looks after details */
|
||||
|
||||
@ -129,7 +146,11 @@ static int nope = 0; /* for use in asserts; shuts lint up */
|
||||
(m)->vn = 0; }
|
||||
#define STATETEARDOWN(m) { free((m)->space); }
|
||||
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
|
||||
#ifndef __ORCAC__
|
||||
#define onestate int
|
||||
#else /* ORCA/C doesn't use 32 bits by default */
|
||||
#define onestate long
|
||||
#endif
|
||||
#define INIT(o, n) ((o) = (n))
|
||||
#define INC(o) ((o)++)
|
||||
#define ISSTATEIN(v, o) ((v)[o])
|
||||
@ -172,6 +193,10 @@ regexec(const regex_t *preg,
|
||||
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
|
||||
#endif
|
||||
|
||||
#ifdef REDEBUG
|
||||
fprintf(stdout, "regexec: string \"%s\"\n", string);
|
||||
#endif
|
||||
|
||||
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
|
||||
return(REG_BADPAT);
|
||||
assert(!(g->iflags&BAD));
|
||||
@ -179,6 +204,9 @@ regexec(const regex_t *preg,
|
||||
return(REG_BADPAT);
|
||||
eflags = GOODFLAGS(eflags);
|
||||
|
||||
/* Special...for debugging turn on register tracing */
|
||||
eflags |= REG_TRACE;
|
||||
|
||||
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
else
|
||||
|
Loading…
Reference in New Issue
Block a user