From e0bcce09baff576b1b16b3ffe780b6d91c7710c2 Mon Sep 17 00:00:00 2001 From: Matt Kraai Date: Wed, 27 Sep 2000 02:29:39 +0000 Subject: [PATCH] Rewrote uniq to be less than a third of the size, and fixed some other minor problems. --- busybox.h | 1 + coreutils/uniq.c | 164 ++++++---------------------------------------- include/busybox.h | 1 + uniq.c | 164 ++++++---------------------------------------- utility.c | 22 +++++-- 5 files changed, 56 insertions(+), 296 deletions(-) diff --git a/busybox.h b/busybox.h index 69f455435..faad206ee 100644 --- a/busybox.h +++ b/busybox.h @@ -395,6 +395,7 @@ extern int print_file_by_name(char *filename); extern char process_escape_sequence(char **ptr); extern char *get_last_path_component(char *path); extern void xregcomp(regex_t *preg, const char *regex, int cflags); +extern FILE *xfopen(const char *path, const char *mode); #ifndef DMALLOC extern void *xmalloc (size_t size); diff --git a/coreutils/uniq.c b/coreutils/uniq.c index ef38587bd..cfe6cca5e 100644 --- a/coreutils/uniq.c +++ b/coreutils/uniq.c @@ -5,6 +5,7 @@ * * Copyright (C) 1999,2000 by Lineo, inc. * Written by John Beppu + * Rewritten by Matt Kraai * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,156 +28,29 @@ #include #include -/* max chars in line */ -#define UNIQ_MAX 4096 - -typedef void (Print) (FILE *, const char *); - -typedef int (Decide) (const char *, const char *); - -/* container for two lines to be compared */ -typedef struct { - char *a; - char *b; - int recurrence; - FILE *in; - FILE *out; - void *func; -} Subject; - -/* set up all the variables of a uniq operation */ -static Subject *subject_init(Subject * self, FILE * in, FILE * out, - void *func) -{ - self->a = NULL; - self->b = NULL; - self->in = in; - self->out = out; - self->func = func; - self->recurrence = 0; - return self; -} - -/* point a and b to the appropriate lines; - * count the recurrences (if any) of a string; - */ -static Subject *subject_next(Subject * self) -{ - /* tmp line holders */ - static char line[2][UNIQ_MAX]; - static int alternator = 0; - - if (fgets(line[alternator], UNIQ_MAX, self->in)) { - self->a = self->b; - self->b = line[alternator]; - alternator ^= 1; - return self; - } - - return NULL; -} - -static Subject *subject_last(Subject * self) -{ - self->a = self->b; - self->b = NULL; - return self; -} - -static Subject *subject_study(Subject * self) -{ - if (self->a == NULL) { - return self; - } - if (self->b == NULL) { - fprintf(self->out, "%s", self->a); - return self; - } - if (strcmp(self->a, self->b) == 0) { - self->recurrence++; - } else { - fprintf(self->out, "%s", self->a); - self->recurrence = 0; - } - return self; -} - -static int -set_file_pointers(int schema, FILE ** in, FILE ** out, char **argv) -{ - switch (schema) { - case 0: - *in = stdin; - *out = stdout; - break; - case 1: - *in = fopen(argv[0], "r"); - *out = stdout; - break; - case 2: - *in = fopen(argv[0], "r"); - *out = fopen(argv[1], "w"); - break; - } - if (*in == NULL) { - errorMsg("%s: %s\n", argv[0], strerror(errno)); - return errno; - } - if (*out == NULL) { - errorMsg("%s: %s\n", argv[1], strerror(errno)); - return errno; - } - return 0; -} - - -/* one variable is the decision algo */ -/* another variable is the printing algo */ - -/* I don't think I have to have more than a 1 line memory - this is the one constant */ - -/* it seems like GNU/uniq only takes one or two files as an option */ - -/* ________________________________________________________________________ */ int uniq_main(int argc, char **argv) { - int i; - char opt; - FILE *in, *out; - Subject s; + FILE *in = stdin, *out = stdout; + char *lastline = NULL, *input; /* parse argv[] */ - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { - opt = argv[i][1]; - switch (opt) { - case '-': - case 'h': - usage(uniq_usage); - default: - usage(uniq_usage); - } - } else { - break; + if ((argc > 1 && **(argv + 1) == '-') || argc > 3) + usage(uniq_usage); + + if (argv[1] != NULL) { + in = xfopen(argv[1], "r"); + if (argv[2] != NULL) + out = xfopen(argv[2], "w"); + } + + while ((input = get_line_from_file(in)) != NULL) { + if (lastline == NULL || strcmp(input, lastline) != 0) { + fputs(input, out); + free(lastline); + lastline = input; } } + free(lastline); - /* 0 src: stdin; dst: stdout */ - /* 1 src: file; dst: stdout */ - /* 2 src: file; dst: file */ - if (set_file_pointers((argc - 1), &in, &out, &argv[i])) { - exit(1); - } - - subject_init(&s, in, out, NULL); - while (subject_next(&s)) { - subject_study(&s); - } - subject_last(&s); - subject_study(&s); - - return(0); + return EXIT_SUCCESS; } - -/* $Id: uniq.c,v 1.14 2000/09/25 21:45:58 andersen Exp $ */ diff --git a/include/busybox.h b/include/busybox.h index 69f455435..faad206ee 100644 --- a/include/busybox.h +++ b/include/busybox.h @@ -395,6 +395,7 @@ extern int print_file_by_name(char *filename); extern char process_escape_sequence(char **ptr); extern char *get_last_path_component(char *path); extern void xregcomp(regex_t *preg, const char *regex, int cflags); +extern FILE *xfopen(const char *path, const char *mode); #ifndef DMALLOC extern void *xmalloc (size_t size); diff --git a/uniq.c b/uniq.c index ef38587bd..cfe6cca5e 100644 --- a/uniq.c +++ b/uniq.c @@ -5,6 +5,7 @@ * * Copyright (C) 1999,2000 by Lineo, inc. * Written by John Beppu + * Rewritten by Matt Kraai * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,156 +28,29 @@ #include #include -/* max chars in line */ -#define UNIQ_MAX 4096 - -typedef void (Print) (FILE *, const char *); - -typedef int (Decide) (const char *, const char *); - -/* container for two lines to be compared */ -typedef struct { - char *a; - char *b; - int recurrence; - FILE *in; - FILE *out; - void *func; -} Subject; - -/* set up all the variables of a uniq operation */ -static Subject *subject_init(Subject * self, FILE * in, FILE * out, - void *func) -{ - self->a = NULL; - self->b = NULL; - self->in = in; - self->out = out; - self->func = func; - self->recurrence = 0; - return self; -} - -/* point a and b to the appropriate lines; - * count the recurrences (if any) of a string; - */ -static Subject *subject_next(Subject * self) -{ - /* tmp line holders */ - static char line[2][UNIQ_MAX]; - static int alternator = 0; - - if (fgets(line[alternator], UNIQ_MAX, self->in)) { - self->a = self->b; - self->b = line[alternator]; - alternator ^= 1; - return self; - } - - return NULL; -} - -static Subject *subject_last(Subject * self) -{ - self->a = self->b; - self->b = NULL; - return self; -} - -static Subject *subject_study(Subject * self) -{ - if (self->a == NULL) { - return self; - } - if (self->b == NULL) { - fprintf(self->out, "%s", self->a); - return self; - } - if (strcmp(self->a, self->b) == 0) { - self->recurrence++; - } else { - fprintf(self->out, "%s", self->a); - self->recurrence = 0; - } - return self; -} - -static int -set_file_pointers(int schema, FILE ** in, FILE ** out, char **argv) -{ - switch (schema) { - case 0: - *in = stdin; - *out = stdout; - break; - case 1: - *in = fopen(argv[0], "r"); - *out = stdout; - break; - case 2: - *in = fopen(argv[0], "r"); - *out = fopen(argv[1], "w"); - break; - } - if (*in == NULL) { - errorMsg("%s: %s\n", argv[0], strerror(errno)); - return errno; - } - if (*out == NULL) { - errorMsg("%s: %s\n", argv[1], strerror(errno)); - return errno; - } - return 0; -} - - -/* one variable is the decision algo */ -/* another variable is the printing algo */ - -/* I don't think I have to have more than a 1 line memory - this is the one constant */ - -/* it seems like GNU/uniq only takes one or two files as an option */ - -/* ________________________________________________________________________ */ int uniq_main(int argc, char **argv) { - int i; - char opt; - FILE *in, *out; - Subject s; + FILE *in = stdin, *out = stdout; + char *lastline = NULL, *input; /* parse argv[] */ - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { - opt = argv[i][1]; - switch (opt) { - case '-': - case 'h': - usage(uniq_usage); - default: - usage(uniq_usage); - } - } else { - break; + if ((argc > 1 && **(argv + 1) == '-') || argc > 3) + usage(uniq_usage); + + if (argv[1] != NULL) { + in = xfopen(argv[1], "r"); + if (argv[2] != NULL) + out = xfopen(argv[2], "w"); + } + + while ((input = get_line_from_file(in)) != NULL) { + if (lastline == NULL || strcmp(input, lastline) != 0) { + fputs(input, out); + free(lastline); + lastline = input; } } + free(lastline); - /* 0 src: stdin; dst: stdout */ - /* 1 src: file; dst: stdout */ - /* 2 src: file; dst: file */ - if (set_file_pointers((argc - 1), &in, &out, &argv[i])) { - exit(1); - } - - subject_init(&s, in, out, NULL); - while (subject_next(&s)) { - subject_study(&s); - } - subject_last(&s); - subject_study(&s); - - return(0); + return EXIT_SUCCESS; } - -/* $Id: uniq.c,v 1.14 2000/09/25 21:45:58 andersen Exp $ */ diff --git a/utility.c b/utility.c index f7dda7917..d376a04fb 100644 --- a/utility.c +++ b/utility.c @@ -81,7 +81,7 @@ extern void usage(const char *usage) { fprintf(stderr, "%s\n\n", full_version); fprintf(stderr, "Usage: %s\n", usage); - exit FALSE; + exit(EXIT_FAILURE); } extern void errorMsg(const char *s, ...) @@ -106,7 +106,7 @@ extern void fatalError(const char *s, ...) vfprintf(stderr, s, p); va_end(p); fflush(stderr); - exit( FALSE); + exit(EXIT_FAILURE); } #if defined BB_INIT @@ -401,17 +401,17 @@ copyFile(const char *srcName, const char *destName, /* This is fine, since symlinks never get here */ if (chown(destName, srcStatBuf.st_uid, srcStatBuf.st_gid) < 0) { perror(destName); - exit FALSE; + exit(EXIT_FAILURE); } if (chmod(destName, srcStatBuf.st_mode) < 0) { perror(destName); - exit FALSE; + exit(EXIT_FAILURE); } times.actime = srcStatBuf.st_atime; times.modtime = srcStatBuf.st_mtime; if (utime(destName, ×) < 0) { perror(destName); - exit FALSE; + exit(EXIT_FAILURE); } } @@ -1713,11 +1713,21 @@ void xregcomp(regex_t *preg, const char *regex, int cflags) int errmsgsz = regerror(ret, preg, NULL, 0); char *errmsg = xmalloc(errmsgsz); regerror(ret, preg, errmsg, errmsgsz); - fatalError("bb_regcomp: %s\n", errmsg); + fatalError("xregcomp: %s\n", errmsg); } } #endif +#if defined BB_UNIQ +FILE *xfopen(const char *path, const char *mode) +{ + FILE *fp; + if ((fp = fopen(path, mode)) == NULL) + fatalError("%s: %s\n", path, strerror(errno)); + return fp; +} +#endif + /* END CODE */ /* Local Variables: