Rewrote uniq to be less than a third of the size, and fixed some other

minor problems.
This commit is contained in:
Matt Kraai 2000-09-27 02:29:39 +00:00
parent 8ce85ce4e3
commit e0bcce09ba
5 changed files with 56 additions and 296 deletions

View File

@ -395,6 +395,7 @@ extern int print_file_by_name(char *filename);
extern char process_escape_sequence(char **ptr); extern char process_escape_sequence(char **ptr);
extern char *get_last_path_component(char *path); extern char *get_last_path_component(char *path);
extern void xregcomp(regex_t *preg, const char *regex, int cflags); extern void xregcomp(regex_t *preg, const char *regex, int cflags);
extern FILE *xfopen(const char *path, const char *mode);
#ifndef DMALLOC #ifndef DMALLOC
extern void *xmalloc (size_t size); extern void *xmalloc (size_t size);

View File

@ -5,6 +5,7 @@
* *
* Copyright (C) 1999,2000 by Lineo, inc. * Copyright (C) 1999,2000 by Lineo, inc.
* Written by John Beppu <beppu@lineo.com> * Written by John Beppu <beppu@lineo.com>
* Rewritten by Matt Kraai <kraai@alumni.carnegiemellon.edu>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -27,156 +28,29 @@
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
/* max chars in line */
#define UNIQ_MAX 4096
typedef void (Print) (FILE *, const char *);
typedef int (Decide) (const char *, const char *);
/* container for two lines to be compared */
typedef struct {
char *a;
char *b;
int recurrence;
FILE *in;
FILE *out;
void *func;
} Subject;
/* set up all the variables of a uniq operation */
static Subject *subject_init(Subject * self, FILE * in, FILE * out,
void *func)
{
self->a = NULL;
self->b = NULL;
self->in = in;
self->out = out;
self->func = func;
self->recurrence = 0;
return self;
}
/* point a and b to the appropriate lines;
* count the recurrences (if any) of a string;
*/
static Subject *subject_next(Subject * self)
{
/* tmp line holders */
static char line[2][UNIQ_MAX];
static int alternator = 0;
if (fgets(line[alternator], UNIQ_MAX, self->in)) {
self->a = self->b;
self->b = line[alternator];
alternator ^= 1;
return self;
}
return NULL;
}
static Subject *subject_last(Subject * self)
{
self->a = self->b;
self->b = NULL;
return self;
}
static Subject *subject_study(Subject * self)
{
if (self->a == NULL) {
return self;
}
if (self->b == NULL) {
fprintf(self->out, "%s", self->a);
return self;
}
if (strcmp(self->a, self->b) == 0) {
self->recurrence++;
} else {
fprintf(self->out, "%s", self->a);
self->recurrence = 0;
}
return self;
}
static int
set_file_pointers(int schema, FILE ** in, FILE ** out, char **argv)
{
switch (schema) {
case 0:
*in = stdin;
*out = stdout;
break;
case 1:
*in = fopen(argv[0], "r");
*out = stdout;
break;
case 2:
*in = fopen(argv[0], "r");
*out = fopen(argv[1], "w");
break;
}
if (*in == NULL) {
errorMsg("%s: %s\n", argv[0], strerror(errno));
return errno;
}
if (*out == NULL) {
errorMsg("%s: %s\n", argv[1], strerror(errno));
return errno;
}
return 0;
}
/* one variable is the decision algo */
/* another variable is the printing algo */
/* I don't think I have to have more than a 1 line memory
this is the one constant */
/* it seems like GNU/uniq only takes one or two files as an option */
/* ________________________________________________________________________ */
int uniq_main(int argc, char **argv) int uniq_main(int argc, char **argv)
{ {
int i; FILE *in = stdin, *out = stdout;
char opt; char *lastline = NULL, *input;
FILE *in, *out;
Subject s;
/* parse argv[] */ /* parse argv[] */
for (i = 1; i < argc; i++) { if ((argc > 1 && **(argv + 1) == '-') || argc > 3)
if (argv[i][0] == '-') { usage(uniq_usage);
opt = argv[i][1];
switch (opt) { if (argv[1] != NULL) {
case '-': in = xfopen(argv[1], "r");
case 'h': if (argv[2] != NULL)
usage(uniq_usage); out = xfopen(argv[2], "w");
default: }
usage(uniq_usage);
} while ((input = get_line_from_file(in)) != NULL) {
} else { if (lastline == NULL || strcmp(input, lastline) != 0) {
break; fputs(input, out);
free(lastline);
lastline = input;
} }
} }
free(lastline);
/* 0 src: stdin; dst: stdout */ return EXIT_SUCCESS;
/* 1 src: file; dst: stdout */
/* 2 src: file; dst: file */
if (set_file_pointers((argc - 1), &in, &out, &argv[i])) {
exit(1);
}
subject_init(&s, in, out, NULL);
while (subject_next(&s)) {
subject_study(&s);
}
subject_last(&s);
subject_study(&s);
return(0);
} }
/* $Id: uniq.c,v 1.14 2000/09/25 21:45:58 andersen Exp $ */

View File

@ -395,6 +395,7 @@ extern int print_file_by_name(char *filename);
extern char process_escape_sequence(char **ptr); extern char process_escape_sequence(char **ptr);
extern char *get_last_path_component(char *path); extern char *get_last_path_component(char *path);
extern void xregcomp(regex_t *preg, const char *regex, int cflags); extern void xregcomp(regex_t *preg, const char *regex, int cflags);
extern FILE *xfopen(const char *path, const char *mode);
#ifndef DMALLOC #ifndef DMALLOC
extern void *xmalloc (size_t size); extern void *xmalloc (size_t size);

164
uniq.c
View File

@ -5,6 +5,7 @@
* *
* Copyright (C) 1999,2000 by Lineo, inc. * Copyright (C) 1999,2000 by Lineo, inc.
* Written by John Beppu <beppu@lineo.com> * Written by John Beppu <beppu@lineo.com>
* Rewritten by Matt Kraai <kraai@alumni.carnegiemellon.edu>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -27,156 +28,29 @@
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
/* max chars in line */
#define UNIQ_MAX 4096
typedef void (Print) (FILE *, const char *);
typedef int (Decide) (const char *, const char *);
/* container for two lines to be compared */
typedef struct {
char *a;
char *b;
int recurrence;
FILE *in;
FILE *out;
void *func;
} Subject;
/* set up all the variables of a uniq operation */
static Subject *subject_init(Subject * self, FILE * in, FILE * out,
void *func)
{
self->a = NULL;
self->b = NULL;
self->in = in;
self->out = out;
self->func = func;
self->recurrence = 0;
return self;
}
/* point a and b to the appropriate lines;
* count the recurrences (if any) of a string;
*/
static Subject *subject_next(Subject * self)
{
/* tmp line holders */
static char line[2][UNIQ_MAX];
static int alternator = 0;
if (fgets(line[alternator], UNIQ_MAX, self->in)) {
self->a = self->b;
self->b = line[alternator];
alternator ^= 1;
return self;
}
return NULL;
}
static Subject *subject_last(Subject * self)
{
self->a = self->b;
self->b = NULL;
return self;
}
static Subject *subject_study(Subject * self)
{
if (self->a == NULL) {
return self;
}
if (self->b == NULL) {
fprintf(self->out, "%s", self->a);
return self;
}
if (strcmp(self->a, self->b) == 0) {
self->recurrence++;
} else {
fprintf(self->out, "%s", self->a);
self->recurrence = 0;
}
return self;
}
static int
set_file_pointers(int schema, FILE ** in, FILE ** out, char **argv)
{
switch (schema) {
case 0:
*in = stdin;
*out = stdout;
break;
case 1:
*in = fopen(argv[0], "r");
*out = stdout;
break;
case 2:
*in = fopen(argv[0], "r");
*out = fopen(argv[1], "w");
break;
}
if (*in == NULL) {
errorMsg("%s: %s\n", argv[0], strerror(errno));
return errno;
}
if (*out == NULL) {
errorMsg("%s: %s\n", argv[1], strerror(errno));
return errno;
}
return 0;
}
/* one variable is the decision algo */
/* another variable is the printing algo */
/* I don't think I have to have more than a 1 line memory
this is the one constant */
/* it seems like GNU/uniq only takes one or two files as an option */
/* ________________________________________________________________________ */
int uniq_main(int argc, char **argv) int uniq_main(int argc, char **argv)
{ {
int i; FILE *in = stdin, *out = stdout;
char opt; char *lastline = NULL, *input;
FILE *in, *out;
Subject s;
/* parse argv[] */ /* parse argv[] */
for (i = 1; i < argc; i++) { if ((argc > 1 && **(argv + 1) == '-') || argc > 3)
if (argv[i][0] == '-') { usage(uniq_usage);
opt = argv[i][1];
switch (opt) { if (argv[1] != NULL) {
case '-': in = xfopen(argv[1], "r");
case 'h': if (argv[2] != NULL)
usage(uniq_usage); out = xfopen(argv[2], "w");
default: }
usage(uniq_usage);
} while ((input = get_line_from_file(in)) != NULL) {
} else { if (lastline == NULL || strcmp(input, lastline) != 0) {
break; fputs(input, out);
free(lastline);
lastline = input;
} }
} }
free(lastline);
/* 0 src: stdin; dst: stdout */ return EXIT_SUCCESS;
/* 1 src: file; dst: stdout */
/* 2 src: file; dst: file */
if (set_file_pointers((argc - 1), &in, &out, &argv[i])) {
exit(1);
}
subject_init(&s, in, out, NULL);
while (subject_next(&s)) {
subject_study(&s);
}
subject_last(&s);
subject_study(&s);
return(0);
} }
/* $Id: uniq.c,v 1.14 2000/09/25 21:45:58 andersen Exp $ */

View File

@ -81,7 +81,7 @@ extern void usage(const char *usage)
{ {
fprintf(stderr, "%s\n\n", full_version); fprintf(stderr, "%s\n\n", full_version);
fprintf(stderr, "Usage: %s\n", usage); fprintf(stderr, "Usage: %s\n", usage);
exit FALSE; exit(EXIT_FAILURE);
} }
extern void errorMsg(const char *s, ...) extern void errorMsg(const char *s, ...)
@ -106,7 +106,7 @@ extern void fatalError(const char *s, ...)
vfprintf(stderr, s, p); vfprintf(stderr, s, p);
va_end(p); va_end(p);
fflush(stderr); fflush(stderr);
exit( FALSE); exit(EXIT_FAILURE);
} }
#if defined BB_INIT #if defined BB_INIT
@ -401,17 +401,17 @@ copyFile(const char *srcName, const char *destName,
/* This is fine, since symlinks never get here */ /* This is fine, since symlinks never get here */
if (chown(destName, srcStatBuf.st_uid, srcStatBuf.st_gid) < 0) { if (chown(destName, srcStatBuf.st_uid, srcStatBuf.st_gid) < 0) {
perror(destName); perror(destName);
exit FALSE; exit(EXIT_FAILURE);
} }
if (chmod(destName, srcStatBuf.st_mode) < 0) { if (chmod(destName, srcStatBuf.st_mode) < 0) {
perror(destName); perror(destName);
exit FALSE; exit(EXIT_FAILURE);
} }
times.actime = srcStatBuf.st_atime; times.actime = srcStatBuf.st_atime;
times.modtime = srcStatBuf.st_mtime; times.modtime = srcStatBuf.st_mtime;
if (utime(destName, &times) < 0) { if (utime(destName, &times) < 0) {
perror(destName); perror(destName);
exit FALSE; exit(EXIT_FAILURE);
} }
} }
@ -1713,11 +1713,21 @@ void xregcomp(regex_t *preg, const char *regex, int cflags)
int errmsgsz = regerror(ret, preg, NULL, 0); int errmsgsz = regerror(ret, preg, NULL, 0);
char *errmsg = xmalloc(errmsgsz); char *errmsg = xmalloc(errmsgsz);
regerror(ret, preg, errmsg, errmsgsz); regerror(ret, preg, errmsg, errmsgsz);
fatalError("bb_regcomp: %s\n", errmsg); fatalError("xregcomp: %s\n", errmsg);
} }
} }
#endif #endif
#if defined BB_UNIQ
FILE *xfopen(const char *path, const char *mode)
{
FILE *fp;
if ((fp = fopen(path, mode)) == NULL)
fatalError("%s: %s\n", path, strerror(errno));
return fp;
}
#endif
/* END CODE */ /* END CODE */
/* /*
Local Variables: Local Variables: