version 1.0 of msort and dsort for GNO

This commit is contained in:
gdr 1996-01-28 00:52:48 +00:00
parent a1e9154b96
commit 5722e4fa7f
14 changed files with 1561 additions and 0 deletions

18
usr.bin/sort/README Normal file
View File

@ -0,0 +1,18 @@
This archive contains the utilities msort(1) and dsort(1). Both sort
text files lexicographically.
Msort is a fast in-place memory sort.
Dsort is a disk based sort that can handle "arbitrarily large" files
(in reality, limited to ULONG_MAX -- 4 294 967 295 -- bytes).
The big difference between these sorts and the previously available
sort(1) is that these won't crash your system ... if either run into
problems, they exit gracefully and (if you are using the verbose flag)
tell you what the problem is.
Enjoy.
Devin Reade
14 June 1994

57
usr.bin/sort/common.h Normal file
View File

@ -0,0 +1,57 @@
#include <sys/types.h>
#include <stdio.h>
#ifdef DEFFUNC
# define EXTERN
#else
# define EXTERN extern
#endif
#define ALN2I 1.442695022 /* 1 / ln(2) */
#define TINY 1.0e-5 /* "zero" for heapsort */
#define BUFFERSIZE 4096 /* a generic buffer for I/O */
#define DEFAULT_LINECOUNT 1000 /* number of lines to memory sort */
#define DEFAULT_LINELENGTH 512 /* max length of line recognised */
#define DELIM 0x03 /* ETX */
#ifdef __ORCAC__
# define NEWLINE '\r'
#else
# define NEWLINE '\n'
# define BROKEN_REALLOC
#endif
#ifdef __GNUC__
int printf(char *format, ...);
int fprintf(FILE *stream, char *format, ...);
void perror(char *s);
int close(int fd);
int fclose(FILE *stream);
int rename(char *, char *);
void rewind(FILE *);
#endif
#ifdef DEBUG
# define STATUS(string) fprintf(stderr,"%s\n",string)
extern void begin_stack_check(void);
extern int end_stack_check(void);
#else
# define STATUS(string) {;}
#endif
unsigned long int linecount (char *filename, size_t *maxlinelen);
char **loadarray (unsigned long n, char *filename, size_t maxlinelen);
void sortarray(char *array[], unsigned long n);
int disksort (char *filename, size_t linecount, size_t linelength);
int initdisksort(void);
int mergeone(FILE *fpA, FILE *fpB, FILE *fpC, char strA[], char strB[],
size_t linelength);
EXTERN short v_flag;
EXTERN FILE *out_fp;
#ifdef DSORT
EXTERN FILE *fp1, *fp2, *fp3, *fp4;
EXTERN char *file1, *file2, *file3, *file4;
EXTERN char *tpath1, *tpath2, *tpath3, *tpath4;
#endif

262
usr.bin/sort/disksort.c Normal file
View File

@ -0,0 +1,262 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
#define DSORT
#include "common.h"
#include <stdlib.h>
#include <unistd.h>
#define KILLTEMP fclose(fp1); fclose(fp2); fclose(fp3); fclose(fp4); \
unlink(file1); unlink(file2); unlink(file3); unlink(file4); \
free(file1); free(file2); free(file3); free(file4)
#define KILLARRAY(a,i) { \
size_t j; \
for (j=0;j<i;j++) free(a[j]); \
free(a); \
}
static char *errmsg1 =
"disksort: write failed on fp1 during construction phase";
static char *errmsg2 =
"disksort: write failed on fp2 during construction phase";
static char *errmsg3 = "disksort: couldn't allocate scratch buffers";
static char *errmsg4 = "disksort: couldn't reopen temp files";
static char *errmsg5 = "disksort: read on temp file failed";
/*
* int disksort (char *infile, size_t linecount, size_t linelength);
*
* Pre: <infile> is the name of the input file. <linecount> is the
* maximum number of text lines we should try to sort in _memory_
* at any one time. If it is zero, then DEFAULT_LINECOUNT is used.
* <linecount> may be much smaller than the actual linecount of
* <infile>. <linelength> - 1 is the maximum length of line from
* <infile> that disksort will recognise. If <linelength> is zero,
* then DEFAULT_LINELENGTH is used. Global out_fp is an open stream.
*
* Post: The file refered to by <infile> is sorted lexicographically
* by lines. If a line is longer than <linelength>, then any extra
* characters in that line will be truncated. On success, disksort
* returns zero. On failure, disksort returns -1. The sorted output
* is printed to out_fp, and <infile> is unchanged.
*
* Note: This routine is based on a polyphase merge sort using four
* temporary files.
*
* Uses Globals:
* fp1, fp2, fp3, fp4, file1, file2, file3, file4, out_fp
*/
int disksort (char *infile, size_t linecount, size_t linelength) {
FILE *in_fp; /* input file pointer */
char lastfile; /* to where did we last write? */
size_t runcount; /* how many runs make up infile? */
FILE *fpA, *fpB, *fpC, *fpD;
char *tempout; /* the name of the final temp file */
char **array;
size_t i;
char *strA, *strB;
/*
*
* PHASE ZERO: Initialization
*
*/
if (initdisksort() != 0) return -1; /* already printed error msgs */
/*
* Open the input file for reading
*/
if ((in_fp=fopen(infile,"r"))==NULL) {
if (v_flag) perror("disksort: couldn't open input file");
KILLTEMP;
return -1;
}
/*
* Get the size of the array of strings we will sort, and create it
*/
if (linecount == 0) linecount = DEFAULT_LINECOUNT;
if (linelength == 0) linelength = DEFAULT_LINELENGTH;
if ((array = malloc (linecount * sizeof(char *))) == NULL) {
if (v_flag) perror("disksort: couldn't allocate array");
fclose(in_fp);
KILLTEMP;
return -1;
}
for (i = 0; i<linecount; i++) {
if ((array[i] = malloc (linelength * sizeof(char))) == NULL) {
if (v_flag) perror("disksort: couldn't allocate array elements");
KILLARRAY(array,i);
fclose(in_fp);
KILLTEMP;
return -1;
}
}
/*
* PHASE I:
*
* Read runs from input file, sort each run, dump to first two
* temp files, alternating between file1 & file2.
*/
lastfile = 'B';
runcount = 0;
while(!feof(in_fp)) {
/* read in a block that can be sorted in core memory */
for (i=0; i<linecount; i++) {
if (fgets(array[i],linelength,in_fp)==NULL) {
if (feof(in_fp)) {
array[i][0] = '\0'; /* end of file */
--i; /* reduce it by one so that sortarray() works */
break;
} else {
if (v_flag) perror(errmsg5); /* file error */
KILLARRAY(array,linecount);
fclose(in_fp);
KILLTEMP;
return -1;
}
}
}
/* sort it */
sortarray(array,i);
/* print it out to one of the temp files and add the end-of-line DELIM */
if (lastfile == 'B') {
for (i=0; (i<linecount) && (array[i][0]!='\0'); i++)
if ((fprintf(fp1,"%s",array[i])==EOF) && v_flag) perror(errmsg1);
lastfile = 'A';
if ((fprintf(fp1,"%c\n",DELIM)==EOF) && v_flag) perror(errmsg1);
} else { /* lastfile == 'A' */
for (i=0; (i<linecount) && (array[i][0]!='\0'); i++)
if ((fprintf(fp2,"%s",array[i])==EOF) && v_flag) perror(errmsg2);
lastfile = 'B';
if ((fprintf(fp2,"%c\n",DELIM)==EOF) && v_flag) perror(errmsg2);
}
}
/* clean up Phase I */
fclose(in_fp);
rewind(fp1);
rewind(fp2);
/*
* merge the files -- at this point, files fp1 and fp2 contain
* multiple runs of <linecount> records. Keep merging and
*/
/* initialize this backwards because of the initial flip */
fpA = fp2;
/* get some scratch strings for the merge */
if (((strA=malloc(linelength))==NULL) ||
((strB=malloc(linelength))==NULL)) {
if (v_flag) perror(errmsg3);
return -1;
}
do {
runcount = 0;
/* flip the files so we can sort back the other way */
if (fpA == fp1) {
fpA = fp3;
fpB = fp4;
fp1 = freopen(file1,"w+",fp1);
fp2 = freopen(file2,"w+",fp2);
if ((fp1==NULL) || (fp2==NULL)) {
if (v_flag) perror(errmsg4);
return -1;
}
fpC = fp1;
fpD = fp2;
} else {
fpA = fp1;
fpB = fp2;
fp3 = freopen(file3,"w+",fp3);
fp4 = freopen(file4,"w+",fp4);
if ((fp3==NULL) || (fp4==NULL)) {
if (v_flag) perror(errmsg4);
return -1;
}
fpC = fp3;
fpD = fp4;
}
rewind(fpA);
rewind(fpB);
/*
* Sort pairs of runs until EOF is reached
*/
for (;;) {
int mergeresult;
/*
* merge one run from each of fpA and fpB into fpC, then repeat
* it but placing the result into fpD.
*/
mergeresult = mergeone(fpA,fpB,fpC,strA,strB,linelength);
if (mergeresult == 0) {
runcount++;
mergeresult = mergeone(fpA,fpB,fpD,strA,strB,linelength);
if (mergeresult == 0) runcount++;
}
if (mergeresult == -1) {
/* both files at EOF */
break;
} else if (mergeresult == -2) {
/* files in error; message already printed */
KILLARRAY(array,linecount);
KILLTEMP;
return -1;
}
/* else normal merge; continue */
}
} while (runcount>1);
/*
* At this point, fpC contains the sorted file. (We hope ...)
*/
if (fpC==fp1) tempout = file1;
else if (fpC==fp2) tempout = file2;
else if (fpC==fp3) tempout = file3;
else /* fpC==fp4 */ tempout = file4;
/*
* clean up and exit
*/
/* copy lines from fpC to infile except for the trailing DELIM */
rewind(fpC);
for (;;) {
if (fgets(strA,linelength,fpC)==NULL) {
if(v_flag) perror(errmsg5);
return -1;
}
if ((strA[0]==DELIM) && (strA[1]=='\n')) break;
if (fprintf(out_fp,"%s",strA)==EOF) {
if (v_flag) perror("disksort: write on output file failed");
return -1;
}
}
free(strA);
free(strB);
KILLARRAY(array,linecount);
KILLTEMP;
return 0;
}

130
usr.bin/sort/dsort.1 Normal file
View File

@ -0,0 +1,130 @@
.TH DSORT 1 "Commands and Applications" "14 June 1994" "Version 1.0"
.SH NAME
dsort, msort \- sort text files lexicographically
.SH SYNOPSIS
.B msort
[
.I -hvV?
] [
.I "-o outfile"
] [
.I "-n lines"
]
.I file1
[
.I "file2 ..."
]
.LP
.B dsort
[
.I -hvV?
] [
.I "-l length"
] [
.I "-n lines"
] [
.I "-o outfile"
] [\fI-t path1\fR[,\fIpath2\fR[,\fIpath3\fR[,\fIpath4\fR]]]] \fIinfile\fR
.SH DESCRIPTION
.BR dsort " and " msort
are robust text file sorting utilities. While they do not support a lot
of features, they are designed to sort large (and small) files very quickly.
.LP
.B msort
is an in-place memory sort. Since it uses the heapsort algorithm, it is
O[n lg n] both on average and for worst-case. Provided it has enough memory,
.BR msort
will sort files with lines of arbitrary length. Unless overridden by the
.I -n
flag,
.BR msort
will sort files of up to 1000 lines. Larger files can be sorted provided
there is sufficient core memory. If multiple input files are given, the
output is the concatenated result of sorting the input files separately.
Thus, the following would be equivalent:
.LP
.nf
% msort file1 file2 file3 >outfile
and
% msort file1 >file1out
% msort file2 >file2out
% cat file1out file2out >outfile
.fi
.LP
.B dsort
is a disk sort intended for files too large to be sorted in memory. It
uses a four-file polyphase merge algorithm. Since it is an I/O-bound
program,
.BR dsort "'s
speed is very dependant on the speed of the device used for temporary files.
By default,
.BR dsort
will sort files with lines up to 512 characters long. Lines with more
characters will be trucated unless the
.I -l
flag is used. Also by default, 1000 lines at a time will be sorted in
memory during the collection (first) phase of the merge sort algorithm.
This can be changed using the
.I -n
flag.
.BR dsort
will accept only one input file.
.LP
Both
.BR dsort " and " msort
leave the input file(s) intact.
.SH OPTIONS
.nf
\fI-h\fR \fI-?\fR -- print version and usage info, then exit
\fI-l\fR \fIlength\fR -- use a line length of \fIlength\fR
\fI-n\fR \fIlines\fR -- sort \fIlines\fR lines in memory, (for \fBdsort\fR); don't
try to sort files over \fIlines\fR long (for \fBmsort\fR).
\fI-o\fR \fIoutfile\fR -- send sorted output to \fIoutfile\fR rather than to stdout
\fI-t\fR \fIpathlist\fR -- use \fIpathlist\fR as the locations of temp files. If any
of these are not specified, dsort will attempt to use
the directory specified by the environment variable
$(TMPDIR), then the system default temp path.
\fI-v\fR -- verbose operation
\fI-V\fR -- print version information
.fi
.SH HINTS
If you have more than one fast drive, the speed of
.B dsort
can in general be improved by using four different drives for the
path list when using
.I -t .
The best speed observed, however, has occurred when $(TMPDIR) or /tmp
reside on a RAM disk or ROM disk.
It is not suggested that floppies be used for temporary files.
.SH RESOURCE USAGE
Both
.BR dsort " and " msort
use 1k of stack space.
.LP
.BR msort
is an in-place sort, so in general the amount of core memory used is
the same as the size of the file to be sorted. When sorting multiple
files,
.BR msort "'s
memory usage will match the size of the largest input file, not the
total of all files. It will use a minimum of approximately 4k of core
memory.
.LP
.BR dsort
by default uses approximately 512k of core memory. This can be modified
by changing the
.I -l
and
.I -n
parameters. Core memory usage is approximately the product of these two
parameters.
.LP
When using
.BR dsort ,
the amount free space on the temporary path(s) must be at least twice
the size of the file to be sorted.
.SH AUTHOR
Devin Reade \- glyn@cs.ualberta.ca
.SH SEE ALSO
.BR sort (1),
.BR uniq (1).

207
usr.bin/sort/dsort.c Normal file
View File

@ -0,0 +1,207 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
/*
* dsort -- sort a text file on disk lexicographically
*
* Synopsis:
* dsort [-hvV?] [-l length] [-n lines] [-o outfile]
* [-t path1[,path2[,path3[,path4]]]] infile
*
* Options:
* -h -? -- print version and usage info, then exit
* -l <length> -- use a line length of <length>
* -n <m> -- sort <m> lines in memory.
* -o <outfile> -- sorted output to <outfile> rather than
* to stdout
* -t <pathlist> -- use <pathlist> (up to four paths) as the locations
* of temp files. <pathlist> is of the form:
* path1[,path2[,path3[,path4]]]. If any of these
* are not specified, dsort will attempt to use
* the system default temp path.
* -v -- verbose operation
* -V -- print version information
*/
#define DEFFUNC
#define DSORT
#include "common.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "/usr/include/getopt.h" /* GNU version */
extern int optind;
extern char *optarg;
extern int errno;
static char *versionstring="\
Version 1.0 by Devin Reade\n";
static char *usagestring="\
dsort -- Sort a text file on disk lexicographically\n\
\n\
Synopsis:\n\
\tdsort\t[-hvV?] [-l length] [-n lines] [-o outfile]\n\
\t\t[-t path1[,path2[,path3[,path4]]]] infile\n\
\n\
Options:\n\
\t-h -?\t\t-- Print version and usage info, then exit.\n\
\t-l <length>\t-- Set the maximum line length to <length>.\n\
\t-n <m>\t\t-- Set the number of lines to sort in memory to <m>.\n\
\t-o <outfile>\t-- Dump sorted output to <outfile> rather\n\
\t\t\t than to stdout.\n\
\t-t <pathlist>\t-- Set the paths to use for the location of\n\
\t\t\t scratch files. Paths are delimited by \',\' characters.\n\
\t-v\t\t-- Verbose operation.\n\
\t-V\t\t-- Print version information.\n";
int main (int argc, char **argv) {
size_t lc, i;
char *outfile; /* the name of the output file, if nec */
char **array; /* an array of strings; for sorting */
size_t maxlinelen; /* length of longest line in current file */
size_t maxlinecount; /* max number of lines we want to allow */
char *tbuffer; /* buffer containing the temp paths */
short failed=0; /* any errors found? */
int c;
short errflag=0;
short l_flag=0;
short n_flag=0;
short o_flag=0;
short t_flag=0;
short V_flag=0;
/* v_flag defined in common.h */
#ifdef DEBUG
begin_stack_check();
#endif
/*
* parse the command line
*/
while ((c= getopt(argc,argv,"hl:n:o:t:vV?")) != EOF)
switch (c) {
case 'l': /* use this as the maximum line length */
l_flag++;
errno = 0;
maxlinelen = (size_t) atol(optarg);
if (errno = ERANGE) maxlinelen = DEFAULT_LINELENGTH;
break;
case 'n': /* sort this number of lines in memory */
n_flag++;
errno = 0;
maxlinecount = (size_t) atol(optarg);
if (errno == ERANGE) maxlinecount = DEFAULT_LINECOUNT;
break;
case 'o': /* redirect sorted output to file */
o_flag++;
outfile = optarg;
break;
case 't': /* define locations of temp files */
t_flag++;
if ((tbuffer=malloc(strlen(optarg)+1))==NULL) {
perror("couldn't allocate temporary buffer; using default");
break;
}
strcpy(tbuffer,optarg);
break;
case 'v': /* verbose */
v_flag++;
break;
case 'V': /* print version information */
V_flag++;
break;
case '?': /* fallthrough */
case 'h': /* fallthrough */
default: /* Display usage, version, and exit */
V_flag++;
errflag++;
break;
}
/*
* React to command line parameters
*/
if (errflag) {
fprintf (stderr,"\n%s\n%s\n",usagestring,versionstring);
return -1;
}
if (V_flag) fprintf(stderr,"\n%s\n",versionstring);
if (!l_flag) maxlinelen = DEFAULT_LINELENGTH;
if (!n_flag) maxlinecount = DEFAULT_LINECOUNT;
if (v_flag) fprintf(stderr,
"Sorting %lu lines in memory.\nMaximum recognised line length = %lu\n",
maxlinecount,maxlinelen);
if (o_flag) {
if ((out_fp = fopen(outfile,"w")) == NULL) {
if (v_flag) perror("open on output file failed");
return -1;
}
} else out_fp = stdout;
tpath1 = NULL;
tpath2 = NULL;
tpath3 = NULL;
tpath4 = NULL;
if ((t_flag) && (tbuffer!=NULL)) {
char *tp = tbuffer;
/* set tpath1 */
tpath1 = tp;
while (*tp && (*tp!=',')) tp++;
if (*tp) {
*tp++ = '\0'; /* terminate tpath1 */
if (v_flag) fprintf(stderr,"Will try to use temp directory %s\n",tpath1);
/* set tpath2 */
tpath2 = tp;
while (*tp && (*tp!=',')) tp++;
if (*tp) {
*tp++ = '\0'; /* terminate tpath2 */
if (v_flag)
fprintf(stderr,"Will try to use temp directory %s\n",tpath2);
/* set tpath3 */
tpath3 = tp;
while (*tp && (*tp!=',')) tp++;
if (*tp) {
*tp++ = '\0'; /* terminate tpath3 */
if (v_flag)
fprintf(stderr,"Will try to use temp directory %s\n",tpath3);
/* set tpath4 */
tpath4 = tp;
while (*tp && (*tp!=',')) tp++;
*tp = '\0'; /* terminate tpath4 */
if (v_flag)
fprintf(stderr,"Will try to use temp directory %s\n",tpath4);
}
}
}
} else if (v_flag) fprintf(stderr,"Using default temp path\n");
/* do the sort */
if (argc - optind == 1) {
c = disksort(argv[optind],maxlinecount,maxlinelen);
} else {
fprintf(stderr,"\n%s\n%s\n",usagestring,versionstring);
c = -1;
}
if (t_flag && (tbuffer)) free(tbuffer);
#ifdef DEBUG
fprintf(stderr,"%s stack usage: %d bytes\n",argv[0],end_stack_check());
#endif
return c;
}

105
usr.bin/sort/initdisksort.c Normal file
View File

@ -0,0 +1,105 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
#define DSORT
#include "common.h"
#include <stdlib.h>
#include <unistd.h>
#define OPENMODE "w+" /* the mode for create/read/write */
static char *errstring1="initdisksort: couldn't get temp name";
static char *errstring2="initdisksort: couldn't open temp file";
/*
* int initdisksort(void);
*
* Pre: None.
*
* Post: Returns 0 on success, -1 on failure.
* On success:
* file1 through file4 are initialized as temp file names.
* fp1 through fp4 are open file pointers for file1 ... file4.
*
* Uses Globals:
* fp1, fp2, fp3, fp4
* file1, file2, file3, file4,
* v_flag
*/
int initdisksort(void) {
/*
* Get the names for the temp files -- this is ponderous but necessary
*/
if ((file1 = tempnam(tpath1,"dsort")) == NULL) {
if (v_flag) perror(errstring1);
return -1;
}
if ((file2 = tempnam(tpath2,"dsort")) == NULL) {
if (v_flag) perror(errstring1);
free(file1);
return -1;
}
if ((file3 = tempnam(tpath3,"dsort")) == NULL) {
if (v_flag) perror(errstring1);
free(file1);
free(file2);
return -1;
}
if ((file4 = tempnam(tpath4,"dsort")) == NULL) {
if (v_flag) perror(errstring1);
free(file1);
free(file2);
free(file3);
return -1;
}
/*
* Open the temp files -- again ponderous but necessary
*/
if ((fp1 = fopen(file1,OPENMODE))==NULL) {
if (v_flag) perror(errstring2);
free(file1);
free(file2);
free(file3);
free(file4);
return -1;
}
if ((fp2 = fopen(file2,OPENMODE))==NULL) {
if (v_flag) perror(errstring2);
unlink(file1);
free(file1);
free(file2);
free(file3);
free(file4);
return -1;
}
if ((fp3 = fopen(file3,OPENMODE))==NULL) {
if (v_flag) perror(errstring2);
unlink(file1);
unlink(file2);
free(file1);
free(file2);
free(file3);
free(file4);
return -1;
}
if ((fp4 = fopen(file4,OPENMODE))==NULL) {
if (v_flag) perror(errstring2);
unlink(file1);
unlink(file2);
unlink(file3);
free(file1);
free(file2);
free(file3);
free(file4);
return -1;
}
return 0;
}

89
usr.bin/sort/linecount.c Normal file
View File

@ -0,0 +1,89 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
#include "common.h"
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
/*
* unsigned long int linecount (char *filename, size_t *maxlinelen);
*
* Pre: <filename> is the name of the file for which we need to know
* the number of lines. The file must be closed.
*
* Post: Returns the number of newline characters in the file. On
* return, the file is again closed and *maxlinelen is the length
* of the longest line in <filename> (length is calculated to
* include the newline character but not the null terminator.
* Returns zero on failure or if there are no newlines.
*
* Uses Globals:
* v_flag
*/
unsigned long int linecount (char *filename, size_t *maxlinelen) {
char *buff; /* the input buffer */
unsigned long result; /* the number of newlines */
int count; /* the number of chars last read */
int fd; /* file descriptor for <filename> */
short done;
int i;
size_t linelen; /* length of current line */
/* init some variables */
done = 0;
result = 0;
*maxlinelen = 0;
linelen = 0;
/* open <filename> for unbuffered I/O */
if ((fd = open(filename,O_RDONLY)) == -1) {
if (v_flag) perror("linecount: couldn't open input file");
return 0lu;
}
/* get an input buffer */
if ((buff = malloc(BUFFERSIZE)) == NULL) {
if (v_flag) perror ("linecount: couldn't allocate buffer");
close(fd);
return 0lu;
}
/* repeatedly fill the buffer and increment the newline count */
while (!done) {
count = read (fd, buff, BUFFERSIZE);
switch (count) {
case -1: /* file error */
if (v_flag) perror ("linecount");
close(fd);
free(buff);
return 0lu;
/* NOTREACHED */
break;
case 0: /* EOF */
done = 1;
break;
default: /* got some info in the buffer */
for (i=0; i<count; i++) {
linelen++;
if (buff[i] == NEWLINE) {
result++;
if (linelen > *maxlinelen) *maxlinelen = linelen;
linelen = 0;
}
}
break;
}
}
/* clean up and return */
close(fd);
free(buff);
return result;
}

121
usr.bin/sort/loadarray.c Normal file
View File

@ -0,0 +1,121 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
#include "common.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
/*
* char **loadarray (unsigned long n, char *filename, size_t maxlinelen);
*
* Pre: <filename> is the name of a file containing <n> lines of text,
* the number of lines. The file <filename> must be closed.
* <maxlinelen> is the length of the longest line in <filename>
*
* Post: Returns a pointer to an array of pointers to malloc'd strings,
* where the strings are successive lines from the file <filename>.
* On return <filename> will be closed. If loadarray() fails for
* any reason, it will return NULL.
*
* Warning: The use of realloc() with a NULL pointer for the initial
* allocation may not be portable. If this is not valid for your
* current libraries, then #define BROKEN_REALLOC.
*
* Uses Globals:
* v_flag
*/
char **loadarray (unsigned long n, char *filename, size_t maxlinelen) {
char **result;
unsigned long i,j;
FILE *in_fp;
static char *inbuf=NULL;
static size_t previous_size = 0;
char *p;
#ifndef BROKEN_REALLOC /* realloc() is ANSI-compliant with NULL first arg */
/* reallocate the input buffer if necessary */
if (maxlinelen > previous_size) {
if ((p = realloc(inbuf,maxlinelen+1)) == NULL) {
if (v_flag) perror("loadarray: couldn't (re)allocate input buffer");
return NULL;
}
previous_size = maxlinelen;
inbuf = p;
}
#else /* BROKEN_REALLOC */
/* reallocate the input buffer if necessary */
if (maxlinelen > previous_size) {
if (previous_size == 0) {
if ((p = malloc(maxlinelen+1)) == NULL) {
if (v_flag) perror("loadarray: couldn't allocate input buffer");
return NULL;
}
} else {
if ((p = realloc(inbuf,maxlinelen+1)) == NULL) {
if (v_flag) perror("loadarray: couldn't reallocate input buffer");
return NULL;
}
}
previous_size = maxlinelen;
inbuf = p;
}
#endif /* BROKEN_REALLOC */
/* allocate the array */
if ((result = malloc (n * sizeof(char *)))==NULL) {
if (v_flag) perror("loadarray: couldn't allocate base array");
return NULL;
}
/* set up the input stream */
in_fp = fopen(filename,"r");
if (in_fp == NULL) { /* open failed */
free(result);
if (v_flag) perror("loadarray: couldn't open input file");
return NULL;
}
/* allocate and copy elements */
for (i=0; i<n; i++) {
/* read into the buffer */
if(fgets(inbuf,maxlinelen+1,in_fp)==NULL) {
/* read failed; clean up and exit */
if (v_flag) {
if (ferror(in_fp)) perror("loadarray: read error on input file");
else perror ("loadarray: premature EOF on input file");
}
for (j=0; j<i; j++) free(result[j]);
free(result);
fclose(in_fp);
return NULL;
}
/* copy the buffer to the array */
result[i] = malloc(strlen(inbuf)+1);
if (result[i]==NULL) {
/* malloc failed; clean up and exit */
if (v_flag) perror("loadarray: couldn't duplicate buffer");
for (j=0; j<i; j++) free(result[j]);
free(result);
fclose(in_fp);
return NULL;
}
strcpy(result[i],inbuf);
}
fclose(in_fp);
return result;
}

55
usr.bin/sort/makefile.mk Normal file
View File

@ -0,0 +1,55 @@
BINDIR = /usr/local/bin
MANDIR = /usr/man
# Nothing should need to be changed below this point
# DEFINES = -DDEBUG -D__GNO__
DEFINES = -D__GNO__
CFLAGS = $(DEFINES) -O -v -w -r
CFLAGS2 = $(DEFINES) -O31 -v -w -r
MAINFLAGS = $(DEFINES) -O -v -w -S1024
LDFLAGS = -v
# LDLIBS = -l/usr/lib/gnulib -l/usr/lib/stack
LDLIBS = -l/usr/lib/gnulib
MOBJS = msort.o linecount.o loadarray.o
DOBJS = dsort.o disksort.o initdisksort.o mergeone.o tempnam.o
COMMONOBJS = sortarray.o
install:
/bin/cp msort dsort $(BINDIR)
/bin/cp msort.1 dsort.1 $(MANDIR)/man1
all: msort dsort
msort : $(MOBJS) $(COMMONOBJS)
$(CC) $(LDFLAGS) $(LDLIBS) -o $@ $<
dsort : $(DOBJS) $(COMMONOBJS)
$(CC) $(LDFLAGS) $(LDLIBS) -o $@ $<
msort.o: msort.c common.h
$(CC) -c $(MAINFLAGS) -o $@ msort.c
dsort.o: dsort.c common.h
$(CC) -c $(MAINFLAGS) -o $@ dsort.c
# Orca/C screws up with loop invariant optimization on disksort.c
disksort.o: disksort.c common.h
$(CC) -c $(CFLAGS2) -o $@ disksort.c
#
# Housekeeping
#
clean:
$(RM) $(DOBJS) $(MOBJS) $(COMMONOBJS) msort.root dsort.root
clobber: clean
$(RM) dsort msort
#
# Additional dependencies
#
linecount.o loadarray.o initdisksort.o mergeone.o sortarray.o:: common.h

169
usr.bin/sort/mergeone.c Normal file
View File

@ -0,0 +1,169 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
#define DSORT
#include "common.h"
#include <string.h>
/*
* int mergeone(FILE *fpA, FILE *fpB, FILE *fpC, char strA[], char strB[],
* size_t linelength);
*
* Pre: fpA, fpB, and fpC are open file pointers. The first should contain
* "runs" of data delimited by a line consisting of just the DELIM
* character, although either or both may be at EOF. strA and strB
* are scratch character buffers, each of size linelength.
*
* Post: The first run on each of fpA and fpB are merge-sorted and added to
* fpC. If either fpA or fpB are at EOF then the run from the other
* file pointer is simply concatenated onto fpC. Mergeone will return
* zero if the merge was successful, -1 if both fpA and fpB are at
* EOF, and -2 if there was an error. On return, the contents of
* strA and strB are undefined.
*
* Uses Globals:
* v_flag -- if set and an error occurs, a message will be printed
* to stderr
* fp1,fp2,fp3,fp4 -- file pointers to the four scratch files
* file1,file2,file3,file4 -- names of the four scratch files
*/
int mergeone(FILE *fpA, FILE *fpB, FILE *fpC, char strA[], char strB[],
size_t linelength) {
short run_end_A = 0;
short run_end_B = 0;
/*
* Load strA and strB with the first lines from fpA and fpB. After
* this, either file may be at EOF (but not error).
*/
if ((fgets(strA,linelength,fpA)==NULL) && ferror(fpA)) {
if (v_flag) perror("mergeone: Read error on fpA");
return -2;
}
if ((fgets(strB,linelength,fpB)==NULL) && ferror(fpB)) {
if (v_flag) perror("mergeone: Read error on fpB");
return -2;
}
/*
* merge fpA and fpB until we either get an EOF or a DELIM line
*/
while (!feof(fpA) && !feof(fpB)) {
/* test to see if our run is finished */
if ((strA[0]==DELIM) && (strA[1]=='\n')) {
run_end_A = 1;
break;
}
if ((strB[0]==DELIM) && (strB[1]=='\n')) {
run_end_B = 1;
break;
}
if (strcmp(strA,strB) < 0) {
/* print out the string to fpC */
if (fprintf(fpC,"%s",strA) == EOF) {
if (v_flag) perror("mergeone: Write error on fpC");
return -2;
}
/* get another string from fpA */
if ((fgets(strA,linelength,fpA)==NULL) && ferror(fpA)) {
if (v_flag) perror("mergeone: Read error on fpA");
return -2;
}
} else {
/* print out the string to fpC */
if (fprintf(fpC,"%s",strB) == EOF) {
if (v_flag) perror("mergeone: Write error on fpC");
return -2;
if (v_flag) {
/* say something */
}
return -2;
}
/* get another string from fpB */
if ((fgets(strB,linelength,fpB)==NULL) && ferror(fpB)) {
if (v_flag) perror("mergeone: Read error on fpB");
return -2;
}
}
}
/*
* We've come to the end of at least one of the runs, concatenate
* the remainder on the output file
*/
/* finish off fpA if necessary */
while (!run_end_A && !feof(fpA)) {
/* test to see if our run is finished */
if ((strA[0]==DELIM) && (strA[1]=='\n')) {
run_end_A = 1;
break;
}
/* print out the string to fpC */
if (fprintf(fpC,"%s",strA) == EOF) {
if (v_flag) perror("mergeone: Write error on fpC");
return -2;
}
/* get another string from fpA */
if ((fgets(strA,linelength,fpA)==NULL) && ferror(fpA)) {
if (v_flag) perror("mergeone: Read error on fpA");
return -2;
}
}
/* finish off fpB if necessary */
while (!run_end_B && !feof(fpB)) {
/* test to see if our run is finished */
if ((strB[0]==DELIM) && (strB[1]=='\n')) {
run_end_B = 1;
break;
}
/* print out the string to fpC */
if (fprintf(fpC,"%s",strB) == EOF) {
if (v_flag) perror("mergeone: Write error on fpC");
return -2;
}
/* get another string from fpB */
if ((fgets(strB,linelength,fpB)==NULL) && ferror(fpB)) {
if (v_flag) perror("mergeone: Read error on fpB");
return -2;
}
}
/*
* At this point, both fpA and fpB are either at a run-end or at EOF,
* with no errors. If at EOF, then don't append a DELIM character.
*/
if (feof(fpA) && feof(fpB)) return -1;
if (fprintf(fpC,"%c\n",DELIM) == EOF) {
if (v_flag) perror("mergeone: Write error on fpC");
return -2;
}
return 0;
}

1
usr.bin/sort/msort.1 Normal file
View File

@ -0,0 +1 @@
.so /usr/man/man1/dsort.1

155
usr.bin/sort/msort.c Normal file
View File

@ -0,0 +1,155 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
/*
* msort -- sort a text file in memory lexicographically
*
* Synopsis:
* msort [-hvV?] [-o outfile] [-n lines] file1 [file2 ...]
*
* Options:
* -h -? -- print version and usage info, then exit
* -n <lines> -- don't try to sort files over <lines> lines long
* -o <outfile> -- sorted output to <outfile> rather than
* to stdout
* -v -- verbose operation
* -V -- print version information
*/
#define DEFFUNC
#define MSORT
#include "common.h"
#include <limits.h>
#include <stdlib.h>
#include <errno.h>
#include "/usr/include/getopt.h" /* GNU version */
extern int optind;
extern char *optarg;
extern int errno;
static char *versionstring="\
Version 1.0 by Devin Reade\n";
static char *usagestring="\
msort -- Sort a text file in memory lexicographically\n\
\n\
Synopsis:\n\
\tmsort [-hvV?] [-o outfile] file1 [file2 ...]\n\
\n\
Options:\n\
\t-h -?\t\t-- Print version and usage info, then exit.\n\
\t-n <m>\t\t-- Set the maximum number of lines per file to <m>.\n\
\t-o <outfile>\t-- Dump sorted output to <outfile> rather\n\
\t\t\t than to stdout.\n\
\t-v\t\t-- Verbose operation.\n\
\t-V\t\t-- Print version information.\n";
int main (int argc, char **argv) {
size_t lc, i;
char *outfile; /* the name of the output file, if nec */
char **array; /* an array of strings; for sorting */
size_t maxlinelen; /* length of longest line in current file */
size_t maxlinecount; /* max number of lines we want to allow */
short failed=0; /* any errors found? */
int c;
short errflag=0;
short n_flag=0;
short o_flag=0;
short V_flag=0;
/* v_flag defined in common.h */
#ifdef DEBUG
begin_stack_check();
#endif
/*
* parse the command line
*/
while ((c= getopt(argc,argv,"hn:o:vV?")) != EOF)
switch (c) {
case 'n': /* don't try to sort if file is over n lines long */
n_flag++;
errno = 0;
maxlinecount = (size_t) atol(optarg);
if (errno == ERANGE) maxlinecount = DEFAULT_LINECOUNT;
break;
case 'o': /* redirect sorted output to file */
o_flag++;
outfile = optarg;
break;
case 'v': /* verbose */
v_flag++;
break;
case 'V': /* print version information */
V_flag++;
break;
case '?': /* fallthrough */
case 'h': /* fallthrough */
default: /* Display usage, version, and exit */
V_flag++;
errflag++;
break;
}
/*
* React to command line parameters
*/
if (errflag) {
fprintf(stderr,"\n%s\n%s\n",usagestring,versionstring);
return -1;
}
if (V_flag) fprintf(stderr,"\n%s\n",versionstring);
if (!n_flag) maxlinecount = DEFAULT_LINECOUNT;
if (v_flag) fprintf(stderr,"Maximum lines per file = %lu\n",maxlinecount);
if (o_flag) {
if ((out_fp = fopen(outfile,"w")) == NULL) {
if (v_flag) perror("open on output file failed");
return -1;
}
} else out_fp = stdout;
/* loop through files */
for (; optind<argc; optind++) {
/* get the line count */
lc = linecount(argv[optind], &maxlinelen);
if (lc>maxlinecount) {
if (v_flag)
fprintf(stderr,"%s too long for an in-memory sort -- file skipped\n",
argv[optind]);
failed = 1;
continue;
}
/* load the array */
array = loadarray (lc, argv[optind], maxlinelen);
if (array == NULL) {
if (v_flag) fprintf(stderr,"Ignoring file %s\n",argv[optind]);
failed = 1;
continue;
}
/* sort it */
sortarray (array,lc);
/* print the sorted file out and clean up the array */
for (i=0; i<lc; i++) {
fprintf(out_fp,"%s",array[i]);
free(array[i]);
}
free(array);
}
#ifdef DEBUG
fprintf(stderr,"%s stack usage: %d bytes\n",argv[0],end_stack_check());
#endif
if (failed) return -1;
else return 0;
}

62
usr.bin/sort/sortarray.c Normal file
View File

@ -0,0 +1,62 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
#include "common.h"
#include <string.h>
/*
* void sortarray(char **array, unsigned long n);
*
* Pre: <array> is a pointer to an array of pointers to NULL-terminated
* strings, and <n> is the number of elements in <array>
*
* Post: The strings in <array> are sorted lexicographically in ascending
* order, using the heapsort algorithm. This is an in-place
* non-recursive sort with behavior O[n*lg(n)] both on average
* and worst-case.
*/
void sortarray(char *array[], unsigned long n) {
long l, j, ir, i;
char *rra;
if (n==1) return; /* no need to sort one element */
--array; /* fudge since the algorithm was designed */
/* for a unit-indexing */
l = (n>>1) + 1;
ir = n;
/*
* The index l will be decremented from its initial value down to 0 during
* the heap creation phase. Once it reaches 0, the index ir will be
* decremented from its initial value down to 0 during the heap selection
* phase.
*/
for (;;) {
if (l > 1) /* still in creation phase */
rra = array[--l];
else { /* in selection phase */
rra= array[ir]; /* clear a space at the end of array */
array[ir] = array[1]; /* retire the top of the heap into it */
if (--ir == 1) { /* done with the last promotion */
array[1] = rra;
return;
}
}
i = l; /* set up to sift down element rra to its proper place */
j = l << 1;
while (j<=ir) {
if (j<ir && (strcmp(array[j],array[j+1])<0)) ++j;
if (strcmp(rra,array[j])<0) { /* demote rra */
array[i] = array[j];
i = j;
j += i;
} else j = ir + 1; /* this is rra's level; set j to terminate */
} /* the sift-down */
array[i] = rra;
}
}

130
usr.bin/sort/tempnam.c Normal file
View File

@ -0,0 +1,130 @@
#ifdef __CCFRONT__
#include <14:pragma.h>
#endif
/*
* #include <stdio.h>
*
* char *tempnam (const char *dir, const char *prefix);
*
* Generate a pathname for a temporary file.
*
* tempnam will select a directory for the temporary file by using the
* following criteria:
*
* If dir is not the NULL pointer, tempnam uses the pathname pointed to by
* dir as the directory,
*
* otherwise, tmpdir uses the value of the TMPDIR environment variable if
* the variable is defined,
*
* otherwise the directory defined by P_tmpdir in the stdio.h header file
* if that directory is writable by the caller,
*
* otherwise, tempnam will use "/tmp" as a last resort.
*/
#ifdef __ORCAC__
#define __GNO__ 1
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#define max(A,B) (((A)<(B))?(B):(A))
#if !defined(__GNO__)
extern char *mktemp();
extern int access();
#endif
static char seed[4]="AAA";
#if (defined __GNO__)
static char pbrk;
#else
# define pbrk '/';
#endif
/* BSD stdio.h doesn't define P_tmpdir, so let's do it here */
#ifndef P_tmpdir
static char *P_tmpdir = "/tmp";
#endif
static char *
cpdir(char *buf, char *str)
{
char *p;
char *path;
if(str != NULL) {
#if defined(__GNO__)
/* get the path delimiter */
if (strchr(str,':')) pbrk = ':';
else if (strchr(str,'/')) pbrk = '/';
else {
if ((path=getenv("PATH"))==NULL) pbrk = '/';
else pbrk = (strchr(path,':')) ? ':' : '/';
}
#endif
(void) strcpy(buf, str);
p = buf - 1 + strlen(buf);
if(*p == pbrk) *p = '\0';
}
return(buf);
}
char *
tempnam (char *dir, char *prefix)
/* dir -- use this directory please (if non-NULL) */
/* prefix -- use this (if non-NULL) as filename prefix */
{
register char *p, *q, *tmpdir;
int tl=0, dl=0, pl;
/* create a buffer <p> that's as large as necessary */
pl = strlen(P_tmpdir);
if( (tmpdir = getenv("TMPDIR")) != NULL ) tl = strlen(tmpdir);
if( dir != NULL ) dl = strlen(dir);
if( (p = malloc((unsigned int)(max(max(dl,tl),pl)+16))) == NULL )
return(NULL);
*p = '\0';
#if defined (__GNO__)
if( (dl == 0) || (access( cpdir(p, dir), W_OK) != 0) )
if( (tl == 0) || (access( cpdir(p, tmpdir), W_OK) != 0) )
if( access( cpdir(p, P_tmpdir), W_OK) != 0 )
if( access( cpdir(p, "/tmp"), W_OK) != 0 )
return(NULL);
#else /* not __GNO__ */
if( (dl == 0) || (access( cpdir(p, dir), 3) != 0) )
if( (tl == 0) || (access( cpdir(p, tmpdir), 3) != 0) )
if( access( cpdir(p, P_tmpdir), 3) != 0 )
if( access( cpdir(p, "/tmp"), 3) != 0 )
return(NULL);
#endif /* not __GNO__ */
(void) strcat(p, "/");
if(prefix)
{
*(p+strlen(p)+5) = '\0';
(void)strncat(p, prefix, 5);
}
(void)strcat(p, seed);
(void)strcat(p, "XXXXXX");
q = seed;
while(*q == 'Z') *q++ = 'A';
++*q;
if(*mktemp(p) == '\0') return(NULL);
return(p);
}