mirror of
https://github.com/GnoConsortium/gno.git
synced 2024-06-28 03:29:39 +00:00
version 1.0 of msort and dsort for GNO
This commit is contained in:
parent
a1e9154b96
commit
5722e4fa7f
18
usr.bin/sort/README
Normal file
18
usr.bin/sort/README
Normal file
|
@ -0,0 +1,18 @@
|
|||
This archive contains the utilities msort(1) and dsort(1). Both sort
|
||||
text files lexicographically.
|
||||
|
||||
Msort is a fast in-place memory sort.
|
||||
|
||||
Dsort is a disk based sort that can handle "arbitrarily large" files
|
||||
(in reality, limited to ULONG_MAX -- 4 294 967 295 -- bytes).
|
||||
|
||||
The big difference between these sorts and the previously available
|
||||
sort(1) is that these won't crash your system ... if either run into
|
||||
problems, they exit gracefully and (if you are using the verbose flag)
|
||||
tell you what the problem is.
|
||||
|
||||
Enjoy.
|
||||
|
||||
Devin Reade
|
||||
14 June 1994
|
||||
|
57
usr.bin/sort/common.h
Normal file
57
usr.bin/sort/common.h
Normal file
|
@ -0,0 +1,57 @@
|
|||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef DEFFUNC
|
||||
# define EXTERN
|
||||
#else
|
||||
# define EXTERN extern
|
||||
#endif
|
||||
|
||||
#define ALN2I 1.442695022 /* 1 / ln(2) */
|
||||
#define TINY 1.0e-5 /* "zero" for heapsort */
|
||||
#define BUFFERSIZE 4096 /* a generic buffer for I/O */
|
||||
#define DEFAULT_LINECOUNT 1000 /* number of lines to memory sort */
|
||||
#define DEFAULT_LINELENGTH 512 /* max length of line recognised */
|
||||
#define DELIM 0x03 /* ETX */
|
||||
|
||||
#ifdef __ORCAC__
|
||||
# define NEWLINE '\r'
|
||||
#else
|
||||
# define NEWLINE '\n'
|
||||
# define BROKEN_REALLOC
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
int printf(char *format, ...);
|
||||
int fprintf(FILE *stream, char *format, ...);
|
||||
void perror(char *s);
|
||||
int close(int fd);
|
||||
int fclose(FILE *stream);
|
||||
int rename(char *, char *);
|
||||
void rewind(FILE *);
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
# define STATUS(string) fprintf(stderr,"%s\n",string)
|
||||
extern void begin_stack_check(void);
|
||||
extern int end_stack_check(void);
|
||||
#else
|
||||
# define STATUS(string) {;}
|
||||
#endif
|
||||
|
||||
unsigned long int linecount (char *filename, size_t *maxlinelen);
|
||||
char **loadarray (unsigned long n, char *filename, size_t maxlinelen);
|
||||
void sortarray(char *array[], unsigned long n);
|
||||
int disksort (char *filename, size_t linecount, size_t linelength);
|
||||
int initdisksort(void);
|
||||
int mergeone(FILE *fpA, FILE *fpB, FILE *fpC, char strA[], char strB[],
|
||||
size_t linelength);
|
||||
|
||||
EXTERN short v_flag;
|
||||
EXTERN FILE *out_fp;
|
||||
|
||||
#ifdef DSORT
|
||||
EXTERN FILE *fp1, *fp2, *fp3, *fp4;
|
||||
EXTERN char *file1, *file2, *file3, *file4;
|
||||
EXTERN char *tpath1, *tpath2, *tpath3, *tpath4;
|
||||
#endif
|
262
usr.bin/sort/disksort.c
Normal file
262
usr.bin/sort/disksort.c
Normal file
|
@ -0,0 +1,262 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
|
||||
#define DSORT
|
||||
#include "common.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define KILLTEMP fclose(fp1); fclose(fp2); fclose(fp3); fclose(fp4); \
|
||||
unlink(file1); unlink(file2); unlink(file3); unlink(file4); \
|
||||
free(file1); free(file2); free(file3); free(file4)
|
||||
#define KILLARRAY(a,i) { \
|
||||
size_t j; \
|
||||
for (j=0;j<i;j++) free(a[j]); \
|
||||
free(a); \
|
||||
}
|
||||
|
||||
static char *errmsg1 =
|
||||
"disksort: write failed on fp1 during construction phase";
|
||||
static char *errmsg2 =
|
||||
"disksort: write failed on fp2 during construction phase";
|
||||
static char *errmsg3 = "disksort: couldn't allocate scratch buffers";
|
||||
static char *errmsg4 = "disksort: couldn't reopen temp files";
|
||||
static char *errmsg5 = "disksort: read on temp file failed";
|
||||
|
||||
/*
|
||||
* int disksort (char *infile, size_t linecount, size_t linelength);
|
||||
*
|
||||
* Pre: <infile> is the name of the input file. <linecount> is the
|
||||
* maximum number of text lines we should try to sort in _memory_
|
||||
* at any one time. If it is zero, then DEFAULT_LINECOUNT is used.
|
||||
* <linecount> may be much smaller than the actual linecount of
|
||||
* <infile>. <linelength> - 1 is the maximum length of line from
|
||||
* <infile> that disksort will recognise. If <linelength> is zero,
|
||||
* then DEFAULT_LINELENGTH is used. Global out_fp is an open stream.
|
||||
*
|
||||
* Post: The file refered to by <infile> is sorted lexicographically
|
||||
* by lines. If a line is longer than <linelength>, then any extra
|
||||
* characters in that line will be truncated. On success, disksort
|
||||
* returns zero. On failure, disksort returns -1. The sorted output
|
||||
* is printed to out_fp, and <infile> is unchanged.
|
||||
*
|
||||
* Note: This routine is based on a polyphase merge sort using four
|
||||
* temporary files.
|
||||
*
|
||||
* Uses Globals:
|
||||
* fp1, fp2, fp3, fp4, file1, file2, file3, file4, out_fp
|
||||
*/
|
||||
|
||||
int disksort (char *infile, size_t linecount, size_t linelength) {
|
||||
FILE *in_fp; /* input file pointer */
|
||||
char lastfile; /* to where did we last write? */
|
||||
size_t runcount; /* how many runs make up infile? */
|
||||
FILE *fpA, *fpB, *fpC, *fpD;
|
||||
char *tempout; /* the name of the final temp file */
|
||||
char **array;
|
||||
size_t i;
|
||||
char *strA, *strB;
|
||||
|
||||
/*
|
||||
*
|
||||
* PHASE ZERO: Initialization
|
||||
*
|
||||
*/
|
||||
|
||||
if (initdisksort() != 0) return -1; /* already printed error msgs */
|
||||
|
||||
/*
|
||||
* Open the input file for reading
|
||||
*/
|
||||
|
||||
if ((in_fp=fopen(infile,"r"))==NULL) {
|
||||
if (v_flag) perror("disksort: couldn't open input file");
|
||||
KILLTEMP;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the size of the array of strings we will sort, and create it
|
||||
*/
|
||||
|
||||
if (linecount == 0) linecount = DEFAULT_LINECOUNT;
|
||||
if (linelength == 0) linelength = DEFAULT_LINELENGTH;
|
||||
if ((array = malloc (linecount * sizeof(char *))) == NULL) {
|
||||
if (v_flag) perror("disksort: couldn't allocate array");
|
||||
fclose(in_fp);
|
||||
KILLTEMP;
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i<linecount; i++) {
|
||||
if ((array[i] = malloc (linelength * sizeof(char))) == NULL) {
|
||||
if (v_flag) perror("disksort: couldn't allocate array elements");
|
||||
KILLARRAY(array,i);
|
||||
fclose(in_fp);
|
||||
KILLTEMP;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* PHASE I:
|
||||
*
|
||||
* Read runs from input file, sort each run, dump to first two
|
||||
* temp files, alternating between file1 & file2.
|
||||
*/
|
||||
|
||||
|
||||
lastfile = 'B';
|
||||
runcount = 0;
|
||||
|
||||
while(!feof(in_fp)) {
|
||||
|
||||
/* read in a block that can be sorted in core memory */
|
||||
for (i=0; i<linecount; i++) {
|
||||
if (fgets(array[i],linelength,in_fp)==NULL) {
|
||||
if (feof(in_fp)) {
|
||||
array[i][0] = '\0'; /* end of file */
|
||||
--i; /* reduce it by one so that sortarray() works */
|
||||
break;
|
||||
} else {
|
||||
if (v_flag) perror(errmsg5); /* file error */
|
||||
KILLARRAY(array,linecount);
|
||||
fclose(in_fp);
|
||||
KILLTEMP;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* sort it */
|
||||
sortarray(array,i);
|
||||
|
||||
/* print it out to one of the temp files and add the end-of-line DELIM */
|
||||
if (lastfile == 'B') {
|
||||
for (i=0; (i<linecount) && (array[i][0]!='\0'); i++)
|
||||
if ((fprintf(fp1,"%s",array[i])==EOF) && v_flag) perror(errmsg1);
|
||||
lastfile = 'A';
|
||||
if ((fprintf(fp1,"%c\n",DELIM)==EOF) && v_flag) perror(errmsg1);
|
||||
} else { /* lastfile == 'A' */
|
||||
for (i=0; (i<linecount) && (array[i][0]!='\0'); i++)
|
||||
if ((fprintf(fp2,"%s",array[i])==EOF) && v_flag) perror(errmsg2);
|
||||
lastfile = 'B';
|
||||
if ((fprintf(fp2,"%c\n",DELIM)==EOF) && v_flag) perror(errmsg2);
|
||||
}
|
||||
}
|
||||
|
||||
/* clean up Phase I */
|
||||
fclose(in_fp);
|
||||
rewind(fp1);
|
||||
rewind(fp2);
|
||||
|
||||
/*
|
||||
* merge the files -- at this point, files fp1 and fp2 contain
|
||||
* multiple runs of <linecount> records. Keep merging and
|
||||
*/
|
||||
|
||||
/* initialize this backwards because of the initial flip */
|
||||
fpA = fp2;
|
||||
|
||||
/* get some scratch strings for the merge */
|
||||
if (((strA=malloc(linelength))==NULL) ||
|
||||
((strB=malloc(linelength))==NULL)) {
|
||||
if (v_flag) perror(errmsg3);
|
||||
return -1;
|
||||
}
|
||||
|
||||
do {
|
||||
runcount = 0;
|
||||
|
||||
/* flip the files so we can sort back the other way */
|
||||
if (fpA == fp1) {
|
||||
fpA = fp3;
|
||||
fpB = fp4;
|
||||
fp1 = freopen(file1,"w+",fp1);
|
||||
fp2 = freopen(file2,"w+",fp2);
|
||||
if ((fp1==NULL) || (fp2==NULL)) {
|
||||
if (v_flag) perror(errmsg4);
|
||||
return -1;
|
||||
}
|
||||
fpC = fp1;
|
||||
fpD = fp2;
|
||||
} else {
|
||||
fpA = fp1;
|
||||
fpB = fp2;
|
||||
fp3 = freopen(file3,"w+",fp3);
|
||||
fp4 = freopen(file4,"w+",fp4);
|
||||
if ((fp3==NULL) || (fp4==NULL)) {
|
||||
if (v_flag) perror(errmsg4);
|
||||
return -1;
|
||||
}
|
||||
fpC = fp3;
|
||||
fpD = fp4;
|
||||
}
|
||||
rewind(fpA);
|
||||
rewind(fpB);
|
||||
|
||||
/*
|
||||
* Sort pairs of runs until EOF is reached
|
||||
*/
|
||||
for (;;) {
|
||||
int mergeresult;
|
||||
|
||||
/*
|
||||
* merge one run from each of fpA and fpB into fpC, then repeat
|
||||
* it but placing the result into fpD.
|
||||
*/
|
||||
|
||||
mergeresult = mergeone(fpA,fpB,fpC,strA,strB,linelength);
|
||||
if (mergeresult == 0) {
|
||||
runcount++;
|
||||
mergeresult = mergeone(fpA,fpB,fpD,strA,strB,linelength);
|
||||
if (mergeresult == 0) runcount++;
|
||||
}
|
||||
if (mergeresult == -1) {
|
||||
/* both files at EOF */
|
||||
break;
|
||||
} else if (mergeresult == -2) {
|
||||
/* files in error; message already printed */
|
||||
KILLARRAY(array,linecount);
|
||||
KILLTEMP;
|
||||
return -1;
|
||||
}
|
||||
/* else normal merge; continue */
|
||||
}
|
||||
} while (runcount>1);
|
||||
|
||||
/*
|
||||
* At this point, fpC contains the sorted file. (We hope ...)
|
||||
*/
|
||||
if (fpC==fp1) tempout = file1;
|
||||
else if (fpC==fp2) tempout = file2;
|
||||
else if (fpC==fp3) tempout = file3;
|
||||
else /* fpC==fp4 */ tempout = file4;
|
||||
|
||||
/*
|
||||
* clean up and exit
|
||||
*/
|
||||
|
||||
/* copy lines from fpC to infile except for the trailing DELIM */
|
||||
rewind(fpC);
|
||||
for (;;) {
|
||||
if (fgets(strA,linelength,fpC)==NULL) {
|
||||
if(v_flag) perror(errmsg5);
|
||||
return -1;
|
||||
}
|
||||
if ((strA[0]==DELIM) && (strA[1]=='\n')) break;
|
||||
if (fprintf(out_fp,"%s",strA)==EOF) {
|
||||
if (v_flag) perror("disksort: write on output file failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free(strA);
|
||||
free(strB);
|
||||
KILLARRAY(array,linecount);
|
||||
KILLTEMP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
130
usr.bin/sort/dsort.1
Normal file
130
usr.bin/sort/dsort.1
Normal file
|
@ -0,0 +1,130 @@
|
|||
.TH DSORT 1 "Commands and Applications" "14 June 1994" "Version 1.0"
|
||||
.SH NAME
|
||||
dsort, msort \- sort text files lexicographically
|
||||
.SH SYNOPSIS
|
||||
.B msort
|
||||
[
|
||||
.I -hvV?
|
||||
] [
|
||||
.I "-o outfile"
|
||||
] [
|
||||
.I "-n lines"
|
||||
]
|
||||
.I file1
|
||||
[
|
||||
.I "file2 ..."
|
||||
]
|
||||
.LP
|
||||
.B dsort
|
||||
[
|
||||
.I -hvV?
|
||||
] [
|
||||
.I "-l length"
|
||||
] [
|
||||
.I "-n lines"
|
||||
] [
|
||||
.I "-o outfile"
|
||||
] [\fI-t path1\fR[,\fIpath2\fR[,\fIpath3\fR[,\fIpath4\fR]]]] \fIinfile\fR
|
||||
.SH DESCRIPTION
|
||||
.BR dsort " and " msort
|
||||
are robust text file sorting utilities. While they do not support a lot
|
||||
of features, they are designed to sort large (and small) files very quickly.
|
||||
.LP
|
||||
.B msort
|
||||
is an in-place memory sort. Since it uses the heapsort algorithm, it is
|
||||
O[n lg n] both on average and for worst-case. Provided it has enough memory,
|
||||
.BR msort
|
||||
will sort files with lines of arbitrary length. Unless overridden by the
|
||||
.I -n
|
||||
flag,
|
||||
.BR msort
|
||||
will sort files of up to 1000 lines. Larger files can be sorted provided
|
||||
there is sufficient core memory. If multiple input files are given, the
|
||||
output is the concatenated result of sorting the input files separately.
|
||||
Thus, the following would be equivalent:
|
||||
.LP
|
||||
.nf
|
||||
% msort file1 file2 file3 >outfile
|
||||
and
|
||||
% msort file1 >file1out
|
||||
% msort file2 >file2out
|
||||
% cat file1out file2out >outfile
|
||||
.fi
|
||||
.LP
|
||||
.B dsort
|
||||
is a disk sort intended for files too large to be sorted in memory. It
|
||||
uses a four-file polyphase merge algorithm. Since it is an I/O-bound
|
||||
program,
|
||||
.BR dsort "'s
|
||||
speed is very dependant on the speed of the device used for temporary files.
|
||||
By default,
|
||||
.BR dsort
|
||||
will sort files with lines up to 512 characters long. Lines with more
|
||||
characters will be trucated unless the
|
||||
.I -l
|
||||
flag is used. Also by default, 1000 lines at a time will be sorted in
|
||||
memory during the collection (first) phase of the merge sort algorithm.
|
||||
This can be changed using the
|
||||
.I -n
|
||||
flag.
|
||||
.BR dsort
|
||||
will accept only one input file.
|
||||
.LP
|
||||
Both
|
||||
.BR dsort " and " msort
|
||||
leave the input file(s) intact.
|
||||
.SH OPTIONS
|
||||
.nf
|
||||
\fI-h\fR \fI-?\fR -- print version and usage info, then exit
|
||||
\fI-l\fR \fIlength\fR -- use a line length of \fIlength\fR
|
||||
\fI-n\fR \fIlines\fR -- sort \fIlines\fR lines in memory, (for \fBdsort\fR); don't
|
||||
try to sort files over \fIlines\fR long (for \fBmsort\fR).
|
||||
\fI-o\fR \fIoutfile\fR -- send sorted output to \fIoutfile\fR rather than to stdout
|
||||
\fI-t\fR \fIpathlist\fR -- use \fIpathlist\fR as the locations of temp files. If any
|
||||
of these are not specified, dsort will attempt to use
|
||||
the directory specified by the environment variable
|
||||
$(TMPDIR), then the system default temp path.
|
||||
\fI-v\fR -- verbose operation
|
||||
\fI-V\fR -- print version information
|
||||
.fi
|
||||
.SH HINTS
|
||||
If you have more than one fast drive, the speed of
|
||||
.B dsort
|
||||
can in general be improved by using four different drives for the
|
||||
path list when using
|
||||
.I -t .
|
||||
The best speed observed, however, has occurred when $(TMPDIR) or /tmp
|
||||
reside on a RAM disk or ROM disk.
|
||||
It is not suggested that floppies be used for temporary files.
|
||||
.SH RESOURCE USAGE
|
||||
Both
|
||||
.BR dsort " and " msort
|
||||
use 1k of stack space.
|
||||
.LP
|
||||
.BR msort
|
||||
is an in-place sort, so in general the amount of core memory used is
|
||||
the same as the size of the file to be sorted. When sorting multiple
|
||||
files,
|
||||
.BR msort "'s
|
||||
memory usage will match the size of the largest input file, not the
|
||||
total of all files. It will use a minimum of approximately 4k of core
|
||||
memory.
|
||||
.LP
|
||||
.BR dsort
|
||||
by default uses approximately 512k of core memory. This can be modified
|
||||
by changing the
|
||||
.I -l
|
||||
and
|
||||
.I -n
|
||||
parameters. Core memory usage is approximately the product of these two
|
||||
parameters.
|
||||
.LP
|
||||
When using
|
||||
.BR dsort ,
|
||||
the amount free space on the temporary path(s) must be at least twice
|
||||
the size of the file to be sorted.
|
||||
.SH AUTHOR
|
||||
Devin Reade \- glyn@cs.ualberta.ca
|
||||
.SH SEE ALSO
|
||||
.BR sort (1),
|
||||
.BR uniq (1).
|
207
usr.bin/sort/dsort.c
Normal file
207
usr.bin/sort/dsort.c
Normal file
|
@ -0,0 +1,207 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
/*
|
||||
* dsort -- sort a text file on disk lexicographically
|
||||
*
|
||||
* Synopsis:
|
||||
* dsort [-hvV?] [-l length] [-n lines] [-o outfile]
|
||||
* [-t path1[,path2[,path3[,path4]]]] infile
|
||||
*
|
||||
* Options:
|
||||
* -h -? -- print version and usage info, then exit
|
||||
* -l <length> -- use a line length of <length>
|
||||
* -n <m> -- sort <m> lines in memory.
|
||||
* -o <outfile> -- sorted output to <outfile> rather than
|
||||
* to stdout
|
||||
* -t <pathlist> -- use <pathlist> (up to four paths) as the locations
|
||||
* of temp files. <pathlist> is of the form:
|
||||
* path1[,path2[,path3[,path4]]]. If any of these
|
||||
* are not specified, dsort will attempt to use
|
||||
* the system default temp path.
|
||||
* -v -- verbose operation
|
||||
* -V -- print version information
|
||||
*/
|
||||
|
||||
|
||||
#define DEFFUNC
|
||||
#define DSORT
|
||||
#include "common.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "/usr/include/getopt.h" /* GNU version */
|
||||
|
||||
extern int optind;
|
||||
extern char *optarg;
|
||||
extern int errno;
|
||||
|
||||
static char *versionstring="\
|
||||
Version 1.0 by Devin Reade\n";
|
||||
|
||||
static char *usagestring="\
|
||||
dsort -- Sort a text file on disk lexicographically\n\
|
||||
\n\
|
||||
Synopsis:\n\
|
||||
\tdsort\t[-hvV?] [-l length] [-n lines] [-o outfile]\n\
|
||||
\t\t[-t path1[,path2[,path3[,path4]]]] infile\n\
|
||||
\n\
|
||||
Options:\n\
|
||||
\t-h -?\t\t-- Print version and usage info, then exit.\n\
|
||||
\t-l <length>\t-- Set the maximum line length to <length>.\n\
|
||||
\t-n <m>\t\t-- Set the number of lines to sort in memory to <m>.\n\
|
||||
\t-o <outfile>\t-- Dump sorted output to <outfile> rather\n\
|
||||
\t\t\t than to stdout.\n\
|
||||
\t-t <pathlist>\t-- Set the paths to use for the location of\n\
|
||||
\t\t\t scratch files. Paths are delimited by \',\' characters.\n\
|
||||
\t-v\t\t-- Verbose operation.\n\
|
||||
\t-V\t\t-- Print version information.\n";
|
||||
|
||||
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
|
||||
size_t lc, i;
|
||||
char *outfile; /* the name of the output file, if nec */
|
||||
char **array; /* an array of strings; for sorting */
|
||||
size_t maxlinelen; /* length of longest line in current file */
|
||||
size_t maxlinecount; /* max number of lines we want to allow */
|
||||
char *tbuffer; /* buffer containing the temp paths */
|
||||
short failed=0; /* any errors found? */
|
||||
int c;
|
||||
short errflag=0;
|
||||
short l_flag=0;
|
||||
short n_flag=0;
|
||||
short o_flag=0;
|
||||
short t_flag=0;
|
||||
short V_flag=0;
|
||||
/* v_flag defined in common.h */
|
||||
|
||||
#ifdef DEBUG
|
||||
begin_stack_check();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* parse the command line
|
||||
*/
|
||||
|
||||
while ((c= getopt(argc,argv,"hl:n:o:t:vV?")) != EOF)
|
||||
switch (c) {
|
||||
case 'l': /* use this as the maximum line length */
|
||||
l_flag++;
|
||||
errno = 0;
|
||||
maxlinelen = (size_t) atol(optarg);
|
||||
if (errno = ERANGE) maxlinelen = DEFAULT_LINELENGTH;
|
||||
break;
|
||||
case 'n': /* sort this number of lines in memory */
|
||||
n_flag++;
|
||||
errno = 0;
|
||||
maxlinecount = (size_t) atol(optarg);
|
||||
if (errno == ERANGE) maxlinecount = DEFAULT_LINECOUNT;
|
||||
break;
|
||||
case 'o': /* redirect sorted output to file */
|
||||
o_flag++;
|
||||
outfile = optarg;
|
||||
break;
|
||||
case 't': /* define locations of temp files */
|
||||
t_flag++;
|
||||
if ((tbuffer=malloc(strlen(optarg)+1))==NULL) {
|
||||
perror("couldn't allocate temporary buffer; using default");
|
||||
break;
|
||||
}
|
||||
strcpy(tbuffer,optarg);
|
||||
break;
|
||||
case 'v': /* verbose */
|
||||
v_flag++;
|
||||
break;
|
||||
case 'V': /* print version information */
|
||||
V_flag++;
|
||||
break;
|
||||
case '?': /* fallthrough */
|
||||
case 'h': /* fallthrough */
|
||||
default: /* Display usage, version, and exit */
|
||||
V_flag++;
|
||||
errflag++;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* React to command line parameters
|
||||
*/
|
||||
|
||||
if (errflag) {
|
||||
fprintf (stderr,"\n%s\n%s\n",usagestring,versionstring);
|
||||
return -1;
|
||||
}
|
||||
if (V_flag) fprintf(stderr,"\n%s\n",versionstring);
|
||||
|
||||
if (!l_flag) maxlinelen = DEFAULT_LINELENGTH;
|
||||
if (!n_flag) maxlinecount = DEFAULT_LINECOUNT;
|
||||
if (v_flag) fprintf(stderr,
|
||||
"Sorting %lu lines in memory.\nMaximum recognised line length = %lu\n",
|
||||
maxlinecount,maxlinelen);
|
||||
if (o_flag) {
|
||||
if ((out_fp = fopen(outfile,"w")) == NULL) {
|
||||
if (v_flag) perror("open on output file failed");
|
||||
return -1;
|
||||
}
|
||||
} else out_fp = stdout;
|
||||
|
||||
tpath1 = NULL;
|
||||
tpath2 = NULL;
|
||||
tpath3 = NULL;
|
||||
tpath4 = NULL;
|
||||
if ((t_flag) && (tbuffer!=NULL)) {
|
||||
char *tp = tbuffer;
|
||||
|
||||
/* set tpath1 */
|
||||
tpath1 = tp;
|
||||
while (*tp && (*tp!=',')) tp++;
|
||||
if (*tp) {
|
||||
*tp++ = '\0'; /* terminate tpath1 */
|
||||
if (v_flag) fprintf(stderr,"Will try to use temp directory %s\n",tpath1);
|
||||
|
||||
/* set tpath2 */
|
||||
tpath2 = tp;
|
||||
while (*tp && (*tp!=',')) tp++;
|
||||
if (*tp) {
|
||||
*tp++ = '\0'; /* terminate tpath2 */
|
||||
if (v_flag)
|
||||
fprintf(stderr,"Will try to use temp directory %s\n",tpath2);
|
||||
|
||||
/* set tpath3 */
|
||||
tpath3 = tp;
|
||||
while (*tp && (*tp!=',')) tp++;
|
||||
if (*tp) {
|
||||
*tp++ = '\0'; /* terminate tpath3 */
|
||||
if (v_flag)
|
||||
fprintf(stderr,"Will try to use temp directory %s\n",tpath3);
|
||||
|
||||
/* set tpath4 */
|
||||
tpath4 = tp;
|
||||
while (*tp && (*tp!=',')) tp++;
|
||||
*tp = '\0'; /* terminate tpath4 */
|
||||
if (v_flag)
|
||||
fprintf(stderr,"Will try to use temp directory %s\n",tpath4);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (v_flag) fprintf(stderr,"Using default temp path\n");
|
||||
|
||||
/* do the sort */
|
||||
if (argc - optind == 1) {
|
||||
c = disksort(argv[optind],maxlinecount,maxlinelen);
|
||||
} else {
|
||||
fprintf(stderr,"\n%s\n%s\n",usagestring,versionstring);
|
||||
c = -1;
|
||||
}
|
||||
|
||||
if (t_flag && (tbuffer)) free(tbuffer);
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,"%s stack usage: %d bytes\n",argv[0],end_stack_check());
|
||||
#endif
|
||||
|
||||
return c;
|
||||
}
|
105
usr.bin/sort/initdisksort.c
Normal file
105
usr.bin/sort/initdisksort.c
Normal file
|
@ -0,0 +1,105 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
|
||||
#define DSORT
|
||||
#include "common.h"
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define OPENMODE "w+" /* the mode for create/read/write */
|
||||
|
||||
static char *errstring1="initdisksort: couldn't get temp name";
|
||||
static char *errstring2="initdisksort: couldn't open temp file";
|
||||
|
||||
/*
|
||||
* int initdisksort(void);
|
||||
*
|
||||
* Pre: None.
|
||||
*
|
||||
* Post: Returns 0 on success, -1 on failure.
|
||||
* On success:
|
||||
* file1 through file4 are initialized as temp file names.
|
||||
* fp1 through fp4 are open file pointers for file1 ... file4.
|
||||
*
|
||||
* Uses Globals:
|
||||
* fp1, fp2, fp3, fp4
|
||||
* file1, file2, file3, file4,
|
||||
* v_flag
|
||||
*/
|
||||
|
||||
int initdisksort(void) {
|
||||
|
||||
/*
|
||||
* Get the names for the temp files -- this is ponderous but necessary
|
||||
*/
|
||||
|
||||
if ((file1 = tempnam(tpath1,"dsort")) == NULL) {
|
||||
if (v_flag) perror(errstring1);
|
||||
return -1;
|
||||
}
|
||||
if ((file2 = tempnam(tpath2,"dsort")) == NULL) {
|
||||
if (v_flag) perror(errstring1);
|
||||
free(file1);
|
||||
return -1;
|
||||
}
|
||||
if ((file3 = tempnam(tpath3,"dsort")) == NULL) {
|
||||
if (v_flag) perror(errstring1);
|
||||
free(file1);
|
||||
free(file2);
|
||||
return -1;
|
||||
}
|
||||
if ((file4 = tempnam(tpath4,"dsort")) == NULL) {
|
||||
if (v_flag) perror(errstring1);
|
||||
free(file1);
|
||||
free(file2);
|
||||
free(file3);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Open the temp files -- again ponderous but necessary
|
||||
*/
|
||||
|
||||
|
||||
if ((fp1 = fopen(file1,OPENMODE))==NULL) {
|
||||
if (v_flag) perror(errstring2);
|
||||
free(file1);
|
||||
free(file2);
|
||||
free(file3);
|
||||
free(file4);
|
||||
return -1;
|
||||
}
|
||||
if ((fp2 = fopen(file2,OPENMODE))==NULL) {
|
||||
if (v_flag) perror(errstring2);
|
||||
unlink(file1);
|
||||
free(file1);
|
||||
free(file2);
|
||||
free(file3);
|
||||
free(file4);
|
||||
return -1;
|
||||
}
|
||||
if ((fp3 = fopen(file3,OPENMODE))==NULL) {
|
||||
if (v_flag) perror(errstring2);
|
||||
unlink(file1);
|
||||
unlink(file2);
|
||||
free(file1);
|
||||
free(file2);
|
||||
free(file3);
|
||||
free(file4);
|
||||
return -1;
|
||||
}
|
||||
if ((fp4 = fopen(file4,OPENMODE))==NULL) {
|
||||
if (v_flag) perror(errstring2);
|
||||
unlink(file1);
|
||||
unlink(file2);
|
||||
unlink(file3);
|
||||
free(file1);
|
||||
free(file2);
|
||||
free(file3);
|
||||
free(file4);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
89
usr.bin/sort/linecount.c
Normal file
89
usr.bin/sort/linecount.c
Normal file
|
@ -0,0 +1,89 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*
|
||||
* unsigned long int linecount (char *filename, size_t *maxlinelen);
|
||||
*
|
||||
* Pre: <filename> is the name of the file for which we need to know
|
||||
* the number of lines. The file must be closed.
|
||||
*
|
||||
* Post: Returns the number of newline characters in the file. On
|
||||
* return, the file is again closed and *maxlinelen is the length
|
||||
* of the longest line in <filename> (length is calculated to
|
||||
* include the newline character but not the null terminator.
|
||||
* Returns zero on failure or if there are no newlines.
|
||||
*
|
||||
* Uses Globals:
|
||||
* v_flag
|
||||
*/
|
||||
|
||||
unsigned long int linecount (char *filename, size_t *maxlinelen) {
|
||||
|
||||
char *buff; /* the input buffer */
|
||||
unsigned long result; /* the number of newlines */
|
||||
int count; /* the number of chars last read */
|
||||
int fd; /* file descriptor for <filename> */
|
||||
short done;
|
||||
int i;
|
||||
size_t linelen; /* length of current line */
|
||||
|
||||
/* init some variables */
|
||||
done = 0;
|
||||
result = 0;
|
||||
*maxlinelen = 0;
|
||||
linelen = 0;
|
||||
|
||||
/* open <filename> for unbuffered I/O */
|
||||
if ((fd = open(filename,O_RDONLY)) == -1) {
|
||||
if (v_flag) perror("linecount: couldn't open input file");
|
||||
return 0lu;
|
||||
}
|
||||
|
||||
/* get an input buffer */
|
||||
if ((buff = malloc(BUFFERSIZE)) == NULL) {
|
||||
if (v_flag) perror ("linecount: couldn't allocate buffer");
|
||||
close(fd);
|
||||
return 0lu;
|
||||
}
|
||||
|
||||
/* repeatedly fill the buffer and increment the newline count */
|
||||
while (!done) {
|
||||
count = read (fd, buff, BUFFERSIZE);
|
||||
switch (count) {
|
||||
case -1: /* file error */
|
||||
if (v_flag) perror ("linecount");
|
||||
close(fd);
|
||||
free(buff);
|
||||
return 0lu;
|
||||
/* NOTREACHED */
|
||||
break;
|
||||
|
||||
case 0: /* EOF */
|
||||
done = 1;
|
||||
break;
|
||||
|
||||
default: /* got some info in the buffer */
|
||||
for (i=0; i<count; i++) {
|
||||
linelen++;
|
||||
if (buff[i] == NEWLINE) {
|
||||
result++;
|
||||
if (linelen > *maxlinelen) *maxlinelen = linelen;
|
||||
linelen = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* clean up and return */
|
||||
close(fd);
|
||||
free(buff);
|
||||
return result;
|
||||
}
|
121
usr.bin/sort/loadarray.c
Normal file
121
usr.bin/sort/loadarray.c
Normal file
|
@ -0,0 +1,121 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* char **loadarray (unsigned long n, char *filename, size_t maxlinelen);
|
||||
*
|
||||
* Pre: <filename> is the name of a file containing <n> lines of text,
|
||||
* the number of lines. The file <filename> must be closed.
|
||||
* <maxlinelen> is the length of the longest line in <filename>
|
||||
*
|
||||
* Post: Returns a pointer to an array of pointers to malloc'd strings,
|
||||
* where the strings are successive lines from the file <filename>.
|
||||
* On return <filename> will be closed. If loadarray() fails for
|
||||
* any reason, it will return NULL.
|
||||
*
|
||||
* Warning: The use of realloc() with a NULL pointer for the initial
|
||||
* allocation may not be portable. If this is not valid for your
|
||||
* current libraries, then #define BROKEN_REALLOC.
|
||||
*
|
||||
* Uses Globals:
|
||||
* v_flag
|
||||
*/
|
||||
|
||||
char **loadarray (unsigned long n, char *filename, size_t maxlinelen) {
|
||||
|
||||
char **result;
|
||||
unsigned long i,j;
|
||||
FILE *in_fp;
|
||||
static char *inbuf=NULL;
|
||||
static size_t previous_size = 0;
|
||||
char *p;
|
||||
|
||||
|
||||
#ifndef BROKEN_REALLOC /* realloc() is ANSI-compliant with NULL first arg */
|
||||
|
||||
/* reallocate the input buffer if necessary */
|
||||
if (maxlinelen > previous_size) {
|
||||
if ((p = realloc(inbuf,maxlinelen+1)) == NULL) {
|
||||
if (v_flag) perror("loadarray: couldn't (re)allocate input buffer");
|
||||
return NULL;
|
||||
}
|
||||
previous_size = maxlinelen;
|
||||
inbuf = p;
|
||||
}
|
||||
|
||||
#else /* BROKEN_REALLOC */
|
||||
|
||||
/* reallocate the input buffer if necessary */
|
||||
if (maxlinelen > previous_size) {
|
||||
if (previous_size == 0) {
|
||||
if ((p = malloc(maxlinelen+1)) == NULL) {
|
||||
if (v_flag) perror("loadarray: couldn't allocate input buffer");
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
if ((p = realloc(inbuf,maxlinelen+1)) == NULL) {
|
||||
if (v_flag) perror("loadarray: couldn't reallocate input buffer");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
previous_size = maxlinelen;
|
||||
inbuf = p;
|
||||
}
|
||||
|
||||
|
||||
#endif /* BROKEN_REALLOC */
|
||||
|
||||
/* allocate the array */
|
||||
if ((result = malloc (n * sizeof(char *)))==NULL) {
|
||||
if (v_flag) perror("loadarray: couldn't allocate base array");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set up the input stream */
|
||||
in_fp = fopen(filename,"r");
|
||||
if (in_fp == NULL) { /* open failed */
|
||||
free(result);
|
||||
if (v_flag) perror("loadarray: couldn't open input file");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* allocate and copy elements */
|
||||
for (i=0; i<n; i++) {
|
||||
|
||||
/* read into the buffer */
|
||||
if(fgets(inbuf,maxlinelen+1,in_fp)==NULL) {
|
||||
/* read failed; clean up and exit */
|
||||
if (v_flag) {
|
||||
if (ferror(in_fp)) perror("loadarray: read error on input file");
|
||||
else perror ("loadarray: premature EOF on input file");
|
||||
}
|
||||
for (j=0; j<i; j++) free(result[j]);
|
||||
free(result);
|
||||
fclose(in_fp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* copy the buffer to the array */
|
||||
result[i] = malloc(strlen(inbuf)+1);
|
||||
if (result[i]==NULL) {
|
||||
/* malloc failed; clean up and exit */
|
||||
if (v_flag) perror("loadarray: couldn't duplicate buffer");
|
||||
for (j=0; j<i; j++) free(result[j]);
|
||||
free(result);
|
||||
fclose(in_fp);
|
||||
return NULL;
|
||||
}
|
||||
strcpy(result[i],inbuf);
|
||||
}
|
||||
|
||||
fclose(in_fp);
|
||||
return result;
|
||||
}
|
||||
|
55
usr.bin/sort/makefile.mk
Normal file
55
usr.bin/sort/makefile.mk
Normal file
|
@ -0,0 +1,55 @@
|
|||
BINDIR = /usr/local/bin
|
||||
MANDIR = /usr/man
|
||||
|
||||
# Nothing should need to be changed below this point
|
||||
|
||||
# DEFINES = -DDEBUG -D__GNO__
|
||||
DEFINES = -D__GNO__
|
||||
CFLAGS = $(DEFINES) -O -v -w -r
|
||||
CFLAGS2 = $(DEFINES) -O31 -v -w -r
|
||||
MAINFLAGS = $(DEFINES) -O -v -w -S1024
|
||||
LDFLAGS = -v
|
||||
# LDLIBS = -l/usr/lib/gnulib -l/usr/lib/stack
|
||||
LDLIBS = -l/usr/lib/gnulib
|
||||
|
||||
MOBJS = msort.o linecount.o loadarray.o
|
||||
DOBJS = dsort.o disksort.o initdisksort.o mergeone.o tempnam.o
|
||||
COMMONOBJS = sortarray.o
|
||||
|
||||
install:
|
||||
/bin/cp msort dsort $(BINDIR)
|
||||
/bin/cp msort.1 dsort.1 $(MANDIR)/man1
|
||||
|
||||
all: msort dsort
|
||||
|
||||
msort : $(MOBJS) $(COMMONOBJS)
|
||||
$(CC) $(LDFLAGS) $(LDLIBS) -o $@ $<
|
||||
|
||||
dsort : $(DOBJS) $(COMMONOBJS)
|
||||
$(CC) $(LDFLAGS) $(LDLIBS) -o $@ $<
|
||||
|
||||
msort.o: msort.c common.h
|
||||
$(CC) -c $(MAINFLAGS) -o $@ msort.c
|
||||
|
||||
dsort.o: dsort.c common.h
|
||||
$(CC) -c $(MAINFLAGS) -o $@ dsort.c
|
||||
|
||||
# Orca/C screws up with loop invariant optimization on disksort.c
|
||||
disksort.o: disksort.c common.h
|
||||
$(CC) -c $(CFLAGS2) -o $@ disksort.c
|
||||
|
||||
#
|
||||
# Housekeeping
|
||||
#
|
||||
|
||||
clean:
|
||||
$(RM) $(DOBJS) $(MOBJS) $(COMMONOBJS) msort.root dsort.root
|
||||
|
||||
clobber: clean
|
||||
$(RM) dsort msort
|
||||
|
||||
#
|
||||
# Additional dependencies
|
||||
#
|
||||
|
||||
linecount.o loadarray.o initdisksort.o mergeone.o sortarray.o:: common.h
|
169
usr.bin/sort/mergeone.c
Normal file
169
usr.bin/sort/mergeone.c
Normal file
|
@ -0,0 +1,169 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
|
||||
#define DSORT
|
||||
#include "common.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* int mergeone(FILE *fpA, FILE *fpB, FILE *fpC, char strA[], char strB[],
|
||||
* size_t linelength);
|
||||
*
|
||||
* Pre: fpA, fpB, and fpC are open file pointers. The first should contain
|
||||
* "runs" of data delimited by a line consisting of just the DELIM
|
||||
* character, although either or both may be at EOF. strA and strB
|
||||
* are scratch character buffers, each of size linelength.
|
||||
*
|
||||
* Post: The first run on each of fpA and fpB are merge-sorted and added to
|
||||
* fpC. If either fpA or fpB are at EOF then the run from the other
|
||||
* file pointer is simply concatenated onto fpC. Mergeone will return
|
||||
* zero if the merge was successful, -1 if both fpA and fpB are at
|
||||
* EOF, and -2 if there was an error. On return, the contents of
|
||||
* strA and strB are undefined.
|
||||
*
|
||||
* Uses Globals:
|
||||
* v_flag -- if set and an error occurs, a message will be printed
|
||||
* to stderr
|
||||
* fp1,fp2,fp3,fp4 -- file pointers to the four scratch files
|
||||
* file1,file2,file3,file4 -- names of the four scratch files
|
||||
*/
|
||||
|
||||
|
||||
int mergeone(FILE *fpA, FILE *fpB, FILE *fpC, char strA[], char strB[],
|
||||
size_t linelength) {
|
||||
|
||||
short run_end_A = 0;
|
||||
short run_end_B = 0;
|
||||
|
||||
/*
|
||||
* Load strA and strB with the first lines from fpA and fpB. After
|
||||
* this, either file may be at EOF (but not error).
|
||||
*/
|
||||
|
||||
if ((fgets(strA,linelength,fpA)==NULL) && ferror(fpA)) {
|
||||
if (v_flag) perror("mergeone: Read error on fpA");
|
||||
return -2;
|
||||
}
|
||||
if ((fgets(strB,linelength,fpB)==NULL) && ferror(fpB)) {
|
||||
if (v_flag) perror("mergeone: Read error on fpB");
|
||||
return -2;
|
||||
}
|
||||
|
||||
/*
|
||||
* merge fpA and fpB until we either get an EOF or a DELIM line
|
||||
*/
|
||||
|
||||
while (!feof(fpA) && !feof(fpB)) {
|
||||
|
||||
/* test to see if our run is finished */
|
||||
if ((strA[0]==DELIM) && (strA[1]=='\n')) {
|
||||
run_end_A = 1;
|
||||
break;
|
||||
}
|
||||
if ((strB[0]==DELIM) && (strB[1]=='\n')) {
|
||||
run_end_B = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (strcmp(strA,strB) < 0) {
|
||||
|
||||
/* print out the string to fpC */
|
||||
if (fprintf(fpC,"%s",strA) == EOF) {
|
||||
if (v_flag) perror("mergeone: Write error on fpC");
|
||||
return -2;
|
||||
}
|
||||
|
||||
/* get another string from fpA */
|
||||
if ((fgets(strA,linelength,fpA)==NULL) && ferror(fpA)) {
|
||||
if (v_flag) perror("mergeone: Read error on fpA");
|
||||
return -2;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
/* print out the string to fpC */
|
||||
if (fprintf(fpC,"%s",strB) == EOF) {
|
||||
if (v_flag) perror("mergeone: Write error on fpC");
|
||||
return -2;
|
||||
if (v_flag) {
|
||||
/* say something */
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
/* get another string from fpB */
|
||||
if ((fgets(strB,linelength,fpB)==NULL) && ferror(fpB)) {
|
||||
if (v_flag) perror("mergeone: Read error on fpB");
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We've come to the end of at least one of the runs, concatenate
|
||||
* the remainder on the output file
|
||||
*/
|
||||
|
||||
/* finish off fpA if necessary */
|
||||
while (!run_end_A && !feof(fpA)) {
|
||||
|
||||
/* test to see if our run is finished */
|
||||
if ((strA[0]==DELIM) && (strA[1]=='\n')) {
|
||||
run_end_A = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* print out the string to fpC */
|
||||
if (fprintf(fpC,"%s",strA) == EOF) {
|
||||
if (v_flag) perror("mergeone: Write error on fpC");
|
||||
return -2;
|
||||
}
|
||||
|
||||
/* get another string from fpA */
|
||||
if ((fgets(strA,linelength,fpA)==NULL) && ferror(fpA)) {
|
||||
if (v_flag) perror("mergeone: Read error on fpA");
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
/* finish off fpB if necessary */
|
||||
while (!run_end_B && !feof(fpB)) {
|
||||
|
||||
/* test to see if our run is finished */
|
||||
if ((strB[0]==DELIM) && (strB[1]=='\n')) {
|
||||
run_end_B = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* print out the string to fpC */
|
||||
if (fprintf(fpC,"%s",strB) == EOF) {
|
||||
if (v_flag) perror("mergeone: Write error on fpC");
|
||||
return -2;
|
||||
}
|
||||
|
||||
/* get another string from fpB */
|
||||
if ((fgets(strB,linelength,fpB)==NULL) && ferror(fpB)) {
|
||||
if (v_flag) perror("mergeone: Read error on fpB");
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point, both fpA and fpB are either at a run-end or at EOF,
|
||||
* with no errors. If at EOF, then don't append a DELIM character.
|
||||
*/
|
||||
|
||||
if (feof(fpA) && feof(fpB)) return -1;
|
||||
if (fprintf(fpC,"%c\n",DELIM) == EOF) {
|
||||
if (v_flag) perror("mergeone: Write error on fpC");
|
||||
return -2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
1
usr.bin/sort/msort.1
Normal file
1
usr.bin/sort/msort.1
Normal file
|
@ -0,0 +1 @@
|
|||
.so /usr/man/man1/dsort.1
|
155
usr.bin/sort/msort.c
Normal file
155
usr.bin/sort/msort.c
Normal file
|
@ -0,0 +1,155 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
/*
|
||||
* msort -- sort a text file in memory lexicographically
|
||||
*
|
||||
* Synopsis:
|
||||
* msort [-hvV?] [-o outfile] [-n lines] file1 [file2 ...]
|
||||
*
|
||||
* Options:
|
||||
* -h -? -- print version and usage info, then exit
|
||||
* -n <lines> -- don't try to sort files over <lines> lines long
|
||||
* -o <outfile> -- sorted output to <outfile> rather than
|
||||
* to stdout
|
||||
* -v -- verbose operation
|
||||
* -V -- print version information
|
||||
*/
|
||||
|
||||
#define DEFFUNC
|
||||
#define MSORT
|
||||
#include "common.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include "/usr/include/getopt.h" /* GNU version */
|
||||
|
||||
extern int optind;
|
||||
extern char *optarg;
|
||||
extern int errno;
|
||||
|
||||
static char *versionstring="\
|
||||
Version 1.0 by Devin Reade\n";
|
||||
|
||||
static char *usagestring="\
|
||||
msort -- Sort a text file in memory lexicographically\n\
|
||||
\n\
|
||||
Synopsis:\n\
|
||||
\tmsort [-hvV?] [-o outfile] file1 [file2 ...]\n\
|
||||
\n\
|
||||
Options:\n\
|
||||
\t-h -?\t\t-- Print version and usage info, then exit.\n\
|
||||
\t-n <m>\t\t-- Set the maximum number of lines per file to <m>.\n\
|
||||
\t-o <outfile>\t-- Dump sorted output to <outfile> rather\n\
|
||||
\t\t\t than to stdout.\n\
|
||||
\t-v\t\t-- Verbose operation.\n\
|
||||
\t-V\t\t-- Print version information.\n";
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
|
||||
size_t lc, i;
|
||||
char *outfile; /* the name of the output file, if nec */
|
||||
char **array; /* an array of strings; for sorting */
|
||||
size_t maxlinelen; /* length of longest line in current file */
|
||||
size_t maxlinecount; /* max number of lines we want to allow */
|
||||
short failed=0; /* any errors found? */
|
||||
int c;
|
||||
short errflag=0;
|
||||
short n_flag=0;
|
||||
short o_flag=0;
|
||||
short V_flag=0;
|
||||
/* v_flag defined in common.h */
|
||||
|
||||
#ifdef DEBUG
|
||||
begin_stack_check();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* parse the command line
|
||||
*/
|
||||
|
||||
while ((c= getopt(argc,argv,"hn:o:vV?")) != EOF)
|
||||
switch (c) {
|
||||
case 'n': /* don't try to sort if file is over n lines long */
|
||||
n_flag++;
|
||||
errno = 0;
|
||||
maxlinecount = (size_t) atol(optarg);
|
||||
if (errno == ERANGE) maxlinecount = DEFAULT_LINECOUNT;
|
||||
break;
|
||||
case 'o': /* redirect sorted output to file */
|
||||
o_flag++;
|
||||
outfile = optarg;
|
||||
break;
|
||||
case 'v': /* verbose */
|
||||
v_flag++;
|
||||
break;
|
||||
case 'V': /* print version information */
|
||||
V_flag++;
|
||||
break;
|
||||
case '?': /* fallthrough */
|
||||
case 'h': /* fallthrough */
|
||||
default: /* Display usage, version, and exit */
|
||||
V_flag++;
|
||||
errflag++;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* React to command line parameters
|
||||
*/
|
||||
|
||||
if (errflag) {
|
||||
fprintf(stderr,"\n%s\n%s\n",usagestring,versionstring);
|
||||
return -1;
|
||||
}
|
||||
if (V_flag) fprintf(stderr,"\n%s\n",versionstring);
|
||||
if (!n_flag) maxlinecount = DEFAULT_LINECOUNT;
|
||||
if (v_flag) fprintf(stderr,"Maximum lines per file = %lu\n",maxlinecount);
|
||||
|
||||
if (o_flag) {
|
||||
if ((out_fp = fopen(outfile,"w")) == NULL) {
|
||||
if (v_flag) perror("open on output file failed");
|
||||
return -1;
|
||||
}
|
||||
} else out_fp = stdout;
|
||||
|
||||
/* loop through files */
|
||||
for (; optind<argc; optind++) {
|
||||
|
||||
/* get the line count */
|
||||
lc = linecount(argv[optind], &maxlinelen);
|
||||
if (lc>maxlinecount) {
|
||||
if (v_flag)
|
||||
fprintf(stderr,"%s too long for an in-memory sort -- file skipped\n",
|
||||
argv[optind]);
|
||||
failed = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* load the array */
|
||||
array = loadarray (lc, argv[optind], maxlinelen);
|
||||
if (array == NULL) {
|
||||
if (v_flag) fprintf(stderr,"Ignoring file %s\n",argv[optind]);
|
||||
failed = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* sort it */
|
||||
sortarray (array,lc);
|
||||
|
||||
/* print the sorted file out and clean up the array */
|
||||
for (i=0; i<lc; i++) {
|
||||
fprintf(out_fp,"%s",array[i]);
|
||||
free(array[i]);
|
||||
}
|
||||
free(array);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr,"%s stack usage: %d bytes\n",argv[0],end_stack_check());
|
||||
#endif
|
||||
|
||||
if (failed) return -1;
|
||||
else return 0;
|
||||
}
|
62
usr.bin/sort/sortarray.c
Normal file
62
usr.bin/sort/sortarray.c
Normal file
|
@ -0,0 +1,62 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* void sortarray(char **array, unsigned long n);
|
||||
*
|
||||
* Pre: <array> is a pointer to an array of pointers to NULL-terminated
|
||||
* strings, and <n> is the number of elements in <array>
|
||||
*
|
||||
* Post: The strings in <array> are sorted lexicographically in ascending
|
||||
* order, using the heapsort algorithm. This is an in-place
|
||||
* non-recursive sort with behavior O[n*lg(n)] both on average
|
||||
* and worst-case.
|
||||
*/
|
||||
|
||||
void sortarray(char *array[], unsigned long n) {
|
||||
|
||||
long l, j, ir, i;
|
||||
char *rra;
|
||||
|
||||
if (n==1) return; /* no need to sort one element */
|
||||
--array; /* fudge since the algorithm was designed */
|
||||
/* for a unit-indexing */
|
||||
|
||||
l = (n>>1) + 1;
|
||||
ir = n;
|
||||
|
||||
/*
|
||||
* The index l will be decremented from its initial value down to 0 during
|
||||
* the heap creation phase. Once it reaches 0, the index ir will be
|
||||
* decremented from its initial value down to 0 during the heap selection
|
||||
* phase.
|
||||
*/
|
||||
for (;;) {
|
||||
if (l > 1) /* still in creation phase */
|
||||
rra = array[--l];
|
||||
else { /* in selection phase */
|
||||
rra= array[ir]; /* clear a space at the end of array */
|
||||
array[ir] = array[1]; /* retire the top of the heap into it */
|
||||
if (--ir == 1) { /* done with the last promotion */
|
||||
array[1] = rra;
|
||||
return;
|
||||
}
|
||||
}
|
||||
i = l; /* set up to sift down element rra to its proper place */
|
||||
j = l << 1;
|
||||
while (j<=ir) {
|
||||
if (j<ir && (strcmp(array[j],array[j+1])<0)) ++j;
|
||||
if (strcmp(rra,array[j])<0) { /* demote rra */
|
||||
array[i] = array[j];
|
||||
i = j;
|
||||
j += i;
|
||||
} else j = ir + 1; /* this is rra's level; set j to terminate */
|
||||
} /* the sift-down */
|
||||
array[i] = rra;
|
||||
}
|
||||
}
|
130
usr.bin/sort/tempnam.c
Normal file
130
usr.bin/sort/tempnam.c
Normal file
|
@ -0,0 +1,130 @@
|
|||
#ifdef __CCFRONT__
|
||||
#include <14:pragma.h>
|
||||
#endif
|
||||
/*
|
||||
* #include <stdio.h>
|
||||
*
|
||||
* char *tempnam (const char *dir, const char *prefix);
|
||||
*
|
||||
* Generate a pathname for a temporary file.
|
||||
*
|
||||
* tempnam will select a directory for the temporary file by using the
|
||||
* following criteria:
|
||||
*
|
||||
* If dir is not the NULL pointer, tempnam uses the pathname pointed to by
|
||||
* dir as the directory,
|
||||
*
|
||||
* otherwise, tmpdir uses the value of the TMPDIR environment variable if
|
||||
* the variable is defined,
|
||||
*
|
||||
* otherwise the directory defined by P_tmpdir in the stdio.h header file
|
||||
* if that directory is writable by the caller,
|
||||
*
|
||||
* otherwise, tempnam will use "/tmp" as a last resort.
|
||||
*/
|
||||
|
||||
#ifdef __ORCAC__
|
||||
#define __GNO__ 1
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define max(A,B) (((A)<(B))?(B):(A))
|
||||
|
||||
#if !defined(__GNO__)
|
||||
extern char *mktemp();
|
||||
extern int access();
|
||||
#endif
|
||||
|
||||
static char seed[4]="AAA";
|
||||
|
||||
#if (defined __GNO__)
|
||||
static char pbrk;
|
||||
#else
|
||||
# define pbrk '/';
|
||||
#endif
|
||||
|
||||
/* BSD stdio.h doesn't define P_tmpdir, so let's do it here */
|
||||
#ifndef P_tmpdir
|
||||
static char *P_tmpdir = "/tmp";
|
||||
#endif
|
||||
|
||||
|
||||
static char *
|
||||
cpdir(char *buf, char *str)
|
||||
{
|
||||
char *p;
|
||||
char *path;
|
||||
|
||||
if(str != NULL) {
|
||||
|
||||
#if defined(__GNO__)
|
||||
/* get the path delimiter */
|
||||
if (strchr(str,':')) pbrk = ':';
|
||||
else if (strchr(str,'/')) pbrk = '/';
|
||||
else {
|
||||
if ((path=getenv("PATH"))==NULL) pbrk = '/';
|
||||
else pbrk = (strchr(path,':')) ? ':' : '/';
|
||||
}
|
||||
#endif
|
||||
|
||||
(void) strcpy(buf, str);
|
||||
p = buf - 1 + strlen(buf);
|
||||
if(*p == pbrk) *p = '\0';
|
||||
}
|
||||
|
||||
return(buf);
|
||||
}
|
||||
|
||||
|
||||
char *
|
||||
tempnam (char *dir, char *prefix)
|
||||
/* dir -- use this directory please (if non-NULL) */
|
||||
/* prefix -- use this (if non-NULL) as filename prefix */
|
||||
{
|
||||
register char *p, *q, *tmpdir;
|
||||
int tl=0, dl=0, pl;
|
||||
|
||||
/* create a buffer <p> that's as large as necessary */
|
||||
pl = strlen(P_tmpdir);
|
||||
if( (tmpdir = getenv("TMPDIR")) != NULL ) tl = strlen(tmpdir);
|
||||
if( dir != NULL ) dl = strlen(dir);
|
||||
if( (p = malloc((unsigned int)(max(max(dl,tl),pl)+16))) == NULL )
|
||||
return(NULL);
|
||||
*p = '\0';
|
||||
|
||||
#if defined (__GNO__)
|
||||
if( (dl == 0) || (access( cpdir(p, dir), W_OK) != 0) )
|
||||
if( (tl == 0) || (access( cpdir(p, tmpdir), W_OK) != 0) )
|
||||
if( access( cpdir(p, P_tmpdir), W_OK) != 0 )
|
||||
if( access( cpdir(p, "/tmp"), W_OK) != 0 )
|
||||
return(NULL);
|
||||
|
||||
#else /* not __GNO__ */
|
||||
if( (dl == 0) || (access( cpdir(p, dir), 3) != 0) )
|
||||
if( (tl == 0) || (access( cpdir(p, tmpdir), 3) != 0) )
|
||||
if( access( cpdir(p, P_tmpdir), 3) != 0 )
|
||||
if( access( cpdir(p, "/tmp"), 3) != 0 )
|
||||
return(NULL);
|
||||
#endif /* not __GNO__ */
|
||||
|
||||
(void) strcat(p, "/");
|
||||
if(prefix)
|
||||
{
|
||||
*(p+strlen(p)+5) = '\0';
|
||||
(void)strncat(p, prefix, 5);
|
||||
}
|
||||
|
||||
(void)strcat(p, seed);
|
||||
(void)strcat(p, "XXXXXX");
|
||||
|
||||
q = seed;
|
||||
while(*q == 'Z') *q++ = 'A';
|
||||
++*q;
|
||||
|
||||
if(*mktemp(p) == '\0') return(NULL);
|
||||
return(p);
|
||||
}
|
Loading…
Reference in New Issue
Block a user