gno/usr.bin/man/fillbuffer.c
gdr 73ce022151 Version 3.0 of the manpack archive. This moved on top of the
original makewhatis repository.  Earlier versions of man, apropos,
and whatis were by other authors (these are written from scratch).
Catman is new.
1996-01-28 00:41:22 +00:00

471 lines
15 KiB
C

/*
* Copyright 1995 by Devin Reade <gdr@myrias.com>. For distribution
* information see the README file that is part of the manpack archive,
* or contact the author, above.
*/
segment "makewhatis";
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "makewhatis.h"
#define NAME1 "NAME"
#define NAME2 "N\bNA\bAM\bME\bE"
#define NAME3 "N\bN\bN\bNA\bA\bA\bAM\bM\bM\bME\bE\bE\bE"
#define SYNOPSIS1 "SYNOPSIS"
#define SYNOPSIS2 "S\bSY\bYN\bNO\bOP\bPS\bSI\bIS\bS"
#define SYNOPSIS3 "S\bS\bS\bSY\bY\bY\bYN\bN\bN\bNO\bO\bO\bOP\bP\bP\bPS\bS\bS\bSI\bI\bI\bIS\bS\bS\bS"
#define DESCRIPTION1 "DESCRIPTION"
#define DESCRIPTION2 "D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN"
#define DESCRIPTION3 "D\bD\bD\bDE\bE\bE\bES\bS\bS\bSC\bC\bC\bCR\bR\bR\bRI\bI\bI\bIP\bP\bP\bPT\bT\bT\bTI\bI\bI\bIO\bO\bO\bON\bN\bN\bN"
char buffer[BUFFERSIZE]; /* contains the command description */
char titlebuf[BUFFERSIZE]; /* contains the command name */
static char buffer2[BUFFERSIZE]; /* used for chars read from man page */
#ifdef TEST_FILLBUFFER
short v_flag=2;
# define output_fp stdout
# define error_fp stderr
#else
extern FILE *output_fp; /* output file descriptor -- may be stdout */
extern FILE *error_fp; /* error file descriptor -- may be stderr */
#endif
/* void fillbuffer (char *filename);
*
* Pre: <filename> is the name of the man page or temporary file
* containing text. It may have nroff formatting information
* or screen control codes, but must not be either a compressed
* file nor an AWGS word processor file.
*
* Post: <buffer> will contain all the text, minus formatting and control
* code, starting from the word "NAME" and ending with either ".SH".
* "SYNOPSIS", or "DESCRIPTION", whichever comes first. <titlebuf>
* will contain all printable text starting with the first printable
* non-whitespace character following "NAME" and ending with the
* last printable character before the first '-' following "NAME".
*
* If an error occurs, <buffer> will be an empty string (ie:
* buffer[0] == '\0')
*
* Warning: This routine was written to be fast at the expense of code
* size. It also has a lot of "special case"ing since it could
* be fed nroff source, aroff'd output, or text files that may
* include formatting control codes. If you're looking for some
* nice neat code, you're not going to find it here.
*/
void fillbuffer (char *filename) {
FILE *fp; /* FILE pointer for filename */
int count; /* how many chars were read into buffer2 */
char *p1; /* points to current char in buffer2 */
char *p2; /* points to last char (of interest) in buffer2 */
char *p3; /* points to current char in buffer */
char *p6; /* scratch */
short found; /* some flags */
short in_comment;
short in_format_BR;
short in_format_f;
short foo;
/*
* Set p4 and p5 to the ends of buffer and titlebuf, respectively.
* These are used for error checking, so that we don't overflow the
* buffers. Using pointers will speed things up a bit at the cost
* of four bytes of local storage. They are not global for the sake
* of speed.
*/
char *p4 = buffer + BUFFERSIZE;
char *p5 = titlebuf + BUFFERSIZE;
/*
* open the file
*/
if ((fp = fopen(filename,"rb")) == NULL) {
buffer[0] = '\0';
if (v_flag) fprintf (error_fp,"Open failed for file \"%s\"\n",filename);
return;
}
/*
* see if it includes another man page
*/
if ((fgets(buffer2,4,fp) == NULL) || (strncmp(buffer2,".so",3)==0)) {
buffer[0] = '\0';
titlebuf[0] = '\0';
fclose(fp);
return;
}
fseek(fp,0L,SEEK_SET);
/*
* Make fp point to the first newline following NAME so that the
* next block read will pick it up as the first character. This is
* needed for the next section of code following this one.
*/
for(;;) {
/*
* read in buffer2 in a line-oriented fashion at first so that we
* can more easily ignore .\" and .TH lines
*/
if (fgets(buffer2,BUFFERSIZE,fp)==NULL) {
/*
* eof or error, and we haven't found "NAME" yet ... return
* an empty string
*/
buffer[0] = '\0';
titlebuf[0] = '\0';
fclose(fp);
if (v_flag) fprintf (error_fp,
"EOF or error on %s, NAME not found.\n",filename);
return;
}
/* ignore comment lines and any .TH line(s) */
if ((strncmp(buffer2,".\\\"",3)==0) || (strncmp(buffer2,".TH",3)==0))
continue;
/* check the various versions of "NAME" */
if (strstr(buffer2,NAME1) != NULL) break;
if (strstr(buffer2,NAME2) != NULL) break;
if (strstr(buffer2,NAME3) != NULL) break;
}
/* we need the previous newline for the next algorithm to work */
fseek(fp,-1L,SEEK_CUR);
/*
* Make p1 point to spot in buffer2 where there occurs the first
* character following '-' which in turn follows "NAME".
* Note that if "NAME" or '-' are within a comment line
* (nroff source), it will still be picked up.
*
* Also copy selected chars to titlebuf until the first '-' is found.
*/
p3 = titlebuf;
found = 0; /* set this when we find '-' */
in_format_BR = 0; /* in the middle of a .BR format */
in_format_f = 0; /* in the middle of a \fI format */
in_comment = 0; /* in the middle of a .\" comment */
foo = 0; /* haven't found the printable character after NAME */
for(;;) {
/* read another block into buffer2. */
count = fread(buffer2,sizeof(char),BUFFERSIZE-1,fp);
if (count == 0) {
/* eof or error; empty buffer and titlebuf then return */
buffer[0] = '\0';
titlebuf[0] = '\0';
fclose(fp);
if (v_flag) fprintf (error_fp,
"EOF or error on %s, command name not found.\n",filename);
return;
}
buffer2[count] = '\0';
p1 = buffer2;
/* mark the "end" of buffer2 with p2 */
if ((p2 = strchr(p1,'-')) != NULL) {
found = 1;
} else {
p2 = buffer + count;
}
/*
* If this is not our first iteration, dump any formatting information
* or comments left over from the last interation.
*/
if (in_comment) {
while((p1<p2) && (*p1 != '\r')) p1++;
in_comment = 0;
}
if (in_format_BR) {
while ((p1<p2) && !isspace(*p1)) p1++;
in_format_BR = 0;
}
if (in_format_f) {
p1 = p1 + 3 - in_format_f;
in_format_f = 0;
}
/*
* At this time, p1 points to the start of the key words, p2
* to either the end of the key words or the end of buffer2,
* and p3 into the spot in titlebuf where the next character should
* be put.
*
* Copy *p1 to *p3 while p1<p2 (not at end of titlebuf or key words),
* skipping comments, formatting info, and control chars
*/
for (; p1<p2; p1++) {
/* skip .\" comments */
if (strncmp(p1,"\r.\\\"",4) == 0) {
while ((p1<p2) && (*p1!='\r')) p1++;
if (p1==p2) in_comment = 1;
continue;
}
/* skip .BR-type formatting */
if ((p1<p2) && (*p1=='\r') && (*(p1+1)=='.')) {
p1++;
while ((p1<p2) && !isspace(*p1)) p1++;
if (p1==p2) in_format_BR = 1;
else --p1;
continue;
}
/* skip \fI-type formatting */
if ((p1<p2) && (*p1=='\\') && (*(p1+1)=='f')) {
if ((p1 + 3) < p2) {
p1 += 3;
} else {
in_format_f = p2 - p1;
p1 = p2;
}
continue;
}
/*
* skip whitespace if we haven't got the beginning of the
* description yet.
*/
#ifdef ISGRAPH_FIX
if (isgraph(*p1) && (*p1!=' ')) foo=1;
if (!foo) {
while ((p1<p2) && !(isgraph(*p1) && (*p1!=' '))) p1++;
if ((*p1=='.') && (*(p1-1)=='\r')) p1 -=2;
else --p1;
continue;
}
#else
if (isgraph(*p1)) foo=1;
if (!foo) {
while ((p1<p2) && !isgraph(*p1)) p1++;
if ((*p1=='.') && (*(p1-1)=='\r')) p1 -=2;
else --p1;
continue;
}
#endif
/*
* at this point, *p1 is either a control char or something that
* we want in titlebuf, assuming that p1<p2.
*/
if ((p1<p2) && !iscntrl(*p1)) {
/*
* The conditional below means:
* Copy it so that: 1. There is only one space between words; and
* 2. The buffer doesn't begin with a space.
*/
if ( !((p3>titlebuf) && (*p1 == ' ') && (*(p3-1) == ' ')) &&
!((p3==titlebuf) && (*p3 == ' '))
) {
/* don't let a space precede a comma */
if ((*p1==',') && (*(p3-1)==' ')) {
*(p3-1) = ',';
continue;
} else *p3++ = *p1;
if (p3>=p5) { /* titlebuf overflow? */
if (v_flag)
fprintf(error_fp,"command name buffer overflow on %s\n",
filename);
buffer[0] = '\0';
titlebuf[0] = '\0';
fclose(fp);
return;
}
}
}
}
if (found) { /* we've got all of the key words */
p3--; /* p3 now points to last char, not terminator */
if (*p3=='\\') p3--;
while(isspace(*p3)) p3--;
*(p3+1) = '\0';
break;
}
}
p1 = p2 + 1;
#ifdef ISGRAPH_FIX
while ( (p1 < buffer2 + BUFFERSIZE) &&!(isgraph(*p1) && (*p1 != ' '))) p1++;
#else
while ((p1 < buffer2 + BUFFERSIZE) && !isgraph(*p1)) p1++;
#endif
/*
* now copy selected chars to buffer until the next subheading is found
*/
p3 = buffer;
found = 0; /* set this when we find one of the above strings */
in_format_BR = 0; /* in the middle of a .BR format */
in_format_f = 0; /* in the middle of a \fI format */
in_comment = 0; /* in the middle of a .\" comment */
for(;;) {
/* mark the "end" of buffer2 with p2 */
if ( ((p2 = strstr(p1,".SH")) != NULL) ||
((p2 = strstr(p1,SYNOPSIS1)) != NULL) ||
((p2 = strstr(p1,SYNOPSIS2)) != NULL) ||
((p2 = strstr(p1,SYNOPSIS3)) != NULL) ||
((p2 = strstr(p1,DESCRIPTION1)) != NULL) ||
((p2 = strstr(p1,DESCRIPTION2)) != NULL) ||
((p2 = strstr(p1,DESCRIPTION3)) != NULL)
) {
*p2 = '\0';
/*
* this conditional is to cover the wierd case of having the word
* "SYNOPSIS" appearing in the description (or elsewhere), as
* it does for the GNO Intro(1) man page. Blech. Only in
* aroff source or a preformatted page would this matter.
*/
if (((p6 = strstr(p1,SYNOPSIS1)) != NULL) ||
((p6 = strstr(p1,DESCRIPTION1)) != NULL)) {
p2 = p6;
}
found = 1;
} else {
p2 = buffer + count;
}
/*
* If this is not our first iteration, dump any formatting information
* or comments left over from the last interation.
*/
if (in_comment) {
while((p1<p2) && (*p1 != '\r')) p1++;
in_comment = 0;
}
if (in_format_BR) {
while ((p1<p2) && !isspace(*p1)) p1++;
in_format_BR = 0;
}
if (in_format_f) {
p1 = p1 + 3 - in_format_f;
in_format_f = 0;
}
/*
* At this time, p1 points to the start of the description, p2
* to either the end of the description or the end of buffer2,
* and p3 into the spot in buffer where the next character should
* be put.
*
* Copy *p1 to *p3 while p1<p2 (not at end of buffer or description),
* skipping comments, formatting info, and control chars
*/
for (; p1<p2; p1++) {
/* skip .\" comments */
if (strncmp(p1,"\r.\\\"",4) == 0) {
while ((p1<p2) && (*p1!='\r')) p1++;
if (p1==p2) in_comment = 1;
}
/* skip .BR-type formatting */
if ((p1<p2) && (*p1=='\r') && (*(p1+1)=='.')) {
p1++;
while ((p1<p2) && !isspace(*p1)) p1++;
if (p1==p2) in_format_BR = 1;
}
/* skip \fI-type formatting */
if ((p1<p2) && (*p1=='\\') && (*(p1+1)=='f')) {
if ((p1 + 3) < p2) {
p1 += 3;
} else {
in_format_f = p2 - p1;
p1 = p2;
}
}
/*
* at this point, *p1 is either a control char or something that
* we want in buffer, assuming that p1<p2.
*/
if ((p1<p2) && !iscntrl(*p1)) {
/*
* The conditional below means:
* Copy it so that: 1. There is only one space between words; and
* 2. The buffer doesn't begin with a space.
*/
if ( !((p3>buffer) && (*p1 == ' ') && (*(p3-1) == ' ')) &&
!((p3==buffer) && (*p3 == ' '))
) {
*p3++ = *p1;
if (p3>=p4) { /* buffer overflow? */
if (v_flag)
fprintf(error_fp,"command description buffer overflow on %s\n",
filename);
buffer[0] = '\0';
titlebuf[0] = '\0';
fclose(fp);
return;
}
}
}
}
if (found) { /* we've got the entire description */
*p3 = '\0';
break;
}
/*
* We're part way through the description; read another block
* into buffer2.
*/
count = fread(buffer2,sizeof(char),BUFFERSIZE-1,fp);
if (count == 0) {
/* eof or error; terminate buffer and return */
*p3 = '\0';
fclose(fp);
if (v_flag) fprintf (error_fp,
"EOF or error on %s, description not found.\n",filename);
return;
}
buffer2[count] = '\0';
p1 = buffer2;
}
/*
* close the file
*/
fclose(fp);
return;
}