From fff9954b7025a8f6d090002792f82a5ab30fbbd4 Mon Sep 17 00:00:00 2001 From: Kelvin Sherlock Date: Sat, 10 Aug 2013 20:25:15 -0400 Subject: [PATCH] mimetype/extension updates --- ftype.c | 62 ++++++++++++++++++--------- ftype.txt | 39 ++++++++++++++--- gno.orca.h | 9 ++++ gopher.c | 13 ++++-- http.c | 87 +++++++++++++++++++++++++++++++++---- makefile.mk | 7 +-- mime.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++ mime.txt | 97 ++++++++++++++++++++++++++++++++++++++++++ prototypes.h | 20 ++++++++- scheme.c | 45 ++++++++++++-------- scheme.txt | 6 +++ setftype.c | 75 ++++++++++++-------------------- txtable.rb | 2 +- 13 files changed, 473 insertions(+), 107 deletions(-) create mode 100644 gno.orca.h create mode 100644 mime.c create mode 100644 mime.txt diff --git a/ftype.c b/ftype.c index af93afb..3ee336f 100644 --- a/ftype.c +++ b/ftype.c @@ -3,30 +3,45 @@ #include -int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype) +int parse_extension(const char *cp, Word *ftype, LongWord *atype) { + Word size; Word *wp = (Word *)cp; Word h; + int i; + int pd; *ftype = 0; *atype = 0; - if (!cp || !size) return 0; + + if (!cp || !*cp) return 0; + + pd = -1; + for (i = 0; ; ++i) + { + char c; + + c = cp[i]; + if (c == 0) break; + if (c == '.') pd = i; + } + + // pd == position of final . + // i == strlen + + if (pd == -1) return 0; + if (pd + 1 >= i) return 0; + pd++; // skip past it... + + cp += pd; + size = i - pd; h = ((*cp | 0x20) ^ size) & 0x0f; switch (h) { case 0x00: - // shk - if (size == 3 - && (wp[0] | 0x2020) == 0x6873 // 'sh' - && (cp[2] | 0x20) == 0x6b // 'k' - ) { - *ftype = 0xe0; - *atype = 0x8002; - return 1; - } // text if (size == 4 && (wp[0] | 0x2020) == 0x6574 // 'te' @@ -36,6 +51,15 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype) *atype = 0x0000; return 1; } + // shk + if (size == 3 + && (wp[0] | 0x2020) == 0x6873 // 'sh' + && (cp[2] | 0x20) == 0x6b // 'k' + ) { + *ftype = 0xe0; + *atype = 0x8002; + return 1; + } break; case 0x01: @@ -51,14 +75,6 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype) break; case 0x02: - // c - if (size == 1 - && (cp[0] | 0x20) == 0x63 // 'c' - ) { - *ftype = 0xb0; - *atype = 0x0008; - return 1; - } // asm if (size == 3 && (wp[0] | 0x2020) == 0x7361 // 'as' @@ -68,6 +84,14 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype) *atype = 0x0003; return 1; } + // c + if (size == 1 + && (cp[0] | 0x20) == 0x63 // 'c' + ) { + *ftype = 0xb0; + *atype = 0x0008; + return 1; + } break; case 0x03: diff --git a/ftype.txt b/ftype.txt index eec4ba1..3dfc276 100644 --- a/ftype.txt +++ b/ftype.txt @@ -4,15 +4,44 @@ #include -int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype) +// cp should be a filename w/ .ext +int parse_extension_c(const char *cp, Word *ftype, LongWord *atype) { + int i; + int pd; + + if (!cp || !*cp) return 0; + + pd = -1; + for (i = 0; ; ++i) + { + char c; + + c = cp[i]; + if (c == 0) break; + if (c == '.') pd = i; + } + + // pd == position of final . + // i == strlen + + if (pd == -1) return 0; + if (pd + 1 >= i) return 0; + pd++; // skip past it... + + return parse_extension(cp + pd, i - pd, ftype, atype); +} + +// cp is just the extension +int parse_extension(const char *cp, Word size, Word *ftype, LongWord *atype) +{ + Word size; Word *wp = (Word *)cp; Word h; - - *ftype = 0; - *atype = 0; - + + if (!cp || !size) return 0; + h = ((*cp | 0x20) ^ size) & 0x0f; diff --git a/gno.orca.h b/gno.orca.h new file mode 100644 index 0000000..6855cb1 --- /dev/null +++ b/gno.orca.h @@ -0,0 +1,9 @@ +#ifndef __orca__ +#define __orca__ + +#include + +#define fsetbinary(f) (f->_flag &= ~_IOTEXT) + + +#endif \ No newline at end of file diff --git a/gopher.c b/gopher.c index 9be62d9..d9395b9 100644 --- a/gopher.c +++ b/gopher.c @@ -21,8 +21,8 @@ #include "s16debug.h" - -extern int setfiletype(const char *filename); +static FileInfoRecGS FileInfo; +static Word FileAttr; static int gopher_binary(Word ipid, FILE *file) { @@ -282,6 +282,9 @@ int do_gopher(const char *url, URLComponents *components) file = stdout; + + FileAttr = 0; + memset(&FileInfo, 0, sizeof(FileInfo)); if (!components->portNumber) components->portNumber = 70; @@ -351,7 +354,11 @@ int do_gopher(const char *url, URLComponents *components) return -1; } - setfiletype(filename); + if (parse_extension(filename, &FileInfo.fileType, &FileInfo.auxType)) + { + FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE; + setfileattr(filename, &FileInfo, FileAttr); + } } diff --git a/http.c b/http.c index f9ae284..dcd3df3 100644 --- a/http.c +++ b/http.c @@ -38,6 +38,10 @@ #include "http.utils.h" #include "s16debug.h" + +static FileInfoRecGS FileInfo; +static Word FileAttr; + static int do_http_0_9( const char *url, URLComponents *components, @@ -312,9 +316,7 @@ int read_response(Word ipid, FILE *file, Handle dict) LongWord contentSize; - int haveTime = 0; - timeGSRec time; - + int haveTime = 0; contentSize = 0; transferEncoding = -1; @@ -326,6 +328,11 @@ int read_response(Word ipid, FILE *file, Handle dict) transferEncoding = 0; } + /* + * check the transfer encoding header + * should be chunked or identity (default) + * + */ value = DictionaryGet(dict, "Transfer-Encoding", 17, &valueSize); if (value) { @@ -359,6 +366,45 @@ int read_response(Word ipid, FILE *file, Handle dict) } } + /* + * convert a content-type header mime string into + * a file type / aux type. + * + */ + value = DictionaryGet(dict, "Content-Type", 12, &valueSize) + if (value && valueSize) + { + int i; + int slash = -1; + // strip ';' + for (i = 0; i < valueSize; ++i) + { + char c = value[i]; + if (c == ';') break; + if (c == '/') slash = i; + } + + // todo -- flag for this or not. + valueSize = i; + if (parse_mime(value, valueSize, + &FileInfo.fileType, + &FileInfo.auxType)) + { + FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE; + } + else if (slash != -1 && parse_mime(value, slash, + &FileInfo.fileType, + &FileInfo.auxType)) + { + FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE; + } + + } + + /* + * convert the Last Modified header into a file mod date + * + */ value = DictionaryGet(dict, "Last-Modified", 13, &valueSize); if (value && valueSize <= 255) { @@ -367,17 +413,31 @@ int read_response(Word ipid, FILE *file, Handle dict) pstring = (char *)malloc(valueSize + 1); if (pstring) { + struct { + LongWord lo; + LongWord hi; + } comp; + *pstring = valueSize; memcpy(pstring + 1, value, valueSize); // parse the last-modified timestamp. // 0x0e00 is rfc 822 format. // (which is now obsoleted by rfc 2822 but close enough) - tiParseDateString(&time, pstring, 0x0e00); - if (!_toolErr) + + + // should use _tiDateString2Sec to get seconds + // then use ConvSeconds to get the date + // this should handle timezones. + + tiDateString2Sec(&comp, pstring, 0x0e00); + if (!_toolErr && hi == 0) { - haveTime = 1; + ConvSeconds(secs2TimeRec, comp.lo, &FileInfo.modDateTime); + FileAttr |= ATTR_MODTIME; + haveTime = 1; } + free(pstring); } } @@ -542,6 +602,9 @@ int do_http(const char *url, URLComponents *components) FILE *file; file = stdout; + + FileAttr = 0; + memset(&FileInfo, 0, sizeof(FileInfo)); if (!components->portNumber) components->portNumber = 80; @@ -575,6 +638,8 @@ int do_http(const char *url, URLComponents *components) if (path) { // path starts with /. + + // todo -- also need to strip any ? parameters. filename = strrchr(path + 1, '/'); if (filename) // *filename == '/' @@ -618,9 +683,11 @@ int do_http(const char *url, URLComponents *components) return -1; } - // should set from mime type? - setfiletype(filename); - + // hmm, flag for this vs content type? + if (parse_extension_c(filename, &FileInfo.fileType, &FileInfo.auxType)) + { + FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE; + } } @@ -646,6 +713,8 @@ int do_http(const char *url, URLComponents *components) fflush(file); if (file != stdout) fclose(file); + if (filename) setfileattr(filename, &FileInfo, FileAttr); + CloseLoop(&connection); free(host); free(path); diff --git a/makefile.mk b/makefile.mk index 9aada29..428909a 100644 --- a/makefile.mk +++ b/makefile.mk @@ -1,7 +1,7 @@ CFLAGS += $(DEFINES) -v -w -OBJS = main.o gopher.o url.o connection.o readline2.o scheme.o ftype.o setftype.o \ - s16debug.o common.o http.o http.utils.o dictionary.o flags.o \ - time.o +OBJS = main.o gopher.o url.o connection.o readline2.o scheme.o ftype.o \ + mime.o setftype.o s16debug.o common.o http.o http.utils.o \ + dictionary.o flags.o time.o gopher: $(OBJS) $(CC) $(LDFLAGS) $(OBJS) $(LDLIBS) -o $@ @@ -34,6 +34,7 @@ dictionary.o: dictionary.c dictionary.h setftype.o: setftype.c scheme.o: scheme.c url.h ftype.o: ftype.c +mime.o: mime.c time.o: time.c diff --git a/mime.c b/mime.c new file mode 100644 index 0000000..5386ec6 --- /dev/null +++ b/mime.c @@ -0,0 +1,118 @@ +#pragma optimize 79 +#pragma noroot + +#include + +int parse_mime(const char *cp, Word *ftype, LongWord *atype) +{ + Word size; + Word *wp = (Word *)cp; + Word h; + int i; + int semi; + int slash; + + *ftype = 0; + *atype = 0; + + /* + * two pass + * 1. type/subtype + * 2. type + */ + + + + if (!cp || !*cp) return 0; + + // find any optional ';' + semi = slash = -1; + for (i = 0; ; ++i) + { + char c = cp[i]; + if (c == 0) break; + if (c == '/') slash = i; + if (c == ';') + { + semi = i; + break; + } + } + size = i; + + for (i = 0; i < 2; ++i) + { + h = ((*cp | 0x20) ^ size) & 0x0f; + + switch (h) + { + case 0x00: + // text + if (size == 4 + && (wp[0] | 0x2020) == 0x6574 // 'te' + && (wp[1] | 0x2020) == 0x7478 // 'xt' + ) { + *ftype = 0x04; + *atype = 0x0000; + return 1; + } + break; + + case 0x09: + // application/octet-stream + if (size == 24 + && (wp[0] | 0x2020) == 0x7061 // 'ap' + && (wp[1] | 0x2020) == 0x6c70 // 'pl' + && (wp[2] | 0x2020) == 0x6369 // 'ic' + && (wp[3] | 0x2020) == 0x7461 // 'at' + && (wp[4] | 0x2020) == 0x6f69 // 'io' + && (wp[5] | 0x2020) == 0x2f6e // 'n/' + && (wp[6] | 0x2020) == 0x636f // 'oc' + && (wp[7] | 0x2020) == 0x6574 // 'te' + && (wp[8] | 0x2020) == 0x2d74 // 't-' + && (wp[9] | 0x2020) == 0x7473 // 'st' + && (wp[10] | 0x2020) == 0x6572 // 're' + && (wp[11] | 0x2020) == 0x6d61 // 'am' + ) { + *ftype = 0x02; + *atype = 0x0000; + return 1; + } + // text/x-pascal + if (size == 13 + && (wp[0] | 0x2020) == 0x6574 // 'te' + && (wp[1] | 0x2020) == 0x7478 // 'xt' + && (wp[2] | 0x2020) == 0x782f // '/x' + && (wp[3] | 0x2020) == 0x702d // '-p' + && (wp[4] | 0x2020) == 0x7361 // 'as' + && (wp[5] | 0x2020) == 0x6163 // 'ca' + && (cp[12] | 0x20) == 0x6c // 'l' + ) { + *ftype = 0xb0; + *atype = 0x0005; + return 1; + } + break; + + case 0x0c: + // text/x-c + if (size == 8 + && (wp[0] | 0x2020) == 0x6574 // 'te' + && (wp[1] | 0x2020) == 0x7478 // 'xt' + && (wp[2] | 0x2020) == 0x782f // '/x' + && (wp[3] | 0x2020) == 0x632d // '-c' + ) { + *ftype = 0xb0; + *atype = 0x0008; + return 1; + } + break; + + } + + size = slash; + if (size == -1) break; + } + + return 0; +} diff --git a/mime.txt b/mime.txt new file mode 100644 index 0000000..91b2452 --- /dev/null +++ b/mime.txt @@ -0,0 +1,97 @@ +%% +#pragma optimize 79 +#pragma noroot + +#include + +int parse_mime_c(const char *cp, Word *ftype, LongWord *atype) +{ + int i; + int slash; + + if (!cp || !*cp) + return 0; + + /* + * two pass + * 1. type/subtype + * 2. type + */ + + semi = slash = -1; + for (i = 0; ; ++i) + { + char c = cp[i]; + if (c == 0 || c == ';') break; + + if (c == '/') + { + slash = i; + } + } + + // try type/subtype + if (parse_mime(cp, i, ftype, atype)) + return 1; + + + // try type + if (slash != -1) + return parse_mime(cp, slash, ftype, atype)); + + return 0; +} + +int parse_mime(const char *cp, Word size, Word *ftype, LongWord *atype) +{ + Word size; + Word *wp = (Word *)cp; + Word h; + + if (!cp || !size) return 0; + +retry: + + h = ((*cp | 0x20) ^ size) & 0x0f; + + switch (h) + { +%% + } + +/* + // try again as type + while (--size) + { + if (cp[size] == '/') goto retry; + } +*/ + + return 0; +} +%% + +'text' -> + *ftype = 0x04; + *atype = 0x0000; + return 1; +. + +'text/x-c' -> + *ftype = 0xb0; + *atype = 0x0008; + return 1; +. + +'text/x-pascal' -> + *ftype = 0xb0; + *atype = 0x0005; + return 1; +. + +'application/octet-stream' -> + *ftype = 0x02; + *atype = 0x0000; + return 1; +. + diff --git a/prototypes.h b/prototypes.h index 7d1d21d..09e9ada 100644 --- a/prototypes.h +++ b/prototypes.h @@ -23,7 +23,25 @@ typedef struct ReadBlock int read_binary(unsigned ipid, FILE *file, ReadBlock *); int read_binary_size(unsigned ipid, FILE *file, ReadBlock *); -int setfiletype(const char *filename); +int parse_extension_c(const char *cp, Word *ftype, LongWord *atype); +int parse_extension(const char *cp, Word size, Word *ftype, LongWord *atype); + +int parse_mime_c(const char *cp, Word *ftype, LongWord *atype); +int parse_mime(const char *cp, Word size, Word *ftype, LongWord *atype) + + +#ifdef __GSOS__ +enum { + ATTR_ACCESS = 1, + ATTR_FILETYPE = 2, + ATTR_AUXTYPE = 4, + ATTR_CREATETIME = 8, + ATTR_MODTIME = 16 +}; + +int setfileattr(const char *filename, FileInfoRecGS *info, unsigned flags) + +#endif #ifdef __CONNECTION_H__ diff --git a/scheme.c b/scheme.c index ef06876..0fe53ed 100644 --- a/scheme.c +++ b/scheme.c @@ -24,6 +24,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c) { // --- begin auto-generated -- case 0x00: + // dict + if (size == 4 + && (wp[0] | 0x2020) == 0x6964 // 'di' + && (wp[1] | 0x2020) == 0x7463 // 'ct' + ) { + c->schemeType = SCHEME_DICT; + c->portNumber = 2628; + return; + } // ssh if (size == 3 && (wp[0] | 0x2020) == 0x7373 // 'ss' @@ -49,6 +58,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c) break; case 0x02: + // file + if (size == 4 + && (wp[0] | 0x2020) == 0x6966 // 'fi' + && (wp[1] | 0x2020) == 0x656c // 'le' + ) { + c->schemeType = SCHEME_FILE; + c->portNumber = 0; + return; + } // afp if (size == 3 && (wp[0] | 0x2020) == 0x6661 // 'af' @@ -68,15 +86,6 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c) c->portNumber = 23; return; } - // file - if (size == 4 - && (wp[0] | 0x2020) == 0x6966 // 'fi' - && (wp[1] | 0x2020) == 0x656c // 'le' - ) { - c->schemeType = SCHEME_FILE; - c->portNumber = 0; - return; - } break; case 0x05: @@ -128,6 +137,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c) break; case 0x0d: + // nfs + if (size == 3 + && (wp[0] | 0x2020) == 0x666e // 'nf' + && (cp[2] | 0x20) == 0x73 // 's' + ) { + c->schemeType = SCHEME_NFS; + c->portNumber = 2049; + return; + } // https if (size == 5 && (wp[0] | 0x2020) == 0x7468 // 'ht' @@ -138,15 +156,6 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c) c->portNumber = 443; return; } - // nfs - if (size == 3 - && (wp[0] | 0x2020) == 0x666e // 'nf' - && (cp[2] | 0x20) == 0x73 // 's' - ) { - c->schemeType = SCHEME_NFS; - c->portNumber = 2049; - return; - } break; // --- end auto-generated -- diff --git a/scheme.txt b/scheme.txt index 243b949..de098e4 100644 --- a/scheme.txt +++ b/scheme.txt @@ -98,3 +98,9 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c) c->portNumber = 2049; return; . + +'dict' -> + c->schemeType = SCHEME_DICT; + c->portNumber = 2628; + return; +. diff --git a/setftype.c b/setftype.c index 86606a1..30d4656 100644 --- a/setftype.c +++ b/setftype.c @@ -5,54 +5,33 @@ #include #include -extern int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype); - -int setfiletype(const char *filename) +int setfileattr(const char *filename, FileInfoRecGS *info, unsigned flags) { - int pd; - int i; - - Word ftype; - Word atype; - int rv; - - FileInfoRecGS info; - - // find the extension in the filename. - - pd = -1; - for (i = 0; ; ++i) - { - char c; - - c = filename[i]; - if (c == 0) break; - if (c == '.') pd = i; - } - - // pd == position of final . - // i == strlen - - if (pd == -1) return 0; - if (pd + 1 >= i) return 0; - pd++; // skip past it... - - if (!parse_ftype(filename + pd, i - pd, &ftype, &atype)) - return 0; - - info.pCount = 4; - info.pathname = (GSString255Ptr)__C2GSMALLOC(filename); - info.access = 0xe3; - info.auxType = atype; - info.fileType = ftype; - - //GetFileInfoGS(&info); - //if (_toolErr) return 0; - - SetFileInfoGS(&info); - rv = _toolErr; - if (_toolErr) + Word rv; + FileInfoRecGS tmp; + + if (!info) return 0; + if (!flags) return 1; + + tmp.pCount = 7; + tmp.pathname = (GSString255Ptr)__C2GSMALLOC(filename); + if (!tmp.pathname) return 0; + + GetFileInfoGS(&tmp); + rv = _toolErr; + if (!_toolErr) + { + if (flags & ATTR_ACCESS) tmp.access = info->access; + if (flags & ATTR_FILETYPE) tmp.fileType = info->fileType; + if (flags & ATTR_AUXTYPE) tmp.auxType = info->auxType; + if (flags & ATTR_CREATETIME) tmp.createDateTime = info->createDateTime; + if (flags & ATTR_MODTIME) tmp.modDateTime = info->modDateTime; + + SetFileInfoGS(&tmp); + rv = _toolErr; + } + + free (tmp.pathname); - free(info.pathname); return rv ? 0 : 1; -} +} \ No newline at end of file diff --git a/txtable.rb b/txtable.rb index e3ec9ac..d0cd8e8 100644 --- a/txtable.rb +++ b/txtable.rb @@ -115,7 +115,7 @@ ARGV.each {|filename| if !rule next if line == '' - if line =~ /^'([a-zA-Z0-9.+_-]+)'\s*->$/ + if line =~ /^'([a-zA-Z0-9.+_\/-]+)'\s*->$/ rule = $1; raise "duplicate rule: #{rule}" if rules[rule] next