mimetype/extension updates

This commit is contained in:
Kelvin Sherlock 2013-08-10 20:25:15 -04:00
parent ed6c2fc7f7
commit fff9954b70
13 changed files with 473 additions and 107 deletions

62
ftype.c
View File

@ -3,30 +3,45 @@
#include <Types.h>
int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
int parse_extension(const char *cp, Word *ftype, LongWord *atype)
{
Word size;
Word *wp = (Word *)cp;
Word h;
int i;
int pd;
*ftype = 0;
*atype = 0;
if (!cp || !size) return 0;
if (!cp || !*cp) return 0;
pd = -1;
for (i = 0; ; ++i)
{
char c;
c = cp[i];
if (c == 0) break;
if (c == '.') pd = i;
}
// pd == position of final .
// i == strlen
if (pd == -1) return 0;
if (pd + 1 >= i) return 0;
pd++; // skip past it...
cp += pd;
size = i - pd;
h = ((*cp | 0x20) ^ size) & 0x0f;
switch (h)
{
case 0x00:
// shk
if (size == 3
&& (wp[0] | 0x2020) == 0x6873 // 'sh'
&& (cp[2] | 0x20) == 0x6b // 'k'
) {
*ftype = 0xe0;
*atype = 0x8002;
return 1;
}
// text
if (size == 4
&& (wp[0] | 0x2020) == 0x6574 // 'te'
@ -36,6 +51,15 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
*atype = 0x0000;
return 1;
}
// shk
if (size == 3
&& (wp[0] | 0x2020) == 0x6873 // 'sh'
&& (cp[2] | 0x20) == 0x6b // 'k'
) {
*ftype = 0xe0;
*atype = 0x8002;
return 1;
}
break;
case 0x01:
@ -51,14 +75,6 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
break;
case 0x02:
// c
if (size == 1
&& (cp[0] | 0x20) == 0x63 // 'c'
) {
*ftype = 0xb0;
*atype = 0x0008;
return 1;
}
// asm
if (size == 3
&& (wp[0] | 0x2020) == 0x7361 // 'as'
@ -68,6 +84,14 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
*atype = 0x0003;
return 1;
}
// c
if (size == 1
&& (cp[0] | 0x20) == 0x63 // 'c'
) {
*ftype = 0xb0;
*atype = 0x0008;
return 1;
}
break;
case 0x03:

View File

@ -4,15 +4,44 @@
#include <Types.h>
int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
// cp should be a filename w/ .ext
int parse_extension_c(const char *cp, Word *ftype, LongWord *atype)
{
int i;
int pd;
if (!cp || !*cp) return 0;
pd = -1;
for (i = 0; ; ++i)
{
char c;
c = cp[i];
if (c == 0) break;
if (c == '.') pd = i;
}
// pd == position of final .
// i == strlen
if (pd == -1) return 0;
if (pd + 1 >= i) return 0;
pd++; // skip past it...
return parse_extension(cp + pd, i - pd, ftype, atype);
}
// cp is just the extension
int parse_extension(const char *cp, Word size, Word *ftype, LongWord *atype)
{
Word size;
Word *wp = (Word *)cp;
Word h;
*ftype = 0;
*atype = 0;
if (!cp || !size) return 0;
h = ((*cp | 0x20) ^ size) & 0x0f;

9
gno.orca.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef __orca__
#define __orca__
#include <stdio.h>
#define fsetbinary(f) (f->_flag &= ~_IOTEXT)
#endif

View File

@ -21,8 +21,8 @@
#include "s16debug.h"
extern int setfiletype(const char *filename);
static FileInfoRecGS FileInfo;
static Word FileAttr;
static int gopher_binary(Word ipid, FILE *file)
{
@ -282,6 +282,9 @@ int do_gopher(const char *url, URLComponents *components)
file = stdout;
FileAttr = 0;
memset(&FileInfo, 0, sizeof(FileInfo));
if (!components->portNumber) components->portNumber = 70;
@ -351,7 +354,11 @@ int do_gopher(const char *url, URLComponents *components)
return -1;
}
setfiletype(filename);
if (parse_extension(filename, &FileInfo.fileType, &FileInfo.auxType))
{
FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
setfileattr(filename, &FileInfo, FileAttr);
}
}

87
http.c
View File

@ -38,6 +38,10 @@
#include "http.utils.h"
#include "s16debug.h"
static FileInfoRecGS FileInfo;
static Word FileAttr;
static int do_http_0_9(
const char *url,
URLComponents *components,
@ -312,9 +316,7 @@ int read_response(Word ipid, FILE *file, Handle dict)
LongWord contentSize;
int haveTime = 0;
timeGSRec time;
int haveTime = 0;
contentSize = 0;
transferEncoding = -1;
@ -326,6 +328,11 @@ int read_response(Word ipid, FILE *file, Handle dict)
transferEncoding = 0;
}
/*
* check the transfer encoding header
* should be chunked or identity (default)
*
*/
value = DictionaryGet(dict, "Transfer-Encoding", 17, &valueSize);
if (value)
{
@ -359,6 +366,45 @@ int read_response(Word ipid, FILE *file, Handle dict)
}
}
/*
* convert a content-type header mime string into
* a file type / aux type.
*
*/
value = DictionaryGet(dict, "Content-Type", 12, &valueSize)
if (value && valueSize)
{
int i;
int slash = -1;
// strip ';'
for (i = 0; i < valueSize; ++i)
{
char c = value[i];
if (c == ';') break;
if (c == '/') slash = i;
}
// todo -- flag for this or not.
valueSize = i;
if (parse_mime(value, valueSize,
&FileInfo.fileType,
&FileInfo.auxType))
{
FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
}
else if (slash != -1 && parse_mime(value, slash,
&FileInfo.fileType,
&FileInfo.auxType))
{
FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
}
}
/*
* convert the Last Modified header into a file mod date
*
*/
value = DictionaryGet(dict, "Last-Modified", 13, &valueSize);
if (value && valueSize <= 255)
{
@ -367,17 +413,31 @@ int read_response(Word ipid, FILE *file, Handle dict)
pstring = (char *)malloc(valueSize + 1);
if (pstring)
{
struct {
LongWord lo;
LongWord hi;
} comp;
*pstring = valueSize;
memcpy(pstring + 1, value, valueSize);
// parse the last-modified timestamp.
// 0x0e00 is rfc 822 format.
// (which is now obsoleted by rfc 2822 but close enough)
tiParseDateString(&time, pstring, 0x0e00);
if (!_toolErr)
// should use _tiDateString2Sec to get seconds
// then use ConvSeconds to get the date
// this should handle timezones.
tiDateString2Sec(&comp, pstring, 0x0e00);
if (!_toolErr && hi == 0)
{
haveTime = 1;
ConvSeconds(secs2TimeRec, comp.lo, &FileInfo.modDateTime);
FileAttr |= ATTR_MODTIME;
haveTime = 1;
}
free(pstring);
}
}
@ -542,6 +602,9 @@ int do_http(const char *url, URLComponents *components)
FILE *file;
file = stdout;
FileAttr = 0;
memset(&FileInfo, 0, sizeof(FileInfo));
if (!components->portNumber) components->portNumber = 80;
@ -575,6 +638,8 @@ int do_http(const char *url, URLComponents *components)
if (path)
{
// path starts with /.
// todo -- also need to strip any ? parameters.
filename = strrchr(path + 1, '/');
if (filename) // *filename == '/'
@ -618,9 +683,11 @@ int do_http(const char *url, URLComponents *components)
return -1;
}
// should set from mime type?
setfiletype(filename);
// hmm, flag for this vs content type?
if (parse_extension_c(filename, &FileInfo.fileType, &FileInfo.auxType))
{
FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
}
}
@ -646,6 +713,8 @@ int do_http(const char *url, URLComponents *components)
fflush(file);
if (file != stdout) fclose(file);
if (filename) setfileattr(filename, &FileInfo, FileAttr);
CloseLoop(&connection);
free(host);
free(path);

View File

@ -1,7 +1,7 @@
CFLAGS += $(DEFINES) -v -w
OBJS = main.o gopher.o url.o connection.o readline2.o scheme.o ftype.o setftype.o \
s16debug.o common.o http.o http.utils.o dictionary.o flags.o \
time.o
OBJS = main.o gopher.o url.o connection.o readline2.o scheme.o ftype.o \
mime.o setftype.o s16debug.o common.o http.o http.utils.o \
dictionary.o flags.o time.o
gopher: $(OBJS)
$(CC) $(LDFLAGS) $(OBJS) $(LDLIBS) -o $@
@ -34,6 +34,7 @@ dictionary.o: dictionary.c dictionary.h
setftype.o: setftype.c
scheme.o: scheme.c url.h
ftype.o: ftype.c
mime.o: mime.c
time.o: time.c

118
mime.c Normal file
View File

@ -0,0 +1,118 @@
#pragma optimize 79
#pragma noroot
#include <Types.h>
int parse_mime(const char *cp, Word *ftype, LongWord *atype)
{
Word size;
Word *wp = (Word *)cp;
Word h;
int i;
int semi;
int slash;
*ftype = 0;
*atype = 0;
/*
* two pass
* 1. type/subtype
* 2. type
*/
if (!cp || !*cp) return 0;
// find any optional ';'
semi = slash = -1;
for (i = 0; ; ++i)
{
char c = cp[i];
if (c == 0) break;
if (c == '/') slash = i;
if (c == ';')
{
semi = i;
break;
}
}
size = i;
for (i = 0; i < 2; ++i)
{
h = ((*cp | 0x20) ^ size) & 0x0f;
switch (h)
{
case 0x00:
// text
if (size == 4
&& (wp[0] | 0x2020) == 0x6574 // 'te'
&& (wp[1] | 0x2020) == 0x7478 // 'xt'
) {
*ftype = 0x04;
*atype = 0x0000;
return 1;
}
break;
case 0x09:
// application/octet-stream
if (size == 24
&& (wp[0] | 0x2020) == 0x7061 // 'ap'
&& (wp[1] | 0x2020) == 0x6c70 // 'pl'
&& (wp[2] | 0x2020) == 0x6369 // 'ic'
&& (wp[3] | 0x2020) == 0x7461 // 'at'
&& (wp[4] | 0x2020) == 0x6f69 // 'io'
&& (wp[5] | 0x2020) == 0x2f6e // 'n/'
&& (wp[6] | 0x2020) == 0x636f // 'oc'
&& (wp[7] | 0x2020) == 0x6574 // 'te'
&& (wp[8] | 0x2020) == 0x2d74 // 't-'
&& (wp[9] | 0x2020) == 0x7473 // 'st'
&& (wp[10] | 0x2020) == 0x6572 // 're'
&& (wp[11] | 0x2020) == 0x6d61 // 'am'
) {
*ftype = 0x02;
*atype = 0x0000;
return 1;
}
// text/x-pascal
if (size == 13
&& (wp[0] | 0x2020) == 0x6574 // 'te'
&& (wp[1] | 0x2020) == 0x7478 // 'xt'
&& (wp[2] | 0x2020) == 0x782f // '/x'
&& (wp[3] | 0x2020) == 0x702d // '-p'
&& (wp[4] | 0x2020) == 0x7361 // 'as'
&& (wp[5] | 0x2020) == 0x6163 // 'ca'
&& (cp[12] | 0x20) == 0x6c // 'l'
) {
*ftype = 0xb0;
*atype = 0x0005;
return 1;
}
break;
case 0x0c:
// text/x-c
if (size == 8
&& (wp[0] | 0x2020) == 0x6574 // 'te'
&& (wp[1] | 0x2020) == 0x7478 // 'xt'
&& (wp[2] | 0x2020) == 0x782f // '/x'
&& (wp[3] | 0x2020) == 0x632d // '-c'
) {
*ftype = 0xb0;
*atype = 0x0008;
return 1;
}
break;
}
size = slash;
if (size == -1) break;
}
return 0;
}

97
mime.txt Normal file
View File

@ -0,0 +1,97 @@
%%
#pragma optimize 79
#pragma noroot
#include <Types.h>
int parse_mime_c(const char *cp, Word *ftype, LongWord *atype)
{
int i;
int slash;
if (!cp || !*cp)
return 0;
/*
* two pass
* 1. type/subtype
* 2. type
*/
semi = slash = -1;
for (i = 0; ; ++i)
{
char c = cp[i];
if (c == 0 || c == ';') break;
if (c == '/')
{
slash = i;
}
}
// try type/subtype
if (parse_mime(cp, i, ftype, atype))
return 1;
// try type
if (slash != -1)
return parse_mime(cp, slash, ftype, atype));
return 0;
}
int parse_mime(const char *cp, Word size, Word *ftype, LongWord *atype)
{
Word size;
Word *wp = (Word *)cp;
Word h;
if (!cp || !size) return 0;
retry:
h = ((*cp | 0x20) ^ size) & 0x0f;
switch (h)
{
%%
}
/*
// try again as type
while (--size)
{
if (cp[size] == '/') goto retry;
}
*/
return 0;
}
%%
'text' ->
*ftype = 0x04;
*atype = 0x0000;
return 1;
.
'text/x-c' ->
*ftype = 0xb0;
*atype = 0x0008;
return 1;
.
'text/x-pascal' ->
*ftype = 0xb0;
*atype = 0x0005;
return 1;
.
'application/octet-stream' ->
*ftype = 0x02;
*atype = 0x0000;
return 1;
.

View File

@ -23,7 +23,25 @@ typedef struct ReadBlock
int read_binary(unsigned ipid, FILE *file, ReadBlock *);
int read_binary_size(unsigned ipid, FILE *file, ReadBlock *);
int setfiletype(const char *filename);
int parse_extension_c(const char *cp, Word *ftype, LongWord *atype);
int parse_extension(const char *cp, Word size, Word *ftype, LongWord *atype);
int parse_mime_c(const char *cp, Word *ftype, LongWord *atype);
int parse_mime(const char *cp, Word size, Word *ftype, LongWord *atype)
#ifdef __GSOS__
enum {
ATTR_ACCESS = 1,
ATTR_FILETYPE = 2,
ATTR_AUXTYPE = 4,
ATTR_CREATETIME = 8,
ATTR_MODTIME = 16
};
int setfileattr(const char *filename, FileInfoRecGS *info, unsigned flags)
#endif
#ifdef __CONNECTION_H__

View File

@ -24,6 +24,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
{
// --- begin auto-generated --
case 0x00:
// dict
if (size == 4
&& (wp[0] | 0x2020) == 0x6964 // 'di'
&& (wp[1] | 0x2020) == 0x7463 // 'ct'
) {
c->schemeType = SCHEME_DICT;
c->portNumber = 2628;
return;
}
// ssh
if (size == 3
&& (wp[0] | 0x2020) == 0x7373 // 'ss'
@ -49,6 +58,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
break;
case 0x02:
// file
if (size == 4
&& (wp[0] | 0x2020) == 0x6966 // 'fi'
&& (wp[1] | 0x2020) == 0x656c // 'le'
) {
c->schemeType = SCHEME_FILE;
c->portNumber = 0;
return;
}
// afp
if (size == 3
&& (wp[0] | 0x2020) == 0x6661 // 'af'
@ -68,15 +86,6 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
c->portNumber = 23;
return;
}
// file
if (size == 4
&& (wp[0] | 0x2020) == 0x6966 // 'fi'
&& (wp[1] | 0x2020) == 0x656c // 'le'
) {
c->schemeType = SCHEME_FILE;
c->portNumber = 0;
return;
}
break;
case 0x05:
@ -128,6 +137,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
break;
case 0x0d:
// nfs
if (size == 3
&& (wp[0] | 0x2020) == 0x666e // 'nf'
&& (cp[2] | 0x20) == 0x73 // 's'
) {
c->schemeType = SCHEME_NFS;
c->portNumber = 2049;
return;
}
// https
if (size == 5
&& (wp[0] | 0x2020) == 0x7468 // 'ht'
@ -138,15 +156,6 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
c->portNumber = 443;
return;
}
// nfs
if (size == 3
&& (wp[0] | 0x2020) == 0x666e // 'nf'
&& (cp[2] | 0x20) == 0x73 // 's'
) {
c->schemeType = SCHEME_NFS;
c->portNumber = 2049;
return;
}
break;
// --- end auto-generated --

View File

@ -98,3 +98,9 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
c->portNumber = 2049;
return;
.
'dict' ->
c->schemeType = SCHEME_DICT;
c->portNumber = 2628;
return;
.

View File

@ -5,54 +5,33 @@
#include <stdlib.h>
#include <gno/gno.h>
extern int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype);
int setfiletype(const char *filename)
int setfileattr(const char *filename, FileInfoRecGS *info, unsigned flags)
{
int pd;
int i;
Word ftype;
Word atype;
int rv;
FileInfoRecGS info;
// find the extension in the filename.
pd = -1;
for (i = 0; ; ++i)
{
char c;
c = filename[i];
if (c == 0) break;
if (c == '.') pd = i;
}
// pd == position of final .
// i == strlen
if (pd == -1) return 0;
if (pd + 1 >= i) return 0;
pd++; // skip past it...
if (!parse_ftype(filename + pd, i - pd, &ftype, &atype))
return 0;
info.pCount = 4;
info.pathname = (GSString255Ptr)__C2GSMALLOC(filename);
info.access = 0xe3;
info.auxType = atype;
info.fileType = ftype;
//GetFileInfoGS(&info);
//if (_toolErr) return 0;
SetFileInfoGS(&info);
rv = _toolErr;
if (_toolErr)
Word rv;
FileInfoRecGS tmp;
if (!info) return 0;
if (!flags) return 1;
tmp.pCount = 7;
tmp.pathname = (GSString255Ptr)__C2GSMALLOC(filename);
if (!tmp.pathname) return 0;
GetFileInfoGS(&tmp);
rv = _toolErr;
if (!_toolErr)
{
if (flags & ATTR_ACCESS) tmp.access = info->access;
if (flags & ATTR_FILETYPE) tmp.fileType = info->fileType;
if (flags & ATTR_AUXTYPE) tmp.auxType = info->auxType;
if (flags & ATTR_CREATETIME) tmp.createDateTime = info->createDateTime;
if (flags & ATTR_MODTIME) tmp.modDateTime = info->modDateTime;
SetFileInfoGS(&tmp);
rv = _toolErr;
}
free (tmp.pathname);
free(info.pathname);
return rv ? 0 : 1;
}
}

View File

@ -115,7 +115,7 @@ ARGV.each {|filename|
if !rule
next if line == ''
if line =~ /^'([a-zA-Z0-9.+_-]+)'\s*->$/
if line =~ /^'([a-zA-Z0-9.+_\/-]+)'\s*->$/
rule = $1;
raise "duplicate rule: #{rule}" if rules[rule]
next