mimetype/extension updates

2025-08-05 03:24:48 +00:00 · 2013-08-10 20:25:15 -04:00
parent ed6c2fc7f7
commit fff9954b70
13 changed files with 473 additions and 107 deletions
--- a/ftype.c
+++ b/ftype.c
@@ -3,30 +3,45 @@

 #include <Types.h>

-int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
+int parse_extension(const char *cp, Word *ftype, LongWord *atype)
 {
+  Word size;
  Word *wp = (Word *)cp;
  Word h;
+  int i;
+  int pd;

  *ftype = 0;
  *atype = 0;

-  if (!cp || !size) return 0;
+
+  if (!cp || !*cp) return 0;
+
+  pd = -1;
+  for (i = 0; ; ++i)
+  {
+    char c;
+
+    c = cp[i];
+    if (c == 0) break;
+    if (c == '.') pd = i;
+  }
+
+  // pd == position of final .
+  // i == strlen
+
+  if (pd == -1) return 0;
+  if (pd + 1 >= i) return 0;
+  pd++; // skip past it...
+
+  cp += pd;
+  size = i - pd;

  h = ((*cp | 0x20) ^ size) & 0x0f;

  switch (h)
  {
    case 0x00:
-      // shk
-      if (size == 3
-        && (wp[0] | 0x2020) == 0x6873 // 'sh'
-        && (cp[2] | 0x20) == 0x6b     // 'k'
-      ) {
-        *ftype = 0xe0;
-        *atype = 0x8002;
-        return 1;
-      }
      // text
      if (size == 4
        && (wp[0] | 0x2020) == 0x6574 // 'te'
@@ -36,6 +51,15 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
        *atype = 0x0000;
        return 1;
      }
+      // shk
+      if (size == 3
+        && (wp[0] | 0x2020) == 0x6873 // 'sh'
+        && (cp[2] | 0x20) == 0x6b     // 'k'
+      ) {
+        *ftype = 0xe0;
+        *atype = 0x8002;
+        return 1;
+      }
      break;

    case 0x01:
@@ -51,14 +75,6 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
      break;

    case 0x02:
-      // c
-      if (size == 1
-        && (cp[0] | 0x20) == 0x63     // 'c'
-      ) {
-        *ftype = 0xb0;
-        *atype = 0x0008;
-        return 1;
-      }
      // asm
      if (size == 3
        && (wp[0] | 0x2020) == 0x7361 // 'as'
@@ -68,6 +84,14 @@ int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
        *atype = 0x0003;
        return 1;
      }
+      // c
+      if (size == 1
+        && (cp[0] | 0x20) == 0x63     // 'c'
+      ) {
+        *ftype = 0xb0;
+        *atype = 0x0008;
+        return 1;
+      }
      break;

    case 0x03:
--- a/ftype.txt
+++ b/ftype.txt
@@ -4,15 +4,44 @@

 #include <Types.h>

-int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
+// cp should be a filename w/ .ext
+int parse_extension_c(const char *cp, Word *ftype, LongWord *atype)
 {
+  int i;
+  int pd;
+
+  if (!cp || !*cp) return 0;
+
+  pd = -1;
+  for (i = 0; ; ++i)
+  {
+    char c;
+    
+    c = cp[i];
+    if (c == 0) break;
+    if (c == '.') pd = i;
+  } 
+
+  // pd == position of final .
+  // i == strlen
+  
+  if (pd == -1) return 0;
+  if (pd + 1 >= i) return 0;
+  pd++; // skip past it...
+
+  return parse_extension(cp + pd, i - pd, ftype, atype);
+}
+
+// cp is just the extension
+int parse_extension(const char *cp, Word size, Word *ftype, LongWord *atype)
+{
+  Word size;
  Word *wp = (Word *)cp;
  Word h;
-  
-  *ftype = 0;
-  *atype = 0;
-  
+
+
  if (!cp || !size) return 0;
+
  
  h = ((*cp | 0x20) ^ size) & 0x0f;
  
--- a/gno.orca.h
+++ b/gno.orca.h
@@ -0,0 +1,9 @@
+#ifndef __orca__
+#define __orca__
+
+#include <stdio.h>
+
+#define fsetbinary(f) (f->_flag &= ~_IOTEXT)
+
+
+#endif
--- a/gopher.c
+++ b/gopher.c
@@ -21,8 +21,8 @@

 #include "s16debug.h"

-
-extern int setfiletype(const char *filename);
+static FileInfoRecGS FileInfo;
+static Word FileAttr;

 static int gopher_binary(Word ipid, FILE *file)
 {
@@ -282,6 +282,9 @@ int do_gopher(const char *url, URLComponents *components)
  
  file = stdout;
    
+
+  FileAttr = 0;
+  memset(&FileInfo, 0, sizeof(FileInfo));
    
  if (!components->portNumber) components->portNumber = 70;
  
@@ -351,7 +354,11 @@ int do_gopher(const char *url, URLComponents *components)
        return -1; 
      }
      
-      setfiletype(filename);
+      if (parse_extension(filename, &FileInfo.fileType, &FileInfo.auxType))
+      {
+        FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
+        setfileattr(filename, &FileInfo, FileAttr);
+      }
  }


--- a/http.c
+++ b/http.c
@@ -38,6 +38,10 @@
 #include "http.utils.h"
 #include "s16debug.h"

+
+static FileInfoRecGS FileInfo;
+static Word FileAttr;
+
 static int do_http_0_9(
  const char *url, 
  URLComponents *components, 
@@ -312,9 +316,7 @@ int read_response(Word ipid, FILE *file, Handle dict)
    
    LongWord contentSize;

-    int haveTime = 0;
-    timeGSRec time;
-    
+    int haveTime = 0;    
    
    contentSize = 0;
    transferEncoding = -1;
@@ -326,6 +328,11 @@ int read_response(Word ipid, FILE *file, Handle dict)
        transferEncoding = 0;
    }
    
+    /*
+     * check the transfer encoding header 
+     * should be chunked or identity (default)
+     *
+     */
    value = DictionaryGet(dict, "Transfer-Encoding", 17, &valueSize);
    if (value)
    {
@@ -359,6 +366,45 @@ int read_response(Word ipid, FILE *file, Handle dict)
        }    
    }

+    /*
+     * convert a content-type header mime string into
+     * a file type / aux type.
+     *
+     */
+    value = DictionaryGet(dict, "Content-Type", 12, &valueSize)
+    if (value && valueSize)
+    {
+      int i;
+      int slash = -1;
+      // strip ';'
+      for (i = 0; i < valueSize; ++i)
+      {
+        char c = value[i];
+        if (c == ';') break;
+        if (c == '/') slash = i;
+      }
+
+      // todo -- flag for this or not.
+      valueSize = i;
+      if (parse_mime(value, valueSize, 
+        &FileInfo.fileType, 
+        &FileInfo.auxType))
+      {
+        FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
+      }
+      else if (slash != -1 && parse_mime(value, slash, 
+        &FileInfo.fileType, 
+        &FileInfo.auxType))
+      {
+        FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
+      }
+
+    }
+
+    /*
+     * convert the Last Modified header into a file mod date
+     *
+     */
    value = DictionaryGet(dict, "Last-Modified", 13, &valueSize);
    if (value && valueSize <= 255)
    {
@@ -367,17 +413,31 @@ int read_response(Word ipid, FILE *file, Handle dict)
        pstring = (char *)malloc(valueSize + 1);
        if (pstring)
        {
+            struct {
+              LongWord lo;
+              LongWord hi;
+            } comp;
+
            *pstring = valueSize;
            memcpy(pstring + 1, value, valueSize);

            // parse the last-modified timestamp.
            // 0x0e00 is rfc 822 format.
            // (which is now obsoleted by rfc 2822 but close enough)
-            tiParseDateString(&time, pstring, 0x0e00);
-            if (!_toolErr)
+
+
+            // should use _tiDateString2Sec to get seconds
+            // then use ConvSeconds to get the date
+            // this should handle timezones. 
+
+            tiDateString2Sec(&comp, pstring, 0x0e00);
+            if (!_toolErr && hi == 0)
            {
-                haveTime = 1;
+              ConvSeconds(secs2TimeRec, comp.lo, &FileInfo.modDateTime);
+              FileAttr |= ATTR_MODTIME;
+              haveTime = 1;
            }
+
            free(pstring);
        }
    }
@@ -542,6 +602,9 @@ int do_http(const char *url, URLComponents *components)
  FILE *file;
  
  file = stdout;
+
+  FileAttr = 0;
+  memset(&FileInfo, 0, sizeof(FileInfo));
  
  if (!components->portNumber) components->portNumber = 80;

@@ -575,6 +638,8 @@ int do_http(const char *url, URLComponents *components)
    if (path)
    {    
        // path starts with /.
+
+        // todo -- also need to strip any ? parameters.
        
        filename = strrchr(path + 1, '/');
        if (filename) // *filename == '/'
@@ -618,9 +683,11 @@ int do_http(const char *url, URLComponents *components)
        return -1; 
      }

-      // should set from mime type?      
-      setfiletype(filename);
-      
+      // hmm, flag for this vs content type?
+      if (parse_extension_c(filename, &FileInfo.fileType, &FileInfo.auxType))
+      {
+        FileAttr |= ATTR_FILETYPE | ATTR_AUXTYPE;
+      }
  }
  
  
@@ -646,6 +713,8 @@ int do_http(const char *url, URLComponents *components)
  fflush(file);
  if (file != stdout) fclose(file);
  
+  if (filename) setfileattr(filename, &FileInfo, FileAttr);
+
  CloseLoop(&connection);
  free(host);
  free(path);
--- a/makefile.mk
+++ b/makefile.mk
@@ -1,7 +1,7 @@
 CFLAGS += $(DEFINES) -v -w 
-OBJS = main.o gopher.o url.o connection.o readline2.o scheme.o ftype.o setftype.o \
-       s16debug.o common.o http.o http.utils.o dictionary.o flags.o \
-       time.o
+OBJS = main.o gopher.o url.o connection.o readline2.o scheme.o ftype.o \
+       mime.o setftype.o s16debug.o common.o http.o http.utils.o \
+       dictionary.o flags.o time.o

 gopher: $(OBJS)
 	$(CC) $(LDFLAGS) $(OBJS) $(LDLIBS) -o $@
@@ -34,6 +34,7 @@ dictionary.o: dictionary.c dictionary.h
 setftype.o: setftype.c
 scheme.o: scheme.c url.h
 ftype.o: ftype.c
+mime.o: mime.c 

 time.o: time.c

--- a/mime.c
+++ b/mime.c
@@ -0,0 +1,118 @@
+#pragma optimize 79
+#pragma noroot
+
+#include <Types.h>
+
+int parse_mime(const char *cp, Word *ftype, LongWord *atype)
+{
+  Word size;
+  Word *wp = (Word *)cp;
+  Word h;
+  int i;
+  int semi;
+  int slash;
+
+  *ftype = 0;
+  *atype = 0;
+
+  /*
+   * two pass
+   * 1. type/subtype
+   * 2. type
+   */
+
+
+
+  if (!cp || !*cp) return 0;
+
+  // find any optional ';'
+  semi = slash = -1;
+  for (i = 0; ; ++i)
+  {
+    char c = cp[i];
+    if (c == 0) break;
+    if (c == '/') slash = i;
+    if (c == ';')
+    {
+      semi = i;
+      break;
+    }
+  }
+  size = i;
+
+  for (i = 0; i < 2; ++i)
+  {
+    h = ((*cp | 0x20) ^ size) & 0x0f;
+
+    switch (h)
+    {
+    case 0x00:
+      // text
+      if (size == 4
+        && (wp[0] | 0x2020) == 0x6574 // 'te'
+        && (wp[1] | 0x2020) == 0x7478 // 'xt'
+      ) {
+        *ftype = 0x04;
+        *atype = 0x0000;
+        return 1;
+      }
+      break;
+
+    case 0x09:
+      // application/octet-stream
+      if (size == 24
+        && (wp[0] | 0x2020) == 0x7061 // 'ap'
+        && (wp[1] | 0x2020) == 0x6c70 // 'pl'
+        && (wp[2] | 0x2020) == 0x6369 // 'ic'
+        && (wp[3] | 0x2020) == 0x7461 // 'at'
+        && (wp[4] | 0x2020) == 0x6f69 // 'io'
+        && (wp[5] | 0x2020) == 0x2f6e // 'n/'
+        && (wp[6] | 0x2020) == 0x636f // 'oc'
+        && (wp[7] | 0x2020) == 0x6574 // 'te'
+        && (wp[8] | 0x2020) == 0x2d74 // 't-'
+        && (wp[9] | 0x2020) == 0x7473 // 'st'
+        && (wp[10] | 0x2020) == 0x6572 // 're'
+        && (wp[11] | 0x2020) == 0x6d61 // 'am'
+      ) {
+        *ftype = 0x02;
+        *atype = 0x0000;
+        return 1;
+      }
+      // text/x-pascal
+      if (size == 13
+        && (wp[0] | 0x2020) == 0x6574 // 'te'
+        && (wp[1] | 0x2020) == 0x7478 // 'xt'
+        && (wp[2] | 0x2020) == 0x782f // '/x'
+        && (wp[3] | 0x2020) == 0x702d // '-p'
+        && (wp[4] | 0x2020) == 0x7361 // 'as'
+        && (wp[5] | 0x2020) == 0x6163 // 'ca'
+        && (cp[12] | 0x20) == 0x6c     // 'l'
+      ) {
+        *ftype = 0xb0;
+        *atype = 0x0005;
+        return 1;
+      }
+      break;
+
+    case 0x0c:
+      // text/x-c
+      if (size == 8
+        && (wp[0] | 0x2020) == 0x6574 // 'te'
+        && (wp[1] | 0x2020) == 0x7478 // 'xt'
+        && (wp[2] | 0x2020) == 0x782f // '/x'
+        && (wp[3] | 0x2020) == 0x632d // '-c'
+      ) {
+        *ftype = 0xb0;
+        *atype = 0x0008;
+        return 1;
+      }
+      break;
+
+    }
+
+    size = slash;
+    if (size == -1) break;
+  }
+
+  return 0;
+}
--- a/mime.txt
+++ b/mime.txt
@@ -0,0 +1,97 @@
+%%
+#pragma optimize 79
+#pragma noroot
+
+#include <Types.h>
+
+int parse_mime_c(const char *cp, Word *ftype, LongWord *atype)
+{
+  int i;
+  int slash;
+
+  if (!cp || !*cp)
+    return 0;
+
+  /*
+   * two pass
+   * 1. type/subtype
+   * 2. type
+   */
+
+  semi = slash = -1;
+  for (i = 0; ; ++i)
+  {
+    char c = cp[i];
+    if (c == 0 || c == ';') break;
+
+    if (c == '/')
+    {
+      slash = i;
+    }
+  }
+
+  // try type/subtype
+  if (parse_mime(cp, i, ftype, atype)) 
+    return 1;
+
+  
+  // try type
+  if (slash != -1)
+    return parse_mime(cp, slash, ftype, atype));
+
+  return 0;
+}
+
+int parse_mime(const char *cp, Word size, Word *ftype, LongWord *atype)
+{
+  Word size;
+  Word *wp = (Word *)cp;
+  Word h;
+
+  if (!cp || !size) return 0;
+
+retry:
+
+  h = ((*cp | 0x20) ^ size) & 0x0f;
+
+  switch (h)
+  {
+%%
+  }
+
+/*
+  // try again as type
+  while (--size)
+  {
+    if (cp[size] == '/') goto retry;
+  }
+*/
+
+  return 0;
+}
+%%
+
+'text' ->
+  *ftype = 0x04;
+  *atype = 0x0000;
+  return 1;
+.
+
+'text/x-c' ->
+  *ftype = 0xb0;
+  *atype = 0x0008;
+  return 1;
+.
+
+'text/x-pascal' ->
+  *ftype = 0xb0;
+  *atype = 0x0005;
+  return 1;
+.
+
+'application/octet-stream' ->
+  *ftype = 0x02;
+  *atype = 0x0000;
+  return 1;
+.
+
--- a/prototypes.h
+++ b/prototypes.h
@@ -23,7 +23,25 @@ typedef struct ReadBlock
 int read_binary(unsigned ipid, FILE *file, ReadBlock *);
 int read_binary_size(unsigned ipid, FILE *file, ReadBlock *);

-int setfiletype(const char *filename);
+int parse_extension_c(const char *cp, Word *ftype, LongWord *atype);
+int parse_extension(const char *cp, Word size, Word *ftype, LongWord *atype);
+
+int parse_mime_c(const char *cp, Word *ftype, LongWord *atype);
+int parse_mime(const char *cp, Word size, Word *ftype, LongWord *atype)
+
+
+#ifdef __GSOS__
+enum {
+    ATTR_ACCESS = 1,
+    ATTR_FILETYPE = 2,
+    ATTR_AUXTYPE = 4,
+    ATTR_CREATETIME = 8,
+    ATTR_MODTIME = 16
+};
+
+int setfileattr(const char *filename, FileInfoRecGS *info, unsigned flags)
+
+#endif


 #ifdef __CONNECTION_H__
--- a/scheme.c
+++ b/scheme.c
@@ -24,6 +24,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
  {
  // --- begin auto-generated --
    case 0x00:
+      // dict
+      if (size == 4
+        && (wp[0] | 0x2020) == 0x6964 // 'di'
+        && (wp[1] | 0x2020) == 0x7463 // 'ct'
+      ) {
+        c->schemeType = SCHEME_DICT;
+        c->portNumber = 2628;
+        return;
+      }
      // ssh
      if (size == 3
        && (wp[0] | 0x2020) == 0x7373 // 'ss'
@@ -49,6 +58,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
      break;

    case 0x02:
+      // file
+      if (size == 4
+        && (wp[0] | 0x2020) == 0x6966 // 'fi'
+        && (wp[1] | 0x2020) == 0x656c // 'le'
+      ) {
+        c->schemeType = SCHEME_FILE;
+        c->portNumber = 0;
+        return;
+      }
      // afp
      if (size == 3
        && (wp[0] | 0x2020) == 0x6661 // 'af'
@@ -68,15 +86,6 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
        c->portNumber = 23;
        return;
      }
-      // file
-      if (size == 4
-        && (wp[0] | 0x2020) == 0x6966 // 'fi'
-        && (wp[1] | 0x2020) == 0x656c // 'le'
-      ) {
-        c->schemeType = SCHEME_FILE;
-        c->portNumber = 0;
-        return;
-      }
      break;

    case 0x05:
@@ -128,6 +137,15 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
      break;

    case 0x0d:
+      // nfs
+      if (size == 3
+        && (wp[0] | 0x2020) == 0x666e // 'nf'
+        && (cp[2] | 0x20) == 0x73     // 's'
+      ) {
+        c->schemeType = SCHEME_NFS;
+        c->portNumber = 2049;
+        return;
+      }
      // https
      if (size == 5
        && (wp[0] | 0x2020) == 0x7468 // 'ht'
@@ -138,15 +156,6 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
        c->portNumber = 443;
        return;
      }
-      // nfs
-      if (size == 3
-        && (wp[0] | 0x2020) == 0x666e // 'nf'
-        && (cp[2] | 0x20) == 0x73     // 's'
-      ) {
-        c->schemeType = SCHEME_NFS;
-        c->portNumber = 2049;
-        return;
-      }
      break;

  // --- end auto-generated --
--- a/scheme.txt
+++ b/scheme.txt
@@ -98,3 +98,9 @@ void parse_scheme(const char *cp, unsigned size, URLComponents *c)
  c->portNumber = 2049;
  return;
 .
+
+'dict' ->
+  c->schemeType = SCHEME_DICT;
+  c->portNumber = 2628;
+  return;
+.
--- a/setftype.c
+++ b/setftype.c
@@ -5,54 +5,33 @@
 #include <stdlib.h>
 #include <gno/gno.h>

-extern int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype);
-
-int setfiletype(const char *filename)
+int setfileattr(const char *filename, FileInfoRecGS *info, unsigned flags)
 {
-    int pd;
-    int i;
-    
-    Word ftype;
-    Word atype;
-    int rv;
-    
-    FileInfoRecGS info;
-    
-    // find the extension in the filename.
-    
-    pd = -1;
-    for (i = 0; ; ++i)
-    {
-        char c;
-        
-        c = filename[i];
-        if (c == 0) break;
-        if (c == '.') pd = i;
-    } 
-    
-    // pd == position of final .
-    // i == strlen
-    
-    if (pd == -1) return 0;
-    if (pd + 1 >= i) return 0;
-    pd++; // skip past it...
-    
-    if (!parse_ftype(filename + pd, i - pd, &ftype, &atype)) 
-        return 0;
-        
-    info.pCount = 4;
-    info.pathname = (GSString255Ptr)__C2GSMALLOC(filename);
-    info.access = 0xe3;
-    info.auxType = atype;
-    info.fileType = ftype;
-    
-    //GetFileInfoGS(&info);
-    //if (_toolErr) return 0;
-    
-    SetFileInfoGS(&info);
-    rv = _toolErr;
-    if (_toolErr)
+    Word rv;
+    FileInfoRecGS tmp;
+
+    if (!info) return 0;
+    if (!flags) return 1;
+
+    tmp.pCount = 7;
+    tmp.pathname = (GSString255Ptr)__C2GSMALLOC(filename);
+    if (!tmp.pathname) return 0;
+
+    GetFileInfoGS(&tmp);
+    rv = _toolErr;
+    if (!_toolErr)
+    {
+        if (flags & ATTR_ACCESS) tmp.access = info->access;
+        if (flags & ATTR_FILETYPE) tmp.fileType = info->fileType;
+        if (flags & ATTR_AUXTYPE) tmp.auxType = info->auxType;
+        if (flags & ATTR_CREATETIME) tmp.createDateTime = info->createDateTime;
+        if (flags & ATTR_MODTIME) tmp.modDateTime = info->modDateTime;
+
+        SetFileInfoGS(&tmp);
+        rv = _toolErr;
+    }
+
+    free (tmp.pathname);

-    free(info.pathname);
    return rv ? 0 : 1;
-}
+}
--- a/txtable.rb
+++ b/txtable.rb
@@ -115,7 +115,7 @@ ARGV.each {|filename|
        if !rule
            next if line == ''
            
-            if line =~ /^'([a-zA-Z0-9.+_-]+)'\s*->$/
+            if line =~ /^'([a-zA-Z0-9.+_\/-]+)'\s*->$/
                rule = $1;
                raise "duplicate rule: #{rule}" if rules[rule] 
                next