generated text parser

This commit is contained in:
Kelvin Sherlock 2012-04-15 21:45:07 -04:00
parent 5071afb9be
commit f23c883604
5 changed files with 572 additions and 0 deletions

103
ftype.c Normal file
View File

@ -0,0 +1,103 @@
#include <Types.h>
int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
{
Word *wp = (Word *)cp;
*ftype = 0;
*atype = 0;
switch ((*cp | 0x20) ^ size)
{
case 0x00:
// shk
if (size == 3
&& (wp[0] | 0x2020) == 0x6873 // 'sh'
&& (cp[2] | 0x20) == 0x6b // 'k'
) {
*ftype = 0xe0;
*atype = 0x8002;
return 1;
}
// text
if (size == 4
&& (wp[0] | 0x2020) == 0x6574 // 'te'
&& (wp[1] | 0x2020) == 0x7478 // 'xt'
) {
*ftype = 0x04;
*atype = 0x0000;
return 1;
}
break;
case 0x01:
// bxy
if (size == 3
&& (wp[0] | 0x2020) == 0x7862 // 'bx'
&& (cp[2] | 0x20) == 0x79 // 'y'
) {
*ftype = 0xe0;
*atype = 0x8000;
return 1;
}
break;
case 0x02:
// c
if (size == 1
&& (cp[0] | 0x20) == 0x63 // 'c'
) {
*ftype = 0xb0;
*atype = 0x0008;
return 1;
}
// asm
if (size == 3
&& (wp[0] | 0x2020) == 0x7361 // 'as'
&& (cp[2] | 0x20) == 0x6d // 'm'
) {
*ftype = 0xb0;
*atype = 0x0003;
return 1;
}
break;
case 0x03:
// pas
if (size == 3
&& (wp[0] | 0x2020) == 0x6170 // 'pa'
&& (cp[2] | 0x20) == 0x73 // 's'
) {
*ftype = 0xb0;
*atype = 0x0005;
return 1;
}
break;
case 0x07:
// txt
if (size == 3
&& (wp[0] | 0x2020) == 0x7874 // 'tx'
&& (cp[2] | 0x20) == 0x74 // 't'
) {
*ftype = 0x04;
*atype = 0x0000;
return 1;
}
break;
case 0x09:
// h
if (size == 1
&& (cp[0] | 0x20) == 0x68 // 'h'
) {
*ftype = 0xb0;
*atype = 0x0008;
return 1;
}
break;
}
return 0;
}

68
ftype.txt Normal file
View File

@ -0,0 +1,68 @@
%%
#pragma optimize 79
#include <Types.h>
int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype)
{
Word *wp = (Word *)cp;
*ftype = 0;
*atype = 0;
switch ((*cp | 0x20) ^ size)
{
%%
}
return 0;
}
%%
'h' ->
*ftype = 0xb0;
*atype = 0x0008;
return 1;
.
'c' ->
*ftype = 0xb0;
*atype = 0x0008;
return 1;
.
'asm' ->
*ftype = 0xb0;
*atype = 0x0003;
return 1;
.
'pas' ->
*ftype = 0xb0;
*atype = 0x0005;
return 1;
.
'txt' ->
*ftype = 0x04;
*atype = 0x0000;
return 1;
.
'text' ->
*ftype = 0x04;
*atype = 0x0000;
return 1;
.
'shk' ->
*ftype = 0xe0;
*atype = 0x8002;
return 1;
.
'bxy' ->
*ftype = 0xe0;
*atype = 0x8000;
return 1;
.

156
scheme.c Normal file
View File

@ -0,0 +1,156 @@
#pragma optimize 79
#include <Types.h>
#include "url.h"
void parse_scheme(const char *cp, unsigned size, URLComponents *c)
{
unsigned *wp;
unsigned h;
if (!c) return;
if (!cp || !size)
{
c->portNumber = 0;
c->schemeType = SCHEME_NONE;
return;
}
wp = (unsigned *)cp;
h = ((*cp | 0x20) ^ size) & 0x0f;
switch(h)
{
// --- begin auto-generated --
case 0x00:
// ssh
if (size == 3
&& (wp[0] | 0x2020) == 0x7373 // 'ss'
&& (cp[2] | 0x20) == 0x68 // 'h'
) {
c->schemeType = SCHEME_SSH;
c->portNumber = 22;
return;
}
break;
case 0x01:
// gopher
if (size == 6
&& (wp[0] | 0x2020) == 0x6f67 // 'go'
&& (wp[1] | 0x2020) == 0x6870 // 'ph'
&& (wp[2] | 0x2020) == 0x7265 // 'er'
) {
c->schemeType = SCHEME_GOPHER;
c->portNumber = 70;
return;
}
break;
case 0x02:
// afp
if (size == 3
&& (wp[0] | 0x2020) == 0x6661 // 'af'
&& (cp[2] | 0x20) == 0x70 // 'p'
) {
c->schemeType = SCHEME_AFP;
c->portNumber = 548;
return;
}
// telnet
if (size == 6
&& (wp[0] | 0x2020) == 0x6574 // 'te'
&& (wp[1] | 0x2020) == 0x6e6c // 'ln'
&& (wp[2] | 0x2020) == 0x7465 // 'et'
) {
c->schemeType = SCHEME_TELNET;
c->portNumber = 23;
return;
}
// file
if (size == 4
&& (wp[0] | 0x2020) == 0x6966 // 'fi'
&& (wp[1] | 0x2020) == 0x656c // 'le'
) {
c->schemeType = SCHEME_FILE;
c->portNumber = 0;
return;
}
break;
case 0x05:
// ftp
if (size == 3
&& (wp[0] | 0x2020) == 0x7466 // 'ft'
&& (cp[2] | 0x20) == 0x70 // 'p'
) {
c->schemeType = SCHEME_FTP;
c->portNumber = 21;
return;
}
break;
case 0x07:
// sftp
if (size == 4
&& (wp[0] | 0x2020) == 0x6673 // 'sf'
&& (wp[1] | 0x2020) == 0x7074 // 'tp'
) {
c->schemeType = SCHEME_SFTP;
c->portNumber = 115;
return;
}
break;
case 0x0a:
// nntp
if (size == 4
&& (wp[0] | 0x2020) == 0x6e6e // 'nn'
&& (wp[1] | 0x2020) == 0x7074 // 'tp'
) {
c->schemeType = SCHEME_NNTP;
c->portNumber = 119;
return;
}
break;
case 0x0c:
// http
if (size == 4
&& (wp[0] | 0x2020) == 0x7468 // 'ht'
&& (wp[1] | 0x2020) == 0x7074 // 'tp'
) {
c->schemeType = SCHEME_HTTP;
c->portNumber = 80;
return;
}
break;
case 0x0d:
// https
if (size == 5
&& (wp[0] | 0x2020) == 0x7468 // 'ht'
&& (wp[1] | 0x2020) == 0x7074 // 'tp'
&& (cp[4] | 0x20) == 0x73 // 's'
) {
c->schemeType = SCHEME_HTTPS;
c->portNumber = 443;
return;
}
// nfs
if (size == 3
&& (wp[0] | 0x2020) == 0x666e // 'nf'
&& (cp[2] | 0x20) == 0x73 // 's'
) {
c->schemeType = SCHEME_NFS;
c->portNumber = 2049;
return;
}
break;
// --- end auto-generated --
}
c->portNumber = 0;
c->schemeType = SCHEME_UNKNOWN;
}

99
scheme.txt Normal file
View File

@ -0,0 +1,99 @@
%%
#pragma optimize 79
#include <Types.h>
#include "url.h"
void parse_scheme(const char *cp, unsigned size, URLComponents *c)
{
unsigned *wp;
unsigned h;
if (!c) return;
if (!cp || !size)
{
c->portNumber = 0;
c->schemeType = SCHEME_NONE;
return;
}
wp = (unsigned *)cp;
h = ((*cp | 0x20) ^ size) & 0x0f;
switch(h)
{
// --- begin auto-generated --
%%
// --- end auto-generated --
}
c->portNumber = 0;
c->schemeType = SCHEME_UNKNOWN;
}
%%
'file' ->
c->schemeType = SCHEME_FILE;
c->portNumber = 0;
return;
.
'ftp' ->
c->schemeType = SCHEME_FTP;
c->portNumber = 21;
return;
.
'ssh' ->
c->schemeType = SCHEME_SSH;
c->portNumber = 22;
return;
.
'telnet' ->
c->schemeType = SCHEME_TELNET;
c->portNumber = 23;
return;
.
'gopher' ->
c->schemeType = SCHEME_GOPHER;
c->portNumber = 70;
return;
.
'http' ->
c->schemeType = SCHEME_HTTP;
c->portNumber = 80;
return;
.
'sftp' ->
c->schemeType = SCHEME_SFTP;
c->portNumber = 115;
return;
.
'nntp' ->
c->schemeType = SCHEME_NNTP;
c->portNumber = 119;
return;
.
'https' ->
c->schemeType = SCHEME_HTTPS;
c->portNumber = 443;
return;
.
'afp' ->
c->schemeType = SCHEME_AFP;
c->portNumber = 548;
return;
.
'nfs' ->
c->schemeType = SCHEME_NFS;
c->portNumber = 2049;
return;
.

146
txtable.rb Normal file
View File

@ -0,0 +1,146 @@
#!/usr/binenv ruby -w
def dump_rules(rules)
# create another hash for the hash code.
index = []
rules.each {|key, value|
# 1.8 doesn't have getbyte()
# string[] returns a byte in 1.8, string in 1.9
byte = key.bytes.next()
byte |= 0x20
byte ^= key.length
byte &= 0x0f
index[byte] ||= []
index[byte].push(key)
}
indent6 = " "
index.each_index {|ix|
array = index[ix]
next unless array
printf(" case 0x%02x:\n", ix)
array.each{|key|
offset = 0
printf(" // %s\n", key)
printf(" if (size == %d\n", key.length)
key.scan(/..?/) {|xx|
tmp = xx.unpack("C*")
tmp = tmp.map {|xxx| xxx | 0x20 }
if tmp.length == 2
tmp = (tmp[0]) + (tmp[1] << 8 )
printf(" && (wp[%d] | 0x2020) == 0x%04x // '%s'\n",
offset, tmp, xx
)
offset += 1
else
tmp = tmp[0]
printf(" && (cp[%d] | 0x20) == 0x%02x // '%s'\n",
offset * 2, tmp, xx
)
end
} # scan
puts(" ) {")
rules[key].each {|x|
puts(indent6 + x)
}
puts(" }")
}
printf(" break;\n\n")
}
end
ARGV.each {|filename|
state = 0
substate = 0
header = []
trailer = []
tmp = []
rule = nil
rules = {}
IO.foreach(filename) {|line|
#line.chomp!
line.sub!(/\s*$/, ''); #trim trailing space
#next if line == ''
if line == '%%'
state = state + 1
raise "Too many sections" if state > 3
next
end
case state
when 0
raise "invalid section" unless line == ''
next
when 1
header.push(line)
next
when 2
trailer.push(line)
next
end
# state 3
if !rule
next if line == ''
if line =~ /^'([a-zA-Z0-9.+_-]+)'\s*->$/
rule = $1;
raise "duplicate rule: #{rule}" if rules[rule]
next
else
raise "oops #{line}"
end
end
if line == '.'
rules[rule] = tmp
tmp = []
rule = nil
else
tmp.push(line)
end
}
if state != 3 || rule
raise "unexpected EOF"
end
header.each {|x| puts x }
dump_rules(rules)
trailer.each{|x| puts x }
}