From f23c88360453a25dbc0838b38b3674e569777cbb Mon Sep 17 00:00:00 2001 From: Kelvin Sherlock Date: Sun, 15 Apr 2012 21:45:07 -0400 Subject: [PATCH] generated text parser --- ftype.c | 103 +++++++++++++++++++++++++++++++++++ ftype.txt | 68 +++++++++++++++++++++++ scheme.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++++ scheme.txt | 99 ++++++++++++++++++++++++++++++++++ txtable.rb | 146 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 572 insertions(+) create mode 100644 ftype.c create mode 100644 ftype.txt create mode 100644 scheme.c create mode 100644 scheme.txt create mode 100644 txtable.rb diff --git a/ftype.c b/ftype.c new file mode 100644 index 0000000..b09e80c --- /dev/null +++ b/ftype.c @@ -0,0 +1,103 @@ +#include + +int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype) +{ + Word *wp = (Word *)cp; + + *ftype = 0; + *atype = 0; + + switch ((*cp | 0x20) ^ size) + { + case 0x00: + // shk + if (size == 3 + && (wp[0] | 0x2020) == 0x6873 // 'sh' + && (cp[2] | 0x20) == 0x6b // 'k' + ) { + *ftype = 0xe0; + *atype = 0x8002; + return 1; + } + // text + if (size == 4 + && (wp[0] | 0x2020) == 0x6574 // 'te' + && (wp[1] | 0x2020) == 0x7478 // 'xt' + ) { + *ftype = 0x04; + *atype = 0x0000; + return 1; + } + break; + + case 0x01: + // bxy + if (size == 3 + && (wp[0] | 0x2020) == 0x7862 // 'bx' + && (cp[2] | 0x20) == 0x79 // 'y' + ) { + *ftype = 0xe0; + *atype = 0x8000; + return 1; + } + break; + + case 0x02: + // c + if (size == 1 + && (cp[0] | 0x20) == 0x63 // 'c' + ) { + *ftype = 0xb0; + *atype = 0x0008; + return 1; + } + // asm + if (size == 3 + && (wp[0] | 0x2020) == 0x7361 // 'as' + && (cp[2] | 0x20) == 0x6d // 'm' + ) { + *ftype = 0xb0; + *atype = 0x0003; + return 1; + } + break; + + case 0x03: + // pas + if (size == 3 + && (wp[0] | 0x2020) == 0x6170 // 'pa' + && (cp[2] | 0x20) == 0x73 // 's' + ) { + *ftype = 0xb0; + *atype = 0x0005; + return 1; + } + break; + + case 0x07: + // txt + if (size == 3 + && (wp[0] | 0x2020) == 0x7874 // 'tx' + && (cp[2] | 0x20) == 0x74 // 't' + ) { + *ftype = 0x04; + *atype = 0x0000; + return 1; + } + break; + + case 0x09: + // h + if (size == 1 + && (cp[0] | 0x20) == 0x68 // 'h' + ) { + *ftype = 0xb0; + *atype = 0x0008; + return 1; + } + break; + + } + + return 0; +} diff --git a/ftype.txt b/ftype.txt new file mode 100644 index 0000000..edf4153 --- /dev/null +++ b/ftype.txt @@ -0,0 +1,68 @@ +%% +#pragma optimize 79 + +#include + +int parse_ftype(const char *cp, Word size, Word *ftype, Word *atype) +{ + Word *wp = (Word *)cp; + + *ftype = 0; + *atype = 0; + + switch ((*cp | 0x20) ^ size) + { +%% + } + + return 0; +} +%% + +'h' -> + *ftype = 0xb0; + *atype = 0x0008; + return 1; +. + +'c' -> + *ftype = 0xb0; + *atype = 0x0008; + return 1; +. + +'asm' -> + *ftype = 0xb0; + *atype = 0x0003; + return 1; +. + +'pas' -> + *ftype = 0xb0; + *atype = 0x0005; + return 1; +. + +'txt' -> + *ftype = 0x04; + *atype = 0x0000; + return 1; +. + +'text' -> + *ftype = 0x04; + *atype = 0x0000; + return 1; +. + +'shk' -> + *ftype = 0xe0; + *atype = 0x8002; + return 1; +. + +'bxy' -> + *ftype = 0xe0; + *atype = 0x8000; + return 1; +. diff --git a/scheme.c b/scheme.c new file mode 100644 index 0000000..90411ad --- /dev/null +++ b/scheme.c @@ -0,0 +1,156 @@ +#pragma optimize 79 + +#include +#include "url.h" + +void parse_scheme(const char *cp, unsigned size, URLComponents *c) +{ + unsigned *wp; + unsigned h; + + if (!c) return; + if (!cp || !size) + { + c->portNumber = 0; + c->schemeType = SCHEME_NONE; + return; + } + + wp = (unsigned *)cp; + h = ((*cp | 0x20) ^ size) & 0x0f; + + switch(h) + { + // --- begin auto-generated -- + case 0x00: + // ssh + if (size == 3 + && (wp[0] | 0x2020) == 0x7373 // 'ss' + && (cp[2] | 0x20) == 0x68 // 'h' + ) { + c->schemeType = SCHEME_SSH; + c->portNumber = 22; + return; + } + break; + + case 0x01: + // gopher + if (size == 6 + && (wp[0] | 0x2020) == 0x6f67 // 'go' + && (wp[1] | 0x2020) == 0x6870 // 'ph' + && (wp[2] | 0x2020) == 0x7265 // 'er' + ) { + c->schemeType = SCHEME_GOPHER; + c->portNumber = 70; + return; + } + break; + + case 0x02: + // afp + if (size == 3 + && (wp[0] | 0x2020) == 0x6661 // 'af' + && (cp[2] | 0x20) == 0x70 // 'p' + ) { + c->schemeType = SCHEME_AFP; + c->portNumber = 548; + return; + } + // telnet + if (size == 6 + && (wp[0] | 0x2020) == 0x6574 // 'te' + && (wp[1] | 0x2020) == 0x6e6c // 'ln' + && (wp[2] | 0x2020) == 0x7465 // 'et' + ) { + c->schemeType = SCHEME_TELNET; + c->portNumber = 23; + return; + } + // file + if (size == 4 + && (wp[0] | 0x2020) == 0x6966 // 'fi' + && (wp[1] | 0x2020) == 0x656c // 'le' + ) { + c->schemeType = SCHEME_FILE; + c->portNumber = 0; + return; + } + break; + + case 0x05: + // ftp + if (size == 3 + && (wp[0] | 0x2020) == 0x7466 // 'ft' + && (cp[2] | 0x20) == 0x70 // 'p' + ) { + c->schemeType = SCHEME_FTP; + c->portNumber = 21; + return; + } + break; + + case 0x07: + // sftp + if (size == 4 + && (wp[0] | 0x2020) == 0x6673 // 'sf' + && (wp[1] | 0x2020) == 0x7074 // 'tp' + ) { + c->schemeType = SCHEME_SFTP; + c->portNumber = 115; + return; + } + break; + + case 0x0a: + // nntp + if (size == 4 + && (wp[0] | 0x2020) == 0x6e6e // 'nn' + && (wp[1] | 0x2020) == 0x7074 // 'tp' + ) { + c->schemeType = SCHEME_NNTP; + c->portNumber = 119; + return; + } + break; + + case 0x0c: + // http + if (size == 4 + && (wp[0] | 0x2020) == 0x7468 // 'ht' + && (wp[1] | 0x2020) == 0x7074 // 'tp' + ) { + c->schemeType = SCHEME_HTTP; + c->portNumber = 80; + return; + } + break; + + case 0x0d: + // https + if (size == 5 + && (wp[0] | 0x2020) == 0x7468 // 'ht' + && (wp[1] | 0x2020) == 0x7074 // 'tp' + && (cp[4] | 0x20) == 0x73 // 's' + ) { + c->schemeType = SCHEME_HTTPS; + c->portNumber = 443; + return; + } + // nfs + if (size == 3 + && (wp[0] | 0x2020) == 0x666e // 'nf' + && (cp[2] | 0x20) == 0x73 // 's' + ) { + c->schemeType = SCHEME_NFS; + c->portNumber = 2049; + return; + } + break; + + // --- end auto-generated -- + } + + c->portNumber = 0; + c->schemeType = SCHEME_UNKNOWN; +} diff --git a/scheme.txt b/scheme.txt new file mode 100644 index 0000000..5d0fe1c --- /dev/null +++ b/scheme.txt @@ -0,0 +1,99 @@ +%% +#pragma optimize 79 + +#include +#include "url.h" + +void parse_scheme(const char *cp, unsigned size, URLComponents *c) +{ + unsigned *wp; + unsigned h; + + if (!c) return; + if (!cp || !size) + { + c->portNumber = 0; + c->schemeType = SCHEME_NONE; + return; + } + + wp = (unsigned *)cp; + h = ((*cp | 0x20) ^ size) & 0x0f; + + switch(h) + { + // --- begin auto-generated -- +%% + // --- end auto-generated -- + } + + c->portNumber = 0; + c->schemeType = SCHEME_UNKNOWN; +} +%% + +'file' -> + c->schemeType = SCHEME_FILE; + c->portNumber = 0; + return; +. + +'ftp' -> + c->schemeType = SCHEME_FTP; + c->portNumber = 21; + return; +. + +'ssh' -> + c->schemeType = SCHEME_SSH; + c->portNumber = 22; + return; +. + +'telnet' -> + c->schemeType = SCHEME_TELNET; + c->portNumber = 23; + return; +. + +'gopher' -> + c->schemeType = SCHEME_GOPHER; + c->portNumber = 70; + return; +. + +'http' -> + c->schemeType = SCHEME_HTTP; + c->portNumber = 80; + return; +. + +'sftp' -> + c->schemeType = SCHEME_SFTP; + c->portNumber = 115; + return; +. + +'nntp' -> + c->schemeType = SCHEME_NNTP; + c->portNumber = 119; + return; +. + +'https' -> + c->schemeType = SCHEME_HTTPS; + c->portNumber = 443; + return; +. + +'afp' -> + c->schemeType = SCHEME_AFP; + c->portNumber = 548; + return; +. + +'nfs' -> + c->schemeType = SCHEME_NFS; + c->portNumber = 2049; + return; +. diff --git a/txtable.rb b/txtable.rb new file mode 100644 index 0000000..e3ec9ac --- /dev/null +++ b/txtable.rb @@ -0,0 +1,146 @@ +#!/usr/binenv ruby -w + + +def dump_rules(rules) + + # create another hash for the hash code. + + index = [] + + rules.each {|key, value| + + # 1.8 doesn't have getbyte() + # string[] returns a byte in 1.8, string in 1.9 + byte = key.bytes.next() + byte |= 0x20 + byte ^= key.length + + byte &= 0x0f + + index[byte] ||= [] + + index[byte].push(key) + } + + indent6 = " " + + index.each_index {|ix| + + array = index[ix] + + next unless array + + printf(" case 0x%02x:\n", ix) + + array.each{|key| + + offset = 0 + printf(" // %s\n", key) + printf(" if (size == %d\n", key.length) + + key.scan(/..?/) {|xx| + tmp = xx.unpack("C*") + tmp = tmp.map {|xxx| xxx | 0x20 } + if tmp.length == 2 + tmp = (tmp[0]) + (tmp[1] << 8 ) + + printf(" && (wp[%d] | 0x2020) == 0x%04x // '%s'\n", + offset, tmp, xx + ) + offset += 1 + else + tmp = tmp[0] + printf(" && (cp[%d] | 0x20) == 0x%02x // '%s'\n", + offset * 2, tmp, xx + ) + end + } # scan + + puts(" ) {") + + rules[key].each {|x| + puts(indent6 + x) + } + puts(" }") + + } + printf(" break;\n\n") + } + + +end + + +ARGV.each {|filename| + + state = 0 + substate = 0 + + header = [] + trailer = [] + tmp = [] + rule = nil + + rules = {} + + + IO.foreach(filename) {|line| + + #line.chomp! + line.sub!(/\s*$/, ''); #trim trailing space + + #next if line == '' + + if line == '%%' + state = state + 1 + raise "Too many sections" if state > 3 + next + end + + case state + when 0 + raise "invalid section" unless line == '' + next + when 1 + header.push(line) + next + + when 2 + trailer.push(line) + next + end + + + # state 3 + if !rule + next if line == '' + + if line =~ /^'([a-zA-Z0-9.+_-]+)'\s*->$/ + rule = $1; + raise "duplicate rule: #{rule}" if rules[rule] + next + else + raise "oops #{line}" + end + end + + if line == '.' + rules[rule] = tmp + tmp = [] + rule = nil + else + tmp.push(line) + end + } + if state != 3 || rule + raise "unexpected EOF" + end + + header.each {|x| puts x } + + dump_rules(rules) + + trailer.each{|x| puts x } + +} +