From 4b8d7406b0cf519dc154ba0ed0ff079da785f325 Mon Sep 17 00:00:00 2001 From: Steven Hugg Date: Fri, 7 Dec 2018 17:24:27 -0500 Subject: [PATCH] isProbablyBinary() used for uploading files --- doc/notes.txt | 3 +- presets/coleco/common.c | 3 -- src/ui.ts | 7 +++-- src/util.ts | 69 ++++++++++++++++++++++++++++------------- test/cli/testutil.js | 9 ++++++ 5 files changed, 62 insertions(+), 29 deletions(-) diff --git a/doc/notes.txt b/doc/notes.txt index 4349d922..98c4702f 100644 --- a/doc/notes.txt +++ b/doc/notes.txt @@ -84,6 +84,7 @@ TODO: - resize memory browser when split resize (any div resize) - preroll the emulator so optimizer does its thing before loading rom - wasm dynamic linking of emulators (https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md) +- upload text/binary detection WEB WORKER FORMAT @@ -165,4 +166,4 @@ PIXEL EDITOR everything is an editor back/forward propogation -encoded <-> raw + spec + palette(s) <-> bitmap <-> tile/sprite preview(s) +encoded <-> raw + spec + palette(s) <-> full tilemap <-> selected tile/sprite/metasprite diff --git a/presets/coleco/common.c b/presets/coleco/common.c index 2d34871f..d1af692f 100644 --- a/presets/coleco/common.c +++ b/presets/coleco/common.c @@ -26,9 +26,6 @@ void flip_sprite_patterns(word dest, const byte* patterns, word len) { } } -char cursor_x; -char cursor_y; - void clrscr() { cvu_vmemset(IMAGE, 0, COLS*ROWS); } diff --git a/src/ui.ts b/src/ui.ts index 3165124e..b83c1e72 100644 --- a/src/ui.ts +++ b/src/ui.ts @@ -11,7 +11,8 @@ import { Platform, Preset, DebugSymbols, DebugEvalCondition } from "./baseplatfo import { PLATFORMS } from "./emu"; import * as Views from "./views"; import { createNewPersistentStore } from "./store"; -import { getFilenameForPath, getFilenamePrefix, highlightDifferences, invertMap, byteArrayToString, compressLZG, byteArrayToUTF8 } from "./util"; +import { getFilenameForPath, getFilenamePrefix, highlightDifferences, invertMap, byteArrayToString, compressLZG, + byteArrayToUTF8, isProbablyBinary } from "./util"; import { StateRecorderImpl } from "./recorder"; // external libs (TODO) @@ -283,8 +284,8 @@ function handleFileUpload(files: File[]) { reader.onload = function(e) { var arrbuf = (e.target).result as ArrayBuffer; var data : FileData = new Uint8Array(arrbuf); - // convert to UTF8, unless it's a binary file (TODO) - if (path.endsWith("bin")) { + // convert to UTF8, unless it's a binary file + if (isProbablyBinary(data)) { // path.endsWith("bin")) { gotoMainFile = false; } else { data = byteArrayToUTF8(data); diff --git a/src/util.ts b/src/util.ts index aec86c9b..614629f1 100644 --- a/src/util.ts +++ b/src/util.ts @@ -270,42 +270,35 @@ export function stringToByteArray(s:string) : Uint8Array { return a; } -export function byteArrayToString(outdata : number[] | Uint8Array) : string { +export function byteArrayToString(data : number[] | Uint8Array) : string { var str = ""; - if (outdata != null) { + if (data != null) { var charLUT = new Array(); for (var i = 0; i < 256; ++i) charLUT[i] = String.fromCharCode(i); - var outlen = outdata.length; - for (var i = 0; i < outlen; i++) - str += charLUT[outdata[i]]; + var len = data.length; + for (var i = 0; i < len; i++) + str += charLUT[data[i]]; } return str; } -export function byteArrayToUTF8(outdata : number[] | Uint8Array) : string { +export function byteArrayToUTF8(data : number[] | Uint8Array) : string { var str = ""; var charLUT = new Array(); for (var i = 0; i < 128; ++i) charLUT[i] = String.fromCharCode(i); var c; - var outlen = outdata.length; - for (var i = 0; i < outlen;) - { - c = outdata[i++]; - if (c < 128) - { + var len = data.length; + for (var i = 0; i < len;) { + c = data[i++]; + if (c < 128) { str += charLUT[c]; - } - else - { - if ((c > 191) && (c < 224)) - { - c = ((c & 31) << 6) | (outdata[i++] & 63); - } - else - { - c = ((c & 15) << 12) | ((outdata[i] & 63) << 6) | (outdata[i+1] & 63); + } else { + if ((c >= 192) && (c < 224)) { + c = ((c & 31) << 6) | (data[i++] & 63); + } else { + c = ((c & 15) << 12) | ((data[i] & 63) << 6) | (data[i+1] & 63); i += 2; if (c == 0xfeff) continue; // ignore BOM } @@ -322,6 +315,38 @@ export function removeBOM(s:string) { return s; } +export function isProbablyBinary(data : number[] | Uint8Array) : boolean { + var score = 0; + // decode as UTF-8 + for (var i = 0; i < data.length;) { + let c = data[i++]; + if ((c & 0x80) == 0) { + // more likely binary if we see a NUL or obscure control character + if (c < 9 || (c >= 14 && c < 26) || c == 0x7f) { + score++; + break; + } + } else { + // look for invalid unicode sequences + var nextra = 0; + if ((c & 0xe0) == 0xc0) nextra = 1; + else if ((c & 0xf0) == 0xe0) nextra = 2; + else if ((c & 0xf8) == 0xf0) nextra = 3; + else { + score++; + break; + } + while (nextra--) { + if ((data[i++] & 0xc0) != 0x80) { + score++; + break; + } + } + } + } + return score > 0; +} + // need to load liblzg.js first export function compressLZG(em_module, inBuffer:number[], levelArg?:boolean) : Uint8Array { var level = levelArg || 9; diff --git a/test/cli/testutil.js b/test/cli/testutil.js index 355ddab2..5bb8742d 100644 --- a/test/cli/testutil.js +++ b/test/cli/testutil.js @@ -110,3 +110,12 @@ describe('LZG', function() { assert.equal(40976, rom.length); }); }); + +describe('string functions', function() { + it('Should detect binary', function() { + assert.ok(!util.isProbablyBinary([32,32,10,13,9,32,32,10,13])); + assert.ok(util.isProbablyBinary([32,32,0x80])); + assert.ok(!util.isProbablyBinary([32,32,0xc1,0x81,32,32,10,13])); + assert.ok(util.isProbablyBinary(NES_CONIO_ROM_LZG)); + }); +});