mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-10-26 13:27:27 +00:00
388 lines
7.5 KiB
HTML
388 lines
7.5 KiB
HTML
<!doctype html>
|
|
<title>document.characterSet and inputEncoding normalization tests</title>
|
|
<link rel=author title="Aryeh Gregor" href=ayg@aryeh.name>
|
|
<meta name=timeout content=long>
|
|
<div id=log></div>
|
|
<script src=/resources/testharness.js></script>
|
|
<script src=/resources/testharnessreport.js></script>
|
|
<style>iframe { display: none }</style>
|
|
<script>
|
|
"use strict";
|
|
|
|
// Taken straight from https://encoding.spec.whatwg.org/
|
|
var encodingMap = {
|
|
"utf-8": [
|
|
"unicode-1-1-utf-8",
|
|
"utf-8",
|
|
"utf8",
|
|
// As we use <meta>, utf-16 will map to utf-8 per
|
|
// https://html.spec.whatwg.org/multipage/#documentEncoding
|
|
"utf-16",
|
|
"utf-16le",
|
|
"utf-16be",
|
|
],
|
|
"ibm866": [
|
|
"866",
|
|
"cp866",
|
|
"csibm866",
|
|
"ibm866",
|
|
],
|
|
"iso-8859-2": [
|
|
"csisolatin2",
|
|
"iso-8859-2",
|
|
"iso-ir-101",
|
|
"iso8859-2",
|
|
"iso88592",
|
|
"iso_8859-2",
|
|
"iso_8859-2:1987",
|
|
"l2",
|
|
"latin2",
|
|
],
|
|
"iso-8859-3": [
|
|
"csisolatin3",
|
|
"iso-8859-3",
|
|
"iso-ir-109",
|
|
"iso8859-3",
|
|
"iso88593",
|
|
"iso_8859-3",
|
|
"iso_8859-3:1988",
|
|
"l3",
|
|
"latin3",
|
|
],
|
|
"iso-8859-4": [
|
|
"csisolatin4",
|
|
"iso-8859-4",
|
|
"iso-ir-110",
|
|
"iso8859-4",
|
|
"iso88594",
|
|
"iso_8859-4",
|
|
"iso_8859-4:1988",
|
|
"l4",
|
|
"latin4",
|
|
],
|
|
"iso-8859-5": [
|
|
"csisolatincyrillic",
|
|
"cyrillic",
|
|
"iso-8859-5",
|
|
"iso-ir-144",
|
|
"iso8859-5",
|
|
"iso88595",
|
|
"iso_8859-5",
|
|
"iso_8859-5:1988",
|
|
],
|
|
"iso-8859-6": [
|
|
"arabic",
|
|
"asmo-708",
|
|
"csiso88596e",
|
|
"csiso88596i",
|
|
"csisolatinarabic",
|
|
"ecma-114",
|
|
"iso-8859-6",
|
|
"iso-8859-6-e",
|
|
"iso-8859-6-i",
|
|
"iso-ir-127",
|
|
"iso8859-6",
|
|
"iso88596",
|
|
"iso_8859-6",
|
|
"iso_8859-6:1987",
|
|
],
|
|
"iso-8859-7": [
|
|
"csisolatingreek",
|
|
"ecma-118",
|
|
"elot_928",
|
|
"greek",
|
|
"greek8",
|
|
"iso-8859-7",
|
|
"iso-ir-126",
|
|
"iso8859-7",
|
|
"iso88597",
|
|
"iso_8859-7",
|
|
"iso_8859-7:1987",
|
|
"sun_eu_greek",
|
|
],
|
|
"iso-8859-8": [
|
|
"csiso88598e",
|
|
"csisolatinhebrew",
|
|
"hebrew",
|
|
"iso-8859-8",
|
|
"iso-8859-8-e",
|
|
"iso-ir-138",
|
|
"iso8859-8",
|
|
"iso88598",
|
|
"iso_8859-8",
|
|
"iso_8859-8:1988",
|
|
"visual",
|
|
],
|
|
"iso-8859-8-i": [
|
|
"csiso88598i",
|
|
"iso-8859-8-i",
|
|
"logical",
|
|
],
|
|
"iso-8859-10": [
|
|
"csisolatin6",
|
|
"iso-8859-10",
|
|
"iso-ir-157",
|
|
"iso8859-10",
|
|
"iso885910",
|
|
"l6",
|
|
"latin6",
|
|
],
|
|
"iso-8859-13": [
|
|
"iso-8859-13",
|
|
"iso8859-13",
|
|
"iso885913",
|
|
],
|
|
"iso-8859-14": [
|
|
"iso-8859-14",
|
|
"iso8859-14",
|
|
"iso885914",
|
|
],
|
|
"iso-8859-15": [
|
|
"csisolatin9",
|
|
"iso-8859-15",
|
|
"iso8859-15",
|
|
"iso885915",
|
|
"iso_8859-15",
|
|
"l9",
|
|
],
|
|
"iso-8859-16": [
|
|
"iso-8859-16",
|
|
],
|
|
"koi8-r": [
|
|
"cskoi8r",
|
|
"koi",
|
|
"koi8",
|
|
"koi8-r",
|
|
"koi8_r",
|
|
],
|
|
"koi8-u": [
|
|
"koi8-u",
|
|
],
|
|
"macintosh": [
|
|
"csmacintosh",
|
|
"mac",
|
|
"macintosh",
|
|
"x-mac-roman",
|
|
],
|
|
"windows-874": [
|
|
"dos-874",
|
|
"iso-8859-11",
|
|
"iso8859-11",
|
|
"iso885911",
|
|
"tis-620",
|
|
"windows-874",
|
|
],
|
|
"windows-1250": [
|
|
"cp1250",
|
|
"windows-1250",
|
|
"x-cp1250",
|
|
],
|
|
"windows-1251": [
|
|
"cp1251",
|
|
"windows-1251",
|
|
"x-cp1251",
|
|
],
|
|
"windows-1252": [
|
|
"ansi_x3.4-1968",
|
|
"ascii",
|
|
"cp1252",
|
|
"cp819",
|
|
"csisolatin1",
|
|
"ibm819",
|
|
"iso-8859-1",
|
|
"iso-ir-100",
|
|
"iso8859-1",
|
|
"iso88591",
|
|
"iso_8859-1",
|
|
"iso_8859-1:1987",
|
|
"l1",
|
|
"latin1",
|
|
"us-ascii",
|
|
"windows-1252",
|
|
"x-cp1252",
|
|
// As we use <meta>, x-user-defined will map to windows-1252 per
|
|
// https://html.spec.whatwg.org/multipage/#documentEncoding
|
|
"x-user-defined"
|
|
],
|
|
"windows-1253": [
|
|
"cp1253",
|
|
"windows-1253",
|
|
"x-cp1253",
|
|
],
|
|
"windows-1254": [
|
|
"cp1254",
|
|
"csisolatin5",
|
|
"iso-8859-9",
|
|
"iso-ir-148",
|
|
"iso8859-9",
|
|
"iso88599",
|
|
"iso_8859-9",
|
|
"iso_8859-9:1989",
|
|
"l5",
|
|
"latin5",
|
|
"windows-1254",
|
|
"x-cp1254",
|
|
],
|
|
"windows-1255": [
|
|
"cp1255",
|
|
"windows-1255",
|
|
"x-cp1255",
|
|
],
|
|
"windows-1256": [
|
|
"cp1256",
|
|
"windows-1256",
|
|
"x-cp1256",
|
|
],
|
|
"windows-1257": [
|
|
"cp1257",
|
|
"windows-1257",
|
|
"x-cp1257",
|
|
],
|
|
"windows-1258": [
|
|
"cp1258",
|
|
"windows-1258",
|
|
"x-cp1258",
|
|
],
|
|
"x-mac-cyrillic": [
|
|
"x-mac-cyrillic",
|
|
"x-mac-ukrainian",
|
|
],
|
|
"gbk": [
|
|
"chinese",
|
|
"csgb2312",
|
|
"csiso58gb231280",
|
|
"gb2312",
|
|
"gb_2312",
|
|
"gb_2312-80",
|
|
"gbk",
|
|
"iso-ir-58",
|
|
"x-gbk",
|
|
],
|
|
"gb18030": [
|
|
"gb18030",
|
|
],
|
|
"hz-gb-2312": [
|
|
"hz-gb-2312",
|
|
],
|
|
"big5": [
|
|
"big5",
|
|
"big5-hkscs",
|
|
"cn-big5",
|
|
"csbig5",
|
|
"x-x-big5",
|
|
],
|
|
"euc-jp": [
|
|
"cseucpkdfmtjapanese",
|
|
"euc-jp",
|
|
"x-euc-jp",
|
|
],
|
|
"iso-2022-jp": [
|
|
"csiso2022jp",
|
|
"iso-2022-jp",
|
|
],
|
|
"shift_jis": [
|
|
"csshiftjis",
|
|
"ms_kanji",
|
|
"shift-jis",
|
|
"shift_jis",
|
|
"sjis",
|
|
"windows-31j",
|
|
"x-sjis",
|
|
],
|
|
"euc-kr": [
|
|
"cseuckr",
|
|
"csksc56011987",
|
|
"euc-kr",
|
|
"iso-ir-149",
|
|
"korean",
|
|
"ks_c_5601-1987",
|
|
"ks_c_5601-1989",
|
|
"ksc5601",
|
|
"ksc_5601",
|
|
"windows-949",
|
|
],
|
|
"replacement": [
|
|
"csiso2022kr",
|
|
"iso-2022-cn",
|
|
"iso-2022-cn-ext",
|
|
"iso-2022-kr",
|
|
],
|
|
};
|
|
|
|
// Add spaces and mix up case
|
|
Object.keys(encodingMap).forEach(function(name) {
|
|
var lower = encodingMap[name];
|
|
var upper = encodingMap[name].map(function(s) { return s.toUpperCase() });
|
|
var mixed = encodingMap[name].map(function(s) {
|
|
var ret = "";
|
|
for (var i = 0; i < s.length; i += 2) {
|
|
ret += s[i].toUpperCase();
|
|
if (i + 1 < s.length) {
|
|
ret += s[i + 1];
|
|
}
|
|
}
|
|
return ret;
|
|
});
|
|
var spacey = encodingMap[name].map(function(s) {
|
|
return " \t\n\f\r" + s + " \t\n\f\r";
|
|
});
|
|
encodingMap[name] = [];
|
|
for (var i = 0; i < lower.length; i++) {
|
|
encodingMap[name].push(lower[i]);
|
|
/*
|
|
if (lower[i] != upper[i]) {
|
|
encodingMap[name].push(upper[i]);
|
|
}
|
|
if (lower[i] != mixed[i] && upper[i] != mixed[i]) {
|
|
encodingMap[name].push(mixed[i]);
|
|
}
|
|
encodingMap[name].push(spacey[i]);
|
|
*/
|
|
}
|
|
});
|
|
|
|
function expected_case(encoding_label) {
|
|
if (encoding_label === 'big5') {
|
|
return 'Big5';
|
|
}
|
|
if (encoding_label === 'shift_jis') {
|
|
return 'Shift_JIS';
|
|
}
|
|
return encoding_label.toUpperCase();
|
|
}
|
|
|
|
Object.keys(encodingMap).forEach(function(name) {
|
|
encodingMap[name].forEach(function(label) {
|
|
var iframe = document.createElement("iframe");
|
|
var t = async_test("Name " + format_value(name) +
|
|
" has label " + format_value(label) + " (characterSet)");
|
|
var t2 = async_test("Name " + format_value(name) +
|
|
" has label " + format_value(label) + " (inputEncoding)");
|
|
var t3 = async_test("Name " + format_value(name) +
|
|
" has label " + format_value(label) + " (charset)");
|
|
/*
|
|
iframe.src = "data:text/html,<!doctype html>" +
|
|
'<meta charset="' + label + '">';
|
|
*/
|
|
iframe.src = "encoding.py?label=" + label;
|
|
iframe.onload = function() {
|
|
t.step(function() {
|
|
assert_equals(iframe.contentDocument.characterSet, expected_case(name));
|
|
});
|
|
t2.step(function() {
|
|
assert_equals(iframe.contentDocument.inputEncoding, expected_case(name));
|
|
});
|
|
t3.step(function() {
|
|
assert_equals(iframe.contentDocument.charset, expected_case(name));
|
|
});
|
|
document.body.removeChild(iframe);
|
|
t.done();
|
|
t2.done();
|
|
t3.done();
|
|
};
|
|
document.body.appendChild(iframe);
|
|
});
|
|
});
|
|
</script>
|
|
<!-- vim: set expandtab tabstop=2 shiftwidth=2: -->
|