mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-10-05 19:56:58 +00:00
153 lines
5.2 KiB
Python
153 lines
5.2 KiB
Python
import sys
|
|
import os
|
|
import hashlib
|
|
import urllib
|
|
import itertools
|
|
import re
|
|
import json
|
|
import glob
|
|
import shutil
|
|
|
|
try:
|
|
import genshi
|
|
from genshi.template import MarkupTemplate
|
|
|
|
from html5lib.tests import support
|
|
except ImportError:
|
|
print """This script requires the Genshi templating library and html5lib source
|
|
|
|
It is recommended that these are installed in a virtualenv:
|
|
|
|
virtualenv venv
|
|
source venv/bin/activate
|
|
pip install genshi
|
|
cd venv
|
|
git clone git@github.com:html5lib/html5lib-python.git html5lib
|
|
cd html5lib
|
|
git submodule init
|
|
git submodule update
|
|
pip install -e ./
|
|
|
|
Then run this script again, with the virtual environment still active.
|
|
When you are done, type "deactivate" to deactivate the virtual environment.
|
|
"""
|
|
|
|
TESTS_PATH = "html/syntax/parsing/"
|
|
|
|
def get_paths():
|
|
script_path = os.path.split(os.path.abspath(__file__))[0]
|
|
repo_base = get_repo_base(script_path)
|
|
tests_path = os.path.join(repo_base, TESTS_PATH)
|
|
return script_path, tests_path
|
|
|
|
def get_repo_base(path):
|
|
while path:
|
|
if os.path.exists(os.path.join(path, ".git")):
|
|
return path
|
|
else:
|
|
path = os.path.split(path)[0]
|
|
|
|
def get_expected(data):
|
|
data = "#document\n" + data
|
|
return data
|
|
|
|
def get_hash(data, container=None):
|
|
if container == None:
|
|
container = ""
|
|
return hashlib.sha1("#container%s#data%s"%(container.encode("utf8"),
|
|
data.encode("utf8"))).hexdigest()
|
|
|
|
def make_tests(script_dir, out_dir, input_file_name, test_data):
|
|
tests = []
|
|
innerHTML_tests = []
|
|
ids_seen = {}
|
|
print input_file_name
|
|
for test in test_data:
|
|
if "script-off" in test:
|
|
continue
|
|
is_innerHTML = "document-fragment" in test
|
|
data = test["data"]
|
|
container = test["document-fragment"] if is_innerHTML else None
|
|
assert test["document"], test
|
|
expected = get_expected(test["document"])
|
|
test_list = innerHTML_tests if is_innerHTML else tests
|
|
test_id = get_hash(data, container)
|
|
if test_id in ids_seen:
|
|
print "WARNING: id %s seen multiple times in file %s this time for test (%s, %s) before for test %s, skipping"%(test_id, input_file_name, container, data, ids_seen[test_id])
|
|
continue
|
|
ids_seen[test_id] = (container, data)
|
|
test_list.append({'string_uri_encoded_input':"\"%s\""%urllib.quote(data.encode("utf8")),
|
|
'input':data,
|
|
'expected':expected,
|
|
'string_escaped_expected':json.dumps(urllib.quote(expected.encode("utf8"))),
|
|
'id':test_id,
|
|
'container':container
|
|
})
|
|
path_normal = None
|
|
if tests:
|
|
path_normal = write_test_file(script_dir, out_dir,
|
|
tests, "html5lib_%s"%input_file_name,
|
|
"html5lib_test.xml")
|
|
path_innerHTML = None
|
|
if innerHTML_tests:
|
|
path_innerHTML = write_test_file(script_dir, out_dir,
|
|
innerHTML_tests, "html5lib_innerHTML_%s"%input_file_name,
|
|
"html5lib_test_fragment.xml")
|
|
|
|
return path_normal, path_innerHTML
|
|
|
|
def write_test_file(script_dir, out_dir, tests, file_name, template_file_name):
|
|
file_name = os.path.join(out_dir, file_name + ".html")
|
|
short_name = os.path.split(file_name)[1]
|
|
|
|
with open(os.path.join(script_dir, template_file_name)) as f:
|
|
template = MarkupTemplate(f)
|
|
|
|
stream = template.generate(file_name=short_name, tests=tests)
|
|
|
|
with open(file_name, "w") as f:
|
|
f.write(stream.render('html', doctype='html5',
|
|
encoding="utf8"))
|
|
return file_name
|
|
|
|
def escape_js_string(in_data):
|
|
return in_data.encode("utf8").encode("string-escape")
|
|
|
|
def serialize_filenames(test_filenames):
|
|
return "[" + ",\n".join("\"%s\""%item for item in test_filenames) + "]"
|
|
|
|
def main():
|
|
|
|
script_dir, out_dir = get_paths()
|
|
|
|
test_files = []
|
|
inner_html_files = []
|
|
|
|
if len(sys.argv) > 2:
|
|
test_iterator = itertools.izip(
|
|
itertools.repeat(False),
|
|
sorted(os.path.abspath(item) for item in
|
|
glob.glob(os.path.join(sys.argv[2], "*.dat"))))
|
|
else:
|
|
test_iterator = itertools.chain(
|
|
itertools.izip(itertools.repeat(False),
|
|
sorted(support.get_data_files("tree-construction"))),
|
|
itertools.izip(itertools.repeat(True),
|
|
sorted(support.get_data_files(
|
|
os.path.join("tree-construction", "scripted")))))
|
|
|
|
for (scripted, test_file) in test_iterator:
|
|
input_file_name = os.path.splitext(os.path.split(test_file)[1])[0]
|
|
if scripted:
|
|
input_file_name = "scripted_" + input_file_name
|
|
test_data = support.TestData(test_file)
|
|
test_filename, inner_html_file_name = make_tests(script_dir, out_dir,
|
|
input_file_name, test_data)
|
|
if test_filename is not None:
|
|
test_files.append(test_filename)
|
|
if inner_html_file_name is not None:
|
|
inner_html_files.append(inner_html_file_name)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|