mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-12-27 05:30:25 +00:00
357 lines
8.8 KiB
Perl
357 lines
8.8 KiB
Perl
#!/usr/bin/perl
|
|
#
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
######################################################################
|
|
#
|
|
# Initial global variable
|
|
#
|
|
######################################################################
|
|
%utot = ();
|
|
$ui=0;
|
|
$li=0;
|
|
|
|
######################################################################
|
|
#
|
|
# Open the unicode database file
|
|
#
|
|
######################################################################
|
|
open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt")
|
|
|| die "cannot find UnicodeData-Latest.txt";
|
|
|
|
######################################################################
|
|
#
|
|
# Open the JIS X 4051 Class file
|
|
#
|
|
######################################################################
|
|
open ( CLASS , "< jisx4051class.txt")
|
|
|| die "cannot find jisx4051class.txt";
|
|
|
|
######################################################################
|
|
#
|
|
# Open the JIS X 4051 Class simplified mapping
|
|
#
|
|
######################################################################
|
|
open ( SIMP , "< jisx4051simp.txt")
|
|
|| die "cannot find jisx4051simp.txt";
|
|
|
|
######################################################################
|
|
#
|
|
# Open the output file
|
|
#
|
|
######################################################################
|
|
open ( OUT , "> anzx4051.html")
|
|
|| die "cannot open output anzx4051.html file";
|
|
|
|
######################################################################
|
|
#
|
|
# Open the output file
|
|
#
|
|
######################################################################
|
|
open ( HEADER , "> ../src/jisx4051class.h")
|
|
|| die "cannot open output ../src/jisx4051class.h file";
|
|
|
|
######################################################################
|
|
#
|
|
# Generate license and header
|
|
#
|
|
######################################################################
|
|
$hthmlheader = <<END_OF_HTML;
|
|
<!-- This Source Code Form is subject to the terms of the Mozilla Public
|
|
- License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
- file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
|
|
|
|
<HTML>
|
|
<HEAD>
|
|
<TITLE>
|
|
Analysis of JIS X 4051 to Unicode General Category Mapping
|
|
</TITLE>
|
|
</HEAD>
|
|
<BODY>
|
|
<H1>
|
|
Analysis of JIS X 4051 to Unicode General Category Mapping
|
|
</H1>
|
|
END_OF_HTML
|
|
print OUT $hthmlheader;
|
|
|
|
######################################################################
|
|
#
|
|
# Generate license and header
|
|
#
|
|
######################################################################
|
|
$npl = <<END_OF_NPL;
|
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
/*
|
|
DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
|
|
mozilla/intl/lwbrk/tools/anzx4051.pl
|
|
*/
|
|
END_OF_NPL
|
|
print HEADER $npl;
|
|
|
|
%occ = ();
|
|
%gcat = ();
|
|
%dcat = ();
|
|
%simp = ();
|
|
%gcount = ();
|
|
%dcount = ();
|
|
%sccount = ();
|
|
%rangecount = ();
|
|
|
|
######################################################################
|
|
#
|
|
# Process the file line by line
|
|
#
|
|
######################################################################
|
|
while(<UNICODATA>) {
|
|
chop;
|
|
######################################################################
|
|
#
|
|
# Get value from fields
|
|
#
|
|
######################################################################
|
|
@f = split(/;/ , $_);
|
|
$c = $f[0]; # The unicode value
|
|
$g = $f[2];
|
|
$d = substr($g, 0, 1);
|
|
|
|
$gcat{$c} = $g;
|
|
$dcat{$c} = $d;
|
|
$gcount{$g}++;
|
|
$dcount{$d}++;
|
|
}
|
|
close(UNIDATA);
|
|
|
|
while(<SIMP>) {
|
|
chop;
|
|
######################################################################
|
|
#
|
|
# Get value from fields
|
|
#
|
|
######################################################################
|
|
@f = split(/;/ , $_);
|
|
|
|
$simp{$f[0]} = $f[1];
|
|
$sccount{$f[1]}++;
|
|
}
|
|
close(SIMP);
|
|
|
|
sub GetClass{
|
|
my ($u) = @_;
|
|
my $hex = DecToHex($u);
|
|
$g = $gcat{$hex};
|
|
if($g ne "") {
|
|
return $g;
|
|
} elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
|
|
return "Han";
|
|
} elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
|
|
return "Lo";
|
|
} elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
|
|
return "Cs";
|
|
} elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
|
|
return "Cs";
|
|
} elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
|
|
return "Cs";
|
|
} elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
|
|
return "Co";
|
|
} else {
|
|
printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
|
|
}
|
|
}
|
|
sub GetDClass{
|
|
my ($u) = @_;
|
|
my $hex = DecToHex($u);
|
|
$g = $dcat{$hex};
|
|
if($g ne "") {
|
|
return $g;
|
|
} elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
|
|
return "Han";
|
|
} elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
|
|
return "L";
|
|
} elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
|
|
return "C";
|
|
} elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
|
|
return "C";
|
|
} elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
|
|
return "C";
|
|
} elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
|
|
return "C";
|
|
} else {
|
|
printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
|
|
}
|
|
}
|
|
sub DecToHex{
|
|
my ($d) = @_;
|
|
return sprintf("%04X", $d);
|
|
}
|
|
%gtotal = ();
|
|
%dtotal = ();
|
|
while(<CLASS>) {
|
|
chop;
|
|
######################################################################
|
|
#
|
|
# Get value from fields
|
|
#
|
|
######################################################################
|
|
@f = split(/;/ , $_);
|
|
|
|
if( substr($f[2], 0, 1) ne "a")
|
|
{
|
|
$sc = $simp{$f[2]};
|
|
$l = hex($f[0]);
|
|
if($f[1] eq "")
|
|
{
|
|
$h = $l;
|
|
} else {
|
|
$h = hex($f[1]);
|
|
}
|
|
for($k = $l; $k <= $h ; $k++)
|
|
{
|
|
if( exists($occ{$k}))
|
|
{
|
|
# printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n",
|
|
# DecToHex($k), $occ{$k} , $f[2] , $sc;
|
|
}
|
|
else
|
|
{
|
|
$occ{$k} = $sc . " | " . $f[2];
|
|
$gclass = GetClass($k);
|
|
$dclass = GetDClass($k);
|
|
$gtotal{$sc . $gclass}++;
|
|
$dtotal{$sc . $dclass}++;
|
|
$u = DecToHex($k);
|
|
$rk = " " . substr($u,0,2) . ":" . $sc;
|
|
$rangecount{$rk}++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#print %gtotal;
|
|
#print %dtotal;
|
|
|
|
sub printreport
|
|
{
|
|
print OUT "<TABLE BORDER=3>\n";
|
|
print OUT "<TR BGCOLOR=blue><TH><TH>\n";
|
|
|
|
foreach $d (sort(keys %dcount)) {
|
|
print OUT "<TD BGCOLOR=red>$d</TD>\n";
|
|
}
|
|
|
|
print OUT "<TD BGCOLOR=white>Total</TD>\n";
|
|
foreach $g (sort(keys %gcount)) {
|
|
print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
|
|
}
|
|
print OUT "</TR>\n";
|
|
foreach $sc (sort(keys %sccount)) {
|
|
|
|
print OUT "<TR><TH>$sc<TH>\n";
|
|
|
|
$total = 0;
|
|
foreach $d (sort (keys %dcount)) {
|
|
$count = $dtotal{$sc . $d};
|
|
$total += $count;
|
|
print OUT "<TD>$count</TD>\n";
|
|
}
|
|
|
|
print OUT "<TD BGCOLOR=white>$total</TD>\n";
|
|
|
|
foreach $g (sort(keys %gcount)) {
|
|
$count = $gtotal{$sc . $g};
|
|
print OUT "<TD>$count</TD>\n";
|
|
}
|
|
|
|
|
|
print OUT "</TR>\n";
|
|
}
|
|
print OUT "</TABLE>\n";
|
|
|
|
|
|
print OUT "<TABLE BORDER=3>\n";
|
|
print OUT "<TR BGCOLOR=blue><TH><TH>\n";
|
|
|
|
foreach $sc (sort(keys %sccount))
|
|
{
|
|
print OUT "<TD BGCOLOR=red>$sc</TD>\n";
|
|
}
|
|
|
|
print OUT "</TR>\n";
|
|
|
|
|
|
for($rr = 0; $rr < 0x4f; $rr++)
|
|
{
|
|
$empty = 0;
|
|
$r = sprintf("%02X" , $rr) ;
|
|
$tmp = "<TR><TH>" . $r . "<TH>\n";
|
|
|
|
foreach $sc (sort(keys %sccount)) {
|
|
$count = $rangecount{ " " .$r . ":" .$sc};
|
|
$tmp .= sprintf("<TD>%s</TD>\n", $count);
|
|
$empty += $count;
|
|
}
|
|
|
|
$tmp .= "</TR>\n";
|
|
|
|
if($empty ne 0)
|
|
{
|
|
print OUT $tmp;
|
|
}
|
|
}
|
|
print OUT "</TABLE>\n";
|
|
|
|
}
|
|
printreport();
|
|
|
|
sub printarray
|
|
{
|
|
my($r, $def) = @_;
|
|
printf "[%s || %s]\n", $r, $def;
|
|
$k = hex($r) * 256;
|
|
printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r;
|
|
for($i = 0 ; $i < 256; $i+= 8)
|
|
{
|
|
for($j = 7 ; $j >= 0; $j-- )
|
|
{
|
|
$v = $k + $i + $j;
|
|
if( exists($occ{$v}))
|
|
{
|
|
$p = substr($occ{$v}, 1,1);
|
|
} else {
|
|
$p = $def;
|
|
}
|
|
|
|
if($j eq 7 )
|
|
{
|
|
printf HEADER "0x%s" , $p;
|
|
} else {
|
|
printf HEADER "%s", $p ;
|
|
}
|
|
}
|
|
printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
|
|
}
|
|
print HEADER "};\n\n";
|
|
}
|
|
printarray("00", "7");
|
|
printarray("20", "7");
|
|
printarray("21", "7");
|
|
printarray("30", "5");
|
|
printarray("0E", "8");
|
|
printarray("17", "7");
|
|
|
|
#print %rangecount;
|
|
|
|
######################################################################
|
|
#
|
|
# Close files
|
|
#
|
|
######################################################################
|
|
close(HEADER);
|
|
close(CLASS);
|
|
close(OUT);
|
|
|