mirror of
https://github.com/byteworksinc/ORCA-C.git
synced 2025-01-14 22:31:21 +00:00
219 lines
8.9 KiB
C
219 lines
8.9 KiB
C
|
/*
|
||
|
* This program is designed to parse the Unicode DerivedCoreProperties.txt
|
||
|
* file and produce tables indicating if a code point has the XID_Start or
|
||
|
* XID_Continue properties. This is needed to define the legal universal
|
||
|
* character names in identifiers under C23.
|
||
|
*
|
||
|
* The DerivedCoreProperties.txt file for the current Unicode version is at:
|
||
|
* https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||
|
*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#define MAX_PLANE 16
|
||
|
|
||
|
/* Maximum number of ranges supported -- increase if necessary. */
|
||
|
#define MAX_RANGES 2000
|
||
|
|
||
|
#define MAX_LINE 1000
|
||
|
|
||
|
#define OUTPUT_FILE "CharTables.asm"
|
||
|
|
||
|
typedef struct CharRange {
|
||
|
unsigned long start, end;
|
||
|
} CharRange;
|
||
|
|
||
|
static CharRange XID_Start_Ranges[MAX_RANGES];
|
||
|
static CharRange XID_Continue_Ranges[MAX_RANGES];
|
||
|
|
||
|
static char line[MAX_LINE];
|
||
|
|
||
|
static int cmp(const void *a_, const void *b_) {
|
||
|
const CharRange *a = a_;
|
||
|
const CharRange *b = b_;
|
||
|
|
||
|
if (a->start < b->start) {
|
||
|
return -1;
|
||
|
} else if (a->start > b->start) {
|
||
|
return 1;
|
||
|
} else {
|
||
|
if (a->end < b->end) {
|
||
|
return -1;
|
||
|
} else if (a->end > b->end) {
|
||
|
return 1;
|
||
|
} else {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int main(int argc, char *argv[]) {
|
||
|
FILE *infile, *outfile;
|
||
|
unsigned xid_start_idx = 0;
|
||
|
unsigned xid_continue_idx = 0;
|
||
|
CharRange range;
|
||
|
char property[101];
|
||
|
int i;
|
||
|
int last_plane;
|
||
|
|
||
|
if (argc != 2) {
|
||
|
fprintf(stderr, "Usage: %s DerivedCoreProperties.txt\n",
|
||
|
argc > 0 ? argv[0] : "GetIDChars");
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
|
||
|
infile = fopen(argv[1], "r");
|
||
|
if (!infile) {
|
||
|
fprintf(stderr, "Error opening %s\n", argv[1]);
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
|
||
|
outfile = fopen(OUTPUT_FILE, "w");
|
||
|
if (!outfile) {
|
||
|
fclose(infile);
|
||
|
fprintf(stderr, "Error opening %s\n", OUTPUT_FILE);
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
|
||
|
fprintf(outfile, "*****************************************************************\n");
|
||
|
fprintf(outfile, "*\n");
|
||
|
fprintf(outfile, "* %s\n", OUTPUT_FILE);
|
||
|
fprintf(outfile, "*\n");
|
||
|
fprintf(outfile, "* These tables record the Unicode code points that have the\n");
|
||
|
fprintf(outfile, "* XID_Start or XID_Continue properties as defined in the\n");
|
||
|
fprintf(outfile, "* Unicode Character Database. These define the legal\n");
|
||
|
fprintf(outfile, "* universal character names in identifiers under C23.\n");
|
||
|
fprintf(outfile, "*\n");
|
||
|
fprintf(outfile, "* THIS FILE IS AUTO-GENERATED FROM UNICODE DATA BY GenCharTbl.\n");
|
||
|
fprintf(outfile, "* DO NOT EDIT IT MANUALLY.\n");
|
||
|
fprintf(outfile, "*\n");
|
||
|
fprintf(outfile, "* Generated from:\n");
|
||
|
fgets(line, MAX_LINE, infile);
|
||
|
if (line[0]) line[strlen(line)-1] = 0;
|
||
|
fprintf(outfile, "* %-61s\n", line);
|
||
|
fgets(line, MAX_LINE, infile);
|
||
|
if (line[0]) line[strlen(line)-1] = 0;
|
||
|
fprintf(outfile, "* %-61s\n", line);
|
||
|
fprintf(outfile, "*\n");
|
||
|
fprintf(outfile, "*****************************************************************\n");
|
||
|
fprintf(outfile, "\n");
|
||
|
|
||
|
|
||
|
fseek(infile, 0, SEEK_SET);
|
||
|
|
||
|
for (; !feof(infile); fgets(line, MAX_LINE, infile)) {
|
||
|
int count = fscanf(infile, "%lx..%lx", &range.start, &range.end);
|
||
|
if (count == 1) {
|
||
|
range.end = range.start;
|
||
|
} else if (count != 2) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
count = fscanf(infile, " ; %100s", property);
|
||
|
if (count != 1) {
|
||
|
fclose(infile);
|
||
|
fclose(outfile);
|
||
|
remove(OUTPUT_FILE);
|
||
|
fprintf(stderr, "Unexpected file format\n");
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
|
||
|
if (strcmp(property, "XID_Start") == 0) {
|
||
|
XID_Start_Ranges[xid_start_idx++] = range;
|
||
|
//printf("XID_Start range: %04lx..%04lx\n", range.start, range.end);
|
||
|
//printf("%lu\n", range.end-range.start);
|
||
|
if (xid_start_idx == MAX_RANGES) {
|
||
|
fclose(infile);
|
||
|
fclose(outfile);
|
||
|
remove(OUTPUT_FILE);
|
||
|
fprintf(stderr, "Too many XID_Start ranges\n");
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
} else if (strcmp(property, "XID_Continue") == 0) {
|
||
|
if (bsearch(&range, XID_Start_Ranges, xid_start_idx,
|
||
|
sizeof(CharRange), cmp)) {
|
||
|
//printf("Skipping XID_Continue range: %04lx..%04lx\n", range.start, range.end);
|
||
|
continue;
|
||
|
}
|
||
|
XID_Continue_Ranges[xid_continue_idx++] = range;
|
||
|
//printf("XID_Continue range: %04lx..%04lx\n", range.start, range.end);
|
||
|
if (xid_continue_idx == MAX_RANGES) {
|
||
|
fclose(infile);
|
||
|
fclose(outfile);
|
||
|
remove(OUTPUT_FILE);
|
||
|
fprintf(stderr, "Too many XID_Continue ranges\n");
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fprintf(outfile, "* Declarations (to be copied into Table.pas):\n");
|
||
|
fprintf(outfile, "*\n");
|
||
|
fprintf(outfile, "* XID_Start_Table: array[0..%u] of charRange;\n", xid_start_idx-1);
|
||
|
fprintf(outfile, "* XID_Continue_Table: array[0..%u] of charRange;\n", xid_continue_idx-1);
|
||
|
fprintf(outfile, "* XID_Start_PlaneStart: array[0..17] of integer;\n");
|
||
|
fprintf(outfile, "* XID_Continue_PlaneStart: array[0..17] of integer;\n");
|
||
|
fprintf(outfile, "\n");
|
||
|
|
||
|
fprintf(outfile, "XID_Start_Table start\n");
|
||
|
last_plane = -1;
|
||
|
for (i = 0; i < xid_start_idx; i++) {
|
||
|
while (XID_Start_Ranges[i].start >> 16 != last_plane)
|
||
|
fprintf(outfile, "plane%d anop\n", ++last_plane);
|
||
|
if (XID_Start_Ranges[i].end >> 16 != last_plane) {
|
||
|
fprintf(stderr, "Range spans multiple planes\n");
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
fprintf(outfile, " dc i2'$%04lx,$%04lx'\n",
|
||
|
XID_Start_Ranges[i].start & 0xFFFF,
|
||
|
XID_Start_Ranges[i].end & 0xFFFF);
|
||
|
}
|
||
|
while (last_plane < MAX_PLANE + 1)
|
||
|
fprintf(outfile, "plane%d anop\n", ++last_plane);
|
||
|
fprintf(outfile, "\n");
|
||
|
fprintf(outfile, "XID_Start_PlaneStart entry\n");
|
||
|
for (i = 0; i <= MAX_PLANE + 1; i++) {
|
||
|
fprintf(outfile, " dc i2'(plane%d-plane0)/4'\n", i);
|
||
|
}
|
||
|
fprintf(outfile, " end\n");
|
||
|
|
||
|
fprintf(outfile, "\n");
|
||
|
fprintf(outfile, "\n");
|
||
|
|
||
|
fprintf(outfile, "* This table only contains XID_Continue ranges that are not in XID_Start_Table.\n");
|
||
|
fprintf(outfile, "* A code point has the XID_Continue property if it is in either table.\n");
|
||
|
fprintf(outfile, "XID_Continue_Table start\n");
|
||
|
last_plane = -1;
|
||
|
for (i = 0; i < xid_continue_idx; i++) {
|
||
|
while (XID_Continue_Ranges[i].start >> 16 != last_plane)
|
||
|
fprintf(outfile, "plane%d anop\n", ++last_plane);
|
||
|
if (XID_Continue_Ranges[i].end >> 16 != last_plane) {
|
||
|
fprintf(stderr, "Range spans multiple planes\n");
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
fprintf(outfile, " dc i2'$%04lx,$%04lx'\n",
|
||
|
XID_Continue_Ranges[i].start & 0xFFFF,
|
||
|
XID_Continue_Ranges[i].end & 0xFFFF);
|
||
|
}
|
||
|
while (last_plane < MAX_PLANE + 1)
|
||
|
fprintf(outfile, "plane%d anop\n", ++last_plane);
|
||
|
fprintf(outfile, "\n");
|
||
|
fprintf(outfile, "XID_Continue_PlaneStart entry\n");
|
||
|
for (i = 0; i <= MAX_PLANE + 1; i++) {
|
||
|
fprintf(outfile, " dc i2'(plane%d-plane0)/4'\n", i);
|
||
|
}
|
||
|
fprintf(outfile, " end\n");
|
||
|
|
||
|
if (ferror(infile) || ferror(outfile)) {
|
||
|
fclose(infile);
|
||
|
fclose(outfile);
|
||
|
remove(OUTPUT_FILE);
|
||
|
fprintf(stderr, "I/O error\n");
|
||
|
return EXIT_FAILURE;
|
||
|
}
|
||
|
|
||
|
fclose(infile);
|
||
|
fclose(outfile);
|
||
|
}
|