#!/usr/bin/python\r
-# -*- coding: <encoding name> -*-\r
+# -*- coding: ascii -*-\r
# Unicode Data Converter\r
#\r
# This file is part of the Public Domain C Library (PDCLib).\r
BIT_LOWER = 64\r
BIT_UPPER = 128\r
BIT_DIGIT = 256\r
+BIT_XDIGT = 512\r
\r
# Category to bitfield mapping\r
categories = {\r
'Sc': BIT_GRAPH, # Currency symbol\r
'Sk': BIT_GRAPH, # Non-letterlike modifier symbol\r
'So': BIT_GRAPH, # Other symbol\r
- 'Zs': BIT_SPACE | BIT_GRAPH | BIT_BLANK, # Non-zero-width space character\r
- 'Zl': BIT_SPACE | BIT_GRAPH, # Line separator\r
- 'Zp': BIT_SPACE | BIT_GRAPH, # Paragraph separator\r
+ 'Zs': BIT_SPACE, # Non-zero-width space character\r
+ 'Zl': BIT_SPACE, # Line separator\r
+ 'Zp': BIT_SPACE, # Paragraph separator\r
'Cc': BIT_CNTRL, # C0/C1 control codes\r
}\r
\r
+# Characters with special properties\r
+special = {\r
+ # Blank characters\r
+ 0x0020: BIT_SPACE | BIT_BLANK, # space\r
+ 0x0009: BIT_SPACE | BIT_BLANK, # tab\r
+\r
+ # Digits\r
+ 0x0030: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0031: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0032: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0033: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0034: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0035: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0036: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0037: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0038: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0039: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+\r
+ # A-F (hex uppercase)\r
+ 0x0041: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0042: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0043: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0044: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0045: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0046: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+\r
+\r
+ # a-f (hex lowercase)\r
+ 0x0061: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0062: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0063: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0064: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0065: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0066: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+}\r
+\r
in_file = open('UnicodeData.txt', 'r')\r
out_file = open('_PDCLIB_unicodedata.c', 'w')\r
try:\r
* in Exhibit 1 of the Unicode Terms of Use, found at\r
* http://www.unicode.org/copyright.html#Exhibit1\r
*/\r
+ #ifndef REGTEST\r
#include <_PDCLIB_locale.h>\r
\r
- _PDCLIB_wctype_t _PDCLIB_wctype[] = {\r
+ _PDCLIB_wcinfo_t _PDCLIB_wcinfo[] = {\r
// { value,\tflags,\tlower,\tupper\t}, // name\r
""")\r
for line in in_file:\r
num = int(num_hex, 16)\r
upper_case = int(upper_case_hex, 16) if len(upper_case_hex) else num\r
lower_case = int(lower_case_hex, 16) if len(lower_case_hex) else num\r
- bits = categories.get(category, 0)\r
+ bits = special.get(num, categories.get(category, 0))\r
\r
if upper_case == 0 and lower_case == 0 and bits == 0:\r
continue\r
out_file.write(" { 0x%X,\t0x%X,\t0x%X,\t0x%X }, // %s\n" % (\r
num, bits, lower_case, upper_case, name))\r
out_file.write('};\n\n')\r
- out_file.write('size_t _PDCLIB_wctype_size = sizeof(_PDCLIB_wctype) / sizeof(_PDCLIB_wctype[0]);\n\n')\r
+ out_file.write("""\r
+size_t _PDCLIB_wcinfo_size = sizeof(_PDCLIB_wcinfo) / sizeof(_PDCLIB_wcinfo[0]);\r
+#endif\r
+\r
+#ifdef TEST\r
+#include <_PDCLIB_test.h>\r
+int main( void )\r
+{\r
+ return TEST_RESULTS;\r
+}\r
+#endif\r
+\r
+""")\r
except:\r
in_file.close()\r
out_file.close()\r