BIT_LOWER = 64\r
BIT_UPPER = 128\r
BIT_DIGIT = 256\r
+BIT_XDIGT = 512\r
\r
# Category to bitfield mapping\r
categories = {\r
'Cc': BIT_CNTRL, # C0/C1 control codes\r
}\r
\r
+# Characters with special properties\r
+special = {\r
+ # Digits\r
+ 0x0030: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0031: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0032: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0033: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0034: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0035: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0036: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0037: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0038: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+ 0x0039: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
+\r
+ # A-F (hex uppercase)\r
+ 0x0041: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0042: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0043: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0044: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0045: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+ 0x0046: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER,\r
+\r
+\r
+ # a-f (hex lowercase)\r
+ 0x0061: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0062: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0063: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0064: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0065: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+ 0x0066: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER,\r
+}\r
+\r
in_file = open('UnicodeData.txt', 'r')\r
out_file = open('_PDCLIB_unicodedata.c', 'w')\r
try:\r
num = int(num_hex, 16)\r
upper_case = int(upper_case_hex, 16) if len(upper_case_hex) else num\r
lower_case = int(lower_case_hex, 16) if len(lower_case_hex) else num\r
- bits = categories.get(category, 0)\r
+ bits = special.get(num, categories.get(category, 0))\r
\r
if upper_case == 0 and lower_case == 0 and bits == 0:\r
continue\r
{ 0x2D, 0x18, 0x2D, 0x2D }, // HYPHEN-MINUS\r
{ 0x2E, 0x18, 0x2E, 0x2E }, // FULL STOP\r
{ 0x2F, 0x18, 0x2F, 0x2F }, // SOLIDUS\r
- { 0x30, 0x108, 0x30, 0x30 }, // DIGIT ZERO\r
- { 0x31, 0x108, 0x31, 0x31 }, // DIGIT ONE\r
- { 0x32, 0x108, 0x32, 0x32 }, // DIGIT TWO\r
- { 0x33, 0x108, 0x33, 0x33 }, // DIGIT THREE\r
- { 0x34, 0x108, 0x34, 0x34 }, // DIGIT FOUR\r
- { 0x35, 0x108, 0x35, 0x35 }, // DIGIT FIVE\r
- { 0x36, 0x108, 0x36, 0x36 }, // DIGIT SIX\r
- { 0x37, 0x108, 0x37, 0x37 }, // DIGIT SEVEN\r
- { 0x38, 0x108, 0x38, 0x38 }, // DIGIT EIGHT\r
- { 0x39, 0x108, 0x39, 0x39 }, // DIGIT NINE\r
+ { 0x30, 0x308, 0x30, 0x30 }, // DIGIT ZERO\r
+ { 0x31, 0x308, 0x31, 0x31 }, // DIGIT ONE\r
+ { 0x32, 0x308, 0x32, 0x32 }, // DIGIT TWO\r
+ { 0x33, 0x308, 0x33, 0x33 }, // DIGIT THREE\r
+ { 0x34, 0x308, 0x34, 0x34 }, // DIGIT FOUR\r
+ { 0x35, 0x308, 0x35, 0x35 }, // DIGIT FIVE\r
+ { 0x36, 0x308, 0x36, 0x36 }, // DIGIT SIX\r
+ { 0x37, 0x308, 0x37, 0x37 }, // DIGIT SEVEN\r
+ { 0x38, 0x308, 0x38, 0x38 }, // DIGIT EIGHT\r
+ { 0x39, 0x308, 0x39, 0x39 }, // DIGIT NINE\r
{ 0x3A, 0x18, 0x3A, 0x3A }, // COLON\r
{ 0x3B, 0x18, 0x3B, 0x3B }, // SEMICOLON\r
{ 0x3C, 0x8, 0x3C, 0x3C }, // LESS-THAN SIGN\r
{ 0x3E, 0x8, 0x3E, 0x3E }, // GREATER-THAN SIGN\r
{ 0x3F, 0x18, 0x3F, 0x3F }, // QUESTION MARK\r
{ 0x40, 0x18, 0x40, 0x40 }, // COMMERCIAL AT\r
- { 0x41, 0x89, 0x61, 0x41 }, // LATIN CAPITAL LETTER A\r
- { 0x42, 0x89, 0x62, 0x42 }, // LATIN CAPITAL LETTER B\r
- { 0x43, 0x89, 0x63, 0x43 }, // LATIN CAPITAL LETTER C\r
- { 0x44, 0x89, 0x64, 0x44 }, // LATIN CAPITAL LETTER D\r
- { 0x45, 0x89, 0x65, 0x45 }, // LATIN CAPITAL LETTER E\r
- { 0x46, 0x89, 0x66, 0x46 }, // LATIN CAPITAL LETTER F\r
+ { 0x41, 0x289, 0x61, 0x41 }, // LATIN CAPITAL LETTER A\r
+ { 0x42, 0x289, 0x62, 0x42 }, // LATIN CAPITAL LETTER B\r
+ { 0x43, 0x289, 0x63, 0x43 }, // LATIN CAPITAL LETTER C\r
+ { 0x44, 0x289, 0x64, 0x44 }, // LATIN CAPITAL LETTER D\r
+ { 0x45, 0x289, 0x65, 0x45 }, // LATIN CAPITAL LETTER E\r
+ { 0x46, 0x289, 0x66, 0x46 }, // LATIN CAPITAL LETTER F\r
{ 0x47, 0x89, 0x67, 0x47 }, // LATIN CAPITAL LETTER G\r
{ 0x48, 0x89, 0x68, 0x48 }, // LATIN CAPITAL LETTER H\r
{ 0x49, 0x89, 0x69, 0x49 }, // LATIN CAPITAL LETTER I\r
{ 0x5E, 0x8, 0x5E, 0x5E }, // CIRCUMFLEX ACCENT\r
{ 0x5F, 0x18, 0x5F, 0x5F }, // LOW LINE\r
{ 0x60, 0x8, 0x60, 0x60 }, // GRAVE ACCENT\r
- { 0x61, 0x49, 0x61, 0x41 }, // LATIN SMALL LETTER A\r
- { 0x62, 0x49, 0x62, 0x42 }, // LATIN SMALL LETTER B\r
- { 0x63, 0x49, 0x63, 0x43 }, // LATIN SMALL LETTER C\r
- { 0x64, 0x49, 0x64, 0x44 }, // LATIN SMALL LETTER D\r
- { 0x65, 0x49, 0x65, 0x45 }, // LATIN SMALL LETTER E\r
- { 0x66, 0x49, 0x66, 0x46 }, // LATIN SMALL LETTER F\r
+ { 0x61, 0x249, 0x61, 0x41 }, // LATIN SMALL LETTER A\r
+ { 0x62, 0x249, 0x62, 0x42 }, // LATIN SMALL LETTER B\r
+ { 0x63, 0x249, 0x63, 0x43 }, // LATIN SMALL LETTER C\r
+ { 0x64, 0x249, 0x64, 0x44 }, // LATIN SMALL LETTER D\r
+ { 0x65, 0x249, 0x65, 0x45 }, // LATIN SMALL LETTER E\r
+ { 0x66, 0x249, 0x66, 0x46 }, // LATIN SMALL LETTER F\r
{ 0x67, 0x49, 0x67, 0x47 }, // LATIN SMALL LETTER G\r
{ 0x68, 0x49, 0x68, 0x48 }, // LATIN SMALL LETTER H\r
{ 0x69, 0x49, 0x69, 0x49 }, // LATIN SMALL LETTER I\r