]> pd.if.org Git - pdclib.old/blobdiff - functions/locale/UnicodeData.py
PDCLIB-3 correct classification of space characters
[pdclib.old] / functions / locale / UnicodeData.py
index e31ec2e29f02fb2b5022a23a7cb0918273b20e40..6fe74cb8d90ee263a1681274bf2faa5fc2255548 100644 (file)
@@ -49,14 +49,18 @@ categories = {
     'Sc': BIT_GRAPH,                            # Currency symbol\r
     'Sk': BIT_GRAPH,                            # Non-letterlike modifier symbol\r
     'So': BIT_GRAPH,                            # Other symbol\r
-    'Zs': BIT_SPACE | BIT_GRAPH | BIT_BLANK,    # Non-zero-width space character\r
-    'Zl': BIT_SPACE | BIT_GRAPH,                # Line separator\r
-    'Zp': BIT_SPACE | BIT_GRAPH,                # Paragraph separator\r
+    'Zs': BIT_SPACE,                            # Non-zero-width space character\r
+    'Zl': BIT_SPACE,                            # Line separator\r
+    'Zp': BIT_SPACE,                            # Paragraph separator\r
     'Cc': BIT_CNTRL,                            # C0/C1 control codes\r
 }\r
 \r
 # Characters with special properties\r
 special = {\r
+    # Blank characters\r
+    0x0020: BIT_SPACE | BIT_BLANK, # space\r
+    0x0009: BIT_SPACE | BIT_BLANK, # tab\r
+\r
     # Digits\r
     0x0030: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r
     0x0031: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH,\r