From 56b5594ab57dd21cf80821fefe95350aa87b7903 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 16:40:35 +0000 Subject: [PATCH 01/16] PDCLIB-2 #ifdef guard surrounding WEOF --- includes/wchar.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/includes/wchar.h b/includes/wchar.h index 7335896..83d880b 100644 --- a/includes/wchar.h +++ b/includes/wchar.h @@ -45,7 +45,9 @@ struct tm; #define WCHAR_MAX _PDCLIB_WCHAR_MAX #endif +#ifndef _WEOF #define WEOF ((wint_t) -1) +#endif /* Wide character string handling */ wchar_t *wcscpy(wchar_t *_PDCLIB_restrict s1, const wchar_t *_PDCLIB_restrict s2); -- 2.40.0 From 36aeca966a42f071466086ddaa4f50e9b9b51c3f Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:16:26 +0000 Subject: [PATCH 02/16] PDCLIB-3 Add XDIGIT to list of bits in Unicode character data --- functions/locale/UnicodeData.py | 35 +++++++++++++++++++- functions/locale/_PDCLIB_unicodedata.c | 44 +++++++++++++------------- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/functions/locale/UnicodeData.py b/functions/locale/UnicodeData.py index 42a8f9f..e31ec2e 100644 --- a/functions/locale/UnicodeData.py +++ b/functions/locale/UnicodeData.py @@ -26,6 +26,7 @@ BIT_SPACE = 32 BIT_LOWER = 64 BIT_UPPER = 128 BIT_DIGIT = 256 +BIT_XDIGT = 512 # Category to bitfield mapping categories = { @@ -54,6 +55,38 @@ categories = { 'Cc': BIT_CNTRL, # C0/C1 control codes } +# Characters with special properties +special = { + # Digits + 0x0030: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0031: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0032: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0033: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0034: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0035: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0036: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0037: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0038: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + 0x0039: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, + + # A-F (hex uppercase) + 0x0041: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER, + 0x0042: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER, + 0x0043: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER, + 0x0044: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER, + 0x0045: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER, + 0x0046: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_UPPER, + + + # a-f (hex lowercase) + 0x0061: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER, + 0x0062: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER, + 0x0063: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER, + 0x0064: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER, + 0x0065: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER, + 0x0066: BIT_XDIGT | BIT_ALPHA | BIT_GRAPH | BIT_LOWER, +} + in_file = open('UnicodeData.txt', 'r') out_file = open('_PDCLIB_unicodedata.c', 'w') try: @@ -83,7 +116,7 @@ try: num = int(num_hex, 16) upper_case = int(upper_case_hex, 16) if len(upper_case_hex) else num lower_case = int(lower_case_hex, 16) if len(lower_case_hex) else num - bits = categories.get(category, 0) + bits = special.get(num, categories.get(category, 0)) if upper_case == 0 and lower_case == 0 and bits == 0: continue diff --git a/functions/locale/_PDCLIB_unicodedata.c b/functions/locale/_PDCLIB_unicodedata.c index e4e9c83..928226c 100644 --- a/functions/locale/_PDCLIB_unicodedata.c +++ b/functions/locale/_PDCLIB_unicodedata.c @@ -63,16 +63,16 @@ { 0x2D, 0x18, 0x2D, 0x2D }, // HYPHEN-MINUS { 0x2E, 0x18, 0x2E, 0x2E }, // FULL STOP { 0x2F, 0x18, 0x2F, 0x2F }, // SOLIDUS - { 0x30, 0x108, 0x30, 0x30 }, // DIGIT ZERO - { 0x31, 0x108, 0x31, 0x31 }, // DIGIT ONE - { 0x32, 0x108, 0x32, 0x32 }, // DIGIT TWO - { 0x33, 0x108, 0x33, 0x33 }, // DIGIT THREE - { 0x34, 0x108, 0x34, 0x34 }, // DIGIT FOUR - { 0x35, 0x108, 0x35, 0x35 }, // DIGIT FIVE - { 0x36, 0x108, 0x36, 0x36 }, // DIGIT SIX - { 0x37, 0x108, 0x37, 0x37 }, // DIGIT SEVEN - { 0x38, 0x108, 0x38, 0x38 }, // DIGIT EIGHT - { 0x39, 0x108, 0x39, 0x39 }, // DIGIT NINE + { 0x30, 0x308, 0x30, 0x30 }, // DIGIT ZERO + { 0x31, 0x308, 0x31, 0x31 }, // DIGIT ONE + { 0x32, 0x308, 0x32, 0x32 }, // DIGIT TWO + { 0x33, 0x308, 0x33, 0x33 }, // DIGIT THREE + { 0x34, 0x308, 0x34, 0x34 }, // DIGIT FOUR + { 0x35, 0x308, 0x35, 0x35 }, // DIGIT FIVE + { 0x36, 0x308, 0x36, 0x36 }, // DIGIT SIX + { 0x37, 0x308, 0x37, 0x37 }, // DIGIT SEVEN + { 0x38, 0x308, 0x38, 0x38 }, // DIGIT EIGHT + { 0x39, 0x308, 0x39, 0x39 }, // DIGIT NINE { 0x3A, 0x18, 0x3A, 0x3A }, // COLON { 0x3B, 0x18, 0x3B, 0x3B }, // SEMICOLON { 0x3C, 0x8, 0x3C, 0x3C }, // LESS-THAN SIGN @@ -80,12 +80,12 @@ { 0x3E, 0x8, 0x3E, 0x3E }, // GREATER-THAN SIGN { 0x3F, 0x18, 0x3F, 0x3F }, // QUESTION MARK { 0x40, 0x18, 0x40, 0x40 }, // COMMERCIAL AT - { 0x41, 0x89, 0x61, 0x41 }, // LATIN CAPITAL LETTER A - { 0x42, 0x89, 0x62, 0x42 }, // LATIN CAPITAL LETTER B - { 0x43, 0x89, 0x63, 0x43 }, // LATIN CAPITAL LETTER C - { 0x44, 0x89, 0x64, 0x44 }, // LATIN CAPITAL LETTER D - { 0x45, 0x89, 0x65, 0x45 }, // LATIN CAPITAL LETTER E - { 0x46, 0x89, 0x66, 0x46 }, // LATIN CAPITAL LETTER F + { 0x41, 0x289, 0x61, 0x41 }, // LATIN CAPITAL LETTER A + { 0x42, 0x289, 0x62, 0x42 }, // LATIN CAPITAL LETTER B + { 0x43, 0x289, 0x63, 0x43 }, // LATIN CAPITAL LETTER C + { 0x44, 0x289, 0x64, 0x44 }, // LATIN CAPITAL LETTER D + { 0x45, 0x289, 0x65, 0x45 }, // LATIN CAPITAL LETTER E + { 0x46, 0x289, 0x66, 0x46 }, // LATIN CAPITAL LETTER F { 0x47, 0x89, 0x67, 0x47 }, // LATIN CAPITAL LETTER G { 0x48, 0x89, 0x68, 0x48 }, // LATIN CAPITAL LETTER H { 0x49, 0x89, 0x69, 0x49 }, // LATIN CAPITAL LETTER I @@ -112,12 +112,12 @@ { 0x5E, 0x8, 0x5E, 0x5E }, // CIRCUMFLEX ACCENT { 0x5F, 0x18, 0x5F, 0x5F }, // LOW LINE { 0x60, 0x8, 0x60, 0x60 }, // GRAVE ACCENT - { 0x61, 0x49, 0x61, 0x41 }, // LATIN SMALL LETTER A - { 0x62, 0x49, 0x62, 0x42 }, // LATIN SMALL LETTER B - { 0x63, 0x49, 0x63, 0x43 }, // LATIN SMALL LETTER C - { 0x64, 0x49, 0x64, 0x44 }, // LATIN SMALL LETTER D - { 0x65, 0x49, 0x65, 0x45 }, // LATIN SMALL LETTER E - { 0x66, 0x49, 0x66, 0x46 }, // LATIN SMALL LETTER F + { 0x61, 0x249, 0x61, 0x41 }, // LATIN SMALL LETTER A + { 0x62, 0x249, 0x62, 0x42 }, // LATIN SMALL LETTER B + { 0x63, 0x249, 0x63, 0x43 }, // LATIN SMALL LETTER C + { 0x64, 0x249, 0x64, 0x44 }, // LATIN SMALL LETTER D + { 0x65, 0x249, 0x65, 0x45 }, // LATIN SMALL LETTER E + { 0x66, 0x249, 0x66, 0x46 }, // LATIN SMALL LETTER F { 0x67, 0x49, 0x67, 0x47 }, // LATIN SMALL LETTER G { 0x68, 0x49, 0x68, 0x48 }, // LATIN SMALL LETTER H { 0x69, 0x49, 0x69, 0x49 }, // LATIN SMALL LETTER I -- 2.40.0 From f7cb88df66442ee71cf6d3177da99e67de8ac198 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:16:56 +0000 Subject: [PATCH 03/16] PDCLIB-3 Add --- includes/wctype.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 includes/wctype.h diff --git a/includes/wctype.h b/includes/wctype.h new file mode 100644 index 0000000..4d6ed5e --- /dev/null +++ b/includes/wctype.h @@ -0,0 +1,47 @@ +/* 7.30 Wide Character Classification and Mapping Utilities + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#ifndef _PDCLIB_WCTYPE_H +#define _PDCLIB_WCTYPE_H _PDCLIB_WCTYPE_H +#include <_PDCLIB_int.h> +_PDCLIB_BEGIN_EXTERN_C + +#ifndef _PDCLIB_WINT_T_DEFINED +#define _PDCLIB_WINT_T_DEFINED _PDCLIB_WINT_T_DEFINED +typedef _PDCLIB_wint_t wint_t; +#endif + +typedef int wctrans_t; +typedef int wctype_t; + +/* 7.30.2.1 Character classification functions */ +int iswalnum( wint_t _Wc ); +int iswalpha( wint_t _Wc ); +int iswblank( wint_t _Wc ); +int iswcntrl( wint_t _Wc ); +int iswdigit( wint_t _Wc ); +int iswgraph( wint_t _Wc ); +int iswlower( wint_t _Wc ); +int iswprint( wint_t _Wc ); +int iswpunct( wint_t _Wc ); +int iswspace( wint_t _Wc ); +int iswupper( wint_t _Wc ); +int iswxdigit( wint_t _Wc ); + +/* 7.30.2.2 Extensible character classification functions */ +int iswctype( wint_t _Wc, wctype_t _Desc ); +wctype_t wctype( const char * _Property ); + +/* 7.30.3 Wide character case mapping utilities */ +wint_t towlower( wint_t _Wc ); +wint_t towupper( wint_t _Wc ); + +/* 7.30.3.2 Extensible wide character case mapping functions */ +wint_t towctrans( wint_t _Wc, wctrans_t _Desc ); +wctrans_t wctrans( const char * _Property ); + +_PDCLIB_END_EXTERN_C +#endif // _PDCLIB_WCTYPE_H -- 2.40.0 From 0c13662a438dc9306daace1412c9d330cfd0cf46 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:17:35 +0000 Subject: [PATCH 04/16] PDCLIB-3 Add wctype(3) --- functions/wctype/wctype.c | 110 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 functions/wctype/wctype.c diff --git a/functions/wctype/wctype.c b/functions/wctype/wctype.c new file mode 100644 index 0000000..faca09e --- /dev/null +++ b/functions/wctype/wctype.c @@ -0,0 +1,110 @@ +/* wctype( const char * ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include +#include <_PDCLIB_locale.h> + +wctype_t wctype( const char * property ) +{ + if(property) switch(property[0]) + { + case 'a': + if(strcmp(property, "alpha") == 0) { + return _PDCLIB_CTYPE_ALPHA; + } else if(strcmp(property, "alnum") == 0) { + return _PDCLIB_CTYPE_ALPHA | _PDCLIB_CTYPE_DIGIT; + } else return 0; + + case 'b': + if(strcmp(property, "blank") == 0) { + return _PDCLIB_CTYPE_BLANK; + } else return 0; + + case 'c': + if(strcmp(property, "cntrl") == 0) { + return _PDCLIB_CTYPE_CNTRL; + } else return 0; + + case 'd': + if(strcmp(property, "digit") == 0) { + return _PDCLIB_CTYPE_DIGIT; + } else return 0; + + case 'g': + if(strcmp(property, "graph") == 0) { + return _PDCLIB_CTYPE_GRAPH; + } else return 0; + + case 'l': + if(strcmp(property, "lower") == 0) { + return _PDCLIB_CTYPE_LOWER; + } else return 0; + + case 'p': + if(strcmp(property, "print") == 0) { + return _PDCLIB_CTYPE_GRAPH | _PDCLIB_CTYPE_SPACE; + } else if(strcmp(property, "punct") == 0) { + return _PDCLIB_CTYPE_PUNCT; + } else return 0; + + case 's': + if(strcmp(property, "space") == 0) { + return _PDCLIB_CTYPE_SPACE; + } else return 0; + + case 'u': + if(strcmp(property, "upper") == 0) { + return _PDCLIB_CTYPE_UPPER; + } else return 0; + + case 'x': + if(strcmp(property, "xdigit") == 0) { + return _PDCLIB_CTYPE_XDIGT; + } else return 0; + } + return 0; +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(wctype("") == 0); + TESTCASE_NOREG(wctype(NULL) == 0); // mingw libc crashes on this + + TESTCASE(wctype("alpha") != 0); + TESTCASE(wctype("alnum") != 0); + TESTCASE(wctype("blank") != 0); + TESTCASE(wctype("cntrl") != 0); + TESTCASE(wctype("digit") != 0); + TESTCASE(wctype("graph") != 0); + TESTCASE(wctype("lower") != 0); + TESTCASE(wctype("print") != 0); + TESTCASE(wctype("punct") != 0); + TESTCASE(wctype("space") != 0); + TESTCASE(wctype("upper") != 0); + TESTCASE(wctype("xdigit") != 0); + + TESTCASE_NOREG(wctype("alpha") == _PDCLIB_CTYPE_ALPHA); + TESTCASE_NOREG(wctype("alnum") == _PDCLIB_CTYPE_ALPHA | _PDCLIB_CTYPE_DIGIT); + TESTCASE_NOREG(wctype("blank") == _PDCLIB_CTYPE_BLANK); + TESTCASE_NOREG(wctype("cntrl") == _PDCLIB_CTYPE_CNTRL); + TESTCASE_NOREG(wctype("digit") == _PDCLIB_CTYPE_DIGIT); + TESTCASE_NOREG(wctype("graph") == _PDCLIB_CTYPE_GRAPH); + TESTCASE_NOREG(wctype("lower") == _PDCLIB_CTYPE_LOWER); + TESTCASE_NOREG(wctype("print") == _PDCLIB_CTYPE_GRAPH | _PDCLIB_CTYPE_SPACE); + TESTCASE_NOREG(wctype("punct") == _PDCLIB_CTYPE_PUNCT); + TESTCASE_NOREG(wctype("space") == _PDCLIB_CTYPE_SPACE); + TESTCASE_NOREG(wctype("upper") == _PDCLIB_CTYPE_UPPER); + TESTCASE_NOREG(wctype("xdigit") == _PDCLIB_CTYPE_XDIGT); + return TEST_RESULTS; +} +#endif -- 2.40.0 From d0bcf6b5c2af6352326428a02bb6f2e391c6b765 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:45:39 +0000 Subject: [PATCH 05/16] PDCLIB-3 wctype: add parentheses around bitwise logic in test cases due to C's quirky precedence --- functions/wctype/wctype.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/functions/wctype/wctype.c b/functions/wctype/wctype.c index faca09e..e196ee4 100644 --- a/functions/wctype/wctype.c +++ b/functions/wctype/wctype.c @@ -94,13 +94,13 @@ int main( void ) TESTCASE(wctype("xdigit") != 0); TESTCASE_NOREG(wctype("alpha") == _PDCLIB_CTYPE_ALPHA); - TESTCASE_NOREG(wctype("alnum") == _PDCLIB_CTYPE_ALPHA | _PDCLIB_CTYPE_DIGIT); + TESTCASE_NOREG(wctype("alnum") == (_PDCLIB_CTYPE_ALPHA | _PDCLIB_CTYPE_DIGIT)); TESTCASE_NOREG(wctype("blank") == _PDCLIB_CTYPE_BLANK); TESTCASE_NOREG(wctype("cntrl") == _PDCLIB_CTYPE_CNTRL); TESTCASE_NOREG(wctype("digit") == _PDCLIB_CTYPE_DIGIT); TESTCASE_NOREG(wctype("graph") == _PDCLIB_CTYPE_GRAPH); TESTCASE_NOREG(wctype("lower") == _PDCLIB_CTYPE_LOWER); - TESTCASE_NOREG(wctype("print") == _PDCLIB_CTYPE_GRAPH | _PDCLIB_CTYPE_SPACE); + TESTCASE_NOREG(wctype("print") == (_PDCLIB_CTYPE_GRAPH | _PDCLIB_CTYPE_SPACE)); TESTCASE_NOREG(wctype("punct") == _PDCLIB_CTYPE_PUNCT); TESTCASE_NOREG(wctype("space") == _PDCLIB_CTYPE_SPACE); TESTCASE_NOREG(wctype("upper") == _PDCLIB_CTYPE_UPPER); -- 2.40.0 From e83bbf1f89de742ebf07a11984be8d38fd407527 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:54:03 +0000 Subject: [PATCH 06/16] PDCLIB-3 correct classification of space characters --- functions/locale/UnicodeData.py | 10 ++++-- functions/locale/_PDCLIB_unicodedata.c | 42 +++++++++++++------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/functions/locale/UnicodeData.py b/functions/locale/UnicodeData.py index e31ec2e..6fe74cb 100644 --- a/functions/locale/UnicodeData.py +++ b/functions/locale/UnicodeData.py @@ -49,14 +49,18 @@ categories = { 'Sc': BIT_GRAPH, # Currency symbol 'Sk': BIT_GRAPH, # Non-letterlike modifier symbol 'So': BIT_GRAPH, # Other symbol - 'Zs': BIT_SPACE | BIT_GRAPH | BIT_BLANK, # Non-zero-width space character - 'Zl': BIT_SPACE | BIT_GRAPH, # Line separator - 'Zp': BIT_SPACE | BIT_GRAPH, # Paragraph separator + 'Zs': BIT_SPACE, # Non-zero-width space character + 'Zl': BIT_SPACE, # Line separator + 'Zp': BIT_SPACE, # Paragraph separator 'Cc': BIT_CNTRL, # C0/C1 control codes } # Characters with special properties special = { + # Blank characters + 0x0020: BIT_SPACE | BIT_BLANK, # space + 0x0009: BIT_SPACE | BIT_BLANK, # tab + # Digits 0x0030: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, 0x0031: BIT_XDIGT | BIT_DIGIT | BIT_GRAPH, diff --git a/functions/locale/_PDCLIB_unicodedata.c b/functions/locale/_PDCLIB_unicodedata.c index 928226c..dbfc02a 100644 --- a/functions/locale/_PDCLIB_unicodedata.c +++ b/functions/locale/_PDCLIB_unicodedata.c @@ -24,7 +24,7 @@ { 0x6, 0x4, 0x6, 0x6 }, // { 0x7, 0x4, 0x7, 0x7 }, // { 0x8, 0x4, 0x8, 0x8 }, // - { 0x9, 0x4, 0x9, 0x9 }, // + { 0x9, 0x22, 0x9, 0x9 }, // { 0xA, 0x4, 0xA, 0xA }, // { 0xB, 0x4, 0xB, 0xB }, // { 0xC, 0x4, 0xC, 0xC }, // @@ -47,7 +47,7 @@ { 0x1D, 0x4, 0x1D, 0x1D }, // { 0x1E, 0x4, 0x1E, 0x1E }, // { 0x1F, 0x4, 0x1F, 0x1F }, // - { 0x20, 0x2A, 0x20, 0x20 }, // SPACE + { 0x20, 0x22, 0x20, 0x20 }, // SPACE { 0x21, 0x18, 0x21, 0x21 }, // EXCLAMATION MARK { 0x22, 0x18, 0x22, 0x22 }, // QUOTATION MARK { 0x23, 0x18, 0x23, 0x23 }, // NUMBER SIGN @@ -175,7 +175,7 @@ { 0x9D, 0x4, 0x9D, 0x9D }, // { 0x9E, 0x4, 0x9E, 0x9E }, // { 0x9F, 0x4, 0x9F, 0x9F }, // - { 0xA0, 0x2A, 0xA0, 0xA0 }, // NO-BREAK SPACE + { 0xA0, 0x20, 0xA0, 0xA0 }, // NO-BREAK SPACE { 0xA1, 0x18, 0xA1, 0xA1 }, // INVERTED EXCLAMATION MARK { 0xA2, 0x8, 0xA2, 0xA2 }, // CENT SIGN { 0xA3, 0x8, 0xA3, 0xA3 }, // POUND SIGN @@ -4994,7 +4994,7 @@ { 0x167D, 0x9, 0x167D, 0x167D }, // CANADIAN SYLLABICS WOODS-CREE THWAA { 0x167E, 0x9, 0x167E, 0x167E }, // CANADIAN SYLLABICS WOODS-CREE FINAL TH { 0x167F, 0x9, 0x167F, 0x167F }, // CANADIAN SYLLABICS BLACKFOOT W - { 0x1680, 0x2A, 0x1680, 0x1680 }, // OGHAM SPACE MARK + { 0x1680, 0x20, 0x1680, 0x1680 }, // OGHAM SPACE MARK { 0x1681, 0x9, 0x1681, 0x1681 }, // OGHAM LETTER BEITH { 0x1682, 0x9, 0x1682, 0x1682 }, // OGHAM LETTER LUIS { 0x1683, 0x9, 0x1683, 0x1683 }, // OGHAM LETTER FEARN @@ -5313,7 +5313,7 @@ { 0x180B, 0x0, 0x180B, 0x180B }, // MONGOLIAN FREE VARIATION SELECTOR ONE { 0x180C, 0x0, 0x180C, 0x180C }, // MONGOLIAN FREE VARIATION SELECTOR TWO { 0x180D, 0x0, 0x180D, 0x180D }, // MONGOLIAN FREE VARIATION SELECTOR THREE - { 0x180E, 0x2A, 0x180E, 0x180E }, // MONGOLIAN VOWEL SEPARATOR + { 0x180E, 0x20, 0x180E, 0x180E }, // MONGOLIAN VOWEL SEPARATOR { 0x1810, 0x108, 0x1810, 0x1810 }, // MONGOLIAN DIGIT ZERO { 0x1811, 0x108, 0x1811, 0x1811 }, // MONGOLIAN DIGIT ONE { 0x1812, 0x108, 0x1812, 0x1812 }, // MONGOLIAN DIGIT TWO @@ -7032,17 +7032,17 @@ { 0x1FFC, 0x89, 0x1FF3, 0x1FFC }, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI { 0x1FFD, 0x8, 0x1FFD, 0x1FFD }, // GREEK OXIA { 0x1FFE, 0x8, 0x1FFE, 0x1FFE }, // GREEK DASIA - { 0x2000, 0x2A, 0x2000, 0x2000 }, // EN QUAD - { 0x2001, 0x2A, 0x2001, 0x2001 }, // EM QUAD - { 0x2002, 0x2A, 0x2002, 0x2002 }, // EN SPACE - { 0x2003, 0x2A, 0x2003, 0x2003 }, // EM SPACE - { 0x2004, 0x2A, 0x2004, 0x2004 }, // THREE-PER-EM SPACE - { 0x2005, 0x2A, 0x2005, 0x2005 }, // FOUR-PER-EM SPACE - { 0x2006, 0x2A, 0x2006, 0x2006 }, // SIX-PER-EM SPACE - { 0x2007, 0x2A, 0x2007, 0x2007 }, // FIGURE SPACE - { 0x2008, 0x2A, 0x2008, 0x2008 }, // PUNCTUATION SPACE - { 0x2009, 0x2A, 0x2009, 0x2009 }, // THIN SPACE - { 0x200A, 0x2A, 0x200A, 0x200A }, // HAIR SPACE + { 0x2000, 0x20, 0x2000, 0x2000 }, // EN QUAD + { 0x2001, 0x20, 0x2001, 0x2001 }, // EM QUAD + { 0x2002, 0x20, 0x2002, 0x2002 }, // EN SPACE + { 0x2003, 0x20, 0x2003, 0x2003 }, // EM SPACE + { 0x2004, 0x20, 0x2004, 0x2004 }, // THREE-PER-EM SPACE + { 0x2005, 0x20, 0x2005, 0x2005 }, // FOUR-PER-EM SPACE + { 0x2006, 0x20, 0x2006, 0x2006 }, // SIX-PER-EM SPACE + { 0x2007, 0x20, 0x2007, 0x2007 }, // FIGURE SPACE + { 0x2008, 0x20, 0x2008, 0x2008 }, // PUNCTUATION SPACE + { 0x2009, 0x20, 0x2009, 0x2009 }, // THIN SPACE + { 0x200A, 0x20, 0x200A, 0x200A }, // HAIR SPACE { 0x200B, 0x0, 0x200B, 0x200B }, // ZERO WIDTH SPACE { 0x200C, 0x0, 0x200C, 0x200C }, // ZERO WIDTH NON-JOINER { 0x200D, 0x0, 0x200D, 0x200D }, // ZERO WIDTH JOINER @@ -7072,14 +7072,14 @@ { 0x2025, 0x18, 0x2025, 0x2025 }, // TWO DOT LEADER { 0x2026, 0x18, 0x2026, 0x2026 }, // HORIZONTAL ELLIPSIS { 0x2027, 0x18, 0x2027, 0x2027 }, // HYPHENATION POINT - { 0x2028, 0x28, 0x2028, 0x2028 }, // LINE SEPARATOR - { 0x2029, 0x28, 0x2029, 0x2029 }, // PARAGRAPH SEPARATOR + { 0x2028, 0x20, 0x2028, 0x2028 }, // LINE SEPARATOR + { 0x2029, 0x20, 0x2029, 0x2029 }, // PARAGRAPH SEPARATOR { 0x202A, 0x0, 0x202A, 0x202A }, // LEFT-TO-RIGHT EMBEDDING { 0x202B, 0x0, 0x202B, 0x202B }, // RIGHT-TO-LEFT EMBEDDING { 0x202C, 0x0, 0x202C, 0x202C }, // POP DIRECTIONAL FORMATTING { 0x202D, 0x0, 0x202D, 0x202D }, // LEFT-TO-RIGHT OVERRIDE { 0x202E, 0x0, 0x202E, 0x202E }, // RIGHT-TO-LEFT OVERRIDE - { 0x202F, 0x2A, 0x202F, 0x202F }, // NARROW NO-BREAK SPACE + { 0x202F, 0x20, 0x202F, 0x202F }, // NARROW NO-BREAK SPACE { 0x2030, 0x18, 0x2030, 0x2030 }, // PER MILLE SIGN { 0x2031, 0x18, 0x2031, 0x2031 }, // PER TEN THOUSAND SIGN { 0x2032, 0x18, 0x2032, 0x2032 }, // PRIME @@ -7127,7 +7127,7 @@ { 0x205C, 0x18, 0x205C, 0x205C }, // DOTTED CROSS { 0x205D, 0x18, 0x205D, 0x205D }, // TRICOLON { 0x205E, 0x18, 0x205E, 0x205E }, // VERTICAL FOUR DOTS - { 0x205F, 0x2A, 0x205F, 0x205F }, // MEDIUM MATHEMATICAL SPACE + { 0x205F, 0x20, 0x205F, 0x205F }, // MEDIUM MATHEMATICAL SPACE { 0x2060, 0x0, 0x2060, 0x2060 }, // WORD JOINER { 0x2061, 0x0, 0x2061, 0x2061 }, // FUNCTION APPLICATION { 0x2062, 0x0, 0x2062, 0x2062 }, // INVISIBLE TIMES @@ -10683,7 +10683,7 @@ { 0x2FF9, 0x8, 0x2FF9, 0x2FF9 }, // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT { 0x2FFA, 0x8, 0x2FFA, 0x2FFA }, // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT { 0x2FFB, 0x8, 0x2FFB, 0x2FFB }, // IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID - { 0x3000, 0x2A, 0x3000, 0x3000 }, // IDEOGRAPHIC SPACE + { 0x3000, 0x20, 0x3000, 0x3000 }, // IDEOGRAPHIC SPACE { 0x3001, 0x18, 0x3001, 0x3001 }, // IDEOGRAPHIC COMMA { 0x3002, 0x18, 0x3002, 0x3002 }, // IDEOGRAPHIC FULL STOP { 0x3003, 0x18, 0x3003, 0x3003 }, // DITTO MARK -- 2.40.0 From 83cf6c1cd8f5729b66a1da2469d94e75f028a4fd Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:55:20 +0000 Subject: [PATCH 07/16] Win32: Change wint_t to be signed int (32-bit) in order to hold UCS-2 + -1. Additionally, this will enable use of a "packed UTF-16" representation with some functions (as an extension) --- platform/win32/internals/_PDCLIB_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/win32/internals/_PDCLIB_config.h b/platform/win32/internals/_PDCLIB_config.h index c6da3ef..9e69353 100644 --- a/platform/win32/internals/_PDCLIB_config.h +++ b/platform/win32/internals/_PDCLIB_config.h @@ -156,7 +156,7 @@ struct _PDCLIB_lldiv_t XX: Windows requires wchar_t be an unsigned short, but this is not compliant. */ -#define _PDCLIB_wint signed short +#define _PDCLIB_wint signed int #define _PDCLIB_wchar unsigned short #define _PDCLIB_WCHAR USHRT -- 2.40.0 From 380b0fdfb5ad7cd23ff3d943f5cdf93a055da541 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:56:36 +0000 Subject: [PATCH 08/16] PDCLIB-3: Add _PDCLIB_unpackwint to enable UTF-16 platforms to pass a "packed UTF-16" representation to certain wide character classification functions --- internals/_PDCLIB_locale.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/internals/_PDCLIB_locale.h b/internals/_PDCLIB_locale.h index 09fa3c0..8d28f2d 100644 --- a/internals/_PDCLIB_locale.h +++ b/internals/_PDCLIB_locale.h @@ -75,6 +75,30 @@ typedef struct _PDCLIB_wcinfo _PDCLIB_uint32_t upper; } _PDCLIB_wcinfo_t; +static inline _PDCLIB_wint_t _PDCLIB_unpackwint( _PDCLIB_wint_t wc ) +{ + if( sizeof(_PDCLIB_wchar_t) == 2 && sizeof(_PDCLIB_wint_t) == 4 ) { + /* On UTF-16 platforms, as an extension accept a "packed surrogate" + * encoding. We accept the surrogate pairs either way + */ + + _PDCLIB_wint_t c = (wc & 0xF800F800); + if(c == (_PDCLIB_wint_t) 0xD800DC00) { + // MSW: Lead, LSW: Trail + _PDCLIB_wint_t lead = wc >> 16 & 0x3FF; + _PDCLIB_wint_t trail = wc & 0x3FF; + wc = lead << 10 | trail; + } else if(c == (_PDCLIB_wint_t) 0xDC00D800) { + // MSW: Trail, LSW: Lead + _PDCLIB_wint_t trail = wc >> 16 & 0x3FF; + _PDCLIB_wint_t lead = wc & 0x3FF; + wc = lead << 10 | trail; + } + + } + return wc; +} + struct _PDCLIB_locale { _PDCLIB_charcodec_t _Codec; struct lconv _Conv; -- 2.40.0 From 65021cb444e137937b38b258e45f889750175eb8 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:57:09 +0000 Subject: [PATCH 09/16] PDCLIB-3: Add lookup functions to _PDCLIB_locale.h which can be used to lookup the classification information pertaining to a wide character --- internals/_PDCLIB_locale.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/internals/_PDCLIB_locale.h b/internals/_PDCLIB_locale.h index 8d28f2d..3e0f424 100644 --- a/internals/_PDCLIB_locale.h +++ b/internals/_PDCLIB_locale.h @@ -75,6 +75,25 @@ typedef struct _PDCLIB_wcinfo _PDCLIB_uint32_t upper; } _PDCLIB_wcinfo_t; +extern _PDCLIB_wcinfo_t _PDCLIB_wcinfo[]; +extern size_t _PDCLIB_wcinfo_size; + +static inline int _PDCLIB_wcinfo_cmp( const void * _key, const void * _obj ) +{ + _PDCLIB_uint32_t * key = (_PDCLIB_uint32_t *) _key; + _PDCLIB_wcinfo_t * obj = (_PDCLIB_wcinfo_t *) _obj; + return *key - obj->num; +} + +static inline _PDCLIB_wcinfo_t * _PDCLIB_wcgetinfo( _PDCLIB_uint32_t num ) +{ + _PDCLIB_wcinfo_t *info = (_PDCLIB_wcinfo_t*) + bsearch( &num, _PDCLIB_wcinfo, _PDCLIB_wcinfo_size, + sizeof( _PDCLIB_wcinfo[0] ), _PDCLIB_wcinfo_cmp ); + + return info; +} + static inline _PDCLIB_wint_t _PDCLIB_unpackwint( _PDCLIB_wint_t wc ) { if( sizeof(_PDCLIB_wchar_t) == 2 && sizeof(_PDCLIB_wint_t) == 4 ) { -- 2.40.0 From e6e196c9d74a3a1d88b244fb0e5a759c47d3da84 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 18:57:32 +0000 Subject: [PATCH 10/16] PDCLIB-3 Add iswctype(3) --- functions/wctype/iswctype.c | 130 ++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 functions/wctype/iswctype.c diff --git a/functions/wctype/iswctype.c b/functions/wctype/iswctype.c new file mode 100644 index 0000000..5c5f7ed --- /dev/null +++ b/functions/wctype/iswctype.c @@ -0,0 +1,130 @@ +/* iswctype( wint_t, wctype_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswctype( wint_t wc, wctype_t desc ) +{ + wc = _PDCLIB_unpackwint( wc ); + + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( wc ); + + if(!info) return 0; + + return info->flags & desc; +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE( iswctype(L'a', wctype("alpha"))); + TESTCASE( iswctype(L'z', wctype("alpha"))); + TESTCASE( iswctype(L'E', wctype("alpha"))); + TESTCASE(!iswctype(L'3', wctype("alpha"))); + TESTCASE(!iswctype(L';', wctype("alpha"))); + + TESTCASE( iswctype(L'a', wctype("alnum"))); + TESTCASE( iswctype(L'3', wctype("alnum"))); + TESTCASE(!iswctype(L';', wctype("alnum"))); + + TESTCASE( iswctype(L' ', wctype("blank"))); + TESTCASE( iswctype(L'\t', wctype("blank"))); + TESTCASE(!iswctype(L'\n', wctype("blank"))); + TESTCASE(!iswctype(L';', wctype("blank"))); + + TESTCASE( iswctype(L'\0', wctype("cntrl"))); + TESTCASE( iswctype(L'\n', wctype("cntrl"))); + TESTCASE( iswctype(L'\v', wctype("cntrl"))); + TESTCASE(!iswctype(L'\t', wctype("cntrl"))); + TESTCASE(!iswctype(L'a', wctype("cntrl"))); + + TESTCASE( iswctype(L'0', wctype("digit"))); + TESTCASE( iswctype(L'1', wctype("digit"))); + TESTCASE( iswctype(L'2', wctype("digit"))); + TESTCASE( iswctype(L'3', wctype("digit"))); + TESTCASE( iswctype(L'4', wctype("digit"))); + TESTCASE( iswctype(L'5', wctype("digit"))); + TESTCASE( iswctype(L'6', wctype("digit"))); + TESTCASE( iswctype(L'7', wctype("digit"))); + TESTCASE( iswctype(L'8', wctype("digit"))); + TESTCASE( iswctype(L'9', wctype("digit"))); + TESTCASE(!iswctype(L'X', wctype("digit"))); + TESTCASE(!iswctype(L'?', wctype("digit"))); + + TESTCASE( iswctype(L'a', wctype("graph"))); + TESTCASE( iswctype(L'z', wctype("graph"))); + TESTCASE( iswctype(L'E', wctype("graph"))); + TESTCASE( iswctype(L'E', wctype("graph"))); + TESTCASE(!iswctype(L' ', wctype("graph"))); + TESTCASE(!iswctype(L'\t', wctype("graph"))); + TESTCASE(!iswctype(L'\n', wctype("graph"))); + + TESTCASE( iswctype(L'a', wctype("lower"))); + TESTCASE( iswctype(L'e', wctype("lower"))); + TESTCASE( iswctype(L'z', wctype("lower"))); + TESTCASE(!iswctype(L'A', wctype("lower"))); + TESTCASE(!iswctype(L'E', wctype("lower"))); + TESTCASE(!iswctype(L'Z', wctype("lower"))); + + TESTCASE(!iswctype(L'a', wctype("upper"))); + TESTCASE(!iswctype(L'e', wctype("upper"))); + TESTCASE(!iswctype(L'z', wctype("upper"))); + TESTCASE( iswctype(L'A', wctype("upper"))); + TESTCASE( iswctype(L'E', wctype("upper"))); + TESTCASE( iswctype(L'Z', wctype("upper"))); + + TESTCASE( iswctype(L'Z', wctype("print"))); + TESTCASE( iswctype(L'a', wctype("print"))); + TESTCASE( iswctype(L';', wctype("print"))); + TESTCASE( iswctype(L'\t', wctype("print"))); + TESTCASE(!iswctype(L'\0', wctype("print"))); + + TESTCASE( iswctype(L';', wctype("punct"))); + TESTCASE( iswctype(L'.', wctype("punct"))); + TESTCASE( iswctype(L'?', wctype("punct"))); + TESTCASE(!iswctype(L' ', wctype("punct"))); + TESTCASE(!iswctype(L'Z', wctype("punct"))); + + TESTCASE( iswctype(L' ', wctype("space"))); + TESTCASE( iswctype(L'\t', wctype("space"))); + + TESTCASE( iswctype(L'0', wctype("xdigit"))); + TESTCASE( iswctype(L'1', wctype("xdigit"))); + TESTCASE( iswctype(L'2', wctype("xdigit"))); + TESTCASE( iswctype(L'3', wctype("xdigit"))); + TESTCASE( iswctype(L'4', wctype("xdigit"))); + TESTCASE( iswctype(L'5', wctype("xdigit"))); + TESTCASE( iswctype(L'6', wctype("xdigit"))); + TESTCASE( iswctype(L'7', wctype("xdigit"))); + TESTCASE( iswctype(L'8', wctype("xdigit"))); + TESTCASE( iswctype(L'9', wctype("xdigit"))); + TESTCASE( iswctype(L'a', wctype("xdigit"))); + TESTCASE( iswctype(L'b', wctype("xdigit"))); + TESTCASE( iswctype(L'c', wctype("xdigit"))); + TESTCASE( iswctype(L'd', wctype("xdigit"))); + TESTCASE( iswctype(L'e', wctype("xdigit"))); + TESTCASE( iswctype(L'f', wctype("xdigit"))); + TESTCASE( iswctype(L'A', wctype("xdigit"))); + TESTCASE( iswctype(L'B', wctype("xdigit"))); + TESTCASE( iswctype(L'C', wctype("xdigit"))); + TESTCASE( iswctype(L'D', wctype("xdigit"))); + TESTCASE( iswctype(L'E', wctype("xdigit"))); + TESTCASE( iswctype(L'F', wctype("xdigit"))); + TESTCASE(!iswctype(L'g', wctype("xdigit"))); + TESTCASE(!iswctype(L'G', wctype("xdigit"))); + TESTCASE(!iswctype(L'x', wctype("xdigit"))); + TESTCASE(!iswctype(L'X', wctype("xdigit"))); + TESTCASE(!iswctype(L' ', wctype("xdigit"))); + + return TEST_RESULTS; +} +#endif -- 2.40.0 From 06630c64aa2ac5b8ec55c3c0509e69da4d81a22b Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 19:46:26 +0000 Subject: [PATCH 11/16] PDCLIB-3 implement isw* functions --- functions/wctype/iswalnum.c | 30 +++++++++++++++++++++ functions/wctype/iswalpha.c | 30 +++++++++++++++++++++ functions/wctype/iswblank.c | 29 ++++++++++++++++++++ functions/wctype/iswcntrl.c | 30 +++++++++++++++++++++ functions/wctype/iswdigit.c | 52 ++++++++++++++++++++++++++++++++++++ functions/wctype/iswgraph.c | 31 +++++++++++++++++++++ functions/wctype/iswlower.c | 31 +++++++++++++++++++++ functions/wctype/iswprint.c | 26 ++++++++++++++++++ functions/wctype/iswpunct.c | 31 +++++++++++++++++++++ functions/wctype/iswspace.c | 28 +++++++++++++++++++ functions/wctype/iswupper.c | 31 +++++++++++++++++++++ functions/wctype/iswxdigit.c | 52 ++++++++++++++++++++++++++++++++++++ 12 files changed, 401 insertions(+) create mode 100644 functions/wctype/iswalnum.c create mode 100644 functions/wctype/iswalpha.c create mode 100644 functions/wctype/iswblank.c create mode 100644 functions/wctype/iswcntrl.c create mode 100644 functions/wctype/iswdigit.c create mode 100644 functions/wctype/iswgraph.c create mode 100644 functions/wctype/iswlower.c create mode 100644 functions/wctype/iswprint.c create mode 100644 functions/wctype/iswpunct.c create mode 100644 functions/wctype/iswspace.c create mode 100644 functions/wctype/iswupper.c create mode 100644 functions/wctype/iswxdigit.c diff --git a/functions/wctype/iswalnum.c b/functions/wctype/iswalnum.c new file mode 100644 index 0000000..9d5a945 --- /dev/null +++ b/functions/wctype/iswalnum.c @@ -0,0 +1,30 @@ +/* iswalnum( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswalnum( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_ALPHA | _PDCLIB_CTYPE_DIGIT ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswalnum(L'a')); + TESTCASE(iswalnum(L'z')); + TESTCASE(iswalnum(L'E')); + TESTCASE(iswalnum(L'3')); + TESTCASE(!iswalnum(L';')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswalpha.c b/functions/wctype/iswalpha.c new file mode 100644 index 0000000..93409c3 --- /dev/null +++ b/functions/wctype/iswalpha.c @@ -0,0 +1,30 @@ +/* iswalpha( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswalpha( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_ALPHA ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswalpha(L'a')); + TESTCASE(iswalpha(L'z')); + TESTCASE(iswalpha(L'E')); + TESTCASE(!iswalpha(L'3')); + TESTCASE(!iswalpha(L';')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswblank.c b/functions/wctype/iswblank.c new file mode 100644 index 0000000..99fb35c --- /dev/null +++ b/functions/wctype/iswblank.c @@ -0,0 +1,29 @@ +/* iswblank( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswblank( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_BLANK ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswblank(L' ')); + TESTCASE(iswblank(L'\t')); + TESTCASE(!iswblank(L'\n')); + TESTCASE(!iswblank(L'a')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswcntrl.c b/functions/wctype/iswcntrl.c new file mode 100644 index 0000000..493a503 --- /dev/null +++ b/functions/wctype/iswcntrl.c @@ -0,0 +1,30 @@ +/* iswcntrl( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswcntrl( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_CNTRL ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswcntrl(L'\0')); + TESTCASE(iswcntrl(L'\n')); + TESTCASE(iswcntrl(L'\v')); + TESTCASE(!iswcntrl(L'\t')); + TESTCASE(!iswcntrl(L'a')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswdigit.c b/functions/wctype/iswdigit.c new file mode 100644 index 0000000..3b5c8a7 --- /dev/null +++ b/functions/wctype/iswdigit.c @@ -0,0 +1,52 @@ +/* iswdigit( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswdigit( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_DIGIT ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswdigit(L'0')); + TESTCASE(iswdigit(L'1')); + TESTCASE(iswdigit(L'2')); + TESTCASE(iswdigit(L'3')); + TESTCASE(iswdigit(L'4')); + TESTCASE(iswdigit(L'5')); + TESTCASE(iswdigit(L'6')); + TESTCASE(iswdigit(L'7')); + TESTCASE(iswdigit(L'8')); + TESTCASE(iswdigit(L'9')); + TESTCASE(!iswdigit(L'a')); + TESTCASE(!iswdigit(L'b')); + TESTCASE(!iswdigit(L'c')); + TESTCASE(!iswdigit(L'd')); + TESTCASE(!iswdigit(L'e')); + TESTCASE(!iswdigit(L'f')); + TESTCASE(!iswdigit(L'A')); + TESTCASE(!iswdigit(L'B')); + TESTCASE(!iswdigit(L'C')); + TESTCASE(!iswdigit(L'D')); + TESTCASE(!iswdigit(L'E')); + TESTCASE(!iswdigit(L'F')); + TESTCASE(!iswdigit(L'g')); + TESTCASE(!iswdigit(L'G')); + TESTCASE(!iswdigit(L'x')); + TESTCASE(!iswdigit(L'X')); + TESTCASE(!iswdigit(L' ')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswgraph.c b/functions/wctype/iswgraph.c new file mode 100644 index 0000000..29d6e96 --- /dev/null +++ b/functions/wctype/iswgraph.c @@ -0,0 +1,31 @@ +/* iswgraph( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswgraph( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_GRAPH ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswgraph(L'a')); + TESTCASE(iswgraph(L'z')); + TESTCASE(iswgraph(L'E')); + TESTCASE(!iswgraph(L' ')); + TESTCASE(!iswgraph(L'\t')); + TESTCASE(!iswgraph(L'\n')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswlower.c b/functions/wctype/iswlower.c new file mode 100644 index 0000000..1675ed2 --- /dev/null +++ b/functions/wctype/iswlower.c @@ -0,0 +1,31 @@ +/* iswalnum( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswlower( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_LOWER ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswlower(L'a')); + TESTCASE(iswlower(L'e')); + TESTCASE(iswlower(L'z')); + TESTCASE(!iswlower(L'A')); + TESTCASE(!iswlower(L'E')); + TESTCASE(!iswlower(L'Z')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswprint.c b/functions/wctype/iswprint.c new file mode 100644 index 0000000..280c316 --- /dev/null +++ b/functions/wctype/iswprint.c @@ -0,0 +1,26 @@ +/* iswprint( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswprint( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_GRAPH | _PDCLIB_CTYPE_SPACE ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswpunct.c b/functions/wctype/iswpunct.c new file mode 100644 index 0000000..ad9ccf6 --- /dev/null +++ b/functions/wctype/iswpunct.c @@ -0,0 +1,31 @@ +/* iswpunct( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswpunct( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_PUNCT ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswpunct(L';')); + TESTCASE(iswpunct(L'?')); + TESTCASE(iswpunct(L'.')); + TESTCASE(!iswpunct(L' ')); + TESTCASE(!iswpunct(L'Z')); + + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswspace.c b/functions/wctype/iswspace.c new file mode 100644 index 0000000..bf5ffdd --- /dev/null +++ b/functions/wctype/iswspace.c @@ -0,0 +1,28 @@ +/* iswspace( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswspace( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_SPACE ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswspace(L' ')); + TESTCASE(iswspace(L'\t')); + TESTCASE(!iswspace(L'a')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswupper.c b/functions/wctype/iswupper.c new file mode 100644 index 0000000..c011772 --- /dev/null +++ b/functions/wctype/iswupper.c @@ -0,0 +1,31 @@ +/* iswupper( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswupper( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_UPPER ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(!iswupper(L'a')); + TESTCASE(!iswupper(L'e')); + TESTCASE(!iswupper(L'z')); + TESTCASE(iswupper(L'A')); + TESTCASE(iswupper(L'E')); + TESTCASE(iswupper(L'Z')); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/iswxdigit.c b/functions/wctype/iswxdigit.c new file mode 100644 index 0000000..1d7a1bb --- /dev/null +++ b/functions/wctype/iswxdigit.c @@ -0,0 +1,52 @@ +/* iswxdigit( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +int iswxdigit( wint_t wc ) +{ + return iswctype( wc, _PDCLIB_CTYPE_XDIGT ); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(iswxdigit(L'0')); + TESTCASE(iswxdigit(L'1')); + TESTCASE(iswxdigit(L'2')); + TESTCASE(iswxdigit(L'3')); + TESTCASE(iswxdigit(L'4')); + TESTCASE(iswxdigit(L'5')); + TESTCASE(iswxdigit(L'6')); + TESTCASE(iswxdigit(L'7')); + TESTCASE(iswxdigit(L'8')); + TESTCASE(iswxdigit(L'9')); + TESTCASE(iswxdigit(L'a')); + TESTCASE(iswxdigit(L'b')); + TESTCASE(iswxdigit(L'c')); + TESTCASE(iswxdigit(L'd')); + TESTCASE(iswxdigit(L'e')); + TESTCASE(iswxdigit(L'f')); + TESTCASE(iswxdigit(L'A')); + TESTCASE(iswxdigit(L'B')); + TESTCASE(iswxdigit(L'C')); + TESTCASE(iswxdigit(L'D')); + TESTCASE(iswxdigit(L'E')); + TESTCASE(iswxdigit(L'F')); + TESTCASE(!iswxdigit(L'g')); + TESTCASE(!iswxdigit(L'G')); + TESTCASE(!iswxdigit(L'x')); + TESTCASE(!iswxdigit(L'X')); + TESTCASE(!iswxdigit(L' ')); + return TEST_RESULTS; +} +#endif -- 2.40.0 From 495dc4306cf5b9437faa86b8c7a6eaeaa51ee83a Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 20:27:17 +0000 Subject: [PATCH 12/16] PDCLIB-3 Change _PDCLIB_wcinfo_t to use wint_t --- internals/_PDCLIB_locale.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internals/_PDCLIB_locale.h b/internals/_PDCLIB_locale.h index 3e0f424..dbedc80 100644 --- a/internals/_PDCLIB_locale.h +++ b/internals/_PDCLIB_locale.h @@ -69,10 +69,10 @@ typedef struct _PDCLIB_ctype typedef struct _PDCLIB_wcinfo { - _PDCLIB_uint32_t num; + _PDCLIB_wint_t num; _PDCLIB_uint16_t flags; - _PDCLIB_uint32_t lower; - _PDCLIB_uint32_t upper; + _PDCLIB_wint_t lower; + _PDCLIB_wint_t upper; } _PDCLIB_wcinfo_t; extern _PDCLIB_wcinfo_t _PDCLIB_wcinfo[]; -- 2.40.0 From 75bf0c22663cec60d8164c8cf77e855a8b6ad459 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 20:27:34 +0000 Subject: [PATCH 13/16] PDCLIB-3 towupper(3) towlower(3) --- functions/wctype/towlower.c | 36 ++++++++++++++++++++++++++++++++++++ functions/wctype/towupper.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 functions/wctype/towlower.c create mode 100644 functions/wctype/towupper.c diff --git a/functions/wctype/towlower.c b/functions/wctype/towlower.c new file mode 100644 index 0000000..7f9809c --- /dev/null +++ b/functions/wctype/towlower.c @@ -0,0 +1,36 @@ +/* towlower( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +wint_t towlower( wint_t wc ) +{ + wint_t uwc = _PDCLIB_unpackwint( wc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( uwc ); + if( info && info->lower != uwc ) + { + wc = info->lower; + } + return wc; +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(towlower(0) == 0); + TESTCASE(towlower(L'a') == L'a'); + TESTCASE(towlower(L'B') == L'b'); + TESTCASE(towlower(L'0') == L'0'); + + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/towupper.c b/functions/wctype/towupper.c new file mode 100644 index 0000000..faac657 --- /dev/null +++ b/functions/wctype/towupper.c @@ -0,0 +1,36 @@ +/* towupper( wint_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include <_PDCLIB_locale.h> + +wint_t towupper( wint_t wc ) +{ + wint_t uwc = _PDCLIB_unpackwint( wc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( uwc ); + if( info && info->upper != uwc ) + { + wc = info->upper; + } + return wc; +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(towupper(0) == 0); + TESTCASE(towupper(L'a') == L'A'); + TESTCASE(towupper(L'B') == L'B'); + TESTCASE(towupper(L'0') == L'0'); + + return TEST_RESULTS; +} +#endif -- 2.40.0 From 6e6c4e6b52f2516e4bb6b9f37c1e2e18cb7448b5 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 16 Mar 2013 20:33:00 +0000 Subject: [PATCH 14/16] PDCLIB-3 #resolve Implement towctrans/wctrans. Completes wctype.h --- functions/wctype/towctrans.c | 37 +++++++++++++++++++++++++++++++++++ functions/wctype/wctrans.c | 38 ++++++++++++++++++++++++++++++++++++ internals/_PDCLIB_locale.h | 3 +++ 3 files changed, 78 insertions(+) create mode 100644 functions/wctype/towctrans.c create mode 100644 functions/wctype/wctrans.c diff --git a/functions/wctype/towctrans.c b/functions/wctype/towctrans.c new file mode 100644 index 0000000..aea9b1a --- /dev/null +++ b/functions/wctype/towctrans.c @@ -0,0 +1,37 @@ +/* towctrans( wint_t, wctrans_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include +#include <_PDCLIB_locale.h> + +wint_t towctrans( wint_t wc, wctrans_t trans ) +{ + switch( trans ) { + case 0: return wc; + case _PDCLIB_WCTRANS_TOLOWER: return towlower( wc ); + case _PDCLIB_WCTRANS_TOUPPER: return towupper( wc ); + default: abort(); + } +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(towctrans(L'a', wctrans("toupper")) == L'A'); + TESTCASE(towctrans(L'B', wctrans("toupper")) == L'B'); + TESTCASE(towctrans(L'a', wctrans("tolower")) == L'a'); + TESTCASE(towctrans(L'B', wctrans("tolower")) == L'b'); + TESTCASE(towctrans(L'B', wctrans("invalid")) == L'B'); + TESTCASE(towctrans(L'B', 0) == L'B'); + return TEST_RESULTS; +} +#endif diff --git a/functions/wctype/wctrans.c b/functions/wctype/wctrans.c new file mode 100644 index 0000000..31f36be --- /dev/null +++ b/functions/wctype/wctrans.c @@ -0,0 +1,38 @@ +/* wctrans( const char * ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#include +#ifndef REGTEST +#include +#include <_PDCLIB_locale.h> + +wctrans_t wctrans( const char * property ) +{ + if(!property) { + return 0; + } else if(strcmp(property, "tolower") == 0) { + return _PDCLIB_WCTRANS_TOLOWER; + } else if(strcmp(property, "toupper") == 0) { + return _PDCLIB_WCTRANS_TOUPPER; + } else { + return 0; + } +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE(wctrans("") == 0); + TESTCASE(wctrans("invalid") == 0); + TESTCASE(wctrans("toupper") != 0); + TESTCASE(wctrans("tolower") != 0); + return TEST_RESULTS; +} +#endif diff --git a/internals/_PDCLIB_locale.h b/internals/_PDCLIB_locale.h index dbedc80..3b2dece 100644 --- a/internals/_PDCLIB_locale.h +++ b/internals/_PDCLIB_locale.h @@ -59,6 +59,9 @@ #define _PDCLIB_CTYPE_DIGIT 256 #define _PDCLIB_CTYPE_XDIGT 512 +#define _PDCLIB_WCTRANS_TOLOWER 1 +#define _PDCLIB_WCTRANS_TOUPPER 2 + typedef struct _PDCLIB_ctype { _PDCLIB_uint16_t flags; -- 2.40.0 From fa53a74861e1ac9513ae57b7bd7889b85ac0fbe9 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 21 Mar 2013 00:06:53 +0000 Subject: [PATCH 15/16] PDCLIB-1 (PDCLIB-3) Make wide character classification/collation information locale dependent (to support locale-dependent information in future). * locale_t has had _WCType + _WCTypeSize fields added to it * iswctype, towctrans, towlower, towupper -> add _PDCLIB_*_l internal variations * Added module "c_locale" which will provide default C locale implementation. So far just registers Unicode tables with provided locale * win32 -> use new "c_locale" module. crt0 modifications. * posix -> use new "c_locale" module. stdinit modifications --- functions/wctype/iswctype.c | 9 ++- functions/wctype/towctrans.c | 11 +++- functions/wctype/towlower.c | 9 ++- functions/wctype/towupper.c | 9 ++- internals/_PDCLIB_locale.h | 59 +++++++++++++------ opt/c_locale/README.txt | 2 + opt/c_locale/_PDCLIB_clocale.h | 15 +++++ opt/c_locale/_PDCLIB_initclocale.c | 29 +++++++++ platform/posix/Config.jam | 2 +- .../posix/functions/_PDCLIB/_PDCLIB_stdinit.c | 19 +++--- platform/win32/Config.jam | 2 +- platform/win32/crt0.c | 3 + 12 files changed, 131 insertions(+), 38 deletions(-) create mode 100644 opt/c_locale/README.txt create mode 100644 opt/c_locale/_PDCLIB_clocale.h create mode 100644 opt/c_locale/_PDCLIB_initclocale.c diff --git a/functions/wctype/iswctype.c b/functions/wctype/iswctype.c index 5c5f7ed..d3de118 100644 --- a/functions/wctype/iswctype.c +++ b/functions/wctype/iswctype.c @@ -8,17 +8,22 @@ #ifndef REGTEST #include <_PDCLIB_locale.h> -int iswctype( wint_t wc, wctype_t desc ) +int _PDCLIB_iswctype_l( wint_t wc, wctype_t desc, locale_t l ) { wc = _PDCLIB_unpackwint( wc ); - _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( wc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( l, wc ); if(!info) return 0; return info->flags & desc; } +int iswctype( wint_t wc, wctype_t desc ) +{ + return _PDCLIB_iswctype_l( wc, desc, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/functions/wctype/towctrans.c b/functions/wctype/towctrans.c index aea9b1a..f5f4a1d 100644 --- a/functions/wctype/towctrans.c +++ b/functions/wctype/towctrans.c @@ -9,16 +9,21 @@ #include #include <_PDCLIB_locale.h> -wint_t towctrans( wint_t wc, wctrans_t trans ) +wint_t _PDCLIB_towctrans_l( wint_t wc, wctrans_t trans, locale_t l ) { switch( trans ) { case 0: return wc; - case _PDCLIB_WCTRANS_TOLOWER: return towlower( wc ); - case _PDCLIB_WCTRANS_TOUPPER: return towupper( wc ); + case _PDCLIB_WCTRANS_TOLOWER: return _PDCLIB_towlower_l( wc, l ); + case _PDCLIB_WCTRANS_TOUPPER: return _PDCLIB_towupper_l( wc, l ); default: abort(); } } +wint_t towctrans( wint_t wc, wctrans_t trans ) +{ + return _PDCLIB_towctrans_l( wc, trans, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/functions/wctype/towlower.c b/functions/wctype/towlower.c index 7f9809c..d8d56ec 100644 --- a/functions/wctype/towlower.c +++ b/functions/wctype/towlower.c @@ -8,10 +8,10 @@ #ifndef REGTEST #include <_PDCLIB_locale.h> -wint_t towlower( wint_t wc ) +wint_t _PDCLIB_towlower_l( wint_t wc, locale_t l ) { wint_t uwc = _PDCLIB_unpackwint( wc ); - _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( uwc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( l, uwc ); if( info && info->lower != uwc ) { wc = info->lower; @@ -19,6 +19,11 @@ wint_t towlower( wint_t wc ) return wc; } +wint_t towlower( wint_t wc ) +{ + return _PDCLIB_towlower_l( wc, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/functions/wctype/towupper.c b/functions/wctype/towupper.c index faac657..7b96a61 100644 --- a/functions/wctype/towupper.c +++ b/functions/wctype/towupper.c @@ -8,10 +8,10 @@ #ifndef REGTEST #include <_PDCLIB_locale.h> -wint_t towupper( wint_t wc ) +wint_t _PDCLIB_towupper_l( wint_t wc, locale_t l ) { wint_t uwc = _PDCLIB_unpackwint( wc ); - _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( uwc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( l, uwc ); if( info && info->upper != uwc ) { wc = info->upper; @@ -19,6 +19,11 @@ wint_t towupper( wint_t wc ) return wc; } +wint_t towupper( wint_t wc ) +{ + return _PDCLIB_towupper_l( wc, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/internals/_PDCLIB_locale.h b/internals/_PDCLIB_locale.h index 3b2dece..fd47c9a 100644 --- a/internals/_PDCLIB_locale.h +++ b/internals/_PDCLIB_locale.h @@ -2,6 +2,7 @@ #define __PDCLIB_LOCALE_H __PDCLIB_LOCALE_H #include <_PDCLIB_int.h> #include +#include #include #include @@ -78,6 +79,19 @@ typedef struct _PDCLIB_wcinfo _PDCLIB_wint_t upper; } _PDCLIB_wcinfo_t; +struct _PDCLIB_locale { + _PDCLIB_charcodec_t _Codec; + struct lconv _Conv; + + /* ctype / wctype */ + _PDCLIB_wcinfo_t *_WCType; + _PDCLIB_size_t _WCTypeSize; + _PDCLIB_ctype_t *_CType; + + /* perror/strerror */ + char *_ErrnoStr[_PDCLIB_ERRNO_MAX]; +}; + extern _PDCLIB_wcinfo_t _PDCLIB_wcinfo[]; extern size_t _PDCLIB_wcinfo_size; @@ -88,32 +102,32 @@ static inline int _PDCLIB_wcinfo_cmp( const void * _key, const void * _obj ) return *key - obj->num; } -static inline _PDCLIB_wcinfo_t * _PDCLIB_wcgetinfo( _PDCLIB_uint32_t num ) +static inline _PDCLIB_wcinfo_t * _PDCLIB_wcgetinfo( locale_t l, _PDCLIB_uint32_t num ) { _PDCLIB_wcinfo_t *info = (_PDCLIB_wcinfo_t*) - bsearch( &num, _PDCLIB_wcinfo, _PDCLIB_wcinfo_size, - sizeof( _PDCLIB_wcinfo[0] ), _PDCLIB_wcinfo_cmp ); + bsearch( &num, l->_WCType, l->_WCTypeSize, + sizeof( l->_WCType[0] ), _PDCLIB_wcinfo_cmp ); return info; } -static inline _PDCLIB_wint_t _PDCLIB_unpackwint( _PDCLIB_wint_t wc ) +static inline wint_t _PDCLIB_unpackwint( wint_t wc ) { if( sizeof(_PDCLIB_wchar_t) == 2 && sizeof(_PDCLIB_wint_t) == 4 ) { /* On UTF-16 platforms, as an extension accept a "packed surrogate" * encoding. We accept the surrogate pairs either way */ - _PDCLIB_wint_t c = (wc & 0xF800F800); + wint_t c = (wc & 0xF800F800); if(c == (_PDCLIB_wint_t) 0xD800DC00) { // MSW: Lead, LSW: Trail - _PDCLIB_wint_t lead = wc >> 16 & 0x3FF; - _PDCLIB_wint_t trail = wc & 0x3FF; + wint_t lead = wc >> 16 & 0x3FF; + wint_t trail = wc & 0x3FF; wc = lead << 10 | trail; } else if(c == (_PDCLIB_wint_t) 0xDC00D800) { // MSW: Trail, LSW: Lead - _PDCLIB_wint_t trail = wc >> 16 & 0x3FF; - _PDCLIB_wint_t lead = wc & 0x3FF; + wint_t trail = wc >> 16 & 0x3FF; + wint_t lead = wc & 0x3FF; wc = lead << 10 | trail; } @@ -121,15 +135,22 @@ static inline _PDCLIB_wint_t _PDCLIB_unpackwint( _PDCLIB_wint_t wc ) return wc; } -struct _PDCLIB_locale { - _PDCLIB_charcodec_t _Codec; - struct lconv _Conv; - - /* ctype */ - _PDCLIB_ctype_t *_CType; - - /* perror/strerror */ - char *_ErrnoStr[_PDCLIB_ERRNO_MAX]; -}; +/* Internal xlocale-style WCType API */ +int _PDCLIB_iswalnum_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswalpha_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswblank_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswcntrl_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswdigit_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswgraph_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswlower_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswprint_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswpunct_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswspace_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswupper_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswxdigit_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswctype_l( wint_t _Wc, wctype_t _Desc, locale_t l ); +wint_t _PDCLIB_towlower_l( wint_t _Wc, locale_t l ); +wint_t _PDCLIB_towupper_l( wint_t _Wc, locale_t l ); +wint_t _PDCLIB_towctrans_l( wint_t _Wc, wctrans_t _Desc, locale_t l ); #endif diff --git a/opt/c_locale/README.txt b/opt/c_locale/README.txt new file mode 100644 index 0000000..1fced17 --- /dev/null +++ b/opt/c_locale/README.txt @@ -0,0 +1,2 @@ +Basic C Locale Support + - i.e. support for the basic (PDCLib-packaged) C locale only \ No newline at end of file diff --git a/opt/c_locale/_PDCLIB_clocale.h b/opt/c_locale/_PDCLIB_clocale.h new file mode 100644 index 0000000..ee51d28 --- /dev/null +++ b/opt/c_locale/_PDCLIB_clocale.h @@ -0,0 +1,15 @@ +/* "C" Locale Support + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#ifndef _PDCLIB_CLOCALE_H +#define _PDCLIB_CLOCALE_H _PDCLIB_CLOCALE_H +#include +_PDCLIB_BEGIN_EXTERN_C + +void _PDCLIB_initclocale( locale_t l ); + +_PDCLIB_END_EXTERN_C +#endif // _PDCLIB_CLOCALE_H diff --git a/opt/c_locale/_PDCLIB_initclocale.c b/opt/c_locale/_PDCLIB_initclocale.c new file mode 100644 index 0000000..b1a0dbe --- /dev/null +++ b/opt/c_locale/_PDCLIB_initclocale.c @@ -0,0 +1,29 @@ +/* _PDCLIB_initclocale( locale_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#ifndef REGTEST +#include "_PDCLIB_clocale.h" +#include "_PDCLIB_locale.h" + +void _PDCLIB_initclocale( locale_t l ) +{ + // TODO: There will be more added here... + + l->_WCType = _PDCLIB_wcinfo; + l->_WCTypeSize = _PDCLIB_wcinfo_size; +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main() +{ + return TEST_RESULTS; +} + +#endif \ No newline at end of file diff --git a/platform/posix/Config.jam b/platform/posix/Config.jam index 4fb8cf5..52d0e62 100644 --- a/platform/posix/Config.jam +++ b/platform/posix/Config.jam @@ -11,4 +11,4 @@ if $(OS) = "MACOSX" { PDCLIB_TEST_LINKLIBS += -lgcc ; } -PDCLIB_OPTIONS = pthreads notime dlmalloc basecodecs ; \ No newline at end of file +PDCLIB_OPTIONS = pthreads notime dlmalloc basecodecs c_locale ; \ No newline at end of file diff --git a/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c b/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c index 7d24679..0e562c6 100644 --- a/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c +++ b/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c @@ -18,6 +18,7 @@ #ifndef REGTEST #include <_PDCLIB_io.h> #include <_PDCLIB_locale.h> +#include <_PDCLIB_clocale.h> #include /* In a POSIX system, stdin / stdout / stderr are equivalent to the (int) file @@ -79,14 +80,6 @@ FILE * stdout = &_PDCLIB_sout; FILE * stderr = &_PDCLIB_serr; tss_t _PDCLIB_locale_tss; -/* Todo: Better solution than this! */ -__attribute__((constructor)) void init_stdio(void) -{ - tss_create(&_PDCLIB_locale_tss, (tss_dtor_t) freelocale); - mtx_init(&stdin->lock, mtx_recursive); - mtx_init(&stdout->lock, mtx_recursive); - mtx_init(&stderr->lock, mtx_recursive); -} /* FIXME: This approach is a possible attack vector. */ FILE * _PDCLIB_filelist = &_PDCLIB_sin; @@ -394,6 +387,16 @@ struct _PDCLIB_locale _PDCLIB_global_locale = { }, }; +/* Todo: Better solution than this! */ +__attribute__((constructor)) void init_stdio(void) +{ + _PDCLIB_initclocale( &_PDCLIB_global_locale ); + tss_create(&_PDCLIB_locale_tss, (tss_dtor_t) freelocale); + mtx_init(&stdin->lock, mtx_recursive); + mtx_init(&stdout->lock, mtx_recursive); + mtx_init(&stderr->lock, mtx_recursive); +} + #endif #ifdef TEST diff --git a/platform/win32/Config.jam b/platform/win32/Config.jam index 77be305..baac82e 100644 --- a/platform/win32/Config.jam +++ b/platform/win32/Config.jam @@ -23,6 +23,6 @@ if $(PDCLIB_TOOLCHAIN) = "gcc" { EXIT ; } -PDCLIB_OPTIONS = notime dlmalloc mincoll tss_errno basecodecs ; +PDCLIB_OPTIONS = notime dlmalloc mincoll tss_errno basecodecs c_locale ; CRT0 = [ FDirName platform win32 crt0$(SUFOBJ) ] ; \ No newline at end of file diff --git a/platform/win32/crt0.c b/platform/win32/crt0.c index bead332..533ee98 100644 --- a/platform/win32/crt0.c +++ b/platform/win32/crt0.c @@ -7,6 +7,7 @@ #include #include <_PDCLIB_io.h> #include <_PDCLIB_locale.h> +#include <_PDCLIB_clocale.h> static char ** argvToAnsi( wchar_t ** wargv, int argc ) { @@ -116,6 +117,8 @@ void __cdecl mainCRTStartup( void ) wargv = CommandLineToArgvW(cl, &argc); argv = argvToAnsi(wargv, argc); + _PDCLIB_initclocale( &_PDCLIB_global_locale ); + if(tss_create(&_PDCLIB_locale_tss, (tss_dtor_t) freelocale) != thrd_success) { fputs( "Error during C runtime initialization: " -- 2.40.0 From a86fc06b877aa1418ae52f36720957e4cb9a4300 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sat, 20 Apr 2013 23:43:33 +0100 Subject: [PATCH 16/16] UnicodeData.py: actually fill in encoding declaration --- functions/locale/UnicodeData.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions/locale/UnicodeData.py b/functions/locale/UnicodeData.py index 6fe74cb..7de35bd 100644 --- a/functions/locale/UnicodeData.py +++ b/functions/locale/UnicodeData.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -*- coding: -*- +# -*- coding: ascii -*- # Unicode Data Converter # # This file is part of the Public Domain C Library (PDCLib). -- 2.40.0