From fa53a74861e1ac9513ae57b7bd7889b85ac0fbe9 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 21 Mar 2013 00:06:53 +0000 Subject: [PATCH] PDCLIB-1 (PDCLIB-3) Make wide character classification/collation information locale dependent (to support locale-dependent information in future). * locale_t has had _WCType + _WCTypeSize fields added to it * iswctype, towctrans, towlower, towupper -> add _PDCLIB_*_l internal variations * Added module "c_locale" which will provide default C locale implementation. So far just registers Unicode tables with provided locale * win32 -> use new "c_locale" module. crt0 modifications. * posix -> use new "c_locale" module. stdinit modifications --- functions/wctype/iswctype.c | 9 ++- functions/wctype/towctrans.c | 11 +++- functions/wctype/towlower.c | 9 ++- functions/wctype/towupper.c | 9 ++- internals/_PDCLIB_locale.h | 59 +++++++++++++------ opt/c_locale/README.txt | 2 + opt/c_locale/_PDCLIB_clocale.h | 15 +++++ opt/c_locale/_PDCLIB_initclocale.c | 29 +++++++++ platform/posix/Config.jam | 2 +- .../posix/functions/_PDCLIB/_PDCLIB_stdinit.c | 19 +++--- platform/win32/Config.jam | 2 +- platform/win32/crt0.c | 3 + 12 files changed, 131 insertions(+), 38 deletions(-) create mode 100644 opt/c_locale/README.txt create mode 100644 opt/c_locale/_PDCLIB_clocale.h create mode 100644 opt/c_locale/_PDCLIB_initclocale.c diff --git a/functions/wctype/iswctype.c b/functions/wctype/iswctype.c index 5c5f7ed..d3de118 100644 --- a/functions/wctype/iswctype.c +++ b/functions/wctype/iswctype.c @@ -8,17 +8,22 @@ #ifndef REGTEST #include <_PDCLIB_locale.h> -int iswctype( wint_t wc, wctype_t desc ) +int _PDCLIB_iswctype_l( wint_t wc, wctype_t desc, locale_t l ) { wc = _PDCLIB_unpackwint( wc ); - _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( wc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( l, wc ); if(!info) return 0; return info->flags & desc; } +int iswctype( wint_t wc, wctype_t desc ) +{ + return _PDCLIB_iswctype_l( wc, desc, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/functions/wctype/towctrans.c b/functions/wctype/towctrans.c index aea9b1a..f5f4a1d 100644 --- a/functions/wctype/towctrans.c +++ b/functions/wctype/towctrans.c @@ -9,16 +9,21 @@ #include #include <_PDCLIB_locale.h> -wint_t towctrans( wint_t wc, wctrans_t trans ) +wint_t _PDCLIB_towctrans_l( wint_t wc, wctrans_t trans, locale_t l ) { switch( trans ) { case 0: return wc; - case _PDCLIB_WCTRANS_TOLOWER: return towlower( wc ); - case _PDCLIB_WCTRANS_TOUPPER: return towupper( wc ); + case _PDCLIB_WCTRANS_TOLOWER: return _PDCLIB_towlower_l( wc, l ); + case _PDCLIB_WCTRANS_TOUPPER: return _PDCLIB_towupper_l( wc, l ); default: abort(); } } +wint_t towctrans( wint_t wc, wctrans_t trans ) +{ + return _PDCLIB_towctrans_l( wc, trans, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/functions/wctype/towlower.c b/functions/wctype/towlower.c index 7f9809c..d8d56ec 100644 --- a/functions/wctype/towlower.c +++ b/functions/wctype/towlower.c @@ -8,10 +8,10 @@ #ifndef REGTEST #include <_PDCLIB_locale.h> -wint_t towlower( wint_t wc ) +wint_t _PDCLIB_towlower_l( wint_t wc, locale_t l ) { wint_t uwc = _PDCLIB_unpackwint( wc ); - _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( uwc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( l, uwc ); if( info && info->lower != uwc ) { wc = info->lower; @@ -19,6 +19,11 @@ wint_t towlower( wint_t wc ) return wc; } +wint_t towlower( wint_t wc ) +{ + return _PDCLIB_towlower_l( wc, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/functions/wctype/towupper.c b/functions/wctype/towupper.c index faac657..7b96a61 100644 --- a/functions/wctype/towupper.c +++ b/functions/wctype/towupper.c @@ -8,10 +8,10 @@ #ifndef REGTEST #include <_PDCLIB_locale.h> -wint_t towupper( wint_t wc ) +wint_t _PDCLIB_towupper_l( wint_t wc, locale_t l ) { wint_t uwc = _PDCLIB_unpackwint( wc ); - _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( uwc ); + _PDCLIB_wcinfo_t *info = _PDCLIB_wcgetinfo( l, uwc ); if( info && info->upper != uwc ) { wc = info->upper; @@ -19,6 +19,11 @@ wint_t towupper( wint_t wc ) return wc; } +wint_t towupper( wint_t wc ) +{ + return _PDCLIB_towupper_l( wc, _PDCLIB_threadlocale() ); +} + #endif #ifdef TEST diff --git a/internals/_PDCLIB_locale.h b/internals/_PDCLIB_locale.h index 3b2dece..fd47c9a 100644 --- a/internals/_PDCLIB_locale.h +++ b/internals/_PDCLIB_locale.h @@ -2,6 +2,7 @@ #define __PDCLIB_LOCALE_H __PDCLIB_LOCALE_H #include <_PDCLIB_int.h> #include +#include #include #include @@ -78,6 +79,19 @@ typedef struct _PDCLIB_wcinfo _PDCLIB_wint_t upper; } _PDCLIB_wcinfo_t; +struct _PDCLIB_locale { + _PDCLIB_charcodec_t _Codec; + struct lconv _Conv; + + /* ctype / wctype */ + _PDCLIB_wcinfo_t *_WCType; + _PDCLIB_size_t _WCTypeSize; + _PDCLIB_ctype_t *_CType; + + /* perror/strerror */ + char *_ErrnoStr[_PDCLIB_ERRNO_MAX]; +}; + extern _PDCLIB_wcinfo_t _PDCLIB_wcinfo[]; extern size_t _PDCLIB_wcinfo_size; @@ -88,32 +102,32 @@ static inline int _PDCLIB_wcinfo_cmp( const void * _key, const void * _obj ) return *key - obj->num; } -static inline _PDCLIB_wcinfo_t * _PDCLIB_wcgetinfo( _PDCLIB_uint32_t num ) +static inline _PDCLIB_wcinfo_t * _PDCLIB_wcgetinfo( locale_t l, _PDCLIB_uint32_t num ) { _PDCLIB_wcinfo_t *info = (_PDCLIB_wcinfo_t*) - bsearch( &num, _PDCLIB_wcinfo, _PDCLIB_wcinfo_size, - sizeof( _PDCLIB_wcinfo[0] ), _PDCLIB_wcinfo_cmp ); + bsearch( &num, l->_WCType, l->_WCTypeSize, + sizeof( l->_WCType[0] ), _PDCLIB_wcinfo_cmp ); return info; } -static inline _PDCLIB_wint_t _PDCLIB_unpackwint( _PDCLIB_wint_t wc ) +static inline wint_t _PDCLIB_unpackwint( wint_t wc ) { if( sizeof(_PDCLIB_wchar_t) == 2 && sizeof(_PDCLIB_wint_t) == 4 ) { /* On UTF-16 platforms, as an extension accept a "packed surrogate" * encoding. We accept the surrogate pairs either way */ - _PDCLIB_wint_t c = (wc & 0xF800F800); + wint_t c = (wc & 0xF800F800); if(c == (_PDCLIB_wint_t) 0xD800DC00) { // MSW: Lead, LSW: Trail - _PDCLIB_wint_t lead = wc >> 16 & 0x3FF; - _PDCLIB_wint_t trail = wc & 0x3FF; + wint_t lead = wc >> 16 & 0x3FF; + wint_t trail = wc & 0x3FF; wc = lead << 10 | trail; } else if(c == (_PDCLIB_wint_t) 0xDC00D800) { // MSW: Trail, LSW: Lead - _PDCLIB_wint_t trail = wc >> 16 & 0x3FF; - _PDCLIB_wint_t lead = wc & 0x3FF; + wint_t trail = wc >> 16 & 0x3FF; + wint_t lead = wc & 0x3FF; wc = lead << 10 | trail; } @@ -121,15 +135,22 @@ static inline _PDCLIB_wint_t _PDCLIB_unpackwint( _PDCLIB_wint_t wc ) return wc; } -struct _PDCLIB_locale { - _PDCLIB_charcodec_t _Codec; - struct lconv _Conv; - - /* ctype */ - _PDCLIB_ctype_t *_CType; - - /* perror/strerror */ - char *_ErrnoStr[_PDCLIB_ERRNO_MAX]; -}; +/* Internal xlocale-style WCType API */ +int _PDCLIB_iswalnum_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswalpha_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswblank_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswcntrl_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswdigit_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswgraph_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswlower_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswprint_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswpunct_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswspace_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswupper_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswxdigit_l( wint_t _Wc, locale_t l ); +int _PDCLIB_iswctype_l( wint_t _Wc, wctype_t _Desc, locale_t l ); +wint_t _PDCLIB_towlower_l( wint_t _Wc, locale_t l ); +wint_t _PDCLIB_towupper_l( wint_t _Wc, locale_t l ); +wint_t _PDCLIB_towctrans_l( wint_t _Wc, wctrans_t _Desc, locale_t l ); #endif diff --git a/opt/c_locale/README.txt b/opt/c_locale/README.txt new file mode 100644 index 0000000..1fced17 --- /dev/null +++ b/opt/c_locale/README.txt @@ -0,0 +1,2 @@ +Basic C Locale Support + - i.e. support for the basic (PDCLib-packaged) C locale only \ No newline at end of file diff --git a/opt/c_locale/_PDCLIB_clocale.h b/opt/c_locale/_PDCLIB_clocale.h new file mode 100644 index 0000000..ee51d28 --- /dev/null +++ b/opt/c_locale/_PDCLIB_clocale.h @@ -0,0 +1,15 @@ +/* "C" Locale Support + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#ifndef _PDCLIB_CLOCALE_H +#define _PDCLIB_CLOCALE_H _PDCLIB_CLOCALE_H +#include +_PDCLIB_BEGIN_EXTERN_C + +void _PDCLIB_initclocale( locale_t l ); + +_PDCLIB_END_EXTERN_C +#endif // _PDCLIB_CLOCALE_H diff --git a/opt/c_locale/_PDCLIB_initclocale.c b/opt/c_locale/_PDCLIB_initclocale.c new file mode 100644 index 0000000..b1a0dbe --- /dev/null +++ b/opt/c_locale/_PDCLIB_initclocale.c @@ -0,0 +1,29 @@ +/* _PDCLIB_initclocale( locale_t ) + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#ifndef REGTEST +#include "_PDCLIB_clocale.h" +#include "_PDCLIB_locale.h" + +void _PDCLIB_initclocale( locale_t l ) +{ + // TODO: There will be more added here... + + l->_WCType = _PDCLIB_wcinfo; + l->_WCTypeSize = _PDCLIB_wcinfo_size; +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main() +{ + return TEST_RESULTS; +} + +#endif \ No newline at end of file diff --git a/platform/posix/Config.jam b/platform/posix/Config.jam index 4fb8cf5..52d0e62 100644 --- a/platform/posix/Config.jam +++ b/platform/posix/Config.jam @@ -11,4 +11,4 @@ if $(OS) = "MACOSX" { PDCLIB_TEST_LINKLIBS += -lgcc ; } -PDCLIB_OPTIONS = pthreads notime dlmalloc basecodecs ; \ No newline at end of file +PDCLIB_OPTIONS = pthreads notime dlmalloc basecodecs c_locale ; \ No newline at end of file diff --git a/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c b/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c index 7d24679..0e562c6 100644 --- a/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c +++ b/platform/posix/functions/_PDCLIB/_PDCLIB_stdinit.c @@ -18,6 +18,7 @@ #ifndef REGTEST #include <_PDCLIB_io.h> #include <_PDCLIB_locale.h> +#include <_PDCLIB_clocale.h> #include /* In a POSIX system, stdin / stdout / stderr are equivalent to the (int) file @@ -79,14 +80,6 @@ FILE * stdout = &_PDCLIB_sout; FILE * stderr = &_PDCLIB_serr; tss_t _PDCLIB_locale_tss; -/* Todo: Better solution than this! */ -__attribute__((constructor)) void init_stdio(void) -{ - tss_create(&_PDCLIB_locale_tss, (tss_dtor_t) freelocale); - mtx_init(&stdin->lock, mtx_recursive); - mtx_init(&stdout->lock, mtx_recursive); - mtx_init(&stderr->lock, mtx_recursive); -} /* FIXME: This approach is a possible attack vector. */ FILE * _PDCLIB_filelist = &_PDCLIB_sin; @@ -394,6 +387,16 @@ struct _PDCLIB_locale _PDCLIB_global_locale = { }, }; +/* Todo: Better solution than this! */ +__attribute__((constructor)) void init_stdio(void) +{ + _PDCLIB_initclocale( &_PDCLIB_global_locale ); + tss_create(&_PDCLIB_locale_tss, (tss_dtor_t) freelocale); + mtx_init(&stdin->lock, mtx_recursive); + mtx_init(&stdout->lock, mtx_recursive); + mtx_init(&stderr->lock, mtx_recursive); +} + #endif #ifdef TEST diff --git a/platform/win32/Config.jam b/platform/win32/Config.jam index 77be305..baac82e 100644 --- a/platform/win32/Config.jam +++ b/platform/win32/Config.jam @@ -23,6 +23,6 @@ if $(PDCLIB_TOOLCHAIN) = "gcc" { EXIT ; } -PDCLIB_OPTIONS = notime dlmalloc mincoll tss_errno basecodecs ; +PDCLIB_OPTIONS = notime dlmalloc mincoll tss_errno basecodecs c_locale ; CRT0 = [ FDirName platform win32 crt0$(SUFOBJ) ] ; \ No newline at end of file diff --git a/platform/win32/crt0.c b/platform/win32/crt0.c index bead332..533ee98 100644 --- a/platform/win32/crt0.c +++ b/platform/win32/crt0.c @@ -7,6 +7,7 @@ #include #include <_PDCLIB_io.h> #include <_PDCLIB_locale.h> +#include <_PDCLIB_clocale.h> static char ** argvToAnsi( wchar_t ** wargv, int argc ) { @@ -116,6 +117,8 @@ void __cdecl mainCRTStartup( void ) wargv = CommandLineToArgvW(cl, &argc); argv = argvToAnsi(wargv, argc); + _PDCLIB_initclocale( &_PDCLIB_global_locale ); + if(tss_create(&_PDCLIB_locale_tss, (tss_dtor_t) freelocale) != thrd_success) { fputs( "Error during C runtime initialization: " -- 2.40.0