From 1790a6c95a86c15a92fe3f5ec4af76c76a26f38a Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Fri, 11 Jan 2013 18:19:11 +0000 Subject: [PATCH] PDCLIB-2 PDCLIB-9 Add single character unicode conversion functions (C11) --- functions/uchar/c16rtomb.c | 104 +++++++++++++++++++++++++++++++++++++ functions/uchar/mbrtoc16.c | 99 +++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 functions/uchar/c16rtomb.c create mode 100644 functions/uchar/mbrtoc16.c diff --git a/functions/uchar/c16rtomb.c b/functions/uchar/c16rtomb.c new file mode 100644 index 0000000..e8f1de7 --- /dev/null +++ b/functions/uchar/c16rtomb.c @@ -0,0 +1,104 @@ +/* c16rtomb( + char *restrict s, + char16_t c16, + mbstate_t *restrict ps); + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#ifndef REGTEST +#include +#include +#include +#include +#include +#include <_PDCLIB_encoding.h> +#include <_PDCLIB_locale.h> + +size_t c16rtomb_l( + char *restrict s, + char16_t c16, + mbstate_t *restrict ps, + locale_t restrict l +) +{ + const char16_t *restrict psrc = &c16; + + if(!l->_Codec->__c16stombs) { + // Codec doesn't support direct conversion - translate via UCS-4 + if(ps->_Surrogate == 0) { + // No pending surrogate + if((c16 & 0xF800) == 0xD800) { + // Surrogate range + if((c16 & 0x0400) == 0) { + // 0xD800 -> 0xDBFF leading surrogate + ps->_Surrogate = c16; + + // Need more data + // Return 0 - we haven't output anything yet + + /* STD: ISO/IEC 9899:2011 is very implcifit about this being + * the correct return value. N1040, from which the + * function was adopted, is explicit about 0 being a + * valid return. + */ + return (size_t) 0; + } else { + // 0xDC00 -> 0xDFFF trailing surrogate + errno = EILSEQ; + return (size_t) -1; + } + } else { + // BMP range - UTF16 == UCS-4, pass through to c32rtomb_l + return c32rtomb_l(s, c16, ps, l); + } + } else { + // We have a stored surrogate + if((c16 & 0xFC00) == 0xDC00) { + // Trailing surrogate + char32_t c32 = (ps->_Surrogate & 0x3FF) << 10 | (c16 & 0x3FF); + return c32rtomb_l(s, c32, ps, l); + } else { + // Not a trailing surrogate - encoding error + errno = EILSEQ; + return (size_t) -1; + } + + } + } else { + // Codec supports direct conversion + size_t srcsz = 1; + size_t dstsz = MB_CUR_MAX; + size_t dstrem = dstsz; + + if(l->_Codec->__c16stombs(&s, &dstrem, &psrc, &srcsz, ps)) { + // Successful conversion + return dstsz - dstrem; + } else { + errno = EILSEQ; + return (size_t) -1; + } + } +} + +size_t c16rtomb( + char *restrict s, + char16_t c16, + mbstate_t *restrict ps +) +{ + return c16rtomb_l(s, c16, ps, _PDCLIB_threadlocale()); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE( NO_TESTDRIVER ); + return TEST_RESULTS; +} +#endif diff --git a/functions/uchar/mbrtoc16.c b/functions/uchar/mbrtoc16.c new file mode 100644 index 0000000..60b9f41 --- /dev/null +++ b/functions/uchar/mbrtoc16.c @@ -0,0 +1,99 @@ +/* size_t mbrtoc16( + char16_t *restrict pc16, + const char *restrict s, + size_t n, + mbstate_t *restrict ps); + + This file is part of the Public Domain C Library (PDCLib). + Permission is granted to use, modify, and / or redistribute at will. +*/ + +#ifndef REGTEST +#include +#include +#include +#include +#include <_PDCLIB_encoding.h> +#include <_PDCLIB_locale.h> + +size_t mbrtoc16_l( + char16_t *restrict pc16, + const char *restrict s, + size_t n, + mbstate_t *restrict ps, + locale_t restrict l +) +{ + size_t dstlen = 1; + size_t nr = n; + + if(!l->_Codec->__mbstoc16s) { + // No UTF-16 support in codec. Must synthesize on top of UCS-4 support. + + if(ps->_Surrogate) { + // If a pending surrogate is stored in the state + *pc16 = ps->_Surrogate; + ps->_Surrogate = 0; + return (size_t) -3; + } + + char32_t c32; + size_t res = mbrtoc32_l(&c32, s, n, ps, l); + if(res != (size_t) -1) { + // Conversion was successful. Check for surrogates + if(c32 <= 0xFFFF) { + // BMP char + *pc16 = c32; + } else { + // Supplementary char + *pc16 = 0xD800 | (c32 >> 10); + ps->_Surrogate = 0xDC00 | (c32 & 0x3FF); + } + } + return res; + } else if(l->_Codec->__mbstoc16s(&pc16, &dstlen, &s, &nr, ps)) { + // Successful conversion + if(dstlen == 0) { + // A character was output + if(nr == n) { + // The output character resulted entirely from stored state + return (size_t) -3; + } else if(pc16[-1] == 0) { + // Was null character + return 0; + } else { + // Count of processed characters + return n - nr; + } + } else { + assert(nr == 0 && "Must have processed whole input"); + return (size_t) -2; + } + } else { + // Failed conversion + errno = EILSEQ; + return (size_t) -1; + } +} + +size_t mbrtoc16( + char16_t *restrict pc16, + const char *restrict s, + size_t n, + mbstate_t *restrict ps +) +{ + return mbrtoc16_l(pc16, s, n, ps, _PDCLIB_threadlocale()); +} + +#endif + +#ifdef TEST +#include <_PDCLIB_test.h> + +int main( void ) +{ + TESTCASE( NO_TESTDRIVER ); + return TEST_RESULTS; +} +#endif -- 2.40.0