]> pd.if.org Git - pdclib.old/commitdiff
PDCLIB-2 PDCLIB-9 Add single character unicode conversion functions (C11)
authorOwen Shepherd <owen.shepherd@e43.eu>
Fri, 11 Jan 2013 18:19:11 +0000 (18:19 +0000)
committerOwen Shepherd <owen.shepherd@e43.eu>
Fri, 11 Jan 2013 18:19:11 +0000 (18:19 +0000)
functions/uchar/c16rtomb.c [new file with mode: 0644]
functions/uchar/mbrtoc16.c [new file with mode: 0644]

diff --git a/functions/uchar/c16rtomb.c b/functions/uchar/c16rtomb.c
new file mode 100644 (file)
index 0000000..e8f1de7
--- /dev/null
@@ -0,0 +1,104 @@
+/* c16rtomb(\r
+    char        *restrict   s, \r
+    char16_t                c16,\r
+    mbstate_t   *restrict   ps);\r
+\r
+   This file is part of the Public Domain C Library (PDCLib).\r
+   Permission is granted to use, modify, and / or redistribute at will.\r
+*/\r
+\r
+#ifndef REGTEST\r
+#include <uchar.h>\r
+#include <errno.h>\r
+#include <stdint.h>\r
+#include <assert.h>\r
+#include <stdlib.h>\r
+#include <_PDCLIB_encoding.h>\r
+#include <_PDCLIB_locale.h>\r
+\r
+size_t c16rtomb_l(\r
+    char        *restrict   s, \r
+    char16_t                c16,\r
+    mbstate_t   *restrict   ps,\r
+    locale_t     restrict   l\r
+)\r
+{\r
+    const char16_t *restrict psrc = &c16;\r
+\r
+    if(!l->_Codec->__c16stombs) {\r
+        // Codec doesn't support direct conversion - translate via UCS-4\r
+        if(ps->_Surrogate == 0) {\r
+            // No pending surrogate\r
+            if((c16 & 0xF800) == 0xD800) {\r
+                // Surrogate range\r
+                if((c16 & 0x0400) == 0) {\r
+                    // 0xD800 -> 0xDBFF leading surrogate\r
+                    ps->_Surrogate = c16;\r
+\r
+                    // Need more data\r
+                    // Return 0 - we haven't output anything yet\r
+\r
+                    /* STD: ISO/IEC 9899:2011 is very implcifit about this being\r
+                     *      the correct return value. N1040, from which the \r
+                     *      function was adopted, is explicit about 0 being a \r
+                     *      valid return.\r
+                     */\r
+                    return (size_t) 0;\r
+                } else {\r
+                    // 0xDC00 -> 0xDFFF trailing surrogate\r
+                    errno = EILSEQ;\r
+                    return (size_t) -1;\r
+                }\r
+            } else {\r
+                // BMP range - UTF16 == UCS-4, pass through to c32rtomb_l\r
+                return c32rtomb_l(s, c16, ps, l);\r
+            }\r
+        } else {\r
+            // We have a stored surrogate\r
+            if((c16 & 0xFC00) == 0xDC00) {\r
+                // Trailing surrogate\r
+                char32_t c32 = (ps->_Surrogate & 0x3FF) << 10 | (c16 & 0x3FF);\r
+                return c32rtomb_l(s, c32, ps, l);\r
+            } else {\r
+                // Not a trailing surrogate - encoding error\r
+                errno = EILSEQ;\r
+                return (size_t) -1;\r
+            }\r
+\r
+        }\r
+    } else {\r
+        // Codec supports direct conversion\r
+        size_t srcsz  = 1;\r
+        size_t dstsz  = MB_CUR_MAX;\r
+        size_t dstrem = dstsz;\r
+\r
+        if(l->_Codec->__c16stombs(&s, &dstrem, &psrc, &srcsz, ps)) {\r
+            // Successful conversion\r
+            return dstsz - dstrem;\r
+        } else {\r
+            errno = EILSEQ;\r
+            return (size_t) -1;\r
+        }\r
+    }\r
+}\r
+\r
+size_t c16rtomb(\r
+    char        *restrict   s, \r
+    char16_t                c16,\r
+    mbstate_t   *restrict   ps\r
+)\r
+{\r
+    return c16rtomb_l(s, c16, ps, _PDCLIB_threadlocale());\r
+}\r
+\r
+#endif\r
+\r
+#ifdef TEST\r
+#include <_PDCLIB_test.h>\r
+\r
+int main( void )\r
+{\r
+    TESTCASE( NO_TESTDRIVER );\r
+    return TEST_RESULTS;\r
+}\r
+#endif\r
diff --git a/functions/uchar/mbrtoc16.c b/functions/uchar/mbrtoc16.c
new file mode 100644 (file)
index 0000000..60b9f41
--- /dev/null
@@ -0,0 +1,99 @@
+/* size_t mbrtoc16(\r
+    char16_t    *restrict   pc16,\r
+    const char  *restrict   s, \r
+    size_t                  n,\r
+    mbstate_t   *restrict   ps);\r
+\r
+   This file is part of the Public Domain C Library (PDCLib).\r
+   Permission is granted to use, modify, and / or redistribute at will.\r
+*/\r
+\r
+#ifndef REGTEST\r
+#include <uchar.h>\r
+#include <errno.h>\r
+#include <stdint.h>\r
+#include <assert.h>\r
+#include <_PDCLIB_encoding.h>\r
+#include <_PDCLIB_locale.h>\r
+\r
+size_t mbrtoc16_l(\r
+    char16_t    *restrict   pc16,\r
+    const char  *restrict   s, \r
+    size_t                  n,\r
+    mbstate_t   *restrict   ps,\r
+    locale_t     restrict   l\r
+)\r
+{\r
+    size_t dstlen = 1;\r
+    size_t nr = n;\r
+\r
+    if(!l->_Codec->__mbstoc16s) {\r
+        // No UTF-16 support in codec. Must synthesize on top of UCS-4 support.\r
+\r
+        if(ps->_Surrogate) {\r
+            // If a pending surrogate is stored in the state\r
+            *pc16 = ps->_Surrogate;\r
+            ps->_Surrogate = 0;\r
+            return (size_t) -3;\r
+        }\r
+\r
+        char32_t c32;\r
+        size_t res = mbrtoc32_l(&c32, s, n, ps, l);\r
+        if(res != (size_t) -1) {\r
+            // Conversion was successful. Check for surrogates\r
+            if(c32 <= 0xFFFF) {\r
+                // BMP char\r
+                *pc16 = c32;\r
+            } else {\r
+                // Supplementary char\r
+                *pc16 = 0xD800 | (c32 >> 10);\r
+                ps->_Surrogate = 0xDC00 | (c32 & 0x3FF);\r
+            }\r
+        }\r
+        return res;\r
+    } else if(l->_Codec->__mbstoc16s(&pc16, &dstlen, &s, &nr, ps)) {\r
+        // Successful conversion\r
+        if(dstlen == 0) {\r
+            // A character was output\r
+            if(nr == n) {\r
+                // The output character resulted entirely from stored state\r
+                return (size_t) -3;\r
+            } else if(pc16[-1] == 0) {\r
+                // Was null character\r
+                return 0;\r
+            } else {\r
+                // Count of processed characters\r
+                return n - nr;\r
+            }\r
+        } else {\r
+            assert(nr == 0 && "Must have processed whole input");\r
+            return (size_t) -2;\r
+        }\r
+    } else {\r
+        // Failed conversion\r
+        errno = EILSEQ;\r
+        return (size_t) -1;\r
+    }\r
+}\r
+\r
+size_t mbrtoc16(\r
+    char16_t    *restrict   pc16,\r
+    const char  *restrict   s, \r
+    size_t                  n,\r
+    mbstate_t   *restrict   ps\r
+)\r
+{\r
+    return mbrtoc16_l(pc16, s, n, ps, _PDCLIB_threadlocale());\r
+}\r
+\r
+#endif\r
+\r
+#ifdef TEST\r
+#include <_PDCLIB_test.h>\r
+\r
+int main( void )\r
+{\r
+    TESTCASE( NO_TESTDRIVER );\r
+    return TEST_RESULTS;\r
+}\r
+#endif\r