HelenOS sources

root/kernel/generic/common/stdc/uchar.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. _set_ilseq
  2. _is_low_surrogate
  3. _is_high_surrogate
  4. _is_surrogate
  5. c32rtomb
  6. mbrtoc16
  7. c16rtomb

/*
 * Copyright (c) 2025 Jiří Zárevúcky
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <uchar.h>

#if __STDC_HOSTED__
#include <fibril.h>
#endif

static void _set_ilseq()
{
#ifdef errno
        errno = EILSEQ;
#endif
}

static bool _is_low_surrogate(char16_t c)
{
        return c >= 0xDC00 && c < 0xE000;
}

static bool _is_high_surrogate(char16_t c)
{
        return c >= 0xD800 && c < 0xDC00;
}

static bool _is_surrogate(char16_t c)
{
        return c >= 0xD800 && c < 0xE000;
}

#define UTF8_CONT(c, shift) (0x80 | (((c) >> (shift)) & 0x3F))

size_t c32rtomb(char *s, char32_t c, mbstate_t *mb)
{
        if (!s) {
                // Equivalent to c32rtomb(buf, L’\0’, mb).
                return 1;
        }

        /* 1 byte encoding */
        if (c < 0x80) {
                s[0] = c;
                return 1;
        }

        /* 2 byte encoding */
        if (c < 0x800) {
                s[0] = 0b11000000 | (c >> 6);
                s[1] = UTF8_CONT(c, 0);
                return 2;
        }

        /* 3 byte encoding */
        if (c < 0x10000) {
                if (_is_surrogate(c)) {
                        /* illegal range for an unicode code point */
                        _set_ilseq();
                        return UCHAR_ILSEQ;
                }

                s[0] = 0b11100000 | (c >> 12);
                s[1] = UTF8_CONT(c, 6);
                s[2] = UTF8_CONT(c, 0);
                return 3;
        }

        /* 4 byte encoding */
        if (c < 0x110000) {
                s[0] = 0b11110000 | (c >> 18);
                s[1] = UTF8_CONT(c, 12);
                s[2] = UTF8_CONT(c, 6);
                s[3] = UTF8_CONT(c, 0);
                return 4;
        }

        _set_ilseq();
        return UCHAR_ILSEQ;
}

size_t mbrtoc16(char16_t *c, const char *s, size_t n, mbstate_t *mb)
{
#if __STDC_HOSTED__
        static fibril_local mbstate_t global_state = { };

        if (!mb)
                mb = &global_state;
#else
        assert(mb);
#endif

        char16_t dummy;

        if (!c)
                c = &dummy;

        if (!s) {
                /* Equivalent to mbrtoc16(NULL, "", 1, mb). */
                if (mb->state) {
                        _set_ilseq();
                        return UCHAR_ILSEQ;
                } else {
                        return 0;
                }
        }

        if ((mb->state & 0xD000) == 0xD000) {
                /* mbstate_t contains the second surrogate character. */
                /* mbrtoc32() will never set it to such value.        */
                *c = mb->state;
                mb->state = 0;
                return UCHAR_CONTINUED;
        }

        char32_t c32 = 0;
        size_t ret = mbrtoc32(&c32, s, n, mb);
        if (ret < INT_MAX) {
                if (c32 < 0x10000) {
                        *c = c32;
                } else {
                        /* Encode UTF-16 surrogates. */
                        mb->state = (c32 & 0x3FF) + 0xDC00;
                        *c = (c32 >> 10) + 0xD7C0;
                }
                return ret;
        }

        return ret;
}

size_t c16rtomb(char *s, char16_t c, mbstate_t *mb)
{
#if __STDC_HOSTED__
        static fibril_local mbstate_t global_state = { };

        if (!mb)
                mb = &global_state;
#else
        assert(mb);
#endif

        if (!s) {
                // Equivalent to c16rtomb(buf, L’\0’, mb).
                if (mb->state) {
                        _set_ilseq();
                        return UCHAR_ILSEQ;
                } else {
                        return 1;
                }
        }

        if (!_is_surrogate(c)) {
                if (mb->state) {
                        _set_ilseq();
                        return UCHAR_ILSEQ;
                }

                return c32rtomb(s, c, mb);
        }

        if (!mb->state) {
                mb->state = c;
                return 0;
        }

        char32_t c32;

        /* Decode UTF-16 surrogates. */
        if (_is_low_surrogate(mb->state) && _is_high_surrogate(c)) {
                c32 = ((c - 0xD7C0) << 10) | (mb->state - 0xDC00);
        } else if (_is_high_surrogate(mb->state) && _is_low_surrogate(c)) {
                c32 = ((mb->state - 0xD7C0) << 10) | (c - 0xDC00);
        } else {
                _set_ilseq();
                return UCHAR_ILSEQ;
        }

        mb->state = 0;
        return c32rtomb(s, c32, mb);
}

/* [<][>][^][v][top][bottom][index][help] */
HelenOS homepage, sources at GitHub