HelenOS sources
This source file includes following definitions.
- _set_ilseq
- _is_low_surrogate
- _is_high_surrogate
- _is_surrogate
- _is_continuation
- _is_1_byte
- _is_2_byte
- _is_3_byte
- _is_4_byte
- _is_non_shortest
- mbrtoc32
- c32rtomb
- mbrtoc16
- c16rtomb
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <uchar.h>
#if __STDC_HOSTED__
#include <fibril.h>
#endif
static void _set_ilseq()
{
#ifdef errno
errno = EILSEQ;
#endif
}
static bool _is_low_surrogate(char16_t c)
{
return c >= 0xDC00 && c < 0xE000;
}
static bool _is_high_surrogate(char16_t c)
{
return c >= 0xD800 && c < 0xDC00;
}
static bool _is_surrogate(char16_t c)
{
return c >= 0xD800 && c < 0xE000;
}
static bool _is_continuation(uint8_t c)
{
return (c & 0xC0) == 0x80;
}
static bool _is_1_byte(uint8_t c)
{
return (c & 0x80) == 0;
}
static bool _is_2_byte(uint8_t c)
{
return (c & 0xE0) == 0xC0;
}
static bool _is_3_byte(uint8_t c)
{
return (c & 0xF0) == 0xE0;
}
static bool _is_4_byte(uint8_t c)
{
return (c & 0xF8) == 0xF0;
}
static bool _is_non_shortest(unsigned short cont, uint8_t b)
{
return (cont == 0b1111110000000000 && !(b & 0b00100000)) ||
(cont == 0b1111111111110000 && !(b & 0b00110000));
}
size_t mbrtoc32(char32_t *c, const char *s, size_t n, mbstate_t *mb)
{
#if __STDC_HOSTED__
static fibril_local mbstate_t global_state = { };
if (!mb)
mb = &global_state;
#else
assert(mb);
#endif
if (n == 0)
return UCHAR_INCOMPLETE;
char32_t dummy;
if (!c)
c = &dummy;
if (!s) {
if (mb->continuation) {
_set_ilseq();
return UCHAR_ILSEQ;
} else {
return 0;
}
}
size_t i = 0;
if (!mb->continuation) {
uint8_t b = s[i++];
if (_is_1_byte(b)) {
*c = b;
return b == 0 ? 0 : 1;
}
if (_is_continuation(b)) {
_set_ilseq();
return UCHAR_ILSEQ;
}
if (_is_2_byte(b)) {
if (!(b & 0b00011110)) {
_set_ilseq();
return UCHAR_ILSEQ;
}
mb->continuation = b ^ 0b0000000011000000;
} else if (_is_3_byte(b)) {
mb->continuation = b ^ 0b1111110011100000;
} else if (_is_4_byte(b)) {
mb->continuation = b ^ 0b1111111100000000;
}
}
for (; i < n; i++) {
uint8_t b = s[i];
if (!_is_continuation(b) || _is_non_shortest(mb->continuation, b)) {
_set_ilseq();
return UCHAR_ILSEQ;
}
if (!(mb->continuation & 0x8000)) {
*c = ((char32_t) mb->continuation) << 6 | (b & 0x3f);
mb->continuation = 0;
return ++i;
}
mb->continuation = mb->continuation << 6 | (b & 0x3f);
}
return UCHAR_INCOMPLETE;
}
#define UTF8_CONT(c, shift) (0x80 | (((c) >> (shift)) & 0x3F))
size_t c32rtomb(char *s, char32_t c, mbstate_t *mb)
{
if (!s) {
return 1;
}
if (c < 0x80) {
s[0] = c;
return 1;
}
if (c < 0x800) {
s[0] = 0b11000000 | (c >> 6);
s[1] = UTF8_CONT(c, 0);
return 2;
}
if (c < 0x10000) {
if (_is_surrogate(c)) {
_set_ilseq();
return UCHAR_ILSEQ;
}
s[0] = 0b11100000 | (c >> 12);
s[1] = UTF8_CONT(c, 6);
s[2] = UTF8_CONT(c, 0);
return 3;
}
if (c < 0x110000) {
s[0] = 0b11110000 | (c >> 18);
s[1] = UTF8_CONT(c, 12);
s[2] = UTF8_CONT(c, 6);
s[3] = UTF8_CONT(c, 0);
return 4;
}
_set_ilseq();
return UCHAR_ILSEQ;
}
size_t mbrtoc16(char16_t *c, const char *s, size_t n, mbstate_t *mb)
{
#if __STDC_HOSTED__
static fibril_local mbstate_t global_state = { };
if (!mb)
mb = &global_state;
#else
assert(mb);
#endif
char16_t dummy;
if (!c)
c = &dummy;
if (!s) {
if (mb->continuation) {
_set_ilseq();
return UCHAR_ILSEQ;
} else {
return 0;
}
}
if ((mb->continuation & 0xD000) == 0xD000) {
*c = mb->continuation;
mb->continuation = 0;
return UCHAR_CONTINUED;
}
char32_t c32 = 0;
size_t ret = mbrtoc32(&c32, s, n, mb);
if (ret < INT_MAX) {
if (c32 < 0x10000) {
*c = c32;
} else {
mb->continuation = (c32 & 0x3FF) + 0xDC00;
*c = (c32 >> 10) + 0xD7C0;
}
return ret;
}
return ret;
}
size_t c16rtomb(char *s, char16_t c, mbstate_t *mb)
{
#if __STDC_HOSTED__
static fibril_local mbstate_t global_state = { };
if (!mb)
mb = &global_state;
#else
assert(mb);
#endif
if (!s) {
if (mb->continuation) {
_set_ilseq();
return UCHAR_ILSEQ;
} else {
return 1;
}
}
if (!_is_surrogate(c)) {
if (mb->continuation) {
_set_ilseq();
return UCHAR_ILSEQ;
}
return c32rtomb(s, c, mb);
}
if (!mb->continuation) {
mb->continuation = c;
return 0;
}
char32_t c32;
if (_is_low_surrogate(mb->continuation) && _is_high_surrogate(c)) {
c32 = ((c - 0xD7C0) << 10) | (mb->continuation - 0xDC00);
} else if (_is_high_surrogate(mb->continuation) && _is_low_surrogate(c)) {
c32 = ((mb->continuation - 0xD7C0) << 10) | (c - 0xDC00);
} else {
_set_ilseq();
return UCHAR_ILSEQ;
}
mb->continuation = 0;
return c32rtomb(s, c32, mb);
}
HelenOS homepage, sources at GitHub