HelenOS sources
This source file includes following definitions.
- tok_init
- tok_fini
- tok_tokenize
- tok_finish_string
- tok_get_char
- tok_look_char
- tok_push_char
- tok_start_token
- tok_push_token
- tok_pending_chars
#include <str.h>
#include <assert.h>
#include <stdlib.h>
#include <stddef.h>
#include <errno.h>
#include "tok.h"
static char32_t tok_get_char(tokenizer_t *);
static char32_t tok_look_char(tokenizer_t *);
static errno_t tok_push_char(tokenizer_t *, char32_t);
static errno_t tok_push_token(tokenizer_t *);
static bool tok_pending_chars(tokenizer_t *);
static errno_t tok_finish_string(tokenizer_t *);
static void tok_start_token(tokenizer_t *, token_type_t);
errno_t tok_init(tokenizer_t *tok, char *input, token_t *out_tokens,
size_t max_tokens)
{
tok->in = input;
tok->in_offset = 0;
tok->last_in_offset = 0;
tok->in_char_offset = 0;
tok->last_in_char_offset = 0;
tok->outtok = out_tokens;
tok->outtok_offset = 0;
tok->outtok_size = max_tokens;
size_t len = str_size(input) + max_tokens + 1;
char *tmp = malloc(len);
if (tmp == NULL) {
return ENOMEM;
}
tok->outbuf = tmp;
tok->outbuf_offset = 0;
tok->outbuf_size = len;
tok->outbuf_last_start = 0;
return EOK;
}
void tok_fini(tokenizer_t *tok)
{
if (tok->outbuf != NULL) {
free(tok->outbuf);
}
}
errno_t tok_tokenize(tokenizer_t *tok, size_t *tokens_length)
{
errno_t rc;
char32_t next_char;
while ((next_char = tok_look_char(tok)) != 0) {
if (next_char == ' ') {
if (tok_pending_chars(tok)) {
rc = tok_push_token(tok);
if (rc != EOK) {
return rc;
}
}
tok_start_token(tok, TOKTYPE_SPACE);
while (tok_look_char(tok) == ' ') {
tok_push_char(tok, tok_get_char(tok));
}
tok_push_token(tok);
} else if (next_char == '|') {
if (tok_pending_chars(tok)) {
rc = tok_push_token(tok);
if (rc != EOK) {
return rc;
}
}
tok_start_token(tok, TOKTYPE_PIPE);
rc = tok_push_char(tok, tok_get_char(tok));
if (rc != EOK) {
return rc;
}
rc = tok_push_token(tok);
if (rc != EOK) {
return rc;
}
} else if (next_char == '\'') {
tok_start_token(tok, TOKTYPE_TEXT);
tok_get_char(tok);
rc = tok_finish_string(tok);
if (rc != EOK) {
return rc;
}
} else {
if (!tok_pending_chars(tok)) {
tok_start_token(tok, TOKTYPE_TEXT);
}
rc = tok_push_char(tok, tok_get_char(tok));
if (rc != EOK) {
return rc;
}
}
}
if (tok_pending_chars(tok)) {
rc = tok_push_token(tok);
if (rc != EOK) {
return rc;
}
}
*tokens_length = tok->outtok_offset;
return EOK;
}
errno_t tok_finish_string(tokenizer_t *tok)
{
errno_t rc;
char32_t next_char;
while ((next_char = tok_look_char(tok)) != 0) {
if (next_char == '\'') {
tok_get_char(tok);
if (tok_look_char(tok) == '\'') {
rc = tok_push_char(tok, '\'');
if (rc != EOK) {
return rc;
}
tok_get_char(tok);
} else {
return tok_push_token(tok);
}
} else {
rc = tok_push_char(tok, tok_get_char(tok));
if (rc != EOK) {
return rc;
}
}
}
return EINVAL;
}
char32_t tok_get_char(tokenizer_t *tok)
{
tok->in_char_offset++;
return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT);
}
char32_t tok_look_char(tokenizer_t *tok)
{
size_t old_offset = tok->in_offset;
size_t old_char_offset = tok->in_char_offset;
char32_t ret = tok_get_char(tok);
tok->in_offset = old_offset;
tok->in_char_offset = old_char_offset;
return ret;
}
errno_t tok_push_char(tokenizer_t *tok, char32_t ch)
{
return chr_encode(ch, tok->outbuf, &tok->outbuf_offset, tok->outbuf_size);
}
void tok_start_token(tokenizer_t *tok, token_type_t type)
{
tok->current_type = type;
}
errno_t tok_push_token(tokenizer_t *tok)
{
if (tok->outtok_offset >= tok->outtok_size) {
return EOVERFLOW;
}
if (tok->outbuf_offset >= tok->outbuf_size) {
return EOVERFLOW;
}
tok->outbuf[tok->outbuf_offset++] = 0;
token_t *tokinfo = &tok->outtok[tok->outtok_offset++];
tokinfo->type = tok->current_type;
tokinfo->text = tok->outbuf + tok->outbuf_last_start;
tokinfo->byte_start = tok->last_in_offset;
tokinfo->byte_length = tok->in_offset - tok->last_in_offset;
tokinfo->char_start = tok->last_in_char_offset;
tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset;
tok->outbuf_last_start = tok->outbuf_offset;
tok->last_in_offset = tok->in_offset;
tok->last_in_char_offset = tok->in_char_offset;
return EOK;
}
bool tok_pending_chars(tokenizer_t *tok)
{
assert(tok->outbuf_offset >= tok->outbuf_last_start);
return (tok->outbuf_offset != tok->outbuf_last_start);
}
HelenOS homepage, sources at GitHub