HelenOS sources

root/uspace/srv/fs/udf/udf_osta.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. legal_check
  2. udf_uncompress_unicode
  3. udf_translate_name
  4. udf_to_unix_name

/*
 * Copyright (c) 2012 Julia Medvedeva
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * OSTA compliant Unicode compression, uncompression routines,
 * file name translation routine for OS/2, Windows 95, Windows NT,
 * Macintosh and UNIX.
 * Copyright 1995 Micro Design International, Inc.
 * Written by Jason M. Rinn.
 * Micro Design International gives permission for the free use of the
 * following source code.
 */

/** @addtogroup udf
 * @{
 */
/**
 * @file udf_osta.c
 * @brief OSTA compliant functions.
 */

#include <stdlib.h>
#include <str.h>
#include <macros.h>
#include <errno.h>
#include "udf_osta.h"
#include "udf_cksum.h"

/** Illegal UNIX characters are NULL and slash.
 *
 */
static bool legal_check(uint16_t ch)
{
        if ((ch == 0x0000) || (ch == 0x002F))
                return false;

        return true;
}

/** Convert OSTA CS0 compressed Unicode name to Unicode.
 *
 * The Unicode output will be in the byte order that the local compiler
 * uses for 16-bit values.
 *
 * NOTE: This routine only performs error checking on the comp_id.
 * It is up to the user to ensure that the Unicode buffer is large
 * enough, and that the compressed Unicode name is correct.
 *
 * @param[in]  number_of_bytes Number of bytes read from media
 * @param[in]  udf_compressed  Bytes read from media
 * @param[out] unicode         Uncompressed unicode characters
 * @param[in]  unicode_max_len Size of output array
 *
 * @return Number of Unicode characters which were uncompressed.
 *
 */
static size_t udf_uncompress_unicode(size_t number_of_bytes,
    uint8_t *udf_compressed, uint16_t *unicode, size_t unicode_max_len)
{
        /* Use udf_compressed to store current byte being read. */
        uint8_t comp_id = udf_compressed[0];

        /* First check for valid compID. */
        if ((comp_id != 8) && (comp_id != 16))
                return 0;

        size_t unicode_idx = 0;
        size_t byte_idx = 1;

        /* Loop through all the bytes. */
        while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) {
                if (comp_id == 16) {
                        /*
                         * Move the first byte to the high bits of the
                         * Unicode char.
                         */
                        unicode[unicode_idx] = udf_compressed[byte_idx++] << 8;
                } else
                        unicode[unicode_idx] = 0;

                if (byte_idx < number_of_bytes) {
                        /* Then the next byte to the low bits. */
                        unicode[unicode_idx] |= udf_compressed[byte_idx++];
                }

                unicode_idx++;
        }

        return unicode_idx;
}

/** Translate a long file name
 *
 * Translate a long file name to one using a MAXLEN and an illegal char set
 * in accord with the OSTA requirements. Assumes the name has already been
 * translated to Unicode.
 *
 * @param[out] new_name Translated name. Must be of length MAXLEN
 * @param[in]  udf_name Name from UDF volume
 * @param[in]  udf_len  Length of UDF Name
 *
 * @return Number of Unicode characters in translated name.
 *
 */
size_t udf_translate_name(uint16_t *new_name, uint16_t *udf_name,
    size_t udf_len)
{
        bool needs_crc = false;
        bool has_ext = false;
        size_t ext_idx = 0;
        size_t new_idx = 0;
        size_t new_ext_idx = 0;

        for (size_t idx = 0; idx < udf_len; idx++) {
                uint16_t current = udf_name[idx];

                if ((!legal_check(current)) || (!ascii_check(current))) {
                        needs_crc = true;

                        /*
                         * Replace Illegal and non-displayable chars with
                         * underscore.
                         */
                        current = ILLEGAL_CHAR_MARK;

                        /*
                         * Skip any other illegal or non-displayable
                         * characters.
                         */
                        while ((idx + 1 < udf_len) &&
                            (!legal_check(udf_name[idx + 1]) ||
                            (!ascii_check(udf_name[idx + 1]))))
                                idx++;
                }

                /* Record position of extension, if one is found. */
                if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) {
                        if (udf_len == idx + 1) {
                                /* A trailing period is NOT an extension. */
                                has_ext = false;
                        } else {
                                has_ext = true;
                                ext_idx = idx;
                                new_ext_idx = new_idx;
                        }
                }

                if (new_idx < MAXLEN)
                        new_name[new_idx++] = current;
                else
                        needs_crc = true;
        }

        if (needs_crc) {
                uint16_t ext[EXT_SIZE];
                size_t local_ext_idx = 0;

                if (has_ext) {
                        size_t max_filename_len;

                        /* Translate extension, and store it in ext. */
                        for (size_t idx = 0; (idx < EXT_SIZE) &&
                            (ext_idx + idx + 1 < udf_len); idx++) {
                                uint16_t current = udf_name[ext_idx + idx + 1];

                                if ((!legal_check(current)) || (!ascii_check(current))) {
                                        needs_crc = true;

                                        /*
                                         * Replace Illegal and non-displayable
                                         * chars with underscore.
                                         */
                                        current = ILLEGAL_CHAR_MARK;

                                        /*
                                         * Skip any other illegal or
                                         * non-displayable characters.
                                         */
                                        while ((idx + 1 < EXT_SIZE) &&
                                            ((!legal_check(udf_name[ext_idx + idx + 2])) ||
                                            (!ascii_check(udf_name[ext_idx + idx + 2]))))
                                                idx++;
                                }

                                ext[local_ext_idx++] = current;
                        }

                        /*
                         * Truncate filename to leave room for extension and
                         * CRC.
                         */
                        max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1);
                        if (new_idx > max_filename_len)
                                new_idx = max_filename_len;
                        else
                                new_idx = new_ext_idx;
                } else if (new_idx > MAXLEN - 5) {
                        /* If no extension, make sure to leave room for CRC. */
                        new_idx = MAXLEN - 5;
                }

                /* Add mark for CRC. */
                new_name[new_idx++] = CRC_MARK;

                /* Calculate CRC from original filename. */
                uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len);

                /* Convert 16-bits of CRC to hex characters. */
                const char hex_char[] = "0123456789ABCDEF";

                new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12];
                new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8];
                new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4];
                new_name[new_idx++] = hex_char[(value_crc & 0x000f)];

                /* Place a translated extension at end, if found. */
                if (has_ext) {
                        new_name[new_idx++] = PERIOD;

                        for (size_t idx = 0; idx < local_ext_idx; idx++)
                                new_name[new_idx++] = ext[idx];
                }
        }

        return new_idx;
}

/** Decode from dchar to utf8
 *
 * @param result     Returned value - utf8 string
 * @param result_len Length of output string
 * @param id         Input string
 * @param len        Length of input string
 * @param chsp       Decode method
 *
 */
void udf_to_unix_name(char *result, size_t result_len, char *id, size_t len,
    udf_charspec_t *chsp)
{
        const char *osta_id = "OSTA Compressed Unicode";
        size_t ucode_chars, nice_uchars;

        uint16_t *raw_name = malloc(MAX_BUF * sizeof(uint16_t));
        uint16_t *unix_name = malloc(MAX_BUF * sizeof(uint16_t));

        // FIXME: Check for malloc returning NULL

        bool is_osta_typ0 = (chsp->type == 0) &&
            (str_cmp((char *) chsp->info, osta_id) == 0);

        if (is_osta_typ0) {
                *raw_name = 0;
                *unix_name = 0;

                ucode_chars =
                    udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF);
                ucode_chars = min(ucode_chars, utf16_wsize(raw_name));
                nice_uchars =
                    udf_translate_name(unix_name, raw_name, ucode_chars);

                /* Output UTF-8 */
                unix_name[nice_uchars] = 0;
                utf16_to_str(result, result_len, unix_name);
        } else {
                /* Assume 8 bit char length byte Latin-1 */
                str_ncpy(result, result_len, (char *) (id + 1),
                    str_size((char *) (id + 1)));
        }

        free(raw_name);
        free(unix_name);
}

/**
 * @}
 */

/* [<][>][^][v][top][bottom][index][help] */
HelenOS homepage, sources at GitHub