/* * Copyright (c) 2012 Julia Medvedeva * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * OSTA compliant Unicode compression, uncompression routines, * file name translation routine for OS/2, Windows 95, Windows NT, * Macintosh and UNIX. * Copyright 1995 Micro Design International, Inc. * Written by Jason M. Rinn. * Micro Design International gives permission for the free use of the * following source code. */ /** @addtogroup udf * @{ */ /** * @file udf_osta.c * @brief OSTA compliant functions. */ #include <stdlib.h> #include <str.h> #include <macros.h> #include <errno.h> #include "udf_osta.h" #include "udf_cksum.h" /** Illegal UNIX characters are NULL and slash. * */ static bool legal_check(uint16_t ch) { if ((ch == 0x0000) || (ch == 0x002F)) return false; return true; } /** Convert OSTA CS0 compressed Unicode name to Unicode. * * The Unicode output will be in the byte order that the local compiler * uses for 16-bit values. * * NOTE: This routine only performs error checking on the comp_id. * It is up to the user to ensure that the Unicode buffer is large * enough, and that the compressed Unicode name is correct. * * @param[in] number_of_bytes Number of bytes read from media * @param[in] udf_compressed Bytes read from media * @param[out] unicode Uncompressed unicode characters * @param[in] unicode_max_len Size of output array * * @return Number of Unicode characters which were uncompressed. * */ static size_t udf_uncompress_unicode(size_t number_of_bytes, uint8_t *udf_compressed, uint16_t *unicode, size_t unicode_max_len) { /* Use udf_compressed to store current byte being read. */ uint8_t comp_id = udf_compressed[0]; /* First check for valid compID. */ if ((comp_id != 8) && (comp_id != 16)) return 0; size_t unicode_idx = 0; size_t byte_idx = 1; /* Loop through all the bytes. */ while ((byte_idx < number_of_bytes) && (unicode_idx < unicode_max_len)) { if (comp_id == 16) { /* * Move the first byte to the high bits of the * Unicode char. */ unicode[unicode_idx] = udf_compressed[byte_idx++] << 8; } else unicode[unicode_idx] = 0; if (byte_idx < number_of_bytes) { /* Then the next byte to the low bits. */ unicode[unicode_idx] |= udf_compressed[byte_idx++]; } unicode_idx++; } return unicode_idx; } /** Translate a long file name * * Translate a long file name to one using a MAXLEN and an illegal char set * in accord with the OSTA requirements. Assumes the name has already been * translated to Unicode. * * @param[out] new_name Translated name. Must be of length MAXLEN * @param[in] udf_name Name from UDF volume * @param[in] udf_len Length of UDF Name * * @return Number of Unicode characters in translated name. * */ size_t udf_translate_name(uint16_t *new_name, uint16_t *udf_name, size_t udf_len) { bool needs_crc = false; bool has_ext = false; size_t ext_idx = 0; size_t new_idx = 0; size_t new_ext_idx = 0; for (size_t idx = 0; idx < udf_len; idx++) { uint16_t current = udf_name[idx]; if ((!legal_check(current)) || (!ascii_check(current))) { needs_crc = true; /* * Replace Illegal and non-displayable chars with * underscore. */ current = ILLEGAL_CHAR_MARK; /* * Skip any other illegal or non-displayable * characters. */ while ((idx + 1 < udf_len) && (!legal_check(udf_name[idx + 1]) || (!ascii_check(udf_name[idx + 1])))) idx++; } /* Record position of extension, if one is found. */ if ((current == PERIOD) && ((udf_len - idx - 1) <= EXT_SIZE)) { if (udf_len == idx + 1) { /* A trailing period is NOT an extension. */ has_ext = false; } else { has_ext = true; ext_idx = idx; new_ext_idx = new_idx; } } if (new_idx < MAXLEN) new_name[new_idx++] = current; else needs_crc = true; } if (needs_crc) { uint16_t ext[EXT_SIZE]; size_t local_ext_idx = 0; if (has_ext) { size_t max_filename_len; /* Translate extension, and store it in ext. */ for (size_t idx = 0; (idx < EXT_SIZE) && (ext_idx + idx + 1 < udf_len); idx++) { uint16_t current = udf_name[ext_idx + idx + 1]; if ((!legal_check(current)) || (!ascii_check(current))) { needs_crc = true; /* * Replace Illegal and non-displayable * chars with underscore. */ current = ILLEGAL_CHAR_MARK; /* * Skip any other illegal or * non-displayable characters. */ while ((idx + 1 < EXT_SIZE) && ((!legal_check(udf_name[ext_idx + idx + 2])) || (!ascii_check(udf_name[ext_idx + idx + 2])))) idx++; } ext[local_ext_idx++] = current; } /* * Truncate filename to leave room for extension and * CRC. */ max_filename_len = ((MAXLEN - 5) - local_ext_idx - 1); if (new_idx > max_filename_len) new_idx = max_filename_len; else new_idx = new_ext_idx; } else if (new_idx > MAXLEN - 5) { /* If no extension, make sure to leave room for CRC. */ new_idx = MAXLEN - 5; } /* Add mark for CRC. */ new_name[new_idx++] = CRC_MARK; /* Calculate CRC from original filename. */ uint16_t value_crc = udf_unicode_cksum(udf_name, udf_len); /* Convert 16-bits of CRC to hex characters. */ const char hex_char[] = "0123456789ABCDEF"; new_name[new_idx++] = hex_char[(value_crc & 0xf000) >> 12]; new_name[new_idx++] = hex_char[(value_crc & 0x0f00) >> 8]; new_name[new_idx++] = hex_char[(value_crc & 0x00f0) >> 4]; new_name[new_idx++] = hex_char[(value_crc & 0x000f)]; /* Place a translated extension at end, if found. */ if (has_ext) { new_name[new_idx++] = PERIOD; for (size_t idx = 0; idx < local_ext_idx; idx++) new_name[new_idx++] = ext[idx]; } } return new_idx; } /** Decode from dchar to utf8 * * @param result Returned value - utf8 string * @param result_len Length of output string * @param id Input string * @param len Length of input string * @param chsp Decode method * */ void udf_to_unix_name(char *result, size_t result_len, char *id, size_t len, udf_charspec_t *chsp) { const char *osta_id = "OSTA Compressed Unicode"; size_t ucode_chars, nice_uchars; uint16_t *raw_name = malloc(MAX_BUF * sizeof(uint16_t)); uint16_t *unix_name = malloc(MAX_BUF * sizeof(uint16_t)); // FIXME: Check for malloc returning NULL bool is_osta_typ0 = (chsp->type == 0) && (str_cmp((char *) chsp->info, osta_id) == 0); if (is_osta_typ0) { *raw_name = 0; *unix_name = 0; ucode_chars = udf_uncompress_unicode(len, (uint8_t *) id, raw_name, MAX_BUF); ucode_chars = min(ucode_chars, utf16_wsize(raw_name)); nice_uchars = udf_translate_name(unix_name, raw_name, ucode_chars); /* Output UTF-8 */ unix_name[nice_uchars] = 0; utf16_to_str(result, result_len, unix_name); } else { /* Assume 8 bit char length byte Latin-1 */ str_ncpy(result, result_len, (char *) (id + 1), str_size((char *) (id + 1))); } free(raw_name); free(unix_name); } /** * @} */