libx52/util/x52_char_map.c

/*
 * Saitek X52 Pro Character Map
 *
 * This file implements functions to perform a lookup of a UCS-4 character
 * in the lookup table.
 *
 * Copyright (C) 2017 Nirenjan Krishnan (nirenjan@nirenjan.org)
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as
 *  published by the Free Software Foundation, version 2.
 *
 */

#include <stdint.h>
#include <errno.h>

#include "libx52util.h"

/**
 * @brief Return the length of the UTF-8 byte sequence
 *
 * This function takes the starting character of a UTF-8 byte sequence
 * and returns the expected length of the sequence in order to convert
 * the sequence to a Unicode codepoint
 *
 * @param   start   Starting character of UTF-8 byte sequence
 *
 * @returns the length of the UTF-8 byte sequence, -EINVAL on invalid character
 */
static inline int32_t _utf8_len(uint8_t start)
{
    if ((start & 0x80) == 0) {
        return 1;
    } else if ((start & 0xe0) == 0xc0) {
        return 2;
    } else if ((start & 0xf0) == 0xe0) {
        return 3;
    } else if ((start & 0xf8) == 0xf0) {
        return 4;
    } else if ((start & 0xfc) == 0xf8) {
        return 5;
    } else if ((start & 0xfe) == 0xfc) {
        return 6;
    } else {
        return -EINVAL;
    }
}

/** @brief Convert a UTF-8 byte sequence to a Unicode codepoint
 *
 * If the first byte is not a valid starting byte, then the byte sequence is
 * treated as being a single byte. However, if an invalid character is
 * encountered during decoding, then the length is the number of bytes from the
 * start of the sequence including the invalid character.
 *
 * @param[in]   str     UTF-8 byte sequence to convert
 * @param[out]  len     Pointer to save the length of the byte sequence
 *
 * @returns the Unicode codepoint for the sequence, -EINVAL on an invalid
 * byte sequence, or invalid length.
 */
static inline int32_t _utf8_to_unicode(const uint8_t *str, uint32_t *len)
{
    int32_t comp_len = _utf8_len(str[0]);
    int32_t actual_len = 1;
    int32_t codepoint;

    switch (comp_len) {
    case 1:
        codepoint = str[0];
        break;

    case 2:
        codepoint = str[0] & 0x1f;
        break;

    case 3:
        codepoint = str[0] & 0x0f;
        break;

    case 4:
        codepoint = str[0] & 0x07;
        break;

    case 5:
        codepoint = str[0] & 0x03;
        break;

    case 6:
        codepoint = str[0] & 0x01;
        break;

    default:
        codepoint = -EINVAL;
        goto err_return;
    }

    /* Add the additional UTF-8 characters */
    for (; actual_len < comp_len; actual_len++) {
        uint8_t chr = str[actual_len];
        if (chr >= 0x80 && chr <= 0xBF) {
            /* Valid UTF-8 continuation byte */
            codepoint <<= 6;
            codepoint |= chr & 0x3f;
        } else {
            codepoint = -EINVAL;
            break;
        }
    }

err_return:
    *len = actual_len;
    return codepoint;
}

/**
 * @brief Fixed map from Unicode codepoint to X52 character map
 *
 * @param   chr     Unicode codepoint to map to X52 character map
 * @param   unrec   Code point to display if not matching any known entry.
 *                  A negative value will drop the character
 *
 * @returns mapped character, or unrec if not found.
 */
static int _unicode_to_x52(int32_t chr, int unrec)
{
    switch (chr) {
    case 0x0020:
        return 0x0020;

    case 0x0021:
        return 0x0021;

    case 0x0022:
        return 0x0022;

    case 0x0023:
        return 0x0023;

    case 0x0024:
        return 0x0024;

    case 0x0025:
        return 0x0025;

    case 0x0026:
        return 0x0026;

    case 0x0027:
        return 0x0027;

    case 0x0028:
        return 0x0028;

    case 0x0029:
        return 0x0029;

    case 0x002A:
        return 0x002A;

    case 0x002B:
        return 0x002B;

    case 0x002C:
        return 0x002C;

    case 0x002D:
        return 0x002D;

    case 0x002E:
        return 0x002E;

    case 0x002F:
        return 0x002F;

    case 0x0030:
        return 0x0030;

    case 0x0031:
        return 0x0031;

    case 0x0032:
        return 0x0032;

    case 0x0033:
        return 0x0033;

    case 0x0034:
        return 0x0034;

    case 0x0035:
        return 0x0035;

    case 0x0036:
        return 0x0036;

    case 0x0037:
        return 0x0037;

    case 0x0038:
        return 0x0038;

    case 0x0039:
        return 0x0039;

    case 0x003A:
        return 0x003A;

    case 0x003B:
        return 0x003B;

    case 0x003C:
        return 0x003C;

    case 0x003D:
        return 0x003D;

    case 0x003E:
        return 0x003E;

    case 0x003F:
        return 0x003F;

    case 0x0040:
        return 0x0040;

    case 0x0041:
        return 0x0041;

    case 0x0042:
        return 0x0042;

    case 0x0043:
        return 0x0043;

    case 0x0044:
        return 0x0044;

    case 0x0045:
        return 0x0045;

    case 0x0046:
        return 0x0046;

    case 0x0047:
        return 0x0047;

    case 0x0048:
        return 0x0048;

    case 0x0049:
        return 0x0049;

    case 0x004A:
        return 0x004A;

    case 0x004B:
        return 0x004B;

    case 0x004C:
        return 0x004C;

    case 0x004D:
        return 0x004D;

    case 0x004E:
        return 0x004E;

    case 0x004F:
        return 0x004F;

    case 0x0050:
        return 0x0050;

    case 0x0051:
        return 0x0051;

    case 0x0052:
        return 0x0052;

    case 0x0053:
        return 0x0053;

    case 0x0054:
        return 0x0054;

    case 0x0055:
        return 0x0055;

    case 0x0056:
        return 0x0056;

    case 0x0057:
        return 0x0057;

    case 0x0058:
        return 0x0058;

    case 0x0059:
        return 0x0059;

    case 0x005A:
        return 0x005A;

    case 0x005B:
        return 0x005B;

    case 0x005D:
        return 0x005D;

    case 0x005E:
        return 0x005E;

    case 0x005F:
        return 0x005F;

    case 0x0060:
        return 0x0060;

    case 0x0061:
        return 0x0061;

    case 0x0062:
        return 0x0062;

    case 0x0063:
        return 0x0063;

    case 0x0064:
        return 0x0064;

    case 0x0065:
        return 0x0065;

    case 0x0066:
        return 0x0066;

    case 0x0067:
        return 0x0067;

    case 0x0068:
        return 0x0068;

    case 0x0069:
        return 0x0069;

    case 0x006A:
        return 0x006A;

    case 0x006B:
        return 0x006B;

    case 0x006C:
        return 0x006C;

    case 0x006D:
        return 0x006D;

    case 0x006E:
        return 0x006E;

    case 0x006F:
        return 0x006F;

    case 0x0070:
        return 0x0070;

    case 0x0071:
        return 0x0071;

    case 0x0072:
        return 0x0072;

    case 0x0073:
        return 0x0073;

    case 0x0074:
        return 0x0074;

    case 0x0075:
        return 0x0075;

    case 0x0076:
        return 0x0076;

    case 0x0077:
        return 0x0077;

    case 0x0078:
        return 0x0078;

    case 0x0079:
        return 0x0079;

    case 0x007A:
        return 0x007A;

    case 0x007B:
        return 0x007B;

    case 0x007C:
        return 0x007C;

    case 0x007D:
        return 0x007D;


    // Miscellaneous Symbols
    case 0x00A7: /* SECTION SIGN */
        return 0x12;

    case 0x00B6: /* PILCROW SIGN */
        return 0x13;

    case 0x00A9: /* (C) */
        return 0x0F;

    case 0x00AE: /* (R) */
        return 0x0E;


    // Mathematical Symbols
    case 0x00BD: /* VULGAR FRACTION ONE HALF */
        return 0xF5;

    case 0x00BC: /* VULGAR FRACTION ONE QUARTER */
        return 0xF6;

    case 0x00D7: /* MULTIPLICATION SIGN */
        return 0xF7;

    case 0x00F7: /* DIVISION SIGN */
        return 0xF8;

    case 0x2264: /* LESS-THAN OR EQUAL TO */
        return 0xF9;

    case 0x2265: /* GREATER-THAN OR EQUAL TO */
        return 0xFA;

    case 0x226A: /* MUCH LESS-THAN */
        return 0xFB;

    case 0x226B: /* MUCH GREATER-THAN */
        return 0xFC;

    case 0x2260: /* NOT EQUAL TO */
        return 0xFD;

    case 0x221A: /* SQUARE ROOT */
        return 0xFE;


    // Accented Latin characters
    case 0x00C7: /* LATIN CAPITAL LETTER C WITH CEDILLA */
        return 0x80;

    case 0x00FC: /* LATIN SMALL LETTER U WITH DIAERESIS */
        return 0x81;

    case 0x00E9: /* LATIN SMALL LETTER E WITH ACUTE */
        return 0x82;

    case 0x00E2: /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
        return 0x83;

    case 0x00E4: /* LATIN SMALL LETTER A WITH DIAERESIS */
        return 0x84;

    case 0x00E0: /* LATIN SMALL LETTER A WITH GRAVE */
        return 0x85;

    case 0x0227: /* LATIN SMALL LETTER A WITH DOT ABOVE */
        return 0x86;

    case 0x00E7: /* LATIN SMALL LETTER C WITH CEDILLA */
        return 0x87;

    case 0x00EA: /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
        return 0x88;

    case 0x00EB: /* LATIN SMALL LETTER E WITH DIAERESIS */
        return 0x89;

    case 0x00E8: /* LATIN SMALL LETTER E WITH GRAVE */
        return 0x8A;

    case 0x00EF: /* LATIN SMALL LETTER I WITH DIAERESIS */
        return 0x8B;

    case 0x00EE: /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
        return 0x8C;

    case 0x00EC: /* LATIN SMALL LETTER I WITH GRAVE */
        return 0x8D;

    case 0x00C4: /* LATIN CAPITAL LETTER A WITH DIAERESIS */
        return 0x8E;

    case 0x00C2: /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
        return 0x8F;


    case 0x00C9: /* LATIN CAPITAL LETTER E WITH ACUTE */
        return 0x90;

    case 0x00E6: /* LATIN SMALL LETTER AE */
        return 0x91;

    case 0x00C6: /* LATIN CAPITAL LETTER AE */
        return 0x92;

    case 0x00F4: /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
        return 0x93;

    case 0x00F6: /* LATIN SMALL LETTER O WITH DIAERESIS */
        return 0x94;

    case 0x00F2: /* LATIN SMALL LETTER O WITH GRAVE */
        return 0x95;

    case 0x00FB: /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
        return 0x96;

    case 0x00F9: /* LATIN SMALL LETTER U WITH GRAVE */
        return 0x97;

    case 0x00FF: /* LATIN SMALL LETTER Y WITH DIAERESIS */
        return 0x98;

    case 0x00D6: /* LATIN CAPITAL LETTER O WITH DIAERESIS */
        return 0x99;

    case 0x00DC: /* LATIN CAPITAL LETTER U WITH DIAERESIS */
        return 0x9A;

    case 0x00F1: /* LATIN SMALL LETTER N WITH TILDE */
        return 0x9B;

    case 0x00D1: /* LATIN CAPITAL LETTER N WITH TILDE */
        return 0x9C;

    case 0x00AA: /* FEMININE ORDINAL INDICATOR */
        return 0x9D;

    case 0x00BA: /* MASCULINE ORDINAL INDICATOR */
        return 0x9E;

    case 0x00BF: /* INVERTED QUESTION MARK */
        return 0x9F;


    case 0x00E1: /* LATIN SMALL LETTER A WITH ACUTE */
        return 0xE0;

    case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */
        return 0xE1;

    case 0x00F3: /* LATIN SMALL LETTER O WITH ACUTE */
        return 0xE2;

    case 0x00FA: /* LATIN SMALL LETTER U WITH ACUTE */
        return 0xE3;

    case 0x00A2: /* CENT SIGN */
        return 0xE4;

    case 0x00A3: /* POUND SIGN */
        return 0xE5;

    case 0x00A5: /* YEN SIGN */
        return 0xE6;

    // case 0x0000: /* This looks like a Pt, I think this is the Pesata symbol? */
    //    return 0xE7;

    // case 0x0000: /* This looks like a stylized lowercase F. */
    //    return 0xE8;

    case 0x00A1: /* INVERTED EXCLAMATION MARK */
        return 0xE9;

    case 0x00C3: /* LATIN CAPITAL LETTER A WITH TILDE */
        return 0xEA;

    case 0x00E3: /* LATIN SMALL LETTER A WITH TILDE */
        return 0xEB;

    case 0x00D5: /* LATIN CAPITAL LETTER O WITH TILDE */
        return 0xEC;

    case 0x00F5: /* LATIN SMALL LETTER O WITH TILDE */
        return 0xED;

    case 0x00D8: /* LATIN CAPITAL LETTER O WITH STROKE */
        return 0xEE;

    case 0x00F8: /* LATIN SMALL LETTER O WITH STROKE */
        return 0xEF;


    // Greek
    case 0x0393: /* GREEK CAPITAL LETTER GAMMA */
        return 0x14;

    case 0x0394: /* GREEK CAPITAL LETTER DELTA */
        return 0x15;

    case 0x0398: /* GREEK CAPITAL LETTER THETA */
        return 0x16;

    case 0x039B: /* GREEK CAPITAL LETTER LAMDA */
        return 0x17;

    case 0x039E: /* GREEK CAPITAL LETTER XI */
        return 0x18;

    case 0x03A0: /* GREEK CAPITAL LETTER PI */
        return 0x19;

    case 0x03A3: /* GREEK CAPITAL LETTER SIGMA */
        return 0x1A;

    case 0x03D2: /* GREEK UPSILON WITH HOOK SYMBOL */
        return 0x1B;

    case 0x03A6: /* GREEK CAPITAL LETTER PHI */
        return 0x1C;

    case 0x03A8: /* GREEK CAPITAL LETTER PSI */
        return 0x1D;

    case 0x03A9: /* GREEK CAPITAL LETTER OMEGA */
        return 0x1E;

    case 0x03B1: /* GREEK SMALL LETTER ALPHA */
        return 0x1F;


    // Box Drawing
    case 0x250C: /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
        return 0x09;

    case 0x2510: /* BOX DRAWINGS LIGHT DOWN AND LEFT */
        return 0x0A;

    case 0x2514: /* BOX DRAWINGS LIGHT UP AND RIGHT */
        return 0x0B;

    case 0x2518: /* BOX DRAWINGS LIGHT UP AND LEFT */
        return 0x0C;

    case 0x2500: /* BOX DRAWINGS LIGHT HORIZONTAL */
        return 0xFF;


    // TODO: Japanese Kana

    default:
        return unrec;
    }
}

/**
 * @brief Convert UTF8 string to X52 character map.
 *
 * This function takes in a UTF-8 string and converts it to the character
 * map used by the X52Pro MFD. Unrecognized characters are silently dropped.
 *
 * @param[in]       input   Input string in UTF-8. Must be NUL-terminated
 * @param[out]      output  Output buffer
 * @param[inout]    len     Length of output buffer
 *
 * @returns 0 on success, -EINVAL on invalid parameters, -E2BIG if the buffer
 * filled up before converting the entire string.
 */
int libx52util_convert_utf8_string(const uint8_t *input,
                                   uint8_t *output, size_t *len)
{
    size_t index;
    uint32_t chr_len;
    int chr;
    int retval = 0;

    if (!input || !output || !len || !*len) {
        return -EINVAL;
    }

    index = 0;
    while (*input) {
        chr = _unicode_to_x52(_utf8_to_unicode(input, &chr_len), -1);
        input += chr_len;

        if (chr >= 0) {
            output[index] = chr;
            index++;
            if (index >= *len) {
                retval = -E2BIG;
                break;
            }
        }
    }

    *len = index;
    return retval;
}