libx52/util/x52_char_map.c

709 lines
14 KiB
C

/*
* Saitek X52 Pro Character Map
*
* This file implements functions to perform a lookup of a UCS-4 character
* in the lookup table.
*
* Copyright (C) 2017 Nirenjan Krishnan (nirenjan@nirenjan.org)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2.
*
*/
#include <stdint.h>
#include <errno.h>
#include "libx52util.h"
/**
* @brief Return the length of the UTF-8 byte sequence
*
* This function takes the starting character of a UTF-8 byte sequence
* and returns the expected length of the sequence in order to convert
* the sequence to a Unicode codepoint
*
* @param start Starting character of UTF-8 byte sequence
*
* @returns the length of the UTF-8 byte sequence, -EINVAL on invalid character
*/
static inline int32_t _utf8_len(uint8_t start)
{
if ((start & 0x80) == 0) {
return 1;
} else if ((start & 0xe0) == 0xc0) {
return 2;
} else if ((start & 0xf0) == 0xe0) {
return 3;
} else if ((start & 0xf8) == 0xf0) {
return 4;
} else if ((start & 0xfc) == 0xf8) {
return 5;
} else if ((start & 0xfe) == 0xfc) {
return 6;
} else {
return -EINVAL;
}
}
/** @brief Convert a UTF-8 byte sequence to a Unicode codepoint
*
* If the first byte is not a valid starting byte, then the byte sequence is
* treated as being a single byte. However, if an invalid character is
* encountered during decoding, then the length is the number of bytes from the
* start of the sequence including the invalid character.
*
* @param[in] str UTF-8 byte sequence to convert
* @param[out] len Pointer to save the length of the byte sequence
*
* @returns the Unicode codepoint for the sequence, -EINVAL on an invalid
* byte sequence, or invalid length.
*/
static inline int32_t _utf8_to_unicode(const uint8_t *str, uint32_t *len)
{
int32_t comp_len = _utf8_len(str[0]);
int32_t actual_len = 1;
int32_t codepoint;
switch (comp_len) {
case 1:
codepoint = str[0];
break;
case 2:
codepoint = str[0] & 0x1f;
break;
case 3:
codepoint = str[0] & 0x0f;
break;
case 4:
codepoint = str[0] & 0x07;
break;
case 5:
codepoint = str[0] & 0x03;
break;
case 6:
codepoint = str[0] & 0x01;
break;
default:
codepoint = -EINVAL;
goto err_return;
}
/* Add the additional UTF-8 characters */
for (; actual_len < comp_len; actual_len++) {
uint8_t chr = str[actual_len];
if (chr >= 0x80 && chr <= 0xBF) {
/* Valid UTF-8 continuation byte */
codepoint <<= 6;
codepoint |= chr & 0x3f;
} else {
codepoint = -EINVAL;
break;
}
}
err_return:
*len = actual_len;
return codepoint;
}
/**
* @brief Fixed map from Unicode codepoint to X52 character map
*
* @param chr Unicode codepoint to map to X52 character map
* @param unrec Code point to display if not matching any known entry.
* A negative value will drop the character
*
* @returns mapped character, or unrec if not found.
*/
static int _unicode_to_x52(int32_t chr, int unrec)
{
switch (chr) {
case 0x0020:
return 0x0020;
case 0x0021:
return 0x0021;
case 0x0022:
return 0x0022;
case 0x0023:
return 0x0023;
case 0x0024:
return 0x0024;
case 0x0025:
return 0x0025;
case 0x0026:
return 0x0026;
case 0x0027:
return 0x0027;
case 0x0028:
return 0x0028;
case 0x0029:
return 0x0029;
case 0x002A:
return 0x002A;
case 0x002B:
return 0x002B;
case 0x002C:
return 0x002C;
case 0x002D:
return 0x002D;
case 0x002E:
return 0x002E;
case 0x002F:
return 0x002F;
case 0x0030:
return 0x0030;
case 0x0031:
return 0x0031;
case 0x0032:
return 0x0032;
case 0x0033:
return 0x0033;
case 0x0034:
return 0x0034;
case 0x0035:
return 0x0035;
case 0x0036:
return 0x0036;
case 0x0037:
return 0x0037;
case 0x0038:
return 0x0038;
case 0x0039:
return 0x0039;
case 0x003A:
return 0x003A;
case 0x003B:
return 0x003B;
case 0x003C:
return 0x003C;
case 0x003D:
return 0x003D;
case 0x003E:
return 0x003E;
case 0x003F:
return 0x003F;
case 0x0040:
return 0x0040;
case 0x0041:
return 0x0041;
case 0x0042:
return 0x0042;
case 0x0043:
return 0x0043;
case 0x0044:
return 0x0044;
case 0x0045:
return 0x0045;
case 0x0046:
return 0x0046;
case 0x0047:
return 0x0047;
case 0x0048:
return 0x0048;
case 0x0049:
return 0x0049;
case 0x004A:
return 0x004A;
case 0x004B:
return 0x004B;
case 0x004C:
return 0x004C;
case 0x004D:
return 0x004D;
case 0x004E:
return 0x004E;
case 0x004F:
return 0x004F;
case 0x0050:
return 0x0050;
case 0x0051:
return 0x0051;
case 0x0052:
return 0x0052;
case 0x0053:
return 0x0053;
case 0x0054:
return 0x0054;
case 0x0055:
return 0x0055;
case 0x0056:
return 0x0056;
case 0x0057:
return 0x0057;
case 0x0058:
return 0x0058;
case 0x0059:
return 0x0059;
case 0x005A:
return 0x005A;
case 0x005B:
return 0x005B;
case 0x005D:
return 0x005D;
case 0x005E:
return 0x005E;
case 0x005F:
return 0x005F;
case 0x0060:
return 0x0060;
case 0x0061:
return 0x0061;
case 0x0062:
return 0x0062;
case 0x0063:
return 0x0063;
case 0x0064:
return 0x0064;
case 0x0065:
return 0x0065;
case 0x0066:
return 0x0066;
case 0x0067:
return 0x0067;
case 0x0068:
return 0x0068;
case 0x0069:
return 0x0069;
case 0x006A:
return 0x006A;
case 0x006B:
return 0x006B;
case 0x006C:
return 0x006C;
case 0x006D:
return 0x006D;
case 0x006E:
return 0x006E;
case 0x006F:
return 0x006F;
case 0x0070:
return 0x0070;
case 0x0071:
return 0x0071;
case 0x0072:
return 0x0072;
case 0x0073:
return 0x0073;
case 0x0074:
return 0x0074;
case 0x0075:
return 0x0075;
case 0x0076:
return 0x0076;
case 0x0077:
return 0x0077;
case 0x0078:
return 0x0078;
case 0x0079:
return 0x0079;
case 0x007A:
return 0x007A;
case 0x007B:
return 0x007B;
case 0x007C:
return 0x007C;
case 0x007D:
return 0x007D;
// Miscellaneous Symbols
case 0x00A7: /* SECTION SIGN */
return 0x12;
case 0x00B6: /* PILCROW SIGN */
return 0x13;
case 0x00A9: /* (C) */
return 0x0F;
case 0x00AE: /* (R) */
return 0x0E;
// Mathematical Symbols
case 0x00BD: /* VULGAR FRACTION ONE HALF */
return 0xF5;
case 0x00BC: /* VULGAR FRACTION ONE QUARTER */
return 0xF6;
case 0x00D7: /* MULTIPLICATION SIGN */
return 0xF7;
case 0x00F7: /* DIVISION SIGN */
return 0xF8;
case 0x2264: /* LESS-THAN OR EQUAL TO */
return 0xF9;
case 0x2265: /* GREATER-THAN OR EQUAL TO */
return 0xFA;
case 0x226A: /* MUCH LESS-THAN */
return 0xFB;
case 0x226B: /* MUCH GREATER-THAN */
return 0xFC;
case 0x2260: /* NOT EQUAL TO */
return 0xFD;
case 0x221A: /* SQUARE ROOT */
return 0xFE;
// Accented Latin characters
case 0x00C7: /* LATIN CAPITAL LETTER C WITH CEDILLA */
return 0x80;
case 0x00FC: /* LATIN SMALL LETTER U WITH DIAERESIS */
return 0x81;
case 0x00E9: /* LATIN SMALL LETTER E WITH ACUTE */
return 0x82;
case 0x00E2: /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
return 0x83;
case 0x00E4: /* LATIN SMALL LETTER A WITH DIAERESIS */
return 0x84;
case 0x00E0: /* LATIN SMALL LETTER A WITH GRAVE */
return 0x85;
case 0x0227: /* LATIN SMALL LETTER A WITH DOT ABOVE */
return 0x86;
case 0x00E7: /* LATIN SMALL LETTER C WITH CEDILLA */
return 0x87;
case 0x00EA: /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
return 0x88;
case 0x00EB: /* LATIN SMALL LETTER E WITH DIAERESIS */
return 0x89;
case 0x00E8: /* LATIN SMALL LETTER E WITH GRAVE */
return 0x8A;
case 0x00EF: /* LATIN SMALL LETTER I WITH DIAERESIS */
return 0x8B;
case 0x00EE: /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
return 0x8C;
case 0x00EC: /* LATIN SMALL LETTER I WITH GRAVE */
return 0x8D;
case 0x00C4: /* LATIN CAPITAL LETTER A WITH DIAERESIS */
return 0x8E;
case 0x00C2: /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
return 0x8F;
case 0x00C9: /* LATIN CAPITAL LETTER E WITH ACUTE */
return 0x90;
case 0x00E6: /* LATIN SMALL LETTER AE */
return 0x91;
case 0x00C6: /* LATIN CAPITAL LETTER AE */
return 0x92;
case 0x00F4: /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
return 0x93;
case 0x00F6: /* LATIN SMALL LETTER O WITH DIAERESIS */
return 0x94;
case 0x00F2: /* LATIN SMALL LETTER O WITH GRAVE */
return 0x95;
case 0x00FB: /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
return 0x96;
case 0x00F9: /* LATIN SMALL LETTER U WITH GRAVE */
return 0x97;
case 0x00FF: /* LATIN SMALL LETTER Y WITH DIAERESIS */
return 0x98;
case 0x00D6: /* LATIN CAPITAL LETTER O WITH DIAERESIS */
return 0x99;
case 0x00DC: /* LATIN CAPITAL LETTER U WITH DIAERESIS */
return 0x9A;
case 0x00F1: /* LATIN SMALL LETTER N WITH TILDE */
return 0x9B;
case 0x00D1: /* LATIN CAPITAL LETTER N WITH TILDE */
return 0x9C;
case 0x00AA: /* FEMININE ORDINAL INDICATOR */
return 0x9D;
case 0x00BA: /* MASCULINE ORDINAL INDICATOR */
return 0x9E;
case 0x00BF: /* INVERTED QUESTION MARK */
return 0x9F;
case 0x00E1: /* LATIN SMALL LETTER A WITH ACUTE */
return 0xE0;
case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */
return 0xE1;
case 0x00F3: /* LATIN SMALL LETTER O WITH ACUTE */
return 0xE2;
case 0x00FA: /* LATIN SMALL LETTER U WITH ACUTE */
return 0xE3;
case 0x00A2: /* CENT SIGN */
return 0xE4;
case 0x00A3: /* POUND SIGN */
return 0xE5;
case 0x00A5: /* YEN SIGN */
return 0xE6;
// case 0x0000: /* This looks like a Pt, I think this is the Pesata symbol? */
// return 0xE7;
// case 0x0000: /* This looks like a stylized lowercase F. */
// return 0xE8;
case 0x00A1: /* INVERTED EXCLAMATION MARK */
return 0xE9;
case 0x00C3: /* LATIN CAPITAL LETTER A WITH TILDE */
return 0xEA;
case 0x00E3: /* LATIN SMALL LETTER A WITH TILDE */
return 0xEB;
case 0x00D5: /* LATIN CAPITAL LETTER O WITH TILDE */
return 0xEC;
case 0x00F5: /* LATIN SMALL LETTER O WITH TILDE */
return 0xED;
case 0x00D8: /* LATIN CAPITAL LETTER O WITH STROKE */
return 0xEE;
case 0x00F8: /* LATIN SMALL LETTER O WITH STROKE */
return 0xEF;
// Greek
case 0x0393: /* GREEK CAPITAL LETTER GAMMA */
return 0x14;
case 0x0394: /* GREEK CAPITAL LETTER DELTA */
return 0x15;
case 0x0398: /* GREEK CAPITAL LETTER THETA */
return 0x16;
case 0x039B: /* GREEK CAPITAL LETTER LAMDA */
return 0x17;
case 0x039E: /* GREEK CAPITAL LETTER XI */
return 0x18;
case 0x03A0: /* GREEK CAPITAL LETTER PI */
return 0x19;
case 0x03A3: /* GREEK CAPITAL LETTER SIGMA */
return 0x1A;
case 0x03D2: /* GREEK UPSILON WITH HOOK SYMBOL */
return 0x1B;
case 0x03A6: /* GREEK CAPITAL LETTER PHI */
return 0x1C;
case 0x03A8: /* GREEK CAPITAL LETTER PSI */
return 0x1D;
case 0x03A9: /* GREEK CAPITAL LETTER OMEGA */
return 0x1E;
case 0x03B1: /* GREEK SMALL LETTER ALPHA */
return 0x1F;
// Box Drawing
case 0x250C: /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
return 0x09;
case 0x2510: /* BOX DRAWINGS LIGHT DOWN AND LEFT */
return 0x0A;
case 0x2514: /* BOX DRAWINGS LIGHT UP AND RIGHT */
return 0x0B;
case 0x2518: /* BOX DRAWINGS LIGHT UP AND LEFT */
return 0x0C;
case 0x2500: /* BOX DRAWINGS LIGHT HORIZONTAL */
return 0xFF;
// TODO: Japanese Kana
default:
return unrec;
}
}
/**
* @brief Convert UTF8 string to X52 character map.
*
* This function takes in a UTF-8 string and converts it to the character
* map used by the X52Pro MFD. Unrecognized characters are silently dropped.
*
* @param[in] input Input string in UTF-8. Must be NUL-terminated
* @param[out] output Output buffer
* @param[inout] len Length of output buffer
*
* @returns 0 on success, -EINVAL on invalid parameters, -E2BIG if the buffer
* filled up before converting the entire string.
*/
int libx52util_convert_utf8_string(const uint8_t *input,
uint8_t *output, size_t *len)
{
size_t index;
uint32_t chr_len;
int chr;
int retval = 0;
if (!input || !output || !len || !*len) {
return -EINVAL;
}
index = 0;
while (*input) {
chr = _unicode_to_x52(_utf8_to_unicode(input, &chr_len), -1);
input += chr_len;
if (chr >= 0) {
output[index] = chr;
index++;
if (index >= *len) {
retval = -E2BIG;
break;
}
}
}
*len = index;
return retval;
}