diff --git a/util/Makefile.am b/util/Makefile.am index 83ba61d..5d1919c 100644 --- a/util/Makefile.am +++ b/util/Makefile.am @@ -1,10 +1,11 @@ ACLOCAL_AMFLAGS = -I m4 -lib_LTLIBRARIES = libx52util.la +lib_LTLIBRARIES = libx52util.la libx52util2.la # libx52 utility library # This library provides extra utilities for ease of use nodist_libx52util_la_SOURCES = util_char_map.c +libx52util_la_SOURCES = x52_char_map_lookup.c libx52util_la_CFLAGS = -I $(top_srcdir)/libx52 libx52util_la_LDFLAGS = -version-info 1:0:0 libx52util_la_LIBADD = ../libx52/libx52.la @@ -22,3 +23,21 @@ EXTRA_DIST = x52_char_map.cfg \ CLEANFILES = util_char_map.c util_char_map.c: $(srcdir)/x52_char_map.cfg x52_char_map_gen.py $(AM_V_GEN) $(srcdir)/x52_char_map_gen.py $(srcdir)/x52_char_map.cfg $@ + +# libx52 utility library v2 +# This library provides extra utilities for ease of use +libx52util2_la_SOURCES = x52_char_map.c +libx52util2_la_CFLAGS = -I $(top_srcdir)/libx52 -O2 +libx52util2_la_LDFLAGS = -version-info 1:0:0 +libx52util2_la_LIBADD = ../libx52/libx52.la + +bin_PROGRAMS = perf1 perf2 + +perf1_SOURCES = x52_lookup_test.c +perf1_CFLAGS = @X52_INCLUDE@ +perf1_LDADD = libx52util.la + +perf2_SOURCES = x52_lookup_test.c +perf2_CFLAGS = @X52_INCLUDE@ +perf2_LDADD = libx52util2.la + diff --git a/util/x52_char_map.c b/util/x52_char_map.c new file mode 100644 index 0000000..a4e67ca --- /dev/null +++ b/util/x52_char_map.c @@ -0,0 +1,708 @@ +/* + * Saitek X52 Pro Character Map + * + * This file implements functions to perform a lookup of a UCS-4 character + * in the lookup table. + * + * Copyright (C) 2017 Nirenjan Krishnan (nirenjan@nirenjan.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + */ + +#include +#include + +#include "libx52util.h" + +/** + * @brief Return the length of the UTF-8 byte sequence + * + * This function takes the starting character of a UTF-8 byte sequence + * and returns the expected length of the sequence in order to convert + * the sequence to a Unicode codepoint + * + * @param start Starting character of UTF-8 byte sequence + * + * @returns the length of the UTF-8 byte sequence, -EINVAL on invalid character + */ +static inline int32_t _utf8_len(uint8_t start) +{ + if ((start & 0x80) == 0) { + return 1; + } else if ((start & 0xe0) == 0xc0) { + return 2; + } else if ((start & 0xf0) == 0xe0) { + return 3; + } else if ((start & 0xf8) == 0xf0) { + return 4; + } else if ((start & 0xfc) == 0xf8) { + return 5; + } else if ((start & 0xfe) == 0xfc) { + return 6; + } else { + return -EINVAL; + } +} + +/** @brief Convert a UTF-8 byte sequence to a Unicode codepoint + * + * If the first byte is not a valid starting byte, then the byte sequence is + * treated as being a single byte. However, if an invalid character is + * encountered during decoding, then the length is the number of bytes from the + * start of the sequence including the invalid character. + * + * @param[in] str UTF-8 byte sequence to convert + * @param[out] len Pointer to save the length of the byte sequence + * + * @returns the Unicode codepoint for the sequence, -EINVAL on an invalid + * byte sequence, or invalid length. + */ +static inline int32_t _utf8_to_unicode(const uint8_t *str, uint32_t *len) +{ + int32_t comp_len = _utf8_len(str[0]); + int32_t actual_len = 1; + int32_t codepoint; + + switch (comp_len) { + case 1: + codepoint = str[0]; + break; + + case 2: + codepoint = str[0] & 0x1f; + break; + + case 3: + codepoint = str[0] & 0x0f; + break; + + case 4: + codepoint = str[0] & 0x07; + break; + + case 5: + codepoint = str[0] & 0x03; + break; + + case 6: + codepoint = str[0] & 0x01; + break; + + default: + codepoint = -EINVAL; + goto err_return; + } + + /* Add the additional UTF-8 characters */ + for (; actual_len < comp_len; actual_len++) { + uint8_t chr = str[actual_len]; + if (chr >= 0x80 && chr <= 0xBF) { + /* Valid UTF-8 continuation byte */ + codepoint <<= 6; + codepoint |= chr & 0x3f; + } else { + codepoint = -EINVAL; + break; + } + } + +err_return: + *len = actual_len; + return codepoint; +} + +/** + * @brief Fixed map from Unicode codepoint to X52 character map + * + * @param chr Unicode codepoint to map to X52 character map + * @param unrec Code point to display if not matching any known entry. + * A negative value will drop the character + * + * @returns mapped character, or unrec if not found. + */ +static int _unicode_to_x52(int32_t chr, int unrec) +{ + switch (chr) { + case 0x0020: + return 0x0020; + + case 0x0021: + return 0x0021; + + case 0x0022: + return 0x0022; + + case 0x0023: + return 0x0023; + + case 0x0024: + return 0x0024; + + case 0x0025: + return 0x0025; + + case 0x0026: + return 0x0026; + + case 0x0027: + return 0x0027; + + case 0x0028: + return 0x0028; + + case 0x0029: + return 0x0029; + + case 0x002A: + return 0x002A; + + case 0x002B: + return 0x002B; + + case 0x002C: + return 0x002C; + + case 0x002D: + return 0x002D; + + case 0x002E: + return 0x002E; + + case 0x002F: + return 0x002F; + + case 0x0030: + return 0x0030; + + case 0x0031: + return 0x0031; + + case 0x0032: + return 0x0032; + + case 0x0033: + return 0x0033; + + case 0x0034: + return 0x0034; + + case 0x0035: + return 0x0035; + + case 0x0036: + return 0x0036; + + case 0x0037: + return 0x0037; + + case 0x0038: + return 0x0038; + + case 0x0039: + return 0x0039; + + case 0x003A: + return 0x003A; + + case 0x003B: + return 0x003B; + + case 0x003C: + return 0x003C; + + case 0x003D: + return 0x003D; + + case 0x003E: + return 0x003E; + + case 0x003F: + return 0x003F; + + case 0x0040: + return 0x0040; + + case 0x0041: + return 0x0041; + + case 0x0042: + return 0x0042; + + case 0x0043: + return 0x0043; + + case 0x0044: + return 0x0044; + + case 0x0045: + return 0x0045; + + case 0x0046: + return 0x0046; + + case 0x0047: + return 0x0047; + + case 0x0048: + return 0x0048; + + case 0x0049: + return 0x0049; + + case 0x004A: + return 0x004A; + + case 0x004B: + return 0x004B; + + case 0x004C: + return 0x004C; + + case 0x004D: + return 0x004D; + + case 0x004E: + return 0x004E; + + case 0x004F: + return 0x004F; + + case 0x0050: + return 0x0050; + + case 0x0051: + return 0x0051; + + case 0x0052: + return 0x0052; + + case 0x0053: + return 0x0053; + + case 0x0054: + return 0x0054; + + case 0x0055: + return 0x0055; + + case 0x0056: + return 0x0056; + + case 0x0057: + return 0x0057; + + case 0x0058: + return 0x0058; + + case 0x0059: + return 0x0059; + + case 0x005A: + return 0x005A; + + case 0x005B: + return 0x005B; + + case 0x005D: + return 0x005D; + + case 0x005E: + return 0x005E; + + case 0x005F: + return 0x005F; + + case 0x0060: + return 0x0060; + + case 0x0061: + return 0x0061; + + case 0x0062: + return 0x0062; + + case 0x0063: + return 0x0063; + + case 0x0064: + return 0x0064; + + case 0x0065: + return 0x0065; + + case 0x0066: + return 0x0066; + + case 0x0067: + return 0x0067; + + case 0x0068: + return 0x0068; + + case 0x0069: + return 0x0069; + + case 0x006A: + return 0x006A; + + case 0x006B: + return 0x006B; + + case 0x006C: + return 0x006C; + + case 0x006D: + return 0x006D; + + case 0x006E: + return 0x006E; + + case 0x006F: + return 0x006F; + + case 0x0070: + return 0x0070; + + case 0x0071: + return 0x0071; + + case 0x0072: + return 0x0072; + + case 0x0073: + return 0x0073; + + case 0x0074: + return 0x0074; + + case 0x0075: + return 0x0075; + + case 0x0076: + return 0x0076; + + case 0x0077: + return 0x0077; + + case 0x0078: + return 0x0078; + + case 0x0079: + return 0x0079; + + case 0x007A: + return 0x007A; + + case 0x007B: + return 0x007B; + + case 0x007C: + return 0x007C; + + case 0x007D: + return 0x007D; + + + // Miscellaneous Symbols + case 0x00A7: /* SECTION SIGN */ + return 0x12; + + case 0x00B6: /* PILCROW SIGN */ + return 0x13; + + case 0x00A9: /* (C) */ + return 0x0F; + + case 0x00AE: /* (R) */ + return 0x0E; + + + // Mathematical Symbols + case 0x00BD: /* VULGAR FRACTION ONE HALF */ + return 0xF5; + + case 0x00BC: /* VULGAR FRACTION ONE QUARTER */ + return 0xF6; + + case 0x00D7: /* MULTIPLICATION SIGN */ + return 0xF7; + + case 0x00F7: /* DIVISION SIGN */ + return 0xF8; + + case 0x2264: /* LESS-THAN OR EQUAL TO */ + return 0xF9; + + case 0x2265: /* GREATER-THAN OR EQUAL TO */ + return 0xFA; + + case 0x226A: /* MUCH LESS-THAN */ + return 0xFB; + + case 0x226B: /* MUCH GREATER-THAN */ + return 0xFC; + + case 0x2260: /* NOT EQUAL TO */ + return 0xFD; + + case 0x221A: /* SQUARE ROOT */ + return 0xFE; + + + // Accented Latin characters + case 0x00C7: /* LATIN CAPITAL LETTER C WITH CEDILLA */ + return 0x80; + + case 0x00FC: /* LATIN SMALL LETTER U WITH DIAERESIS */ + return 0x81; + + case 0x00E9: /* LATIN SMALL LETTER E WITH ACUTE */ + return 0x82; + + case 0x00E2: /* LATIN SMALL LETTER A WITH CIRCUMFLEX */ + return 0x83; + + case 0x00E4: /* LATIN SMALL LETTER A WITH DIAERESIS */ + return 0x84; + + case 0x00E0: /* LATIN SMALL LETTER A WITH GRAVE */ + return 0x85; + + case 0x0227: /* LATIN SMALL LETTER A WITH DOT ABOVE */ + return 0x86; + + case 0x00E7: /* LATIN SMALL LETTER C WITH CEDILLA */ + return 0x87; + + case 0x00EA: /* LATIN SMALL LETTER E WITH CIRCUMFLEX */ + return 0x88; + + case 0x00EB: /* LATIN SMALL LETTER E WITH DIAERESIS */ + return 0x89; + + case 0x00E8: /* LATIN SMALL LETTER E WITH GRAVE */ + return 0x8A; + + case 0x00EF: /* LATIN SMALL LETTER I WITH DIAERESIS */ + return 0x8B; + + case 0x00EE: /* LATIN SMALL LETTER I WITH CIRCUMFLEX */ + return 0x8C; + + case 0x00EC: /* LATIN SMALL LETTER I WITH GRAVE */ + return 0x8D; + + case 0x00C4: /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + return 0x8E; + + case 0x00C2: /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + return 0x8F; + + + case 0x00C9: /* LATIN CAPITAL LETTER E WITH ACUTE */ + return 0x90; + + case 0x00E6: /* LATIN SMALL LETTER AE */ + return 0x91; + + case 0x00C6: /* LATIN CAPITAL LETTER AE */ + return 0x92; + + case 0x00F4: /* LATIN SMALL LETTER O WITH CIRCUMFLEX */ + return 0x93; + + case 0x00F6: /* LATIN SMALL LETTER O WITH DIAERESIS */ + return 0x94; + + case 0x00F2: /* LATIN SMALL LETTER O WITH GRAVE */ + return 0x95; + + case 0x00FB: /* LATIN SMALL LETTER U WITH CIRCUMFLEX */ + return 0x96; + + case 0x00F9: /* LATIN SMALL LETTER U WITH GRAVE */ + return 0x97; + + case 0x00FF: /* LATIN SMALL LETTER Y WITH DIAERESIS */ + return 0x98; + + case 0x00D6: /* LATIN CAPITAL LETTER O WITH DIAERESIS */ + return 0x99; + + case 0x00DC: /* LATIN CAPITAL LETTER U WITH DIAERESIS */ + return 0x9A; + + case 0x00F1: /* LATIN SMALL LETTER N WITH TILDE */ + return 0x9B; + + case 0x00D1: /* LATIN CAPITAL LETTER N WITH TILDE */ + return 0x9C; + + case 0x00AA: /* FEMININE ORDINAL INDICATOR */ + return 0x9D; + + case 0x00BA: /* MASCULINE ORDINAL INDICATOR */ + return 0x9E; + + case 0x00BF: /* INVERTED QUESTION MARK */ + return 0x9F; + + + case 0x00E1: /* LATIN SMALL LETTER A WITH ACUTE */ + return 0xE0; + + case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */ + return 0xE1; + + case 0x00F3: /* LATIN SMALL LETTER O WITH ACUTE */ + return 0xE2; + + case 0x00FA: /* LATIN SMALL LETTER U WITH ACUTE */ + return 0xE3; + + case 0x00A2: /* CENT SIGN */ + return 0xE4; + + case 0x00A3: /* POUND SIGN */ + return 0xE5; + + case 0x00A5: /* YEN SIGN */ + return 0xE6; + + // case 0x0000: /* This looks like a Pt, I think this is the Pesata symbol? */ + // return 0xE7; + + // case 0x0000: /* This looks like a stylized lowercase F. */ + // return 0xE8; + + case 0x00A1: /* INVERTED EXCLAMATION MARK */ + return 0xE9; + + case 0x00C3: /* LATIN CAPITAL LETTER A WITH TILDE */ + return 0xEA; + + case 0x00E3: /* LATIN SMALL LETTER A WITH TILDE */ + return 0xEB; + + case 0x00D5: /* LATIN CAPITAL LETTER O WITH TILDE */ + return 0xEC; + + case 0x00F5: /* LATIN SMALL LETTER O WITH TILDE */ + return 0xED; + + case 0x00D8: /* LATIN CAPITAL LETTER O WITH STROKE */ + return 0xEE; + + case 0x00F8: /* LATIN SMALL LETTER O WITH STROKE */ + return 0xEF; + + + // Greek + case 0x0393: /* GREEK CAPITAL LETTER GAMMA */ + return 0x14; + + case 0x0394: /* GREEK CAPITAL LETTER DELTA */ + return 0x15; + + case 0x0398: /* GREEK CAPITAL LETTER THETA */ + return 0x16; + + case 0x039B: /* GREEK CAPITAL LETTER LAMDA */ + return 0x17; + + case 0x039E: /* GREEK CAPITAL LETTER XI */ + return 0x18; + + case 0x03A0: /* GREEK CAPITAL LETTER PI */ + return 0x19; + + case 0x03A3: /* GREEK CAPITAL LETTER SIGMA */ + return 0x1A; + + case 0x03D2: /* GREEK UPSILON WITH HOOK SYMBOL */ + return 0x1B; + + case 0x03A6: /* GREEK CAPITAL LETTER PHI */ + return 0x1C; + + case 0x03A8: /* GREEK CAPITAL LETTER PSI */ + return 0x1D; + + case 0x03A9: /* GREEK CAPITAL LETTER OMEGA */ + return 0x1E; + + case 0x03B1: /* GREEK SMALL LETTER ALPHA */ + return 0x1F; + + + // Box Drawing + case 0x250C: /* BOX DRAWINGS LIGHT DOWN AND RIGHT */ + return 0x09; + + case 0x2510: /* BOX DRAWINGS LIGHT DOWN AND LEFT */ + return 0x0A; + + case 0x2514: /* BOX DRAWINGS LIGHT UP AND RIGHT */ + return 0x0B; + + case 0x2518: /* BOX DRAWINGS LIGHT UP AND LEFT */ + return 0x0C; + + case 0x2500: /* BOX DRAWINGS LIGHT HORIZONTAL */ + return 0xFF; + + + // TODO: Japanese Kana + + default: + return unrec; + } +} + +/** + * @brief Convert UTF8 string to X52 character map. + * + * This function takes in a UTF-8 string and converts it to the character + * map used by the X52Pro MFD. Unrecognized characters are silently dropped. + * + * @param[in] input Input string in UTF-8. Must be NUL-terminated + * @param[out] output Output buffer + * @param[inout] len Length of output buffer + * + * @returns 0 on success, -EINVAL on invalid parameters, -E2BIG if the buffer + * filled up before converting the entire string. + */ +int libx52util_convert_utf8_string(const uint8_t *input, + uint8_t *output, size_t *len) +{ + size_t index; + uint32_t chr_len; + int chr; + int retval = 0; + + if (!input || !output || !len || !*len) { + return -EINVAL; + } + + index = 0; + while (*input) { + chr = _unicode_to_x52(_utf8_to_unicode(input, &chr_len), -1); + input += chr_len; + + if (chr >= 0) { + output[index] = chr; + index++; + if (index >= *len) { + retval = -E2BIG; + break; + } + } + } + + *len = index; + return retval; +} + diff --git a/util/x52_lookup_test.c b/util/x52_lookup_test.c new file mode 100644 index 0000000..68466f3 --- /dev/null +++ b/util/x52_lookup_test.c @@ -0,0 +1,209 @@ +/* Test program for checking lookup performance */ + +#include +#include +#include +#include "libx52util.h" + +static const uint8_t * test_strings[] = { +"\x20", +"\x21", +"\x22", +"\x23", +"\x24", +"\x25", +"\x26", +"\x27", +"\x28", +"\x29", +"\x2a", +"\x2b", +"\x2c", +"\x2d", +"\x2e", +"\x2f", +"\x30", +"\x31", +"\x32", +"\x33", +"\x34", +"\x35", +"\x36", +"\x37", +"\x38", +"\x39", +"\x3a", +"\x3b", +"\x3c", +"\x3d", +"\x3e", +"\x3f", +"\x40", +"\x41", +"\x42", +"\x43", +"\x44", +"\x45", +"\x46", +"\x47", +"\x48", +"\x49", +"\x4a", +"\x4b", +"\x4c", +"\x4d", +"\x4e", +"\x4f", +"\x50", +"\x51", +"\x52", +"\x53", +"\x54", +"\x55", +"\x56", +"\x57", +"\x58", +"\x59", +"\x5a", +"\x5b", +"\x5d", +"\x5e", +"\x5f", +"\x60", +"\x61", +"\x62", +"\x63", +"\x64", +"\x65", +"\x66", +"\x67", +"\x68", +"\x69", +"\x6a", +"\x6b", +"\x6c", +"\x6d", +"\x6e", +"\x6f", +"\x70", +"\x71", +"\x72", +"\x73", +"\x74", +"\x75", +"\x76", +"\x77", +"\x78", +"\x79", +"\x7a", +"\x7b", +"\x7c", +"\x7d", +"\xc2\xa7", +"\xc2\xb6", +"\xc2\xa9", +"\xc2\xae", +"\xc2\xbd", +"\xc2\xbc", +"\xc3\x97", +"\xc3\xb7", +"\xe2\x89\xa4", +"\xe2\x89\xa5", +"\xe2\x89\xaa", +"\xe2\x89\xab", +"\xe2\x89\xa0", +"\xe2\x88\x9a", +"\xc3\x87", +"\xc3\xbc", +"\xc3\xa9", +"\xc3\xa2", +"\xc3\xa4", +"\xc3\xa0", +"\xc8\xa7", +"\xc3\xa7", +"\xc3\xaa", +"\xc3\xab", +"\xc3\xa8", +"\xc3\xaf", +"\xc3\xae", +"\xc3\xac", +"\xc3\x84", +"\xc3\x82", +"\xc3\x89", +"\xc3\xa6", +"\xc3\x86", +"\xc3\xb4", +"\xc3\xb6", +"\xc3\xb2", +"\xc3\xbb", +"\xc3\xb9", +"\xc3\xbf", +"\xc3\x96", +"\xc3\x9c", +"\xc3\xb1", +"\xc3\x91", +"\xc2\xaa", +"\xc2\xba", +"\xc2\xbf", +"\xc3\xa1", +"\xc3\xad", +"\xc3\xb3", +"\xc3\xba", +"\xc2\xa2", +"\xc2\xa3", +"\xc2\xa5", +"\xc2\xa1", +"\xc3\x83", +"\xc3\xa3", +"\xc3\x95", +"\xc3\xb5", +"\xc3\x98", +"\xc3\xb8", +"\xce\x93", +"\xce\x94", +"\xce\x98", +"\xce\x9b", +"\xce\x9e", +"\xce\xa0", +"\xce\xa3", +"\xcf\x92", +"\xce\xa6", +"\xce\xa8", +"\xce\xa9", +"\xce\xb1", +"\xe2\x94\x8c", +"\xe2\x94\x90", +"\xe2\x94\x94", +"\xe2\x94\x98", +"\xe2\x94\x80", +"" +}; + +#define ROUNDS 10000000 +int main() +{ + int i; + int j; + const uint8_t * test_str = NULL; + uint8_t output[8]; + size_t retval; + clock_t start, end; + + start = clock(); + for (i = 0; i < ROUNDS; i++) { + for (j = 0; ; j++) { + test_str = test_strings[j]; + if (test_str[0] == 0) { + break; + } + + /* Run the lookup function */ + libx52util_convert_utf8_string(test_str, output, &retval); + } + } + end = clock(); + + printf("Perf test - time used %ld\n", end - start); + + return 0; +}