diff --git a/libx52util/meson.build b/libx52util/meson.build index b52f344..9950605 100644 --- a/libx52util/meson.build +++ b/libx52util/meson.build @@ -1,13 +1,13 @@ # libx52util -libx52util_version = '1.0.1' +libx52util_version = '1.0.2' gen_script = files('x52_char_map_gen.py')[0] util_char_map = custom_target('util-char-map', build_by_default: false, depend_files: ['x52_char_map_gen.py', 'x52_char_map.cfg'], - command: [python, gen_script, '@INPUT@', '@OUTPUT@'], + command: [python, gen_script, '@INPUT@', '@OUTPUT0@', '@OUTPUT1@'], input: 'x52_char_map.cfg', - output: 'util_char_map.c') + output: ['util_char_map.c', 'x52_char_map.bin']) lib_libx52util = library('x52util', util_char_map, 'x52_char_map_lookup.c', install: true, @@ -23,21 +23,14 @@ pkgconfig.generate(lib_libx52util, version: libx52util_version, ) -test_gen_script = files('x52_map_test_gen.py')[0] - -libx52util_map_test_src = custom_target('libx52util-map-test-src', - build_by_default: false, - depend_files: ['x52_map_test_gen.py', 'x52_char_map.cfg'], - command: [python, test_gen_script, '@INPUT@', '@OUTPUT@'], - input: 'x52_char_map.cfg', - output: 'x52_map_test.c' - ) - -libx52util_map_test = executable('libx52util-map-test', libx52util_map_test_src, - dependencies: [dep_cmocka], - link_with: [lib_libx52util], +libx52util_bmp_test = executable( + 'libx52util-bmp-test', + 'x52_char_map_test.c', build_by_default: false, include_directories: [includes, lib_libx52util.private_dir_include()], + link_with: [lib_libx52util] ) -test('libx52util-map-test', libx52util_map_test, protocol: 'tap') +test('libx52util-bmp-test', libx52util_bmp_test, + protocol: 'tap', + args: [util_char_map[1]]) diff --git a/libx52util/x52_char_map.cfg b/libx52util/x52_char_map.cfg index cf6eda1..26369da 100644 --- a/libx52util/x52_char_map.cfg +++ b/libx52util/x52_char_map.cfg @@ -324,3 +324,13 @@ 0xFF9E 0xDE # HALFWIDTH KATAKANA VOICED SOUND MARK 0xFF9F 0xDF # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +# The following characters are manually added to aid in normalization to the +# X52 character map +0x2215 0x2F # DIVISION SLASH +0x2044 0x2F # FRACTION SLASH +0x00B0 0xDF # DEGREE SIGN +# Note: while Greek letters aren't actually supported by the MFD character map, +# this is manually addded to map the letter 'mu' to ASCII 'u'. This is needed +# in the CJK compatibility page (0x3300-0x33FF) to deal with the square latin +# abbreviations +0x03BC 0x75 # GREEK SMALL LETTER MU diff --git a/libx52util/x52_char_map.h b/libx52util/x52_char_map.h index 6dd4470..3abe175 100644 --- a/libx52util/x52_char_map.h +++ b/libx52util/x52_char_map.h @@ -12,20 +12,7 @@ #include #include -enum { - TYPE_INVALID = 0, /* Invalid type (default) */ - - TYPE_POINTER, /* Pointer target */ - - TYPE_ENTRY /* Map entry value */ -}; - -struct map_entry { - struct map_entry *next; /* Pointer to the next table */ - uint8_t type; /* Type of entry */ - uint8_t value; /* Value is valid if this is of TYPE_ENTRY */ -}; - -extern struct map_entry map_root[]; +extern const uint16_t *root_table[256]; +extern const uint8_t *sequence_table[]; #endif /* !defined X52_CHAR_MAP_H */ diff --git a/libx52util/x52_char_map_gen.py b/libx52util/x52_char_map_gen.py index f52a9d6..eaf8072 100755 --- a/libx52util/x52_char_map_gen.py +++ b/libx52util/x52_char_map_gen.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Character map generator # -# Copyright (C) 2012-2018 Nirenjan Krishnan (nirenjan@nirenjan.org) +# Copyright (C) 2012-2026 Nirenjan Krishnan (nirenjan@nirenjan.org) # # SPDX-License-Identifier: GPL-2.0-only WITH Classpath-exception-2.0 """ @@ -11,191 +11,267 @@ for the X52/X52 Pro MFD import sys import re - -AUTOGEN_HEADER = """ -/* - * Autogenerated character map file for Saitek X52 Pro - * Generated from %s - */ - -#include "x52_char_map.h" - -""" - - -class MapTable(object): - """ - Defines a MapTable entry, with each entry storing the value seen so far, - the type of the entry, and the value, if it's a value node. - """ - # Empty list - root = [None] * 256 - - def __init__(self, value_so_far, map_value=None): - self.next_level = [None] * 256 - self.value_so_far = value_so_far - self.map_value = map_value - - def output_nodes(self): - """ - Output the individual nodes - """ - output_lines = [] - output_count = 0 - for node in self.next_level: - if node is not None: - output_lines.extend(node.output_nodes()) - output_count += 1 - - if output_count != 0: - struct_header = 'static struct map_entry table_%x[64] = {' % \ - self.value_so_far - output_lines.append(struct_header) - - for node_index in range(0, 256): - node = self.next_level[node_index] - if node is not None: - output_lines.append(self.dump_entry_line(0x80, node_index, - node.value_so_far, - node.map_value)) - - output_lines.extend(['};', '']) - - return output_lines - - @staticmethod - def dump_entry_line(offset, node_index, value_so_far, map_value): - """ - Dump the array entry for the current node - """ - if map_value is None: - node_entry_line = '\t[0x%02x] = { table_%x, TYPE_POINTER, 0 },' % \ - (node_index - offset, value_so_far) - else: - node_entry_line = '\t[0x%02x] = { NULL, TYPE_ENTRY, 0x%02x },' % \ - (node_index - offset, map_value) - - return node_entry_line - - @classmethod - def add_to_table(cls, input_val, map_val): - """ - Add a map value to the lookup table - """ - try: - uchr = unichr(input_val) - except NameError: - # Python 3 doesn't have unichr, but chr should work - uchr = chr(input_val) - - utf8_str = uchr.encode('utf-8') - # Python2 returns the encoded result as a string, wheras - # Python3 returns the result as a bytearray. Converting - # the string (or bytearray) into a bytearray ensures that - # this can be run in both Python2 and Python3 - utf8_vals = [c for c in bytearray(utf8_str)] - - value_so_far = 0 - level = cls.root - for index, char in enumerate(utf8_vals): - value_so_far = (value_so_far << 8) | char - if index < (len(utf8_vals) - 1): - node = level[char] - if node is None: - node = cls(value_so_far) - level[char] = node - - level = level[char].next_level - else: - node = cls(value_so_far, map_val) - level[char] = node - - @classmethod - def output_table_as_list(cls): - """ - Output the map table as a list of lines - """ - output_lines = [] - for node in cls.root: - if node is not None: - output_lines.extend(node.output_nodes()) - - output_lines.append('struct map_entry map_root[256] = {') - - for node_index in range(0, 256): - node = cls.root[node_index] - if node is not None: - output_lines.append(cls.dump_entry_line(0x0, node_index, - node.value_so_far, - node.map_value)) - - output_lines.extend(['};', '']) - - return output_lines - +import json +import unicodedata class LineFormatError(ValueError): """ Error class for parser """ -def parse_line(data): +class BMPTable: """ - Parse a line containing a mapping descriptor. The mapping descriptor - must start with a hexadecimal unicode code point, followed by either a - single character, or a hexadecimal integer that corresponds to the map - value. + Sparse table for Basic Multilingual Plane """ - # Strip off comments - data = re.sub(re.compile('#.*$'), '', data) - # Strip off leading and trailing whitespace - data = data.strip() + REPLACEMENT_CHAR = 0xDB - # If the line is empty, it is a comment line - if len(data) == 0: - return None, None + HEADER = f"""/* +* Autogenerated tables for X52 MFD character lookup +* +* DO NOT EDIT +*/ - # Find the code point and the target value - try: - code_point, target = data.strip().split() - except ValueError: - # Raised when there are either too many, or not enough values in - # the string - raise LineFormatError('Invalid descriptor format "%s"' % data) +#include - # Convert the string to its equivalent numeric value - try: - code_point = int(code_point, 0) - except ValueError: - raise LineFormatError('Invalid code point "%s"' % code_point) +""" - # Check if the target is a single character - if len(target) == 1: - target = ord(target) - else: - # Try parsing the target as an integer + TABLE_NAME_FORMAT = 'bmp_page_%02x' + TABLE_NAME_DEFAULT = 'bmp_page_default' + TABLE_FORMAT = 'const uint16_t %s[256] = {' + TABLE_FOOTER = '};\n' + + def __init__(self, input_file, output_file, output_map): + self.input_file = input_file + self.output_file = output_file + self.output_map = output_map + self.mapping = {} + self.pages = {} + self.sequences = {} + self.root_table = [] + + self.read_map() + self.build_extended_map() + self.build_tables() + self.generate_test_tables() + + @staticmethod + def parse_line(data): + """ + Parse a line containing a mapping descriptor. The mapping descriptor + must start with a hexadecimal unicode code point, followed by either a + single character, or a hexadecimal integer that corresponds to the map + value. + """ + # Strip off comments + data = re.sub(re.compile('#.*$'), '', data) + + # Strip off leading and trailing whitespace + data = data.strip() + + # If the line is empty, it is a comment line + if len(data) == 0: + return None, None + + # Find the code point and the target value try: - target = int(target, 0) - except ValueError: - raise LineFormatError('Invalid map value "%s"' % target) + code_point, target = data.strip().split() + except ValueError as exc: + # Raised when there are either too many, or not enough values in + # the string + raise LineFormatError(f'Invalid descriptor format "{data}"') from exc - return code_point, target + # Convert the string to its equivalent numeric value + try: + code_point = int(code_point, 0) + except ValueError as exc: + raise LineFormatError(f'Invalid code point "{code_point}"') from exc + + # Check if the target is a single character + if len(target) == 1: + target = ord(target) + else: + # Try parsing the target as an integer + try: + target = int(target, 0) + except ValueError as exc: + raise LineFormatError(f'Invalid map value "{target}"') from exc + + return code_point, target + + def read_map(self): + """Read the mapping tables from the config file""" + def map_normalized(char, dst): + # Try to normalize the unicode character as NFKC + normalized = unicodedata.normalize('NFKC', chr(char)) + if normalized == char: + # This is already in normalized form + return + + if len(normalized) == 1: + normalized_char = ord(normalized) + + if normalized_char not in self.mapping: + # This is only needed to ensure that we get the normalized + # forms for example, half-width Katakana characters are + # normalized to their corresponding full width versions. + # However, we don't want to overwrite existing mappings, + # since something like Lowercase A with grave could be + # normalized to lowercase A, which would break the translation + self.mapping[normalized_char] = dst + + with open(self.input_file, 'r', encoding='utf-8') as infile: + for line in infile: + src, dst = self.parse_line(line) + if src is None: + continue + + self.mapping[src] = dst + map_normalized(src, dst) + + def build_extended_map(self): + """Build the extended map for every character in the BMP""" + self.mapping[0] = 0 # Handle NUL + for i in range(0x10000): + # Iterate over the basic multilingual plane + if i in self.mapping: + continue + + if 0xD800 <= i <= 0xDFFF: + # UTF16 surrogate pairs - we want to mark it as a box character + self.mapping[i] = self.REPLACEMENT_CHAR + continue + + normalized = unicodedata.normalize('NFKC', chr(i)) + if len(normalized) == 1: + normalized_ord = ord(normalized) + if normalized_ord in self.mapping: + self.mapping[i] = self.mapping[normalized_ord] + else: + # No single character mapping exists + self.mapping[i] = self.REPLACEMENT_CHAR + + continue + + # Check that all characters in the normalized are in the mapping table: + sequence = [] + for c in normalized: + if ord(c) in self.mapping: + sequence.append(self.mapping[ord(c)]) + else: + sequence.append(self.REPLACEMENT_CHAR) + + # Check if it only contains the box character, or box char and space, + # and reduce runs to a single instance + if all(c in (self.REPLACEMENT_CHAR, self.mapping[0x20]) + for c in sequence): + self.mapping[i] = self.REPLACEMENT_CHAR + continue + + sequence = tuple(sequence) + if sequence not in self.sequences: + if not self.sequences: + last_sequence = 256 + else: + last_sequence = max(self.sequences.values()) + 1 + + self.sequences[sequence] = last_sequence + + self.mapping[i] = self.sequences[sequence] + + def output_c_table(self, page_tuple, out_fd): + """Output the C table structure""" + page_name = self.pages[page_tuple] + + print(self.TABLE_FORMAT % (page_name), file=out_fd) + + for i, val in enumerate(page_tuple): + print(f"0x{val:02x}, ", end='', file=out_fd) + if i % 8 == 7: + print(f"// 0x{i-7:02x}-0x{i:02x}", file=out_fd) + + print(self.TABLE_FOOTER, file=out_fd) + + def build_tables(self): + """Build the C Tables""" + with open(self.output_file, 'w', encoding='utf-8') as out_fd: + print(self.HEADER, file=out_fd) + + default_page = tuple([self.REPLACEMENT_CHAR] * 256) + self.pages[default_page] = self.TABLE_NAME_DEFAULT + self.output_c_table(default_page, out_fd) + + for root_idx in range(256): + base_idx = root_idx * 256 + page = [self.mapping[idx] for idx in range(base_idx, base_idx+256)] + page_tuple = tuple(page) + if page_tuple not in self.pages: + page_name = self.TABLE_NAME_FORMAT % (root_idx) + self.pages[page_tuple] = page_name + self.output_c_table(page_tuple, out_fd) + + self.root_table.append(self.pages[page_tuple]) + + print(self.TABLE_FORMAT % ('* root_table'), file=out_fd) + + for page_id, page_name in enumerate(self.root_table): + print(f" {page_name}, // 0x{page_id:02x}", file=out_fd) + + print(self.TABLE_FOOTER, file=out_fd) + + print(f"const uint8_t *sequence_table[{len(self.sequences)}] = {{", file=out_fd) + for sequence, seq_id in self.sequences.items(): + seq_len = len(sequence) + if seq_len >= 256: + raise RuntimeError("Sequence way too long") + + line = [f"0x{seq_len:02X}"] + for seq_elem in sequence: + line.append(f"0x{seq_elem:02X}") + + line = ', '.join(line) + print(f' [{seq_id-256}] = (const uint8_t[]){{ {line} }},', file=out_fd) + + print(self.TABLE_FOOTER, file=out_fd) + + def generate_test_tables(self): + """Build the test tables used by the test suite""" + # Generate the expected output sequences for every table + # Mapping is a dict mapping the code point as a string to the output + # Sequence is a dict of : mappings (seq_id starts from 256) + output = [] + sequences = [item[0] for item in sorted(self.sequences.items(), + key=lambda item: item[1])] + + # The mapping for the NUL byte (\x00) should be an empty sequence + output.append([]) + + for i in range(1, 0x10000): + seq = self.mapping[i] + if seq >= 256: + # Pull from sequence table + seq = sequences[seq - 256] + else: + seq = [seq] + output.append(seq) + + # Find the longest length sequence (add 1 for the length byte) + longest = max(len(seq) for seq in output) + 1 + # Find the next power of two that can hold this sequence + if (longest & (longest - 1)) == 0: + record_length = longest + else: + record_length = 1 << longest.bit_length() + + with open(self.output_map, 'wb') as output_map: + pad = [0] * record_length + for seq in output: + record = [len(seq)] + list(seq) + pad + output_map.write(bytes(record[:record_length])) if __name__ == "__main__": - if len(sys.argv) != 3: - sys.stderr.write('Usage: %s \n' % - sys.argv[0]) + if len(sys.argv) != 4: + sys.stderr.write(f"Usage: {sys.argv[0]} \n") sys.exit(1) - with open(sys.argv[1], 'r') as infile: - for line in infile: - src, dst = parse_line(line) - if src is not None: - MapTable.add_to_table(src, dst) - - with open(sys.argv[2], 'w') as outfile: - outfile.write(AUTOGEN_HEADER % sys.argv[1]) - - for line in MapTable.output_table_as_list(): - outfile.write(line + '\n') + BMPTable(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/libx52util/x52_char_map_lookup.c b/libx52util/x52_char_map_lookup.c index 04a02e7..93fcb51 100644 --- a/libx52util/x52_char_map_lookup.c +++ b/libx52util/x52_char_map_lookup.c @@ -11,11 +11,63 @@ #include "config.h" #include +#include #include #include "libx52util.h" #include "x52_char_map.h" +/** + * @brief Converts a UTF8 stream to a uint32_t + * + * @param[in] utf8in Pointer to UTF8 input stream. Must be NUL-terminated + * @param[out] unichr Output character pointer + * + * @returns number of bytes to advance stream by - 0 if NUL or input pointer is NULL + */ +static int utf8_to_u32(const uint8_t *utf8in, uint32_t *unichr) +{ + if (!utf8in || !*utf8in) return 0; + + uint8_t b = utf8in[0]; + + // 1-byte (0xxxxxxx) + if (b < 0x80) { + *unichr = b; + return 1; + } + + // Invalid leading bytes + if (b < 0xC2 || b > 0xF4) goto error; + + // 2-byte (110xxxxx 10xxxxxx) + if ((b & 0xE0) == 0xC0) { + if ((utf8in[1] & 0xC0) != 0x80) goto error; + *unichr = ((b & 0x1F) << 6) | (utf8in[1] & 0x3F); + return 2; + } + + // 3-byte (1110xxxx 10xxxxxx 10xxxxxx) + if ((b & 0xF0) == 0xE0) { + if ((utf8in[1] & 0xC0) != 0x80 || (utf8in[2] & 0xC0) != 0x80) goto error; + *unichr = ((b & 0x0F) << 12) | ((utf8in[1] & 0x3F) << 6) | (utf8in[2] & 0x3F); + return 3; + } + + // 4-byte (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) + if ((b & 0xF8) == 0xF0) { + if ((utf8in[1] & 0xC0) != 0x80 || (utf8in[2] & 0xC0) != 0x80 || + (utf8in[3] & 0xC0) != 0x80) goto error; + *unichr = ((b & 0x07) << 18) | ((utf8in[1] & 0x3F) << 12) | + ((utf8in[2] & 0x3F) << 6) | (utf8in[3] & 0x3F); + return 4; + } + +error: + *unichr = 0xFFFD; // Unicode Replacement Character + return 1; // Consume lead byte to attempt resync +} + /** * @brief Convert UTF8 string to X52 character map. * @@ -32,52 +84,61 @@ int libx52util_convert_utf8_string(const uint8_t *input, uint8_t *output, size_t *len) { - struct map_entry *entry; size_t index; int retval = 0; - unsigned char local_index; + uint32_t unichr; + int bytes_consumed; + uint16_t translated; if (!input || !output || !len || !*len) { return -EINVAL; } index = 0; - entry = &map_root[*input]; + // Reset the output array + memset(output, 0, *len); + while (*input) { - input++; - if (entry->type == TYPE_ENTRY) { - output[index] = entry->value; + // Length check + if (index >= *len) { + retval = -E2BIG; + break; + } + + bytes_consumed = utf8_to_u32(input, &unichr); + if (bytes_consumed == 0) { + // We should never get here, since the while loop should have + // caught it + retval = 0; + break; + } + input += bytes_consumed; + + // Check for bytes in the Supplementary planes + if (unichr >= 0x10000) { + unichr = 0xFFFD; // Unicode replacement character + } + + translated = root_table[unichr >> 8][unichr & 0xFF]; + if (translated < 256) { + // Table entry, push to output + output[index] = (uint8_t)translated; index++; - if (index >= *len && *input) { + } else { + // We have a sequence, output that + const uint8_t *sequence = sequence_table[translated - 256]; + uint8_t seq_len = sequence[0]; + + // Let's make sure that we can actually output to the buffer + if ((index + seq_len) >= *len) { retval = -E2BIG; break; } - entry = &map_root[*input]; - } else if (entry->type == TYPE_POINTER) { - local_index = *input; - if (local_index < 0x80 || local_index >= 0xC0) { - /* Invalid input, skip till we find the start of another - * valid UTF-8 character - */ - while (*input >= 0x80 && *input < 0xC0) { - input++; /* Skip invalid characters */ - } - /* New UTF-8 character, reset the entry pointer */ - entry = &map_root[*input]; - } else { - /* Mask off the upper bits, we only care about the lower 6 bits */ - local_index &= 0x3F; - entry = &(entry->next[local_index]); + for (int i = 1; i <= seq_len; i++) { + output[index] = sequence[i]; + index++; } - } else { - /* Invalid value, skip */ - while (*input >= 0x80 && *input < 0xC0) { - input++; /* Skip invalid characters */ - } - - /* New UTF-8 character, reset the entry pointer */ - entry = &map_root[*input]; } } diff --git a/libx52util/x52_char_map_test.c b/libx52util/x52_char_map_test.c new file mode 100644 index 0000000..8648fe8 --- /dev/null +++ b/libx52util/x52_char_map_test.c @@ -0,0 +1,195 @@ +/* + * X52 character map lookup test + * + * Copyright (C) 2026 Nirenjan Krishnan + * + * SPDX-License-Identifier: GPL-2.0-only WITH Classpath-exception-2.0 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libx52util.h" + +// Fix this if we ever hit longer sequences +#define RECORD_SIZE 8 + +// Blindly encode a string into it's smallest UTF8 representation +static void encode_utf8(uint32_t cp, uint8_t *out) +{ + if (cp <= 0x7F) { + out[0] = (uint8_t)cp; + } else if (cp <= 0x7FF) { + out[0] = (uint8_t)(0xC0 | (cp >> 6)); + out[1] = (uint8_t)(0x80 | (cp & 0x3F)); + } else if (cp <= 0xFFFF) { + out[0] = (uint8_t)(0xE0 | (cp >> 12)); + out[1] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F)); + out[2] = (uint8_t)(0x80 | (cp & 0x3F)); + } else if (cp <= 0x1FFFFF) { + out[0] = (uint8_t)(0xF0 | (cp >> 18)); + out[1] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F)); + out[2] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F)); + out[3] = (uint8_t)(0x80 | (cp & 0x3F)); + } else if (cp <= 0x3FFFFFF) { + out[0] = (uint8_t)(0xF8 | (cp >> 24)); + out[1] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F)); + out[2] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F)); + out[3] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F)); + out[4] = (uint8_t)(0x80 | (cp & 0x3F)); + } else if (cp <= 0x7FFFFFFF) { + out[0] = (uint8_t)(0xFC | (cp >> 30)); + out[1] = (uint8_t)(0x80 | ((cp >> 24) & 0x3F)); + out[2] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F)); + out[3] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F)); + out[4] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F)); + out[5] = (uint8_t)(0x80 | (cp & 0x3F)); + } else { // 0x80000000 to 0xFFFFFFFF (7 bytes) + out[0] = (uint8_t)0xFE; // Binary 11111110 + out[1] = (uint8_t)(0x80 | ((cp >> 30) & 0x3F)); + out[2] = (uint8_t)(0x80 | ((cp >> 24) & 0x3F)); + out[3] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F)); + out[4] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F)); + out[5] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F)); + out[6] = (uint8_t)(0x80 | (cp & 0x3F)); + } +} + +int main(int argc, char *argv[]) +{ + uint8_t input[8] = {0}; + uint8_t output[RECORD_SIZE]; + size_t len; + int result; + + int fd; + uint8_t *expected_blob; + bool smp_pages_ok; + + // Argument check + if (argc != 2) { + puts("Bail out! Invalid number of arguments"); + puts("# Usage: libx52util-bmp-test "); + return 1; + } + + fd = open(argv[1], O_RDONLY); + if (fd < 0) { + printf("Bail out! Error %d opening bin file %s: %s\n", + errno, argv[1], strerror(errno)); + return 1; + } + + expected_blob = mmap(NULL, 0x10000 * RECORD_SIZE, + PROT_READ, MAP_SHARED, fd, 0); + if (expected_blob == MAP_FAILED) { + printf("Bail out! MMAP failed with error %d: %s\n", + errno, strerror(errno)); + } + + puts("TAP version 13"); + // Check the 256 BMP Pages, plus the supplementary pages + puts("1..257"); + + for (uint32_t page = 0; page < 256; page++) { + bool page_ok = true; + + for (uint32_t offset = 0; offset < 256; offset++) { + uint32_t cp = page * 256 + offset; + const uint8_t *rec = &expected_blob[cp * RECORD_SIZE]; + + memset(input, 0, sizeof(input)); + memset(output, 0, sizeof(output)); + encode_utf8(cp, input); + len = sizeof(output); + + result = libx52util_convert_utf8_string(input, output, &len); + if (result != 0) { + page_ok = false; + printf("# Bad result @ %04X: %d\n", cp, result); + break; + } + + // result is OK, check against the expected blob + if (len != rec[0]) { + page_ok = false; + printf("# Length mismatch @ %04X: expected %u, got %zu\n", + cp, rec[0], len); + break; + } + + // Length is OK, check the bytes + if (memcmp(output, &rec[1], rec[0]) != 0) { + page_ok = false; + printf("# Output mismatch @ %04X:\n", cp); + printf("# exp/got:"); + for (int i = 0; i < len; i++) { + printf("%02X/%02X ", rec[i+1], output[i]); + } + puts(""); + break; + } + } + + printf("%sok - %d Page 0x%02x\n", page_ok ? "": "not ", + page + 1, page); + } + + // Handle the supplementary pages + smp_pages_ok = true; + for (uint32_t smp = 0x1; smp <= 0x10; smp++) { + const uint8_t *rec = &expected_blob[0xFFFD * RECORD_SIZE]; + for (uint32_t offset = 0; offset < 0x100; offset += 0xFF) { + uint32_t cp = smp * 256 + offset; + + memset(input, 0, sizeof(input)); + memset(output, 0, sizeof(output)); + len = sizeof(output); + encode_utf8(cp, input); + + result = libx52util_convert_utf8_string(input, output, &len); + if (result != 0) { + smp_pages_ok = false; + printf("# Bad result @ %08X: %d\n", cp, result); + break; + } + + // result is OK, check against the expected blob + if (len != rec[0]) { + smp_pages_ok = false; + printf("# Length mismatch @ %08X: expected %u, got %zu\n", + + cp, rec[0], len); + break; + } + + // Length is OK, check the bytes + if (memcmp(output, &rec[1], rec[0]) != 0) { + smp_pages_ok = false; + printf("# Output mismatch @ %08X:\n", cp); + printf("# exp/got:"); + for (int i = 0; i < len; i++) { + printf("%02X/%02X ", rec[i+1], output[i]); + } + puts(""); + break; + } + } + + if (!smp_pages_ok) { + break; + } + } + printf("%sok - 257 SMP tests\n", smp_pages_ok ? "" : "not "); + + // Cleanup + munmap(expected_blob, 0x10000 * RECORD_SIZE); + close(fd); + return 0; +} diff --git a/libx52util/x52_map_test_gen.py b/libx52util/x52_map_test_gen.py deleted file mode 100755 index 60567d6..0000000 --- a/libx52util/x52_map_test_gen.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python3 -"""Generate a test script for the convert function""" - -import argparse -import re - -def parse_file(map_file): - """Read the map file, strip out comments, and return a dictionary that - maps the UTF-8 encoded string to the X52 MFD character""" - - # If we are running this, then we know that the input map is likely - # in a sane format already. - char_dict = {} - - with open(map_file, 'r', encoding='utf-8') as map_fd: - for line in map_fd: - line = re.sub(r'#.*$', '', line).strip() - - if not line: - # Comment line, skip - continue - - key, out = line.split() - in_char = int(key, 0) - - if len(out) == 1: - out_byte = ord(out) - else: - out_byte = int(out, 0) - - char_dict[in_char] = out_byte - - return char_dict - -def generate_positive_test_cases(char_dict): - """Generate a set of positive test cases""" - # For every string in the dictionary, generate a test case that tests - # the input against the output - TEST_CASE_FMT = """ -static void test_map_{in_char}(void **state) {{ - (void)state; - const uint8_t input_array[] = {{ {in_bytes}, 0 }}; - const uint8_t expected_output[2] = {{ {out_byte}, 0 }}; - size_t out_len = 20; - uint8_t output[20] = {{ 0 }}; - int rc; - - rc = libx52util_convert_utf8_string(input_array, output, &out_len); - assert_int_equal(rc, 0); - assert_int_equal(out_len, 1); - assert_memory_equal(output, expected_output, 2); -}} -""" - - output = "" - for in_char, out_byte in char_dict.items(): - in_bytes = ", ".join(hex(c) for c in chr(in_char).encode('utf-8')) - in_tc = hex(in_char) - - output += TEST_CASE_FMT.format(in_char=in_tc, in_bytes=in_bytes, out_byte=out_byte) - - output += """ -const struct CMUnitTest tests[] = { -""" - - for in_char in sorted(char_dict.keys()): - output += f" cmocka_unit_test(test_map_{hex(in_char)}),\n" - - output += '};\n' - - return output - -TEST_HEADER = """ -#include -#include -#include -#include -#include - -#include "libx52util.h" -""" - -TEST_FOOTER = """ -int main(void) { - cmocka_set_message_output(CM_OUTPUT_TAP); - cmocka_run_group_tests(tests, NULL, NULL); - return 0; -} -""" - -def main(): - """Generate X52 map test suite""" - parser = argparse.ArgumentParser(description='Generate map test cases') - parser.add_argument('INPUT_FILE', help="Input character map file") - parser.add_argument('OUTPUT_FILE', help="Generated test script") - args = parser.parse_args() - - char_dict = parse_file(args.INPUT_FILE) - test_cases = generate_positive_test_cases(char_dict) - - with open(args.OUTPUT_FILE, 'w', encoding='utf-8') as out_fd: - print(TEST_HEADER, file=out_fd) - print(test_cases, file=out_fd) - print(TEST_FOOTER, file=out_fd) - -if __name__ == '__main__': - main()