feat: Handle the entire BMP in libx52util

Prior to this change, the libx52util_convert_utf8_string function had a
limited set of characters that it would convert to the MFD character
map, these characters were derived from the x52_char_map.cfg file.
However, this is a tiny subset of the actual supported characters in the
Basic Multilingual Plane (BMP), since many characters in the BMP can be
normalized to a different character (or character sequence) that has a
corresponding glyph on the X52 MFD.

One example of this is the half-width Katakana characters which are
mapped in the display, however the corresponding full-width characters
were not explicitly mapped. With this commit, the generator script now
automatically detects that the half-width characters can be normalized
to the corresponding full width forms, and maps the full width forms
back to the correct characters on the MFD.

A second benefit of this change is that the MFD can now show characters
that would otherwise never be seen, for example, the 3/4 symbol or 5/8
symbol have no corresponding glyph in the MFD, but they can be
translated to the sequence `3` `/` `4`, giving us much more flexibility
on the characters that can actually be displayed.

Finally, with this change, the function also maps missing or unsupported
characters to the box character (0xDB in the display), making it clearer
that there was something there that could not be displayed. Earlier, it
would have simply skipped that character.
fix-issue-63
nirenjan 2026-03-16 09:57:55 -07:00
parent 899ea57bf7
commit 0cb137bbe0
7 changed files with 556 additions and 341 deletions

View File

@ -1,13 +1,13 @@
# libx52util
libx52util_version = '1.0.1'
libx52util_version = '1.0.2'
gen_script = files('x52_char_map_gen.py')[0]
util_char_map = custom_target('util-char-map',
build_by_default: false,
depend_files: ['x52_char_map_gen.py', 'x52_char_map.cfg'],
command: [python, gen_script, '@INPUT@', '@OUTPUT@'],
command: [python, gen_script, '@INPUT@', '@OUTPUT0@', '@OUTPUT1@'],
input: 'x52_char_map.cfg',
output: 'util_char_map.c')
output: ['util_char_map.c', 'x52_char_map.bin'])
lib_libx52util = library('x52util', util_char_map, 'x52_char_map_lookup.c',
install: true,
@ -23,21 +23,14 @@ pkgconfig.generate(lib_libx52util,
version: libx52util_version,
)
test_gen_script = files('x52_map_test_gen.py')[0]
libx52util_map_test_src = custom_target('libx52util-map-test-src',
build_by_default: false,
depend_files: ['x52_map_test_gen.py', 'x52_char_map.cfg'],
command: [python, test_gen_script, '@INPUT@', '@OUTPUT@'],
input: 'x52_char_map.cfg',
output: 'x52_map_test.c'
)
libx52util_map_test = executable('libx52util-map-test', libx52util_map_test_src,
dependencies: [dep_cmocka],
link_with: [lib_libx52util],
libx52util_bmp_test = executable(
'libx52util-bmp-test',
'x52_char_map_test.c',
build_by_default: false,
include_directories: [includes, lib_libx52util.private_dir_include()],
link_with: [lib_libx52util]
)
test('libx52util-map-test', libx52util_map_test, protocol: 'tap')
test('libx52util-bmp-test', libx52util_bmp_test,
protocol: 'tap',
args: [util_char_map[1]])

View File

@ -324,3 +324,13 @@
0xFF9E 0xDE # HALFWIDTH KATAKANA VOICED SOUND MARK
0xFF9F 0xDF # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
# The following characters are manually added to aid in normalization to the
# X52 character map
0x2215 0x2F # DIVISION SLASH
0x2044 0x2F # FRACTION SLASH
0x00B0 0xDF # DEGREE SIGN
# Note: while Greek letters aren't actually supported by the MFD character map,
# this is manually addded to map the letter 'mu' to ASCII 'u'. This is needed
# in the CJK compatibility page (0x3300-0x33FF) to deal with the square latin
# abbreviations
0x03BC 0x75 # GREEK SMALL LETTER MU

View File

@ -12,20 +12,7 @@
#include <stddef.h>
#include <stdint.h>
enum {
TYPE_INVALID = 0, /* Invalid type (default) */
TYPE_POINTER, /* Pointer target */
TYPE_ENTRY /* Map entry value */
};
struct map_entry {
struct map_entry *next; /* Pointer to the next table */
uint8_t type; /* Type of entry */
uint8_t value; /* Value is valid if this is of TYPE_ENTRY */
};
extern struct map_entry map_root[];
extern const uint16_t *root_table[256];
extern const uint8_t *sequence_table[];
#endif /* !defined X52_CHAR_MAP_H */

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# Character map generator
#
# Copyright (C) 2012-2018 Nirenjan Krishnan (nirenjan@nirenjan.org)
# Copyright (C) 2012-2026 Nirenjan Krishnan (nirenjan@nirenjan.org)
#
# SPDX-License-Identifier: GPL-2.0-only WITH Classpath-exception-2.0
"""
@ -11,191 +11,267 @@ for the X52/X52 Pro MFD
import sys
import re
AUTOGEN_HEADER = """
/*
* Autogenerated character map file for Saitek X52 Pro
* Generated from %s
*/
#include "x52_char_map.h"
"""
class MapTable(object):
"""
Defines a MapTable entry, with each entry storing the value seen so far,
the type of the entry, and the value, if it's a value node.
"""
# Empty list
root = [None] * 256
def __init__(self, value_so_far, map_value=None):
self.next_level = [None] * 256
self.value_so_far = value_so_far
self.map_value = map_value
def output_nodes(self):
"""
Output the individual nodes
"""
output_lines = []
output_count = 0
for node in self.next_level:
if node is not None:
output_lines.extend(node.output_nodes())
output_count += 1
if output_count != 0:
struct_header = 'static struct map_entry table_%x[64] = {' % \
self.value_so_far
output_lines.append(struct_header)
for node_index in range(0, 256):
node = self.next_level[node_index]
if node is not None:
output_lines.append(self.dump_entry_line(0x80, node_index,
node.value_so_far,
node.map_value))
output_lines.extend(['};', ''])
return output_lines
@staticmethod
def dump_entry_line(offset, node_index, value_so_far, map_value):
"""
Dump the array entry for the current node
"""
if map_value is None:
node_entry_line = '\t[0x%02x] = { table_%x, TYPE_POINTER, 0 },' % \
(node_index - offset, value_so_far)
else:
node_entry_line = '\t[0x%02x] = { NULL, TYPE_ENTRY, 0x%02x },' % \
(node_index - offset, map_value)
return node_entry_line
@classmethod
def add_to_table(cls, input_val, map_val):
"""
Add a map value to the lookup table
"""
try:
uchr = unichr(input_val)
except NameError:
# Python 3 doesn't have unichr, but chr should work
uchr = chr(input_val)
utf8_str = uchr.encode('utf-8')
# Python2 returns the encoded result as a string, wheras
# Python3 returns the result as a bytearray. Converting
# the string (or bytearray) into a bytearray ensures that
# this can be run in both Python2 and Python3
utf8_vals = [c for c in bytearray(utf8_str)]
value_so_far = 0
level = cls.root
for index, char in enumerate(utf8_vals):
value_so_far = (value_so_far << 8) | char
if index < (len(utf8_vals) - 1):
node = level[char]
if node is None:
node = cls(value_so_far)
level[char] = node
level = level[char].next_level
else:
node = cls(value_so_far, map_val)
level[char] = node
@classmethod
def output_table_as_list(cls):
"""
Output the map table as a list of lines
"""
output_lines = []
for node in cls.root:
if node is not None:
output_lines.extend(node.output_nodes())
output_lines.append('struct map_entry map_root[256] = {')
for node_index in range(0, 256):
node = cls.root[node_index]
if node is not None:
output_lines.append(cls.dump_entry_line(0x0, node_index,
node.value_so_far,
node.map_value))
output_lines.extend(['};', ''])
return output_lines
import json
import unicodedata
class LineFormatError(ValueError):
"""
Error class for parser
"""
def parse_line(data):
class BMPTable:
"""
Parse a line containing a mapping descriptor. The mapping descriptor
must start with a hexadecimal unicode code point, followed by either a
single character, or a hexadecimal integer that corresponds to the map
value.
Sparse table for Basic Multilingual Plane
"""
# Strip off comments
data = re.sub(re.compile('#.*$'), '', data)
# Strip off leading and trailing whitespace
data = data.strip()
REPLACEMENT_CHAR = 0xDB
# If the line is empty, it is a comment line
if len(data) == 0:
return None, None
HEADER = f"""/*
* Autogenerated tables for X52 MFD character lookup
*
* DO NOT EDIT
*/
# Find the code point and the target value
try:
code_point, target = data.strip().split()
except ValueError:
# Raised when there are either too many, or not enough values in
# the string
raise LineFormatError('Invalid descriptor format "%s"' % data)
#include <stdint.h>
# Convert the string to its equivalent numeric value
try:
code_point = int(code_point, 0)
except ValueError:
raise LineFormatError('Invalid code point "%s"' % code_point)
"""
# Check if the target is a single character
if len(target) == 1:
target = ord(target)
else:
# Try parsing the target as an integer
TABLE_NAME_FORMAT = 'bmp_page_%02x'
TABLE_NAME_DEFAULT = 'bmp_page_default'
TABLE_FORMAT = 'const uint16_t %s[256] = {'
TABLE_FOOTER = '};\n'
def __init__(self, input_file, output_file, output_map):
self.input_file = input_file
self.output_file = output_file
self.output_map = output_map
self.mapping = {}
self.pages = {}
self.sequences = {}
self.root_table = []
self.read_map()
self.build_extended_map()
self.build_tables()
self.generate_test_tables()
@staticmethod
def parse_line(data):
"""
Parse a line containing a mapping descriptor. The mapping descriptor
must start with a hexadecimal unicode code point, followed by either a
single character, or a hexadecimal integer that corresponds to the map
value.
"""
# Strip off comments
data = re.sub(re.compile('#.*$'), '', data)
# Strip off leading and trailing whitespace
data = data.strip()
# If the line is empty, it is a comment line
if len(data) == 0:
return None, None
# Find the code point and the target value
try:
target = int(target, 0)
except ValueError:
raise LineFormatError('Invalid map value "%s"' % target)
code_point, target = data.strip().split()
except ValueError as exc:
# Raised when there are either too many, or not enough values in
# the string
raise LineFormatError(f'Invalid descriptor format "{data}"') from exc
return code_point, target
# Convert the string to its equivalent numeric value
try:
code_point = int(code_point, 0)
except ValueError as exc:
raise LineFormatError(f'Invalid code point "{code_point}"') from exc
# Check if the target is a single character
if len(target) == 1:
target = ord(target)
else:
# Try parsing the target as an integer
try:
target = int(target, 0)
except ValueError as exc:
raise LineFormatError(f'Invalid map value "{target}"') from exc
return code_point, target
def read_map(self):
"""Read the mapping tables from the config file"""
def map_normalized(char, dst):
# Try to normalize the unicode character as NFKC
normalized = unicodedata.normalize('NFKC', chr(char))
if normalized == char:
# This is already in normalized form
return
if len(normalized) == 1:
normalized_char = ord(normalized)
if normalized_char not in self.mapping:
# This is only needed to ensure that we get the normalized
# forms for example, half-width Katakana characters are
# normalized to their corresponding full width versions.
# However, we don't want to overwrite existing mappings,
# since something like Lowercase A with grave could be
# normalized to lowercase A, which would break the translation
self.mapping[normalized_char] = dst
with open(self.input_file, 'r', encoding='utf-8') as infile:
for line in infile:
src, dst = self.parse_line(line)
if src is None:
continue
self.mapping[src] = dst
map_normalized(src, dst)
def build_extended_map(self):
"""Build the extended map for every character in the BMP"""
self.mapping[0] = 0 # Handle NUL
for i in range(0x10000):
# Iterate over the basic multilingual plane
if i in self.mapping:
continue
if 0xD800 <= i <= 0xDFFF:
# UTF16 surrogate pairs - we want to mark it as a box character
self.mapping[i] = self.REPLACEMENT_CHAR
continue
normalized = unicodedata.normalize('NFKC', chr(i))
if len(normalized) == 1:
normalized_ord = ord(normalized)
if normalized_ord in self.mapping:
self.mapping[i] = self.mapping[normalized_ord]
else:
# No single character mapping exists
self.mapping[i] = self.REPLACEMENT_CHAR
continue
# Check that all characters in the normalized are in the mapping table:
sequence = []
for c in normalized:
if ord(c) in self.mapping:
sequence.append(self.mapping[ord(c)])
else:
sequence.append(self.REPLACEMENT_CHAR)
# Check if it only contains the box character, or box char and space,
# and reduce runs to a single instance
if all(c in (self.REPLACEMENT_CHAR, self.mapping[0x20])
for c in sequence):
self.mapping[i] = self.REPLACEMENT_CHAR
continue
sequence = tuple(sequence)
if sequence not in self.sequences:
if not self.sequences:
last_sequence = 256
else:
last_sequence = max(self.sequences.values()) + 1
self.sequences[sequence] = last_sequence
self.mapping[i] = self.sequences[sequence]
def output_c_table(self, page_tuple, out_fd):
"""Output the C table structure"""
page_name = self.pages[page_tuple]
print(self.TABLE_FORMAT % (page_name), file=out_fd)
for i, val in enumerate(page_tuple):
print(f"0x{val:02x}, ", end='', file=out_fd)
if i % 8 == 7:
print(f"// 0x{i-7:02x}-0x{i:02x}", file=out_fd)
print(self.TABLE_FOOTER, file=out_fd)
def build_tables(self):
"""Build the C Tables"""
with open(self.output_file, 'w', encoding='utf-8') as out_fd:
print(self.HEADER, file=out_fd)
default_page = tuple([self.REPLACEMENT_CHAR] * 256)
self.pages[default_page] = self.TABLE_NAME_DEFAULT
self.output_c_table(default_page, out_fd)
for root_idx in range(256):
base_idx = root_idx * 256
page = [self.mapping[idx] for idx in range(base_idx, base_idx+256)]
page_tuple = tuple(page)
if page_tuple not in self.pages:
page_name = self.TABLE_NAME_FORMAT % (root_idx)
self.pages[page_tuple] = page_name
self.output_c_table(page_tuple, out_fd)
self.root_table.append(self.pages[page_tuple])
print(self.TABLE_FORMAT % ('* root_table'), file=out_fd)
for page_id, page_name in enumerate(self.root_table):
print(f" {page_name}, // 0x{page_id:02x}", file=out_fd)
print(self.TABLE_FOOTER, file=out_fd)
print(f"const uint8_t *sequence_table[{len(self.sequences)}] = {{", file=out_fd)
for sequence, seq_id in self.sequences.items():
seq_len = len(sequence)
if seq_len >= 256:
raise RuntimeError("Sequence way too long")
line = [f"0x{seq_len:02X}"]
for seq_elem in sequence:
line.append(f"0x{seq_elem:02X}")
line = ', '.join(line)
print(f' [{seq_id-256}] = (const uint8_t[]){{ {line} }},', file=out_fd)
print(self.TABLE_FOOTER, file=out_fd)
def generate_test_tables(self):
"""Build the test tables used by the test suite"""
# Generate the expected output sequences for every table
# Mapping is a dict mapping the code point as a string to the output
# Sequence is a dict of <seq_tuple>:<seq_id> mappings (seq_id starts from 256)
output = []
sequences = [item[0] for item in sorted(self.sequences.items(),
key=lambda item: item[1])]
# The mapping for the NUL byte (\x00) should be an empty sequence
output.append([])
for i in range(1, 0x10000):
seq = self.mapping[i]
if seq >= 256:
# Pull from sequence table
seq = sequences[seq - 256]
else:
seq = [seq]
output.append(seq)
# Find the longest length sequence (add 1 for the length byte)
longest = max(len(seq) for seq in output) + 1
# Find the next power of two that can hold this sequence
if (longest & (longest - 1)) == 0:
record_length = longest
else:
record_length = 1 << longest.bit_length()
with open(self.output_map, 'wb') as output_map:
pad = [0] * record_length
for seq in output:
record = [len(seq)] + list(seq) + pad
output_map.write(bytes(record[:record_length]))
if __name__ == "__main__":
if len(sys.argv) != 3:
sys.stderr.write('Usage: %s <input-map> <output-c-file>\n' %
sys.argv[0])
if len(sys.argv) != 4:
sys.stderr.write(f"Usage: {sys.argv[0]} <input-map> <output-c-file> <output-json-map>\n")
sys.exit(1)
with open(sys.argv[1], 'r') as infile:
for line in infile:
src, dst = parse_line(line)
if src is not None:
MapTable.add_to_table(src, dst)
with open(sys.argv[2], 'w') as outfile:
outfile.write(AUTOGEN_HEADER % sys.argv[1])
for line in MapTable.output_table_as_list():
outfile.write(line + '\n')
BMPTable(sys.argv[1], sys.argv[2], sys.argv[3])

View File

@ -11,11 +11,63 @@
#include "config.h"
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include "libx52util.h"
#include "x52_char_map.h"
/**
* @brief Converts a UTF8 stream to a uint32_t
*
* @param[in] utf8in Pointer to UTF8 input stream. Must be NUL-terminated
* @param[out] unichr Output character pointer
*
* @returns number of bytes to advance stream by - 0 if NUL or input pointer is NULL
*/
static int utf8_to_u32(const uint8_t *utf8in, uint32_t *unichr)
{
if (!utf8in || !*utf8in) return 0;
uint8_t b = utf8in[0];
// 1-byte (0xxxxxxx)
if (b < 0x80) {
*unichr = b;
return 1;
}
// Invalid leading bytes
if (b < 0xC2 || b > 0xF4) goto error;
// 2-byte (110xxxxx 10xxxxxx)
if ((b & 0xE0) == 0xC0) {
if ((utf8in[1] & 0xC0) != 0x80) goto error;
*unichr = ((b & 0x1F) << 6) | (utf8in[1] & 0x3F);
return 2;
}
// 3-byte (1110xxxx 10xxxxxx 10xxxxxx)
if ((b & 0xF0) == 0xE0) {
if ((utf8in[1] & 0xC0) != 0x80 || (utf8in[2] & 0xC0) != 0x80) goto error;
*unichr = ((b & 0x0F) << 12) | ((utf8in[1] & 0x3F) << 6) | (utf8in[2] & 0x3F);
return 3;
}
// 4-byte (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
if ((b & 0xF8) == 0xF0) {
if ((utf8in[1] & 0xC0) != 0x80 || (utf8in[2] & 0xC0) != 0x80 ||
(utf8in[3] & 0xC0) != 0x80) goto error;
*unichr = ((b & 0x07) << 18) | ((utf8in[1] & 0x3F) << 12) |
((utf8in[2] & 0x3F) << 6) | (utf8in[3] & 0x3F);
return 4;
}
error:
*unichr = 0xFFFD; // Unicode Replacement Character
return 1; // Consume lead byte to attempt resync
}
/**
* @brief Convert UTF8 string to X52 character map.
*
@ -32,52 +84,61 @@
int libx52util_convert_utf8_string(const uint8_t *input,
uint8_t *output, size_t *len)
{
struct map_entry *entry;
size_t index;
int retval = 0;
unsigned char local_index;
uint32_t unichr;
int bytes_consumed;
uint16_t translated;
if (!input || !output || !len || !*len) {
return -EINVAL;
}
index = 0;
entry = &map_root[*input];
// Reset the output array
memset(output, 0, *len);
while (*input) {
input++;
if (entry->type == TYPE_ENTRY) {
output[index] = entry->value;
// Length check
if (index >= *len) {
retval = -E2BIG;
break;
}
bytes_consumed = utf8_to_u32(input, &unichr);
if (bytes_consumed == 0) {
// We should never get here, since the while loop should have
// caught it
retval = 0;
break;
}
input += bytes_consumed;
// Check for bytes in the Supplementary planes
if (unichr >= 0x10000) {
unichr = 0xFFFD; // Unicode replacement character
}
translated = root_table[unichr >> 8][unichr & 0xFF];
if (translated < 256) {
// Table entry, push to output
output[index] = (uint8_t)translated;
index++;
if (index >= *len && *input) {
} else {
// We have a sequence, output that
const uint8_t *sequence = sequence_table[translated - 256];
uint8_t seq_len = sequence[0];
// Let's make sure that we can actually output to the buffer
if ((index + seq_len) >= *len) {
retval = -E2BIG;
break;
}
entry = &map_root[*input];
} else if (entry->type == TYPE_POINTER) {
local_index = *input;
if (local_index < 0x80 || local_index >= 0xC0) {
/* Invalid input, skip till we find the start of another
* valid UTF-8 character
*/
while (*input >= 0x80 && *input < 0xC0) {
input++; /* Skip invalid characters */
}
/* New UTF-8 character, reset the entry pointer */
entry = &map_root[*input];
} else {
/* Mask off the upper bits, we only care about the lower 6 bits */
local_index &= 0x3F;
entry = &(entry->next[local_index]);
for (int i = 1; i <= seq_len; i++) {
output[index] = sequence[i];
index++;
}
} else {
/* Invalid value, skip */
while (*input >= 0x80 && *input < 0xC0) {
input++; /* Skip invalid characters */
}
/* New UTF-8 character, reset the entry pointer */
entry = &map_root[*input];
}
}

View File

@ -0,0 +1,195 @@
/*
* X52 character map lookup test
*
* Copyright (C) 2026 Nirenjan Krishnan <nirenjan@nirenjan.org>
*
* SPDX-License-Identifier: GPL-2.0-only WITH Classpath-exception-2.0
*/
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include "libx52util.h"
// Fix this if we ever hit longer sequences
#define RECORD_SIZE 8
// Blindly encode a string into it's smallest UTF8 representation
static void encode_utf8(uint32_t cp, uint8_t *out)
{
if (cp <= 0x7F) {
out[0] = (uint8_t)cp;
} else if (cp <= 0x7FF) {
out[0] = (uint8_t)(0xC0 | (cp >> 6));
out[1] = (uint8_t)(0x80 | (cp & 0x3F));
} else if (cp <= 0xFFFF) {
out[0] = (uint8_t)(0xE0 | (cp >> 12));
out[1] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
out[2] = (uint8_t)(0x80 | (cp & 0x3F));
} else if (cp <= 0x1FFFFF) {
out[0] = (uint8_t)(0xF0 | (cp >> 18));
out[1] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
out[2] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
out[3] = (uint8_t)(0x80 | (cp & 0x3F));
} else if (cp <= 0x3FFFFFF) {
out[0] = (uint8_t)(0xF8 | (cp >> 24));
out[1] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F));
out[2] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
out[3] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
out[4] = (uint8_t)(0x80 | (cp & 0x3F));
} else if (cp <= 0x7FFFFFFF) {
out[0] = (uint8_t)(0xFC | (cp >> 30));
out[1] = (uint8_t)(0x80 | ((cp >> 24) & 0x3F));
out[2] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F));
out[3] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
out[4] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
out[5] = (uint8_t)(0x80 | (cp & 0x3F));
} else { // 0x80000000 to 0xFFFFFFFF (7 bytes)
out[0] = (uint8_t)0xFE; // Binary 11111110
out[1] = (uint8_t)(0x80 | ((cp >> 30) & 0x3F));
out[2] = (uint8_t)(0x80 | ((cp >> 24) & 0x3F));
out[3] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F));
out[4] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
out[5] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
out[6] = (uint8_t)(0x80 | (cp & 0x3F));
}
}
int main(int argc, char *argv[])
{
uint8_t input[8] = {0};
uint8_t output[RECORD_SIZE];
size_t len;
int result;
int fd;
uint8_t *expected_blob;
bool smp_pages_ok;
// Argument check
if (argc != 2) {
puts("Bail out! Invalid number of arguments");
puts("# Usage: libx52util-bmp-test <path-to-bin>");
return 1;
}
fd = open(argv[1], O_RDONLY);
if (fd < 0) {
printf("Bail out! Error %d opening bin file %s: %s\n",
errno, argv[1], strerror(errno));
return 1;
}
expected_blob = mmap(NULL, 0x10000 * RECORD_SIZE,
PROT_READ, MAP_SHARED, fd, 0);
if (expected_blob == MAP_FAILED) {
printf("Bail out! MMAP failed with error %d: %s\n",
errno, strerror(errno));
}
puts("TAP version 13");
// Check the 256 BMP Pages, plus the supplementary pages
puts("1..257");
for (uint32_t page = 0; page < 256; page++) {
bool page_ok = true;
for (uint32_t offset = 0; offset < 256; offset++) {
uint32_t cp = page * 256 + offset;
const uint8_t *rec = &expected_blob[cp * RECORD_SIZE];
memset(input, 0, sizeof(input));
memset(output, 0, sizeof(output));
encode_utf8(cp, input);
len = sizeof(output);
result = libx52util_convert_utf8_string(input, output, &len);
if (result != 0) {
page_ok = false;
printf("# Bad result @ %04X: %d\n", cp, result);
break;
}
// result is OK, check against the expected blob
if (len != rec[0]) {
page_ok = false;
printf("# Length mismatch @ %04X: expected %u, got %zu\n",
cp, rec[0], len);
break;
}
// Length is OK, check the bytes
if (memcmp(output, &rec[1], rec[0]) != 0) {
page_ok = false;
printf("# Output mismatch @ %04X:\n", cp);
printf("# exp/got:");
for (int i = 0; i < len; i++) {
printf("%02X/%02X ", rec[i+1], output[i]);
}
puts("");
break;
}
}
printf("%sok - %d Page 0x%02x\n", page_ok ? "": "not ",
page + 1, page);
}
// Handle the supplementary pages
smp_pages_ok = true;
for (uint32_t smp = 0x1; smp <= 0x10; smp++) {
const uint8_t *rec = &expected_blob[0xFFFD * RECORD_SIZE];
for (uint32_t offset = 0; offset < 0x100; offset += 0xFF) {
uint32_t cp = smp * 256 + offset;
memset(input, 0, sizeof(input));
memset(output, 0, sizeof(output));
len = sizeof(output);
encode_utf8(cp, input);
result = libx52util_convert_utf8_string(input, output, &len);
if (result != 0) {
smp_pages_ok = false;
printf("# Bad result @ %08X: %d\n", cp, result);
break;
}
// result is OK, check against the expected blob
if (len != rec[0]) {
smp_pages_ok = false;
printf("# Length mismatch @ %08X: expected %u, got %zu\n",
cp, rec[0], len);
break;
}
// Length is OK, check the bytes
if (memcmp(output, &rec[1], rec[0]) != 0) {
smp_pages_ok = false;
printf("# Output mismatch @ %08X:\n", cp);
printf("# exp/got:");
for (int i = 0; i < len; i++) {
printf("%02X/%02X ", rec[i+1], output[i]);
}
puts("");
break;
}
}
if (!smp_pages_ok) {
break;
}
}
printf("%sok - 257 SMP tests\n", smp_pages_ok ? "" : "not ");
// Cleanup
munmap(expected_blob, 0x10000 * RECORD_SIZE);
close(fd);
return 0;
}

View File

@ -1,107 +0,0 @@
#!/usr/bin/env python3
"""Generate a test script for the convert function"""
import argparse
import re
def parse_file(map_file):
"""Read the map file, strip out comments, and return a dictionary that
maps the UTF-8 encoded string to the X52 MFD character"""
# If we are running this, then we know that the input map is likely
# in a sane format already.
char_dict = {}
with open(map_file, 'r', encoding='utf-8') as map_fd:
for line in map_fd:
line = re.sub(r'#.*$', '', line).strip()
if not line:
# Comment line, skip
continue
key, out = line.split()
in_char = int(key, 0)
if len(out) == 1:
out_byte = ord(out)
else:
out_byte = int(out, 0)
char_dict[in_char] = out_byte
return char_dict
def generate_positive_test_cases(char_dict):
"""Generate a set of positive test cases"""
# For every string in the dictionary, generate a test case that tests
# the input against the output
TEST_CASE_FMT = """
static void test_map_{in_char}(void **state) {{
(void)state;
const uint8_t input_array[] = {{ {in_bytes}, 0 }};
const uint8_t expected_output[2] = {{ {out_byte}, 0 }};
size_t out_len = 20;
uint8_t output[20] = {{ 0 }};
int rc;
rc = libx52util_convert_utf8_string(input_array, output, &out_len);
assert_int_equal(rc, 0);
assert_int_equal(out_len, 1);
assert_memory_equal(output, expected_output, 2);
}}
"""
output = ""
for in_char, out_byte in char_dict.items():
in_bytes = ", ".join(hex(c) for c in chr(in_char).encode('utf-8'))
in_tc = hex(in_char)
output += TEST_CASE_FMT.format(in_char=in_tc, in_bytes=in_bytes, out_byte=out_byte)
output += """
const struct CMUnitTest tests[] = {
"""
for in_char in sorted(char_dict.keys()):
output += f" cmocka_unit_test(test_map_{hex(in_char)}),\n"
output += '};\n'
return output
TEST_HEADER = """
#include <stdint.h>
#include <stddef.h>
#include <stdarg.h>
#include <setjmp.h>
#include <cmocka.h>
#include "libx52util.h"
"""
TEST_FOOTER = """
int main(void) {
cmocka_set_message_output(CM_OUTPUT_TAP);
cmocka_run_group_tests(tests, NULL, NULL);
return 0;
}
"""
def main():
"""Generate X52 map test suite"""
parser = argparse.ArgumentParser(description='Generate map test cases')
parser.add_argument('INPUT_FILE', help="Input character map file")
parser.add_argument('OUTPUT_FILE', help="Generated test script")
args = parser.parse_args()
char_dict = parse_file(args.INPUT_FILE)
test_cases = generate_positive_test_cases(char_dict)
with open(args.OUTPUT_FILE, 'w', encoding='utf-8') as out_fd:
print(TEST_HEADER, file=out_fd)
print(test_cases, file=out_fd)
print(TEST_FOOTER, file=out_fd)
if __name__ == '__main__':
main()