diff --git a/libx52util/meson.build b/libx52util/meson.build
index b52f344..9950605 100644
--- a/libx52util/meson.build
+++ b/libx52util/meson.build
@@ -1,13 +1,13 @@
 # libx52util
-libx52util_version = '1.0.1'
+libx52util_version = '1.0.2'
 gen_script = files('x52_char_map_gen.py')[0]
 
 util_char_map = custom_target('util-char-map',
   build_by_default: false,
   depend_files: ['x52_char_map_gen.py', 'x52_char_map.cfg'],
-  command: [python, gen_script, '@INPUT@', '@OUTPUT@'],
+  command: [python, gen_script, '@INPUT@', '@OUTPUT0@', '@OUTPUT1@'],
   input: 'x52_char_map.cfg',
-  output: 'util_char_map.c')
+  output: ['util_char_map.c', 'x52_char_map.bin'])
 
 lib_libx52util = library('x52util', util_char_map, 'x52_char_map_lookup.c',
   install: true,
@@ -23,21 +23,14 @@ pkgconfig.generate(lib_libx52util,
   version: libx52util_version,
 )
 
-test_gen_script = files('x52_map_test_gen.py')[0]
-
-libx52util_map_test_src = custom_target('libx52util-map-test-src',
-  build_by_default: false,
-  depend_files: ['x52_map_test_gen.py', 'x52_char_map.cfg'],
-  command: [python, test_gen_script, '@INPUT@', '@OUTPUT@'],
-  input: 'x52_char_map.cfg',
-  output: 'x52_map_test.c'
-  )
-
-libx52util_map_test = executable('libx52util-map-test', libx52util_map_test_src,
-  dependencies: [dep_cmocka],
-  link_with: [lib_libx52util],
+libx52util_bmp_test = executable(
+  'libx52util-bmp-test',
+  'x52_char_map_test.c',
   build_by_default: false,
   include_directories: [includes, lib_libx52util.private_dir_include()],
+  link_with: [lib_libx52util]
   )
 
-test('libx52util-map-test', libx52util_map_test, protocol: 'tap')
+test('libx52util-bmp-test', libx52util_bmp_test,
+  protocol: 'tap',
+  args: [util_char_map[1]])
diff --git a/libx52util/x52_char_map.cfg b/libx52util/x52_char_map.cfg
index cf6eda1..26369da 100644
--- a/libx52util/x52_char_map.cfg
+++ b/libx52util/x52_char_map.cfg
@@ -324,3 +324,13 @@
 0xFF9E  0xDE    # HALFWIDTH KATAKANA VOICED SOUND MARK
 0xFF9F  0xDF    # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
 
+# The following characters are manually added to aid in normalization to the
+# X52 character map
+0x2215  0x2F    # DIVISION SLASH
+0x2044  0x2F    # FRACTION SLASH
+0x00B0  0xDF    # DEGREE SIGN
+# Note: while Greek letters aren't actually supported by the MFD character map,
+# this is manually addded to map the letter 'mu' to ASCII 'u'. This is needed
+# in the CJK compatibility page (0x3300-0x33FF) to deal with the square latin
+# abbreviations
+0x03BC  0x75    # GREEK SMALL LETTER MU
diff --git a/libx52util/x52_char_map.h b/libx52util/x52_char_map.h
index 6dd4470..3abe175 100644
--- a/libx52util/x52_char_map.h
+++ b/libx52util/x52_char_map.h
@@ -12,20 +12,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
-enum {
-    TYPE_INVALID = 0,   /* Invalid type (default) */
-
-    TYPE_POINTER,       /* Pointer target */
-
-    TYPE_ENTRY          /* Map entry value */
-};
-
-struct map_entry {
-    struct map_entry *next; /* Pointer to the next table */
-    uint8_t type;           /* Type of entry */
-    uint8_t value;          /* Value is valid if this is of TYPE_ENTRY */
-};
-
-extern struct map_entry map_root[];
+extern const uint16_t *root_table[256];
+extern const uint8_t *sequence_table[];
 
 #endif /* !defined X52_CHAR_MAP_H */
diff --git a/libx52util/x52_char_map_gen.py b/libx52util/x52_char_map_gen.py
index f52a9d6..eaf8072 100755
--- a/libx52util/x52_char_map_gen.py
+++ b/libx52util/x52_char_map_gen.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # Character map generator
 #
-# Copyright (C) 2012-2018 Nirenjan Krishnan (nirenjan@nirenjan.org)
+# Copyright (C) 2012-2026 Nirenjan Krishnan (nirenjan@nirenjan.org)
 #
 # SPDX-License-Identifier: GPL-2.0-only WITH Classpath-exception-2.0
 """
@@ -11,191 +11,267 @@ for the X52/X52 Pro MFD
 
 import sys
 import re
-
-AUTOGEN_HEADER = """
-/*
- * Autogenerated character map file for Saitek X52 Pro
- * Generated from %s
- */
-
-#include "x52_char_map.h"
-
-"""
-
-
-class MapTable(object):
-    """
-    Defines a MapTable entry, with each entry storing the value seen so far,
-    the type of the entry, and the value, if it's a value node.
-    """
-    # Empty list
-    root = [None] * 256
-
-    def __init__(self, value_so_far, map_value=None):
-        self.next_level = [None] * 256
-        self.value_so_far = value_so_far
-        self.map_value = map_value
-
-    def output_nodes(self):
-        """
-        Output the individual nodes
-        """
-        output_lines = []
-        output_count = 0
-        for node in self.next_level:
-            if node is not None:
-                output_lines.extend(node.output_nodes())
-                output_count += 1
-
-        if output_count != 0:
-            struct_header = 'static struct map_entry table_%x[64] = {' % \
-                            self.value_so_far
-            output_lines.append(struct_header)
-
-            for node_index in range(0, 256):
-                node = self.next_level[node_index]
-                if node is not None:
-                    output_lines.append(self.dump_entry_line(0x80, node_index,
-                                                             node.value_so_far,
-                                                             node.map_value))
-
-            output_lines.extend(['};', ''])
-
-        return output_lines
-
-    @staticmethod
-    def dump_entry_line(offset, node_index, value_so_far, map_value):
-        """
-        Dump the array entry for the current node
-        """
-        if map_value is None:
-            node_entry_line = '\t[0x%02x] = { table_%x, TYPE_POINTER, 0 },' % \
-                (node_index - offset, value_so_far)
-        else:
-            node_entry_line = '\t[0x%02x] = { NULL, TYPE_ENTRY, 0x%02x },' % \
-                (node_index - offset, map_value)
-
-        return node_entry_line
-
-    @classmethod
-    def add_to_table(cls, input_val, map_val):
-        """
-        Add a map value to the lookup table
-        """
-        try:
-            uchr = unichr(input_val)
-        except NameError:
-            # Python 3 doesn't have unichr, but chr should work
-            uchr = chr(input_val)
-
-        utf8_str = uchr.encode('utf-8')
-        # Python2 returns the encoded result as a string, wheras
-        # Python3 returns the result as a bytearray. Converting
-        # the string (or bytearray) into a bytearray ensures that
-        # this can be run in both Python2 and Python3
-        utf8_vals = [c for c in bytearray(utf8_str)]
-
-        value_so_far = 0
-        level = cls.root
-        for index, char in enumerate(utf8_vals):
-            value_so_far = (value_so_far << 8) | char
-            if index < (len(utf8_vals) - 1):
-                node = level[char]
-                if node is None:
-                    node = cls(value_so_far)
-                    level[char] = node
-
-                level = level[char].next_level
-            else:
-                node = cls(value_so_far, map_val)
-                level[char] = node
-
-    @classmethod
-    def output_table_as_list(cls):
-        """
-        Output the map table as a list of lines
-        """
-        output_lines = []
-        for node in cls.root:
-            if node is not None:
-                output_lines.extend(node.output_nodes())
-
-        output_lines.append('struct map_entry map_root[256] = {')
-
-        for node_index in range(0, 256):
-            node = cls.root[node_index]
-            if node is not None:
-                output_lines.append(cls.dump_entry_line(0x0, node_index,
-                                                        node.value_so_far,
-                                                        node.map_value))
-
-        output_lines.extend(['};', ''])
-
-        return output_lines
-
+import json
+import unicodedata
 
 class LineFormatError(ValueError):
     """
     Error class for parser
     """
 
-def parse_line(data):
+class BMPTable:
     """
-    Parse a line containing a mapping descriptor. The mapping descriptor
-    must start with a hexadecimal unicode code point, followed by either a
-    single character, or a hexadecimal integer that corresponds to the map
-    value.
+    Sparse table for Basic Multilingual Plane
     """
-    # Strip off comments
-    data = re.sub(re.compile('#.*$'), '', data)
 
-    # Strip off leading and trailing whitespace
-    data = data.strip()
+    REPLACEMENT_CHAR = 0xDB
 
-    # If the line is empty, it is a comment line
-    if len(data) == 0:
-        return None, None
+    HEADER = f"""/*
+* Autogenerated tables for X52 MFD character lookup
+*
+* DO NOT EDIT
+*/
 
-    # Find the code point and the target value
-    try:
-        code_point, target = data.strip().split()
-    except ValueError:
-        # Raised when there are either too many, or not enough values in
-        # the string
-        raise LineFormatError('Invalid descriptor format "%s"' % data)
+#include <stdint.h>
 
-    # Convert the string to its equivalent numeric value
-    try:
-        code_point = int(code_point, 0)
-    except ValueError:
-        raise LineFormatError('Invalid code point "%s"' % code_point)
+"""
 
-    # Check if the target is a single character
-    if len(target) == 1:
-        target = ord(target)
-    else:
-        # Try parsing the target as an integer
+    TABLE_NAME_FORMAT = 'bmp_page_%02x'
+    TABLE_NAME_DEFAULT = 'bmp_page_default'
+    TABLE_FORMAT = 'const uint16_t %s[256] = {'
+    TABLE_FOOTER = '};\n'
+
+    def __init__(self, input_file, output_file, output_map):
+        self.input_file = input_file
+        self.output_file = output_file
+        self.output_map = output_map
+        self.mapping = {}
+        self.pages = {}
+        self.sequences = {}
+        self.root_table = []
+
+        self.read_map()
+        self.build_extended_map()
+        self.build_tables()
+        self.generate_test_tables()
+
+    @staticmethod
+    def parse_line(data):
+        """
+        Parse a line containing a mapping descriptor. The mapping descriptor
+        must start with a hexadecimal unicode code point, followed by either a
+        single character, or a hexadecimal integer that corresponds to the map
+        value.
+        """
+        # Strip off comments
+        data = re.sub(re.compile('#.*$'), '', data)
+
+        # Strip off leading and trailing whitespace
+        data = data.strip()
+
+        # If the line is empty, it is a comment line
+        if len(data) == 0:
+            return None, None
+
+        # Find the code point and the target value
         try:
-            target = int(target, 0)
-        except ValueError:
-            raise LineFormatError('Invalid map value "%s"' % target)
+            code_point, target = data.strip().split()
+        except ValueError as exc:
+            # Raised when there are either too many, or not enough values in
+            # the string
+            raise LineFormatError(f'Invalid descriptor format "{data}"') from exc
 
-    return code_point, target
+        # Convert the string to its equivalent numeric value
+        try:
+            code_point = int(code_point, 0)
+        except ValueError as exc:
+            raise LineFormatError(f'Invalid code point "{code_point}"') from exc
+
+        # Check if the target is a single character
+        if len(target) == 1:
+            target = ord(target)
+        else:
+            # Try parsing the target as an integer
+            try:
+                target = int(target, 0)
+            except ValueError as exc:
+                raise LineFormatError(f'Invalid map value "{target}"') from exc
+
+        return code_point, target
+
+    def read_map(self):
+        """Read the mapping tables from the config file"""
+        def map_normalized(char, dst):
+            # Try to normalize the unicode character as NFKC
+            normalized = unicodedata.normalize('NFKC', chr(char))
+            if normalized == char:
+                # This is already in normalized form
+                return
+
+            if len(normalized) == 1:
+                normalized_char = ord(normalized)
+
+                if normalized_char not in self.mapping:
+                    # This is only needed to ensure that we get the normalized
+                    # forms for example, half-width Katakana characters are
+                    # normalized to their corresponding full width versions.
+                    # However, we don't want to overwrite existing mappings,
+                    # since something like Lowercase A with grave could be
+                    # normalized to lowercase A, which would break the translation
+                    self.mapping[normalized_char] = dst
+
+        with open(self.input_file, 'r', encoding='utf-8') as infile:
+            for line in infile:
+                src, dst = self.parse_line(line)
+                if src is None:
+                    continue
+
+                self.mapping[src] = dst
+                map_normalized(src, dst)
+
+    def build_extended_map(self):
+        """Build the extended map for every character in the BMP"""
+        self.mapping[0] = 0 # Handle NUL
+        for i in range(0x10000):
+            # Iterate over the basic multilingual plane
+            if i in self.mapping:
+                continue
+
+            if 0xD800 <= i <= 0xDFFF:
+                # UTF16 surrogate pairs - we want to mark it as a box character
+                self.mapping[i] = self.REPLACEMENT_CHAR
+                continue
+
+            normalized = unicodedata.normalize('NFKC', chr(i))
+            if len(normalized) == 1:
+                normalized_ord = ord(normalized)
+                if normalized_ord in self.mapping:
+                    self.mapping[i] = self.mapping[normalized_ord]
+                else:
+                    # No single character mapping exists
+                    self.mapping[i] = self.REPLACEMENT_CHAR
+
+                continue
+
+            # Check that all characters in the normalized are in the mapping table:
+            sequence = []
+            for c in normalized:
+                if ord(c) in self.mapping:
+                    sequence.append(self.mapping[ord(c)])
+                else:
+                    sequence.append(self.REPLACEMENT_CHAR)
+
+            # Check if it only contains the box character, or box char and space,
+            # and reduce runs to a single instance
+            if all(c in (self.REPLACEMENT_CHAR, self.mapping[0x20])
+                   for c in sequence):
+                self.mapping[i] = self.REPLACEMENT_CHAR
+                continue
+
+            sequence = tuple(sequence)
+            if sequence not in self.sequences:
+                if not self.sequences:
+                    last_sequence = 256
+                else:
+                    last_sequence = max(self.sequences.values()) + 1
+
+                self.sequences[sequence] = last_sequence
+
+            self.mapping[i] = self.sequences[sequence]
+
+    def output_c_table(self, page_tuple, out_fd):
+        """Output the C table structure"""
+        page_name = self.pages[page_tuple]
+
+        print(self.TABLE_FORMAT % (page_name), file=out_fd)
+
+        for i, val in enumerate(page_tuple):
+            print(f"0x{val:02x}, ", end='', file=out_fd)
+            if i % 8 == 7:
+                print(f"// 0x{i-7:02x}-0x{i:02x}", file=out_fd)
+
+        print(self.TABLE_FOOTER, file=out_fd)
+
+    def build_tables(self):
+        """Build the C Tables"""
+        with open(self.output_file, 'w', encoding='utf-8') as out_fd:
+            print(self.HEADER, file=out_fd)
+
+            default_page = tuple([self.REPLACEMENT_CHAR] * 256)
+            self.pages[default_page] = self.TABLE_NAME_DEFAULT
+            self.output_c_table(default_page, out_fd)
+
+            for root_idx in range(256):
+                base_idx = root_idx * 256
+                page = [self.mapping[idx] for idx in range(base_idx, base_idx+256)]
+                page_tuple = tuple(page)
+                if page_tuple not in self.pages:
+                    page_name = self.TABLE_NAME_FORMAT % (root_idx)
+                    self.pages[page_tuple] = page_name
+                    self.output_c_table(page_tuple, out_fd)
+
+                self.root_table.append(self.pages[page_tuple])
+
+            print(self.TABLE_FORMAT % ('* root_table'), file=out_fd)
+
+            for page_id, page_name in enumerate(self.root_table):
+                print(f"    {page_name}, // 0x{page_id:02x}", file=out_fd)
+
+            print(self.TABLE_FOOTER, file=out_fd)
+
+            print(f"const uint8_t *sequence_table[{len(self.sequences)}] = {{", file=out_fd)
+            for sequence, seq_id in self.sequences.items():
+                seq_len = len(sequence)
+                if seq_len >= 256:
+                    raise RuntimeError("Sequence way too long")
+
+                line = [f"0x{seq_len:02X}"]
+                for seq_elem in sequence:
+                    line.append(f"0x{seq_elem:02X}")
+
+                line = ', '.join(line)
+                print(f'    [{seq_id-256}] = (const uint8_t[]){{ {line} }},', file=out_fd)
+
+            print(self.TABLE_FOOTER, file=out_fd)
+
+    def generate_test_tables(self):
+        """Build the test tables used by the test suite"""
+        # Generate the expected output sequences for every table
+        # Mapping is a dict mapping the code point as a string to the output
+        # Sequence is a dict of <seq_tuple>:<seq_id> mappings (seq_id starts from 256)
+        output = []
+        sequences = [item[0] for item in sorted(self.sequences.items(),
+                                                key=lambda item: item[1])]
+
+        # The mapping for the NUL byte (\x00) should be an empty sequence
+        output.append([])
+
+        for i in range(1, 0x10000):
+            seq = self.mapping[i]
+            if seq >= 256:
+                # Pull from sequence table
+                seq = sequences[seq - 256]
+            else:
+                seq = [seq]
+            output.append(seq)
+
+        # Find the longest length sequence (add 1 for the length byte)
+        longest = max(len(seq) for seq in output) + 1
+        # Find the next power of two that can hold this sequence
+        if (longest & (longest - 1)) == 0:
+            record_length = longest
+        else:
+            record_length = 1 << longest.bit_length()
+
+        with open(self.output_map, 'wb') as output_map:
+            pad = [0] * record_length
+            for seq in output:
+                record = [len(seq)] + list(seq) + pad
+                output_map.write(bytes(record[:record_length]))
 
 if __name__ == "__main__":
-    if len(sys.argv) != 3:
-        sys.stderr.write('Usage: %s <input-map> <output-c-file>\n' %
-                         sys.argv[0])
+    if len(sys.argv) != 4:
+        sys.stderr.write(f"Usage: {sys.argv[0]} <input-map> <output-c-file> <output-json-map>\n")
         sys.exit(1)
 
-    with open(sys.argv[1], 'r') as infile:
-        for line in infile:
-            src, dst = parse_line(line)
-            if src is not None:
-                MapTable.add_to_table(src, dst)
-
-    with open(sys.argv[2], 'w') as outfile:
-        outfile.write(AUTOGEN_HEADER % sys.argv[1])
-
-        for line in MapTable.output_table_as_list():
-            outfile.write(line + '\n')
+    BMPTable(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/libx52util/x52_char_map_lookup.c b/libx52util/x52_char_map_lookup.c
index 04a02e7..93fcb51 100644
--- a/libx52util/x52_char_map_lookup.c
+++ b/libx52util/x52_char_map_lookup.c
@@ -11,11 +11,63 @@
 
 #include "config.h"
 #include <stdint.h>
+#include <string.h>
 #include <errno.h>
 
 #include "libx52util.h"
 #include "x52_char_map.h"
 
+/**
+ * @brief Converts a UTF8 stream to a uint32_t
+ *
+ * @param[in]       utf8in  Pointer to UTF8 input stream. Must be NUL-terminated
+ * @param[out]      unichr  Output character pointer
+ *
+ * @returns number of bytes to advance stream by - 0 if NUL or input pointer is NULL
+ */
+static int utf8_to_u32(const uint8_t *utf8in, uint32_t *unichr)
+{
+    if (!utf8in || !*utf8in) return 0;
+
+    uint8_t b = utf8in[0];
+
+    // 1-byte (0xxxxxxx)
+    if (b < 0x80) {
+        *unichr = b;
+        return 1;
+    }
+
+    // Invalid leading bytes
+    if (b < 0xC2 || b > 0xF4) goto error;
+
+    // 2-byte (110xxxxx 10xxxxxx)
+    if ((b & 0xE0) == 0xC0) {
+        if ((utf8in[1] & 0xC0) != 0x80) goto error;
+        *unichr = ((b & 0x1F) << 6) | (utf8in[1] & 0x3F);
+        return 2;
+    }
+
+    // 3-byte (1110xxxx 10xxxxxx 10xxxxxx)
+    if ((b & 0xF0) == 0xE0) {
+        if ((utf8in[1] & 0xC0) != 0x80 || (utf8in[2] & 0xC0) != 0x80) goto error;
+        *unichr = ((b & 0x0F) << 12) | ((utf8in[1] & 0x3F) << 6) | (utf8in[2] & 0x3F);
+        return 3;
+    }
+
+    // 4-byte (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    if ((b & 0xF8) == 0xF0) {
+        if ((utf8in[1] & 0xC0) != 0x80 || (utf8in[2] & 0xC0) != 0x80 ||
+                (utf8in[3] & 0xC0) != 0x80) goto error;
+        *unichr = ((b & 0x07) << 18) | ((utf8in[1] & 0x3F) << 12) |
+              ((utf8in[2] & 0x3F) << 6) | (utf8in[3] & 0x3F);
+        return 4;
+    }
+
+error:
+    *unichr = 0xFFFD; // Unicode Replacement Character
+    return 1;     // Consume lead byte to attempt resync
+}
+
 /**
  * @brief Convert UTF8 string to X52 character map.
  *
@@ -32,52 +84,61 @@
 int libx52util_convert_utf8_string(const uint8_t *input,
                                    uint8_t *output, size_t *len)
 {
-    struct map_entry *entry;
     size_t index;
     int retval = 0;
-    unsigned char local_index;
+    uint32_t unichr;
+    int bytes_consumed;
+    uint16_t translated;
 
     if (!input || !output || !len || !*len) {
         return -EINVAL;
     }
 
     index = 0;
-    entry = &map_root[*input];
+    // Reset the output array
+    memset(output, 0, *len);
+
     while (*input) {
-        input++;
-        if (entry->type == TYPE_ENTRY) {
-            output[index] = entry->value;
+        // Length check
+        if (index >= *len) {
+            retval = -E2BIG;
+            break;
+        }
+
+        bytes_consumed = utf8_to_u32(input, &unichr);
+        if (bytes_consumed == 0) {
+            // We should never get here, since the while loop should have
+            // caught it
+            retval = 0;
+            break;
+        }
+        input += bytes_consumed;
+
+        // Check for bytes in the Supplementary planes
+        if (unichr >= 0x10000) {
+            unichr = 0xFFFD; // Unicode replacement character
+        }
+
+        translated = root_table[unichr >> 8][unichr & 0xFF];
+        if (translated < 256) {
+            // Table entry, push to output
+            output[index] = (uint8_t)translated;
             index++;
-            if (index >= *len && *input) {
+        } else {
+            // We have a sequence, output that
+            const uint8_t *sequence = sequence_table[translated - 256];
+            uint8_t seq_len = sequence[0];
+
+            // Let's make sure that we can actually output to the buffer
+            if ((index + seq_len) >= *len) {
                 retval = -E2BIG;
                 break;
             }
-            entry = &map_root[*input];
-        } else if (entry->type == TYPE_POINTER) {
-            local_index = *input;
-            if (local_index < 0x80 || local_index >= 0xC0) {
-                /* Invalid input, skip till we find the start of another
-                 * valid UTF-8 character
-                 */
-                while (*input >= 0x80 && *input < 0xC0) {
-                    input++; /* Skip invalid characters */
-                }
 
-                /* New UTF-8 character, reset the entry pointer */
-                entry = &map_root[*input];
-            } else {
-                /* Mask off the upper bits, we only care about the lower 6 bits */
-                local_index &= 0x3F;
-                entry = &(entry->next[local_index]);
+            for (int i = 1; i <= seq_len; i++) {
+                output[index] = sequence[i];
+                index++;
             }
-        } else {
-            /* Invalid value, skip */
-            while (*input >= 0x80 && *input < 0xC0) {
-                input++; /* Skip invalid characters */
-            }
-
-            /* New UTF-8 character, reset the entry pointer */
-            entry = &map_root[*input];
         }
     }
 
diff --git a/libx52util/x52_char_map_test.c b/libx52util/x52_char_map_test.c
new file mode 100644
index 0000000..8648fe8
--- /dev/null
+++ b/libx52util/x52_char_map_test.c
@@ -0,0 +1,195 @@
+/*
+ * X52 character map lookup test
+ *
+ * Copyright (C) 2026 Nirenjan Krishnan <nirenjan@nirenjan.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-only WITH Classpath-exception-2.0
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "libx52util.h"
+
+// Fix this if we ever hit longer sequences
+#define RECORD_SIZE 8
+
+// Blindly encode a string into it's smallest UTF8 representation
+static void encode_utf8(uint32_t cp, uint8_t *out)
+{
+    if (cp <= 0x7F) {
+        out[0] = (uint8_t)cp;
+    } else if (cp <= 0x7FF) {
+        out[0] = (uint8_t)(0xC0 | (cp >> 6));
+        out[1] = (uint8_t)(0x80 | (cp & 0x3F));
+    } else if (cp <= 0xFFFF) {
+        out[0] = (uint8_t)(0xE0 | (cp >> 12));
+        out[1] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
+        out[2] = (uint8_t)(0x80 | (cp & 0x3F));
+    } else if (cp <= 0x1FFFFF) {
+        out[0] = (uint8_t)(0xF0 | (cp >> 18));
+        out[1] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
+        out[2] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
+        out[3] = (uint8_t)(0x80 | (cp & 0x3F));
+    } else if (cp <= 0x3FFFFFF) {
+        out[0] = (uint8_t)(0xF8 | (cp >> 24));
+        out[1] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F));
+        out[2] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
+        out[3] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
+        out[4] = (uint8_t)(0x80 | (cp & 0x3F));
+    } else if (cp <= 0x7FFFFFFF) {
+        out[0] = (uint8_t)(0xFC | (cp >> 30));
+        out[1] = (uint8_t)(0x80 | ((cp >> 24) & 0x3F));
+        out[2] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F));
+        out[3] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
+        out[4] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
+        out[5] = (uint8_t)(0x80 | (cp & 0x3F));
+    } else { // 0x80000000 to 0xFFFFFFFF (7 bytes)
+        out[0] = (uint8_t)0xFE; // Binary 11111110
+        out[1] = (uint8_t)(0x80 | ((cp >> 30) & 0x3F));
+        out[2] = (uint8_t)(0x80 | ((cp >> 24) & 0x3F));
+        out[3] = (uint8_t)(0x80 | ((cp >> 18) & 0x3F));
+        out[4] = (uint8_t)(0x80 | ((cp >> 12) & 0x3F));
+        out[5] = (uint8_t)(0x80 | ((cp >> 6) & 0x3F));
+        out[6] = (uint8_t)(0x80 | (cp & 0x3F));
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    uint8_t input[8] = {0};
+    uint8_t output[RECORD_SIZE];
+    size_t len;
+    int result;
+
+    int fd;
+    uint8_t *expected_blob;
+    bool smp_pages_ok;
+
+    // Argument check
+    if (argc != 2) {
+        puts("Bail out! Invalid number of arguments");
+        puts("# Usage: libx52util-bmp-test <path-to-bin>");
+        return 1;
+    }
+
+    fd = open(argv[1], O_RDONLY);
+    if (fd < 0) {
+        printf("Bail out! Error %d opening bin file %s: %s\n",
+                errno, argv[1], strerror(errno));
+        return 1;
+    }
+
+    expected_blob = mmap(NULL, 0x10000 * RECORD_SIZE,
+                         PROT_READ, MAP_SHARED, fd, 0);
+    if (expected_blob == MAP_FAILED) {
+        printf("Bail out! MMAP failed with error %d: %s\n",
+                errno, strerror(errno));
+    }
+
+    puts("TAP version 13");
+    // Check the 256 BMP Pages, plus the supplementary pages
+    puts("1..257");
+
+    for (uint32_t page = 0; page < 256; page++) {
+        bool page_ok = true;
+
+        for (uint32_t offset = 0; offset < 256; offset++) {
+            uint32_t cp = page * 256 + offset;
+            const uint8_t *rec = &expected_blob[cp * RECORD_SIZE];
+
+            memset(input, 0, sizeof(input));
+            memset(output, 0, sizeof(output));
+            encode_utf8(cp, input);
+            len = sizeof(output);
+
+            result = libx52util_convert_utf8_string(input, output, &len);
+            if (result != 0) {
+                page_ok = false;
+                printf("# Bad result @ %04X: %d\n", cp, result);
+                break;
+            }
+
+            // result is OK, check against the expected blob
+            if (len != rec[0]) {
+                page_ok = false;
+                printf("# Length mismatch @ %04X: expected %u, got %zu\n",
+                        cp, rec[0], len);
+                break;
+            }
+
+            // Length is OK, check the bytes
+            if (memcmp(output, &rec[1], rec[0]) != 0) {
+                page_ok = false;
+                printf("# Output mismatch @ %04X:\n", cp);
+                printf("# exp/got:");
+                for (int i = 0; i < len; i++) {
+                    printf("%02X/%02X ", rec[i+1], output[i]);
+                }
+                puts("");
+                break;
+            }
+        }
+
+        printf("%sok - %d Page 0x%02x\n", page_ok ? "": "not ",
+                page + 1, page);
+    }
+
+    // Handle the supplementary pages
+    smp_pages_ok = true;
+    for (uint32_t smp = 0x1; smp <= 0x10; smp++) {
+        const uint8_t *rec = &expected_blob[0xFFFD * RECORD_SIZE];
+        for (uint32_t offset = 0; offset < 0x100; offset += 0xFF) {
+            uint32_t cp = smp * 256 + offset;
+
+            memset(input, 0, sizeof(input));
+            memset(output, 0, sizeof(output));
+            len = sizeof(output);
+            encode_utf8(cp, input);
+
+            result = libx52util_convert_utf8_string(input, output, &len);
+            if (result != 0) {
+                smp_pages_ok = false;
+                printf("# Bad result @ %08X: %d\n", cp, result);
+                break;
+            }
+
+            // result is OK, check against the expected blob
+            if (len != rec[0]) {
+                smp_pages_ok = false;
+                printf("# Length mismatch @ %08X: expected %u, got %zu\n",
+
+                        cp, rec[0], len);
+                break;
+            }
+
+            // Length is OK, check the bytes
+            if (memcmp(output, &rec[1], rec[0]) != 0) {
+                smp_pages_ok = false;
+                printf("# Output mismatch @ %08X:\n", cp);
+                printf("# exp/got:");
+                for (int i = 0; i < len; i++) {
+                    printf("%02X/%02X ", rec[i+1], output[i]);
+                }
+                puts("");
+                break;
+            }
+        }
+
+        if (!smp_pages_ok) {
+            break;
+        }
+    }
+    printf("%sok - 257 SMP tests\n", smp_pages_ok ? "" : "not ");
+
+    // Cleanup
+    munmap(expected_blob, 0x10000 * RECORD_SIZE);
+    close(fd);
+    return 0;
+}
diff --git a/libx52util/x52_map_test_gen.py b/libx52util/x52_map_test_gen.py
deleted file mode 100755
index 60567d6..0000000
--- a/libx52util/x52_map_test_gen.py
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/usr/bin/env python3
-"""Generate a test script for the convert function"""
-
-import argparse
-import re
-
-def parse_file(map_file):
-    """Read the map file, strip out comments, and return a dictionary that
-       maps the UTF-8 encoded string to the X52 MFD character"""
-
-    # If we are running this, then we know that the input map is likely
-    # in a sane format already.
-    char_dict = {}
-
-    with open(map_file, 'r', encoding='utf-8') as map_fd:
-        for line in map_fd:
-            line = re.sub(r'#.*$', '', line).strip()
-
-            if not line:
-                # Comment line, skip
-                continue
-
-            key, out = line.split()
-            in_char = int(key, 0)
-
-            if len(out) == 1:
-                out_byte = ord(out)
-            else:
-                out_byte = int(out, 0)
-
-            char_dict[in_char] = out_byte
-
-    return char_dict
-
-def generate_positive_test_cases(char_dict):
-    """Generate a set of positive test cases"""
-    # For every string in the dictionary, generate a test case that tests
-    # the input against the output
-    TEST_CASE_FMT = """
-static void test_map_{in_char}(void **state) {{
-    (void)state;
-    const uint8_t input_array[] = {{ {in_bytes}, 0 }};
-    const uint8_t expected_output[2] = {{ {out_byte}, 0 }};
-    size_t out_len = 20;
-    uint8_t output[20] = {{ 0 }};
-    int rc;
-
-    rc = libx52util_convert_utf8_string(input_array, output, &out_len);
-    assert_int_equal(rc, 0);
-    assert_int_equal(out_len, 1);
-    assert_memory_equal(output, expected_output, 2);
-}}
-"""
-
-    output = ""
-    for in_char, out_byte in char_dict.items():
-        in_bytes = ", ".join(hex(c) for c in chr(in_char).encode('utf-8'))
-        in_tc = hex(in_char)
-
-        output += TEST_CASE_FMT.format(in_char=in_tc, in_bytes=in_bytes, out_byte=out_byte)
-
-    output += """
-const struct CMUnitTest tests[] = {
-"""
-
-    for in_char in sorted(char_dict.keys()):
-        output += f"    cmocka_unit_test(test_map_{hex(in_char)}),\n"
-
-    output += '};\n'
-
-    return output
-
-TEST_HEADER = """
-#include <stdint.h>
-#include <stddef.h>
-#include <stdarg.h>
-#include <setjmp.h>
-#include <cmocka.h>
-
-#include "libx52util.h"
-"""
-
-TEST_FOOTER = """
-int main(void) {
-    cmocka_set_message_output(CM_OUTPUT_TAP);
-    cmocka_run_group_tests(tests, NULL, NULL);
-    return 0;
-}
-"""
-
-def main():
-    """Generate X52 map test suite"""
-    parser = argparse.ArgumentParser(description='Generate map test cases')
-    parser.add_argument('INPUT_FILE', help="Input character map file")
-    parser.add_argument('OUTPUT_FILE', help="Generated test script")
-    args = parser.parse_args()
-
-    char_dict = parse_file(args.INPUT_FILE)
-    test_cases = generate_positive_test_cases(char_dict)
-
-    with open(args.OUTPUT_FILE, 'w', encoding='utf-8') as out_fd:
-        print(TEST_HEADER, file=out_fd)
-        print(test_cases, file=out_fd)
-        print(TEST_FOOTER, file=out_fd)
-
-if __name__ == '__main__':
-    main()