Creating a diagnostics module for Python software

For Theengs Gateway we regularly got bug reports that were difficult to debug, as they depend on the operating system, Python version, application configuration, and Bluetooth adapter. After some back and forth we got an idea of the user's environment and started to identify the issue (or not). Then I discovered that Textual has a convenient solution for this: a diagnose command that prints information about the Textual library and its environment to help diagnose problems.

I borrowed this code from Textual and adapted it for Theengs Gateway's usage. So now we simply ask the user to run this command and include its output in the issue description on GitHub. Theengs Gateway's diagnose module looks like this:

theengs_gateway/diagnose.py (Source)

import asyncio
import json
import os
import platform
import re
import sys

from importlib_metadata import PackageNotFoundError, version

_conf_path = os.path.expanduser("~") + "/theengsgw.conf"
_ADDR_RE = re.compile(r"^(([0-9A-F]{2}:){3})([0-9A-F]{2}:){2}[0-9A-F]{2}$")


def _anonymize_strings(fields, config) -> None:
    for field in fields:
        if field in config:
            config[field] = "***"


def _anonymize_address(address) -> str:
    addr_parts = _ADDR_RE.match(address)
    if addr_parts:
        return f"{addr_parts.group(1)}XX:XX:XX"
    else:
        return "INVALID ADDRESS"


def _anonymize_addresses(field, config) -> None:
    try:
        config[field] = [
            _anonymize_address(address) for address in config[field]
        ]
    except KeyError:
        pass


# This function is taken from Textual
def _section(title, values) -> None:
    """Print a collection of named values within a titled section.
    Args:
        title: The title for the section.
        values: The values to print out.
    """
    max_name = max(map(len, values.keys()))
    max_value = max(map(len, [str(value) for value in values.values()]))
    print(f"## {title}")
    print()
    print(f"| {'Name':{max_name}} | {'Value':{max_value}} |")
    print(f"|-{'-' * max_name}-|-{'-'*max_value}-|")
    for name, value in values.items():
        print(f"| {name:{max_name}} | {str(value):{max_value}} |")
    print()


def _versions() -> None:
    """Print useful version numbers."""
    try:
        packages = {
            "Theengs Gateway": version("TheengsGateway"),
            "Theengs Decoder": version("TheengsDecoder"),
            "Bleak": version("bleak"),
            "Bluetooth Clocks": version("bluetooth-clocks"),
            "Bluetooth Numbers": version("bluetooth-numbers"),
            "Paho MQTT": version("paho-mqtt"),
        }
    except PackageNotFoundError as e:
        print(f"Package {e.name} not found. Please install it with:")
        print()
        print(f"    pip install {e.name}")
        print()

    if sys.version_info[:2] >= (3, 9):
        try:
            packages["Bluetooth Adapters"] = version("bluetooth-adapters")
        except PackageNotFoundError as e:
            print(f"Package {e.name} not found. Please install it with:")
            print()
            print(f"    pip install {e.name}")
            print()

    _section("Package Versions", packages)


def _python() -> None:
    """Print information about Python."""
    _section(
        "Python",
        {
            "Version": platform.python_version(),
            "Implementation": platform.python_implementation(),
            "Compiler": platform.python_compiler(),
            "Executable": sys.executable,
        },
    )


def _os() -> None:
    os_parameters = {
        "System": platform.system(),
        "Release": platform.release(),
        "Version": platform.version(),
        "Machine type": platform.machine(),
    }
    if platform.system() == "Linux" and sys.version_info[:2] >= (3, 10):
        os_parameters["Distribution"] = platform.freedesktop_os_release()[
            "PRETTY_NAME"
        ]

    _section("Operating System", os_parameters)


def _config() -> None:
    print("## Configuration")
    print()
    try:
        with open(_conf_path, encoding="utf-8") as config_file:
            config = json.load(config_file)
            _anonymize_strings(["user", "pass"], config)
            _anonymize_addresses("time_sync", config)
        print("```")
        print(json.dumps(config, sort_keys=True, indent=4))
        print("```")
        print()
    except FileNotFoundError:
        print(f"Configuration file not found: {_conf_path}")
        print()


async def _adapters() -> None:
    if sys.version_info[:2] >= (3, 9):
        from bluetooth_adapters import get_adapters

        print("## Bluetooth adapters")
        print()
        bluetooth_adapters = get_adapters()
        await bluetooth_adapters.refresh()
        print(f"Default adapter: {bluetooth_adapters.default_adapter}")
        print()

        for adapter, properties in sorted(bluetooth_adapters.adapters.items()):
            properties["address"] = _anonymize_address(properties["address"])
            print("#", end="")
            _section(adapter, properties)


async def diagnostics():
    print("# Theengs Gateway Diagnostics")
    print()
    _versions()
    _python()
    _os()
    _config()
    await _adapters()


if __name__ == "__main__":
    asyncio.run(diagnostics())

When you run this module, it prints a level one Markdown title (# Theengs Gateway Diagnostics) and then calls several functions. Each of these functions prints a level two Markdown title and some diagnostic information.

First, it displays the version numbers of the Python package for Theengs Gateway and some of its dependencies. This helps us immediately identify outdated versions, and we can suggest an update. Next, it shows information about the Python platform and the operating system. These functions are all borrowed from Textual's diagnose module, including the _section helper function to print a collection of named values within a titled section.

Since many Theengs Gateway issues depend on the exact configuration used, I also added a section that displays the contents of the configuration file (a JSON file). However, this configuration file contains some information that shouldn't be shared publicly, such as a username and password for an MQTT broker, or Bluetooth addresses. I could remove these fields in the code, but then we wouldn't know if the bug might be a result of a configuration file lacking one of these fields. So I created a simple function to anonymize specific fields:

def _anonymize_strings(fields, config) -> None:
    for field in fields:
        if field in config:
            config[field] = "***"

Then I can call this function on the configuration to anonymize the user and pass fields:

_anonymize_strings(["user", "pass"], config)

For Bluetooth addresses, I created a similar function. I want to keep the first three bytes of an address, which can point to the device manufacturer and be helpful for debugging purposes. Using a regular expression, I extract these bytes and add XX:XX:XX. This function looks like this:

_ADDR_RE = re.compile(r"^(([0-9A-F]{2}:){3})([0-9A-F]{2}:){2}[0-9A-F]{2}$")


def _anonymize_address(address) -> str:
    addr_parts = _ADDR_RE.match(address)
    if addr_parts:
        return f"{addr_parts.group(1)}XX:XX:XX"
    else:
        return "INVALID ADDRESS"

In the last part of the diagnostic information, where I display the information of the computer's Bluetooth adapters, I can call this function to anonymize the adapter's Bluetooth address:

properties["address"] = _anonymize_address(properties["address"])

Running the python -m TheengsGateway.diagnose command shows output like this:

# Theengs Gateway Diagnostics

## Package Versions

| Name               | Value  |
|--------------------|--------|
| Theengs Gateway    | 3.0    |
| Theengs Decoder    | 1.4.0  |
| Bleak              | 0.20.0 |
| Bluetooth Clocks   | 0.1.0  |
| Bluetooth Numbers  | 1.1.0  |
| Paho MQTT          | 1.6.1  |
| Bluetooth Adapters | 0.15.3 |

## Python

| Name           | Value           |
|----------------|-----------------|
| Version        | 3.10.6          |
| Implementation | CPython         |
| Compiler       | GCC 11.3.0      |
| Executable     | /usr/bin/python |

## Operating System

| Name         | Value                                               |
|--------------|-----------------------------------------------------|
| System       | Linux                                               |
| Release      | 6.2.0-10005-tuxedo                                  |
| Version      | #5 SMP PREEMPT_DYNAMIC Wed Mar 22 12:42:40 UTC 2023 |
| Machine type | x86_64                                              |
| Distribution | Ubuntu 22.04.1 LTS                                  |

## Configuration

```
{
    "adapter": "hci0",
    "ble_scan_time": 1000,
    "ble_time_between_scans": 5,
    "discovery": 1,
    "discovery_device_name": "TheengsGateway",
    "discovery_filter": [
        "IBEACON",
        "GAEN",
        "MS-CDP"
    ],
    "discovery_topic": "homeassistant/sensor",
    "hass_discovery": 1,
    "host": "rhasspy",
    "log_level": "DEBUG",
    "lwt_topic": "home/TheengsGateway/LWT",
    "pass": "***",
    "port": 1883,
    "presence": 0,
    "presence_topic": "home/TheengsGateway/presence",
    "publish_advdata": 1,
    "publish_all": 1,
    "publish_topic": "home/TheengsGateway/BTtoMQTT",
    "scanning_mode": "active",
    "subscribe_topic": "home/+/BTtoMQTT/undecoded",
    "time_format": 1,
    "time_sync": [
        "58:2D:34:XX:XX:XX",
        "E7:2E:00:XX:XX:XX",
        "BC:C7:DA:XX:XX:XX",
        "10:76:36:XX:XX:XX"
    ],
    "user": "***"
}
```

## Bluetooth adapters

Default adapter: hci0

### hci0

| Name         | Value               |
|--------------|---------------------|
| address      | 9C:FC:E8:XX:XX:XX   |
| sw_version   | tux                 |
| hw_version   | usb:v1D6Bp0246d0540 |
| passive_scan | True                |
| manufacturer | Intel Corporate     |
| product      | 0029                |
| vendor_id    | 8087                |
| product_id   | 0029                |

### hci1

| Name         | Value                   |
|--------------|-------------------------|
| address      | 00:01:95:XX:XX:XX       |
| sw_version   | tux #2                  |
| hw_version   | usb:v1D6Bp0246d0540     |
| passive_scan | True                    |
| manufacturer | Sena Technologies, Inc. |
| product      | 0001                    |
| vendor_id    | 0a12                    |
| product_id   | 0001                    |

In the repository's issue template for bug reports, we ask for the output of this command. The user simply has to copy the output, which is already formatted in Markdown syntax. This displays titles, subtitles, and even tables cleanly, providing us the necessary information:

/images/theengs-gateway-diagnose.png