IMX6U-Game/tools/debug_kws.py

import sys
import wave
import math
import numpy as np

# Insert project paths to import the test script's functions
sys.path.insert(0, "src/Apps/Game/tools")
import kws_python_test as kws


def load_wav(path):
    with wave.open(path, "rb") as wav:
        sample_rate = wav.getframerate()
        channels = wav.getnchannels()
        frames = wav.readframes(wav.getnframes())
    samples = np.frombuffer(frames, dtype="<i2").astype(np.int16, copy=False)
    return samples, sample_rate, channels


def main():
    model_path = "E:/kws_model/kws_ref_model_float32.tflite"
    wav_path = "E:/kws_model/my_up_gain5.wav"

    print(f"Loading model: {model_path}")
    backend, interpreter = kws.load_interpreter(model_path)
    print(f"Backend: {backend}")

    print(f"Loading WAV: {wav_path}")
    samples, sample_rate, channels = load_wav(wav_path)
    print(f"  samples: {samples.size}, rate: {sample_rate}, channels: {channels}")
    print(f"  raw max abs: {np.max(np.abs(samples))}")

    print("Preparing audio...")
    prepared = kws.prepare_audio(samples, sample_rate, channels, trim=True)
    print(f"  prepared size: {prepared.size}")
    print(f"  prepared max abs: {np.max(np.abs(prepared))}")
    if prepared.size == 0:
        print("ERROR: prepared audio is empty!")
        return

    print("Extracting features for first window...")
    window = kws.normalize_window(prepared, 0)
    print(f"  window max: {np.max(window)}")
    print(f"  window min: {np.min(window)}")
    print(f"  window non-zero count: {np.count_nonzero(window)}")

    features = kws.extract_mfcc_features(window)
    print(f"  features shape: {features.shape}")
    print(f"  features max: {np.max(features)}")
    print(f"  features min: {np.min(features)}")
    print(f"  features mean: {np.mean(features)}")

    print("Running inference...")
    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]
    tensor = kws.tensor_from_features(features, input_details)
    print(f"  tensor shape: {tensor.shape}")
    print(f"  tensor max: {np.max(tensor)}")

    interpreter.set_tensor(input_details["index"], tensor)
    interpreter.invoke()
    raw_output = interpreter.get_tensor(output_details["index"])
    print(f"  raw output shape: {raw_output.shape}")
    print(f"  raw output dtype: {raw_output.dtype}")
    print(f"  raw output: {raw_output}")
    print(f"  raw output max: {np.max(raw_output)}")
    print(f"  raw output min: {np.min(raw_output)}")
    print(f"  softmax: {np.exp(raw_output - np.max(raw_output)) / np.sum(np.exp(raw_output - np.max(raw_output)))}")


if __name__ == "__main__":
    main()