73 lines
2.6 KiB
Python
73 lines
2.6 KiB
Python
import sys
|
|
import wave
|
|
import math
|
|
import numpy as np
|
|
|
|
# Insert project paths to import the test script's functions
|
|
sys.path.insert(0, "src/Apps/Game/tools")
|
|
import kws_python_test as kws
|
|
|
|
|
|
def load_wav(path):
|
|
with wave.open(path, "rb") as wav:
|
|
sample_rate = wav.getframerate()
|
|
channels = wav.getnchannels()
|
|
frames = wav.readframes(wav.getnframes())
|
|
samples = np.frombuffer(frames, dtype="<i2").astype(np.int16, copy=False)
|
|
return samples, sample_rate, channels
|
|
|
|
|
|
def main():
|
|
model_path = "E:/kws_model/kws_ref_model_float32.tflite"
|
|
wav_path = "E:/kws_model/my_up_gain5.wav"
|
|
|
|
print(f"Loading model: {model_path}")
|
|
backend, interpreter = kws.load_interpreter(model_path)
|
|
print(f"Backend: {backend}")
|
|
|
|
print(f"Loading WAV: {wav_path}")
|
|
samples, sample_rate, channels = load_wav(wav_path)
|
|
print(f" samples: {samples.size}, rate: {sample_rate}, channels: {channels}")
|
|
print(f" raw max abs: {np.max(np.abs(samples))}")
|
|
|
|
print("Preparing audio...")
|
|
prepared = kws.prepare_audio(samples, sample_rate, channels, trim=True)
|
|
print(f" prepared size: {prepared.size}")
|
|
print(f" prepared max abs: {np.max(np.abs(prepared))}")
|
|
if prepared.size == 0:
|
|
print("ERROR: prepared audio is empty!")
|
|
return
|
|
|
|
print("Extracting features for first window...")
|
|
window = kws.normalize_window(prepared, 0)
|
|
print(f" window max: {np.max(window)}")
|
|
print(f" window min: {np.min(window)}")
|
|
print(f" window non-zero count: {np.count_nonzero(window)}")
|
|
|
|
features = kws.extract_mfcc_features(window)
|
|
print(f" features shape: {features.shape}")
|
|
print(f" features max: {np.max(features)}")
|
|
print(f" features min: {np.min(features)}")
|
|
print(f" features mean: {np.mean(features)}")
|
|
|
|
print("Running inference...")
|
|
input_details = interpreter.get_input_details()[0]
|
|
output_details = interpreter.get_output_details()[0]
|
|
tensor = kws.tensor_from_features(features, input_details)
|
|
print(f" tensor shape: {tensor.shape}")
|
|
print(f" tensor max: {np.max(tensor)}")
|
|
|
|
interpreter.set_tensor(input_details["index"], tensor)
|
|
interpreter.invoke()
|
|
raw_output = interpreter.get_tensor(output_details["index"])
|
|
print(f" raw output shape: {raw_output.shape}")
|
|
print(f" raw output dtype: {raw_output.dtype}")
|
|
print(f" raw output: {raw_output}")
|
|
print(f" raw output max: {np.max(raw_output)}")
|
|
print(f" raw output min: {np.min(raw_output)}")
|
|
print(f" softmax: {np.exp(raw_output - np.max(raw_output)) / np.sum(np.exp(raw_output - np.max(raw_output)))}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|