From fb216de10780fdc38b09eac4b454f7869088d3c9 Mon Sep 17 00:00:00 2001 From: HP <2726519488@qq.com> Date: Tue, 9 Jun 2026 18:17:52 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dwindows=E5=B9=B3=E5=8F=B0?= =?UTF-8?q?=E4=B8=8B=E9=9F=B3=E9=A2=91=E6=92=AD=E6=94=BE=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Apps/Game/src/app/TomGameApp.cpp | 37 ++++++++++- src/Apps/Game/src/audio/VoiceEffect.cpp | 88 +++++++++++++++++++++++++ src/Apps/Game/src/audio/VoiceEffect.h | 11 ++++ src/Core/Platform/SdlAudioInput.cpp | 4 ++ src/Core/Platform/SdlAudioOutput.cpp | 4 ++ 5 files changed, 142 insertions(+), 2 deletions(-) diff --git a/src/Apps/Game/src/app/TomGameApp.cpp b/src/Apps/Game/src/app/TomGameApp.cpp index 3c35588..ae39e09 100644 --- a/src/Apps/Game/src/app/TomGameApp.cpp +++ b/src/Apps/Game/src/app/TomGameApp.cpp @@ -243,7 +243,27 @@ namespace Game return; } - player.set_voice(samples, audioSampleRate, audioChannels); + if (!audioOutput->is_open() && + !audioOutput->init("default", audioSampleRate, audioChannels)) + { + std::cerr << "[WARN] Audio output init failed." << std::endl; + back_to_idle(); + return; + } + + uint32_t playbackSampleRate = audioOutput->get_sample_rate(); + uint32_t playbackChannels = audioOutput->get_channels(); + std::vector playbackSamples = samples; + if (playbackChannels != audioChannels) + { + playbackSamples = VoiceEffect::convert_channels(playbackSamples, audioChannels, playbackChannels); + } + if (playbackSampleRate != audioSampleRate) + { + playbackSamples = VoiceEffect::resample(playbackSamples, audioSampleRate, playbackSampleRate, playbackChannels); + } + + player.set_voice(playbackSamples, playbackSampleRate, playbackChannels); player.play(); speakingAnimationMs = 0; state = TomGameState::Speaking; @@ -259,7 +279,20 @@ namespace Game return; } - player.update(*audioOutput, 1024); + const uint32_t playbackSampleRate = audioOutput->is_open() ? audioOutput->get_sample_rate() : audioSampleRate; + const uint32_t playbackChannels = audioOutput->is_open() ? audioOutput->get_channels() : audioChannels; + uint32_t requestCount = (playbackSampleRate * playbackChannels * deltaMs) / 1000u; + if (requestCount < playbackSampleRate * playbackChannels / 20u) + { + requestCount = playbackSampleRate * playbackChannels / 20u; + } + if (requestCount < playbackChannels) + { + requestCount = playbackChannels; + } + requestCount -= requestCount % playbackChannels; + + player.update(*audioOutput, static_cast(std::min(requestCount, 8192u))); if (player.is_finished()) { back_to_idle(); diff --git a/src/Apps/Game/src/audio/VoiceEffect.cpp b/src/Apps/Game/src/audio/VoiceEffect.cpp index 17860da..4dad037 100644 --- a/src/Apps/Game/src/audio/VoiceEffect.cpp +++ b/src/Apps/Game/src/audio/VoiceEffect.cpp @@ -66,6 +66,94 @@ namespace Game return output; } + std::vector VoiceEffect::resample( + const std::vector& samples, + uint32_t sourceRate, + uint32_t targetRate, + uint32_t channels) + { + if (samples.empty() || sourceRate == 0 || targetRate == 0 || channels == 0 || sourceRate == targetRate) + { + return samples; + } + + const size_t sourceFrameCount = samples.size() / channels; + if (sourceFrameCount == 0) + { + return std::vector(); + } + + const size_t targetFrameCount = std::max( + 1, + (sourceFrameCount * static_cast(targetRate) + static_cast(sourceRate) - 1) / + static_cast(sourceRate)); + std::vector output(targetFrameCount * channels, 0); + const float sourceStep = static_cast(sourceRate) / static_cast(targetRate); + + for (size_t outFrame = 0; outFrame < targetFrameCount; ++outFrame) + { + const float sourceFrame = static_cast(outFrame) * sourceStep; + const size_t source0 = std::min(static_cast(sourceFrame), sourceFrameCount - 1); + const size_t source1 = std::min(source0 + 1, sourceFrameCount - 1); + const float t = sourceFrame - static_cast(source0); + + for (uint32_t channel = 0; channel < channels; ++channel) + { + const int16_t a = samples[source0 * channels + channel]; + const int16_t b = samples[source1 * channels + channel]; + const float mixed = static_cast(a) + (static_cast(b) - static_cast(a)) * t; + output[outFrame * channels + channel] = clamp_to_sample(mixed); + } + } + + return output; + } + + std::vector VoiceEffect::convert_channels( + const std::vector& samples, + uint32_t sourceChannels, + uint32_t targetChannels) + { + if (samples.empty() || sourceChannels == 0 || targetChannels == 0 || sourceChannels == targetChannels) + { + return samples; + } + + const size_t frameCount = samples.size() / sourceChannels; + if (frameCount == 0) + { + return std::vector(); + } + + std::vector output(frameCount * targetChannels, 0); + for (size_t frame = 0; frame < frameCount; ++frame) + { + for (uint32_t targetChannel = 0; targetChannel < targetChannels; ++targetChannel) + { + if (sourceChannels == 1) + { + output[frame * targetChannels + targetChannel] = samples[frame * sourceChannels]; + } + else if (targetChannels == 1) + { + int32_t mixed = 0; + for (uint32_t sourceChannel = 0; sourceChannel < sourceChannels; ++sourceChannel) + { + mixed += samples[frame * sourceChannels + sourceChannel]; + } + output[frame] = static_cast(mixed / static_cast(sourceChannels)); + } + else + { + const uint32_t sourceChannel = std::min(targetChannel, sourceChannels - 1); + output[frame * targetChannels + targetChannel] = samples[frame * sourceChannels + sourceChannel]; + } + } + } + + return output; + } + std::vector VoiceEffect::trim_silence( const std::vector& samples, float threshold, diff --git a/src/Apps/Game/src/audio/VoiceEffect.h b/src/Apps/Game/src/audio/VoiceEffect.h index c3242d4..36668ca 100644 --- a/src/Apps/Game/src/audio/VoiceEffect.h +++ b/src/Apps/Game/src/audio/VoiceEffect.h @@ -21,6 +21,17 @@ namespace Game const std::vector& samples, float gain); + static std::vector resample( + const std::vector& samples, + uint32_t sourceRate, + uint32_t targetRate, + uint32_t channels = 1); + + static std::vector convert_channels( + const std::vector& samples, + uint32_t sourceChannels, + uint32_t targetChannels); + static std::vector trim_silence( const std::vector& samples, float threshold = 0.02f, diff --git a/src/Core/Platform/SdlAudioInput.cpp b/src/Core/Platform/SdlAudioInput.cpp index fe04da8..1c39c41 100644 --- a/src/Core/Platform/SdlAudioInput.cpp +++ b/src/Core/Platform/SdlAudioInput.cpp @@ -94,6 +94,10 @@ namespace Platform channels_ = static_cast(obtained.channels); opened_ = true; + std::cout << "SdlAudioInput opened: requested " + << sample_rate << " Hz/" << channels << " ch, obtained " + << sample_rate_ << " Hz/" << channels_ << " ch." << std::endl; + SDL_PauseAudioDevice(device_id_, 0); return true; } diff --git a/src/Core/Platform/SdlAudioOutput.cpp b/src/Core/Platform/SdlAudioOutput.cpp index 97feca8..c421580 100644 --- a/src/Core/Platform/SdlAudioOutput.cpp +++ b/src/Core/Platform/SdlAudioOutput.cpp @@ -125,6 +125,10 @@ namespace Platform max_queued_samples_ = std::max(channels_, sample_rate_ * channels_ / 8); opened_ = true; + std::cout << "SdlAudioOutput opened: requested " + << sample_rate << " Hz/" << channels << " ch, obtained " + << sample_rate_ << " Hz/" << channels_ << " ch." << std::endl; + SDL_PauseAudioDevice(device_id_, 0); return true; }