From fb216de10780fdc38b09eac4b454f7869088d3c9 Mon Sep 17 00:00:00 2001
From: HP <2726519488@qq.com>
Date: Tue, 9 Jun 2026 18:17:52 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dwindows=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=E4=B8=8B=E9=9F=B3=E9=A2=91=E6=92=AD=E6=94=BE=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/Apps/Game/src/app/TomGameApp.cpp    | 37 ++++++++++-
 src/Apps/Game/src/audio/VoiceEffect.cpp | 88 +++++++++++++++++++++++++
 src/Apps/Game/src/audio/VoiceEffect.h   | 11 ++++
 src/Core/Platform/SdlAudioInput.cpp     |  4 ++
 src/Core/Platform/SdlAudioOutput.cpp    |  4 ++
 5 files changed, 142 insertions(+), 2 deletions(-)
diff --git a/src/Apps/Game/src/app/TomGameApp.cpp b/src/Apps/Game/src/app/TomGameApp.cpp
index 3c35588..ae39e09 100644
--- a/src/Apps/Game/src/app/TomGameApp.cpp
+++ b/src/Apps/Game/src/app/TomGameApp.cpp
@@ -243,7 +243,27 @@ namespace Game
 			return;
 		}
 
-		player.set_voice(samples, audioSampleRate, audioChannels);
+		if (!audioOutput->is_open() &&
+			!audioOutput->init("default", audioSampleRate, audioChannels))
+		{
+			std::cerr << "[WARN] Audio output init failed." << std::endl;
+			back_to_idle();
+			return;
+		}
+
+		uint32_t playbackSampleRate = audioOutput->get_sample_rate();
+		uint32_t playbackChannels = audioOutput->get_channels();
+		std::vector<int16_t> playbackSamples = samples;
+		if (playbackChannels != audioChannels)
+		{
+			playbackSamples = VoiceEffect::convert_channels(playbackSamples, audioChannels, playbackChannels);
+		}
+		if (playbackSampleRate != audioSampleRate)
+		{
+			playbackSamples = VoiceEffect::resample(playbackSamples, audioSampleRate, playbackSampleRate, playbackChannels);
+		}
+
+		player.set_voice(playbackSamples, playbackSampleRate, playbackChannels);
 		player.play();
 		speakingAnimationMs = 0;
 		state = TomGameState::Speaking;
@@ -259,7 +279,20 @@ namespace Game
 			return;
 		}
 
-		player.update(*audioOutput, 1024);
+		const uint32_t playbackSampleRate = audioOutput->is_open() ? audioOutput->get_sample_rate() : audioSampleRate;
+		const uint32_t playbackChannels = audioOutput->is_open() ? audioOutput->get_channels() : audioChannels;
+		uint32_t requestCount = (playbackSampleRate * playbackChannels * deltaMs) / 1000u;
+		if (requestCount < playbackSampleRate * playbackChannels / 20u)
+		{
+			requestCount = playbackSampleRate * playbackChannels / 20u;
+		}
+		if (requestCount < playbackChannels)
+		{
+			requestCount = playbackChannels;
+		}
+		requestCount -= requestCount % playbackChannels;
+
+		player.update(*audioOutput, static_cast<int>(std::min<uint32_t>(requestCount, 8192u)));
 		if (player.is_finished())
 		{
 			back_to_idle();
diff --git a/src/Apps/Game/src/audio/VoiceEffect.cpp b/src/Apps/Game/src/audio/VoiceEffect.cpp
index 17860da..4dad037 100644
--- a/src/Apps/Game/src/audio/VoiceEffect.cpp
+++ b/src/Apps/Game/src/audio/VoiceEffect.cpp
@@ -66,6 +66,94 @@ namespace Game
 		return output;
 	}
 
+	std::vector<int16_t> VoiceEffect::resample(
+		const std::vector<int16_t>& samples,
+		uint32_t sourceRate,
+		uint32_t targetRate,
+		uint32_t channels)
+	{
+		if (samples.empty() || sourceRate == 0 || targetRate == 0 || channels == 0 || sourceRate == targetRate)
+		{
+			return samples;
+		}
+
+		const size_t sourceFrameCount = samples.size() / channels;
+		if (sourceFrameCount == 0)
+		{
+			return std::vector<int16_t>();
+		}
+
+		const size_t targetFrameCount = std::max<size_t>(
+			1,
+			(sourceFrameCount * static_cast<size_t>(targetRate) + static_cast<size_t>(sourceRate) - 1) /
+				static_cast<size_t>(sourceRate));
+		std::vector<int16_t> output(targetFrameCount * channels, 0);
+		const float sourceStep = static_cast<float>(sourceRate) / static_cast<float>(targetRate);
+
+		for (size_t outFrame = 0; outFrame < targetFrameCount; ++outFrame)
+		{
+			const float sourceFrame = static_cast<float>(outFrame) * sourceStep;
+			const size_t source0 = std::min(static_cast<size_t>(sourceFrame), sourceFrameCount - 1);
+			const size_t source1 = std::min(source0 + 1, sourceFrameCount - 1);
+			const float t = sourceFrame - static_cast<float>(source0);
+
+			for (uint32_t channel = 0; channel < channels; ++channel)
+			{
+				const int16_t a = samples[source0 * channels + channel];
+				const int16_t b = samples[source1 * channels + channel];
+				const float mixed = static_cast<float>(a) + (static_cast<float>(b) - static_cast<float>(a)) * t;
+				output[outFrame * channels + channel] = clamp_to_sample(mixed);
+			}
+		}
+
+		return output;
+	}
+
+	std::vector<int16_t> VoiceEffect::convert_channels(
+		const std::vector<int16_t>& samples,
+		uint32_t sourceChannels,
+		uint32_t targetChannels)
+	{
+		if (samples.empty() || sourceChannels == 0 || targetChannels == 0 || sourceChannels == targetChannels)
+		{
+			return samples;
+		}
+
+		const size_t frameCount = samples.size() / sourceChannels;
+		if (frameCount == 0)
+		{
+			return std::vector<int16_t>();
+		}
+
+		std::vector<int16_t> output(frameCount * targetChannels, 0);
+		for (size_t frame = 0; frame < frameCount; ++frame)
+		{
+			for (uint32_t targetChannel = 0; targetChannel < targetChannels; ++targetChannel)
+			{
+				if (sourceChannels == 1)
+				{
+					output[frame * targetChannels + targetChannel] = samples[frame * sourceChannels];
+				}
+				else if (targetChannels == 1)
+				{
+					int32_t mixed = 0;
+					for (uint32_t sourceChannel = 0; sourceChannel < sourceChannels; ++sourceChannel)
+					{
+						mixed += samples[frame * sourceChannels + sourceChannel];
+					}
+					output[frame] = static_cast<int16_t>(mixed / static_cast<int32_t>(sourceChannels));
+				}
+				else
+				{
+					const uint32_t sourceChannel = std::min(targetChannel, sourceChannels - 1);
+					output[frame * targetChannels + targetChannel] = samples[frame * sourceChannels + sourceChannel];
+				}
+			}
+		}
+
+		return output;
+	}
+
 	std::vector<int16_t> VoiceEffect::trim_silence(
 		const std::vector<int16_t>& samples,
 		float threshold,
diff --git a/src/Apps/Game/src/audio/VoiceEffect.h b/src/Apps/Game/src/audio/VoiceEffect.h
index c3242d4..36668ca 100644
--- a/src/Apps/Game/src/audio/VoiceEffect.h
+++ b/src/Apps/Game/src/audio/VoiceEffect.h
@@ -21,6 +21,17 @@ namespace Game
 			const std::vector<int16_t>& samples,
 			float gain);
 
+		static std::vector<int16_t> resample(
+			const std::vector<int16_t>& samples,
+			uint32_t sourceRate,
+			uint32_t targetRate,
+			uint32_t channels = 1);
+
+		static std::vector<int16_t> convert_channels(
+			const std::vector<int16_t>& samples,
+			uint32_t sourceChannels,
+			uint32_t targetChannels);
+
 		static std::vector<int16_t> trim_silence(
 			const std::vector<int16_t>& samples,
 			float threshold = 0.02f,
diff --git a/src/Core/Platform/SdlAudioInput.cpp b/src/Core/Platform/SdlAudioInput.cpp
index fe04da8..1c39c41 100644
--- a/src/Core/Platform/SdlAudioInput.cpp
+++ b/src/Core/Platform/SdlAudioInput.cpp
@@ -94,6 +94,10 @@ namespace Platform
 		channels_ = static_cast<uint32_t>(obtained.channels);
 		opened_ = true;
 
+		std::cout << "SdlAudioInput opened: requested "
+			<< sample_rate << " Hz/" << channels << " ch, obtained "
+			<< sample_rate_ << " Hz/" << channels_ << " ch." << std::endl;
+
 		SDL_PauseAudioDevice(device_id_, 0);
 		return true;
 	}
diff --git a/src/Core/Platform/SdlAudioOutput.cpp b/src/Core/Platform/SdlAudioOutput.cpp
index 97feca8..c421580 100644
--- a/src/Core/Platform/SdlAudioOutput.cpp
+++ b/src/Core/Platform/SdlAudioOutput.cpp
@@ -125,6 +125,10 @@ namespace Platform
 		max_queued_samples_ = std::max<uint32_t>(channels_, sample_rate_ * channels_ / 8);
 		opened_ = true;
 
+		std::cout << "SdlAudioOutput opened: requested "
+			<< sample_rate << " Hz/" << channels << " ch, obtained "
+			<< sample_rate_ << " Hz/" << channels_ << " ch." << std::endl;
+
 		SDL_PauseAudioDevice(device_id_, 0);
 		return true;
 	}