Initial commit

This commit is contained in:
feie9456 2026-03-29 01:19:51 +08:00
commit 8199769093
17 changed files with 3315 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1762
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

26
Cargo.toml Normal file
View File

@ -0,0 +1,26 @@
[package]
name = "voice-ime"
version = "0.1.0"
edition = "2024"
[dependencies]
windows = { version = "0.61", features = [
"Win32_UI_WindowsAndMessaging",
"Win32_UI_Input_KeyboardAndMouse",
"Win32_UI_Shell",
"Win32_Graphics_Gdi",
"Win32_System_LibraryLoader",
"Win32_Foundation",
"Win32_UI_Controls",
] }
cpal = "0.15"
tokio = { version = "1", features = ["rt-multi-thread", "sync", "macros", "time"] }
tokio-tungstenite = { version = "0.26", features = ["native-tls"] }
futures-util = "0.3"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
base64 = "0.22"
rodio = "0.20"
[build-dependencies]
winres = "0.1"

79
README.md Normal file
View File

@ -0,0 +1,79 @@
# Voice IME 语音输入法
Windows 系统托盘语音输入工具。按下快捷键开始录音,通过阿里云 Qwen ASR 实时语音识别 API 将语音转为文字,自动输入到当前光标位置。
## 功能
- **快捷键切换录音**:默认 F10按一次开始再按一次停止
- **流式语音识别**:使用 Qwen3 ASR Realtime API支持 VAD 自动断句,边说边输入
- **增量文本插入**:对识别结果做 diff仅输入变化部分不影响输入框已有内容
- **系统托盘**:托盘图标显示当前状态(空闲/录音中),右键菜单提供设置
- **音效提示**:录音开始和停止时播放提示音
- **暂停媒体播放**:录音时可自动暂停系统媒体播放(可关闭)
- **可自定义配置**
- 快捷键
- API Key
- ASR 模型
- 媒体暂停开关
## 使用方法
### 获取 API Key
前往 [阿里云百炼](https://bailian.console.aliyun.com/) 开通 Qwen ASR 服务并获取 API Key。
### 运行
```
cargo build --release
./target/release/voice-ime.exe
```
首次启动会弹窗要求输入 API Key。输入后程序最小化到系统托盘。
### 操作
| 操作 | 说明 |
|------|------|
| 按下快捷键(默认 F10 | 开始/停止录音 |
| 右键托盘图标 | 打开设置菜单 |
### 右键菜单
- **设置快捷键** — 按下任意键即可更换
- **录音时暂停媒体播放** — 勾选开关
- **设置 API Key** — 修改 ASR 服务密钥
- **设置模型** — 修改 ASR 模型名称
- **退出**
### 配置文件
配置保存在 `%APPDATA%\voice-ime\config.json`,格式示例:
```json
{
"hotkey_vk": 121,
"media_pause_enabled": true,
"api_key": "sk-xxxxxxxx",
"model": "qwen3-asr-flash-realtime-2026-02-10"
}
```
## 技术栈
- **Rust 2024 Edition**
- **windows** crate — Win32 API托盘图标、热键、SendInput 文字输入)
- **cpal** — WASAPI 麦克风采集
- **tokio + tokio-tungstenite** — 异步 WebSocket 客户端
- **rodio** — 音效播放
- **serde** — 配置序列化
## 系统要求
- Windows 10/11
- 麦克风
- 网络连接(用于访问阿里云 ASR API
## 许可证
MIT

BIN
assets/idle.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

BIN
assets/recording.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

BIN
assets/start.mp3 Normal file

Binary file not shown.

BIN
assets/stop.mp3 Normal file

Binary file not shown.

7
build.rs Normal file
View File

@ -0,0 +1,7 @@
fn main() {
if std::env::var_os("CARGO_CFG_TARGET_OS").as_deref() == Some(std::ffi::OsStr::new("windows")) {
let mut res = winres::WindowsResource::new();
res.set_icon("assets/idle.ico");
res.compile().expect("Failed to compile Windows resources");
}
}

6
convert_icon.py Normal file
View File

@ -0,0 +1,6 @@
from PIL import Image
for name in ['idle', 'recording']:
img = Image.open(f'{name}.png').convert('RGBA')
sizes = [(16,16),(24,24),(32,32),(48,48),(64,64),(128,128),(256,256)]
img.save(f'{name}.ico', format='ICO', sizes=sizes)
print(f'{name}.ico created')

140
src/audio.rs Normal file
View File

@ -0,0 +1,140 @@
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use cpal::{SampleFormat, Stream, StreamConfig};
use tokio::sync::mpsc;
const TARGET_SAMPLE_RATE: u32 = 16000;
pub struct AudioCapture {
stream: Stream,
}
pub struct AudioCaptureConfig {
#[allow(dead_code)]
pub sample_rate: u32,
}
/// Linear interpolation resampling from `from_rate` to `to_rate`.
fn resample(samples: &[i16], from_rate: u32, to_rate: u32) -> Vec<i16> {
if from_rate == to_rate {
return samples.to_vec();
}
let ratio = from_rate as f64 / to_rate as f64;
let out_len = (samples.len() as f64 / ratio) as usize;
let mut output = Vec::with_capacity(out_len);
for i in 0..out_len {
let src_pos = i as f64 * ratio;
let idx = src_pos as usize;
let frac = src_pos - idx as f64;
let s = if idx + 1 < samples.len() {
let a = samples[idx] as f64;
let b = samples[idx + 1] as f64;
(a + frac * (b - a)) as i16
} else {
samples[idx.min(samples.len().saturating_sub(1))]
};
output.push(s);
}
output
}
/// Mix multi-channel i16 to mono, resample to 16kHz, return PCM bytes.
fn process_i16(data: &[i16], channels: u16, source_rate: u32) -> Vec<u8> {
let ch = channels as usize;
let mono: Vec<i16> = data
.chunks(ch)
.map(|frame| {
let sum: i32 = frame.iter().map(|&s| s as i32).sum();
(sum / ch as i32) as i16
})
.collect();
let resampled = resample(&mono, source_rate, TARGET_SAMPLE_RATE);
resampled.iter().flat_map(|s| s.to_le_bytes()).collect()
}
/// Mix multi-channel f32 to mono, resample to 16kHz, return PCM bytes.
fn process_f32(data: &[f32], channels: u16, source_rate: u32) -> Vec<u8> {
let ch = channels as usize;
let mono: Vec<i16> = data
.chunks(ch)
.map(|frame| {
let sum: f32 = frame.iter().sum();
let m = sum / ch as f32;
(m * 32768.0).clamp(-32768.0, 32767.0) as i16
})
.collect();
let resampled = resample(&mono, source_rate, TARGET_SAMPLE_RATE);
resampled.iter().flat_map(|s| s.to_le_bytes()).collect()
}
impl AudioCapture {
/// Start capturing audio from the default input device.
/// Audio data is always resampled to 16kHz mono PCM i16 LE.
pub fn start(tx: mpsc::UnboundedSender<Vec<u8>>) -> Result<(Self, AudioCaptureConfig), String> {
let host = cpal::default_host();
let device = host
.default_input_device()
.ok_or_else(|| "No input device available".to_string())?;
let default_config = device
.default_input_config()
.map_err(|e| format!("Failed to get default input config: {e}"))?;
let source_rate = default_config.sample_rate().0;
let channels = default_config.channels();
let sample_format = default_config.sample_format();
let config: StreamConfig = default_config.into();
eprintln!("[voice-ime] Audio device: {source_rate}Hz, {channels}ch, {sample_format:?} → resampling to {TARGET_SAMPLE_RATE}Hz mono");
let stream = match sample_format {
SampleFormat::I16 => {
let ch = channels;
let rate = source_rate;
device
.build_input_stream(
&config,
move |data: &[i16], _: &cpal::InputCallbackInfo| {
let bytes = process_i16(data, ch, rate);
let _ = tx.send(bytes);
},
|err| eprintln!("Audio capture error: {err}"),
None,
)
.map_err(|e| format!("Failed to build i16 input stream: {e}"))?
}
SampleFormat::F32 => {
let ch = channels;
let rate = source_rate;
device
.build_input_stream(
&config,
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let bytes = process_f32(data, ch, rate);
let _ = tx.send(bytes);
},
|err| eprintln!("Audio capture error: {err}"),
None,
)
.map_err(|e| format!("Failed to build f32 input stream: {e}"))?
}
_ => return Err(format!("Unsupported sample format: {sample_format:?}")),
};
stream
.play()
.map_err(|e| format!("Failed to start audio stream: {e}"))?;
Ok((
AudioCapture { stream },
AudioCaptureConfig {
sample_rate: TARGET_SAMPLE_RATE,
},
))
}
}
impl Drop for AudioCapture {
fn drop(&mut self) {
let _ = self.stream.pause();
}
}

116
src/config.rs Normal file
View File

@ -0,0 +1,116 @@
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::fs;
use std::path::PathBuf;
pub const DEFAULT_MODEL: &str = "qwen3-asr-flash-realtime-2026-02-10";
#[derive(Serialize, Deserialize, Clone)]
pub struct Config {
/// Virtual key code for the hotkey (default: VK_F10 = 0x79)
pub hotkey_vk: u16,
/// Whether to send media play/pause when toggling recording
pub media_pause_enabled: bool,
/// API key for Qwen3 ASR service
#[serde(default)]
pub api_key: String,
/// ASR model name
#[serde(default = "default_model")]
pub model: String,
}
fn default_model() -> String {
DEFAULT_MODEL.to_string()
}
impl Default for Config {
fn default() -> Self {
Config {
hotkey_vk: 0x79, // VK_F10
media_pause_enabled: true,
api_key: String::new(),
model: DEFAULT_MODEL.to_string(),
}
}
}
fn config_path() -> PathBuf {
let dir = std::env::var_os("APPDATA")
.map(PathBuf::from)
.unwrap_or_else(|| {
dirs_fallback_appdata()
})
.join("voice-ime");
let _ = fs::create_dir_all(&dir);
dir.join("config.json")
}
/// Fallback: %USERPROFILE%\AppData\Roaming
fn dirs_fallback_appdata() -> PathBuf {
std::env::var_os("USERPROFILE")
.map(|p| PathBuf::from(p).join("AppData").join("Roaming"))
.unwrap_or_else(|| PathBuf::from("."))
}
thread_local! {
static CONFIG: RefCell<Config> = RefCell::new(Config::default());
}
pub fn load() {
let path = config_path();
if let Ok(data) = fs::read_to_string(&path) {
if let Ok(cfg) = serde_json::from_str::<Config>(&data) {
CONFIG.with(|c| *c.borrow_mut() = cfg);
}
}
}
pub fn save() {
let path = config_path();
CONFIG.with(|c| {
if let Ok(json) = serde_json::to_string_pretty(&*c.borrow()) {
let _ = fs::write(&path, json);
}
});
}
pub fn get() -> Config {
CONFIG.with(|c| c.borrow().clone())
}
pub fn set_hotkey_vk(vk: u16) {
CONFIG.with(|c| c.borrow_mut().hotkey_vk = vk);
save();
}
pub fn set_media_pause(enabled: bool) {
CONFIG.with(|c| c.borrow_mut().media_pause_enabled = enabled);
save();
}
pub fn set_api_key(key: String) {
CONFIG.with(|c| c.borrow_mut().api_key = key);
save();
}
pub fn set_model(model: String) {
CONFIG.with(|c| c.borrow_mut().model = model);
save();
}
/// Return a display name for a virtual key code.
pub fn vk_name(vk: u16) -> String {
match vk {
0x70..=0x87 => format!("F{}", vk - 0x70 + 1),
0x21 => "PageUp".into(),
0x22 => "PageDown".into(),
0x23 => "End".into(),
0x24 => "Home".into(),
0x2D => "Insert".into(),
0x2E => "Delete".into(),
0x13 => "Pause".into(),
0x91 => "ScrollLock".into(),
0xC0 => "`".into(),
_ => format!("VK(0x{vk:02X})"),
}
}

139
src/input.rs Normal file
View File

@ -0,0 +1,139 @@
use std::mem;
use windows::Win32::UI::Input::KeyboardAndMouse::{
SendInput, INPUT, INPUT_KEYBOARD, KEYBDINPUT, KEYEVENTF_KEYUP, KEYEVENTF_UNICODE,
VIRTUAL_KEY, VK_BACK, VK_MEDIA_PLAY_PAUSE,
};
fn send_inputs(inputs: &[INPUT]) {
if inputs.is_empty() {
return;
}
unsafe {
SendInput(inputs, mem::size_of::<INPUT>() as i32);
}
}
fn make_unicode_key_down(scan: u16) -> INPUT {
INPUT {
r#type: INPUT_KEYBOARD,
Anonymous: windows::Win32::UI::Input::KeyboardAndMouse::INPUT_0 {
ki: KEYBDINPUT {
wVk: VIRTUAL_KEY(0),
wScan: scan,
dwFlags: KEYEVENTF_UNICODE,
time: 0,
dwExtraInfo: 0,
},
},
}
}
fn make_unicode_key_up(scan: u16) -> INPUT {
INPUT {
r#type: INPUT_KEYBOARD,
Anonymous: windows::Win32::UI::Input::KeyboardAndMouse::INPUT_0 {
ki: KEYBDINPUT {
wVk: VIRTUAL_KEY(0),
wScan: scan,
dwFlags: KEYEVENTF_UNICODE | KEYEVENTF_KEYUP,
time: 0,
dwExtraInfo: 0,
},
},
}
}
fn make_vk_key_down(vk: VIRTUAL_KEY) -> INPUT {
INPUT {
r#type: INPUT_KEYBOARD,
Anonymous: windows::Win32::UI::Input::KeyboardAndMouse::INPUT_0 {
ki: KEYBDINPUT {
wVk: vk,
wScan: 0,
dwFlags: windows::Win32::UI::Input::KeyboardAndMouse::KEYBD_EVENT_FLAGS(0),
time: 0,
dwExtraInfo: 0,
},
},
}
}
fn make_vk_key_up(vk: VIRTUAL_KEY) -> INPUT {
INPUT {
r#type: INPUT_KEYBOARD,
Anonymous: windows::Win32::UI::Input::KeyboardAndMouse::INPUT_0 {
ki: KEYBDINPUT {
wVk: vk,
wScan: 0,
dwFlags: KEYEVENTF_KEYUP,
time: 0,
dwExtraInfo: 0,
},
},
}
}
/// Send N backspace key presses.
fn send_backspaces(count: usize) {
if count == 0 {
return;
}
let mut inputs = Vec::with_capacity(count * 2);
for _ in 0..count {
inputs.push(make_vk_key_down(VK_BACK));
inputs.push(make_vk_key_up(VK_BACK));
}
send_inputs(&inputs);
}
/// Type a string using SendInput with KEYEVENTF_UNICODE.
/// Handles surrogate pairs for characters outside BMP.
fn send_unicode_string(text: &str) {
if text.is_empty() {
return;
}
let mut inputs = Vec::new();
for c in text.chars() {
let mut buf = [0u16; 2];
let encoded = c.encode_utf16(&mut buf);
for &code_unit in encoded.iter() {
inputs.push(make_unicode_key_down(code_unit));
inputs.push(make_unicode_key_up(code_unit));
}
}
send_inputs(&inputs);
}
/// Compute the common prefix length (in chars) between two strings.
fn common_prefix_chars(a: &str, b: &str) -> usize {
a.chars()
.zip(b.chars())
.take_while(|(ca, cb)| ca == cb)
.count()
}
/// Given the previously inserted text and the new full text from ASR,
/// send the minimal backspaces + new characters to update the input field.
/// Returns the new "last inserted" text (i.e. `current`).
pub fn apply_text_update(last: &str, current: &str) -> String {
let prefix_len = common_prefix_chars(last, current);
let last_char_count = last.chars().count();
let backspace_count = last_char_count - prefix_len;
// Get the byte offset where the common prefix ends in `current`
let new_suffix: String = current.chars().skip(prefix_len).collect();
send_backspaces(backspace_count);
send_unicode_string(&new_suffix);
current.to_string()
}
/// Simulate pressing the media Play/Pause key.
pub fn send_media_play_pause() {
let inputs = [
make_vk_key_down(VK_MEDIA_PLAY_PAUSE),
make_vk_key_up(VK_MEDIA_PLAY_PAUSE),
];
send_inputs(&inputs);
}

679
src/main.rs Normal file
View File

@ -0,0 +1,679 @@
#![windows_subsystem = "windows"]
mod audio;
mod config;
mod input;
mod session;
mod sound;
mod ws;
use session::RecordingSession;
use std::cell::RefCell;
use windows::core::{w, PCWSTR};
use windows::Win32::Foundation::{HWND, LPARAM, LRESULT, WPARAM};
use windows::Win32::Graphics::Gdi::{GetStockObject, BLACK_BRUSH};
use windows::Win32::System::LibraryLoader::GetModuleHandleW;
use windows::Win32::UI::Input::KeyboardAndMouse::{
RegisterHotKey, UnregisterHotKey, HOT_KEY_MODIFIERS,
};
use windows::Win32::UI::Shell::{
Shell_NotifyIconW, NIF_ICON, NIF_MESSAGE, NIF_TIP, NIM_ADD, NIM_DELETE, NIM_MODIFY,
NOTIFYICONDATAW,
};
use windows::Win32::UI::WindowsAndMessaging::*;
static ICO_IDLE: &[u8] = include_bytes!("../assets/idle.ico");
static ICO_RECORDING: &[u8] = include_bytes!("../assets/recording.ico");
/// Parse an ICO file and load the best-matching icon entry as an HICON.
fn load_icon_from_ico(data: &[u8]) -> HICON {
// ICO header: reserved(2) + type(2) + count(2) = 6 bytes
// Each entry: width(1) + height(1) + colorCount(1) + reserved(1)
// + planes(2) + bitCount(2) + bytesInRes(4) + imageOffset(4) = 16 bytes
if data.len() < 6 {
return HICON::default();
}
let count = u16::from_le_bytes([data[4], data[5]]) as usize;
if count == 0 || data.len() < 6 + count * 16 {
return HICON::default();
}
// Get system small icon size for tray
let desired = unsafe { GetSystemMetrics(SM_CXSMICON) } as u32;
// Find best entry: prefer exact match on desired size, else closest larger, else largest
let mut best_idx = 0;
let mut best_w = 0u32;
for i in 0..count {
let off = 6 + i * 16;
let w = if data[off] == 0 { 256 } else { data[off] as u32 };
if w == desired {
best_idx = i;
best_w = w;
break;
}
if (w >= desired && (best_w < desired || w < best_w))
|| (best_w < desired && w > best_w)
{
best_idx = i;
best_w = w;
}
}
let off = 6 + best_idx * 16;
let bytes_in_res = u32::from_le_bytes([data[off + 8], data[off + 9], data[off + 10], data[off + 11]]) as usize;
let image_offset = u32::from_le_bytes([data[off + 12], data[off + 13], data[off + 14], data[off + 15]]) as usize;
if data.len() < image_offset + bytes_in_res {
return HICON::default();
}
let image_data = &data[image_offset..image_offset + bytes_in_res];
unsafe {
CreateIconFromResourceEx(
image_data,
true,
0x00030000,
desired as i32,
desired as i32,
LR_DEFAULTCOLOR,
)
.unwrap_or_default()
}
}
const WM_TRAYICON: u32 = WM_APP + 1;
const HOTKEY_ID: i32 = 1;
const IDM_EXIT: usize = 1001;
const IDM_CHANGE_HOTKEY: usize = 1002;
const IDM_TOGGLE_MEDIA_PAUSE: usize = 1003;
const IDM_SET_API_KEY: usize = 1004;
const IDM_SET_MODEL: usize = 1005;
thread_local! {
static SESSION: RefCell<Option<RecordingSession>> = RefCell::new(None);
static HWND_MAIN: RefCell<HWND> = RefCell::new(HWND::default());
/// When true, next key press in the dialog sets the hotkey.
static PICKING_HOTKEY: RefCell<bool> = RefCell::new(false);
}
fn set_tray_tooltip(hwnd: HWND, tip: &str, recording: bool) {
let mut nid = NOTIFYICONDATAW {
cbSize: std::mem::size_of::<NOTIFYICONDATAW>() as u32,
hWnd: hwnd,
uID: 1,
uFlags: NIF_TIP | NIF_ICON,
..Default::default()
};
// Set tooltip
let tip_wide: Vec<u16> = tip.encode_utf16().chain(std::iter::once(0)).collect();
let len = tip_wide.len().min(nid.szTip.len());
nid.szTip[..len].copy_from_slice(&tip_wide[..len]);
nid.hIcon = if recording {
load_icon_from_ico(ICO_RECORDING)
} else {
load_icon_from_ico(ICO_IDLE)
};
unsafe {
let _ = Shell_NotifyIconW(NIM_MODIFY, &nid);
}
}
fn toggle_recording(hwnd: HWND) {
SESSION.with(|s| {
let mut session = s.borrow_mut();
let cfg = config::get();
if session.is_some() {
// Stop recording
session.take().unwrap().stop();
set_tray_tooltip(hwnd, "语音输入 - 空闲", false);
sound::play_stop();
if cfg.media_pause_enabled {
input::send_media_play_pause();
}
} else {
// Start recording
if cfg.media_pause_enabled {
input::send_media_play_pause();
}
match RecordingSession::start() {
Ok(s) => {
*session = Some(s);
set_tray_tooltip(hwnd, "语音输入 - 录音中...", true);
sound::play_start();
}
Err(e) => {
eprintln!("[voice-ime] Failed to start recording: {e}");
set_tray_tooltip(hwnd, &format!("语音输入 - 错误: {e}"), false);
}
}
}
});
}
/// Show a generic single-line text input dialog. Returns Some(text) if confirmed, None if cancelled.
fn show_text_input_dialog(hwnd: HWND, title: &str, label: &str, initial: &str) -> Option<String> {
unsafe {
let instance = GetModuleHandleW(None).unwrap();
let wc = WNDCLASSEXW {
cbSize: std::mem::size_of::<WNDCLASSEXW>() as u32,
lpfnWndProc: Some(textinput_wnd_proc),
hInstance: instance.into(),
lpszClassName: w!("VoiceIMETextInput"),
hbrBackground: std::mem::transmute(GetStockObject(
windows::Win32::Graphics::Gdi::WHITE_BRUSH,
)),
..Default::default()
};
RegisterClassExW(&wc);
let screen_w = GetSystemMetrics(SM_CXSCREEN);
let screen_h = GetSystemMetrics(SM_CYSCREEN);
let dlg_w = 420;
let dlg_h = 160;
TEXTINPUT_RESULT.with(|r| *r.borrow_mut() = None);
TEXTINPUT_LABEL.with(|r| *r.borrow_mut() = label.to_string());
TEXTINPUT_INITIAL.with(|r| *r.borrow_mut() = initial.to_string());
let mut title_w: Vec<u16> = title.encode_utf16().collect();
title_w.push(0);
let dlg = CreateWindowExW(
WS_EX_TOPMOST | WS_EX_TOOLWINDOW,
w!("VoiceIMETextInput"),
PCWSTR(title_w.as_ptr()),
WS_POPUP | WS_CAPTION | WS_SYSMENU,
(screen_w - dlg_w) / 2,
(screen_h - dlg_h) / 2,
dlg_w,
dlg_h,
Some(hwnd),
None,
Some(instance.into()),
None,
)
.unwrap();
let _ = ShowWindow(dlg, SW_SHOW);
let _ = SetForegroundWindow(dlg);
let mut msg = MSG::default();
while GetMessageW(&mut msg, None, 0, 0).as_bool() {
if !IsWindow(Some(dlg)).as_bool() {
break;
}
let _ = TranslateMessage(&msg);
DispatchMessageW(&msg);
}
TEXTINPUT_RESULT.with(|r| r.borrow().clone()).filter(|s| !s.is_empty())
}
}
thread_local! {
static TEXTINPUT_RESULT: RefCell<Option<String>> = RefCell::new(None);
static TEXTINPUT_LABEL: RefCell<String> = RefCell::new(String::new());
static TEXTINPUT_INITIAL: RefCell<String> = RefCell::new(String::new());
static TEXTINPUT_EDIT_HWND: RefCell<HWND> = RefCell::new(HWND::default());
}
const IDC_TEXTINPUT_EDIT: i32 = 3001;
const IDC_TEXTINPUT_OK: i32 = 3002;
unsafe extern "system" fn textinput_wnd_proc(
hwnd: HWND,
msg: u32,
wparam: WPARAM,
lparam: LPARAM,
) -> LRESULT {
match msg {
WM_CREATE => {
let instance = unsafe { GetModuleHandleW(None).unwrap() };
let label_text = TEXTINPUT_LABEL.with(|r| r.borrow().clone());
let mut label_w: Vec<u16> = label_text.encode_utf16().collect();
label_w.push(0);
unsafe {
let _ = CreateWindowExW(
WINDOW_EX_STYLE::default(),
w!("STATIC"),
PCWSTR(label_w.as_ptr()),
WS_CHILD | WS_VISIBLE,
15, 15, 380, 20,
Some(hwnd), None, Some(instance.into()), None,
);
}
let init_text = TEXTINPUT_INITIAL.with(|r| r.borrow().clone());
let mut init_w: Vec<u16> = init_text.encode_utf16().collect();
init_w.push(0);
let edit = unsafe {
CreateWindowExW(
WS_EX_CLIENTEDGE,
w!("EDIT"),
PCWSTR(init_w.as_ptr()),
WS_CHILD | WS_VISIBLE | WINDOW_STYLE(0x0080),
15, 42, 375, 25,
Some(hwnd), Some(HMENU(IDC_TEXTINPUT_EDIT as _)),
Some(instance.into()), None,
).unwrap()
};
TEXTINPUT_EDIT_HWND.with(|h| *h.borrow_mut() = edit);
unsafe {
let _ = CreateWindowExW(
WINDOW_EX_STYLE::default(),
w!("BUTTON"),
w!("确定"),
WS_CHILD | WS_VISIBLE | WINDOW_STYLE(0x0001),
160, 80, 90, 30,
Some(hwnd), Some(HMENU(IDC_TEXTINPUT_OK as _)),
Some(instance.into()), None,
);
}
unsafe {
let _ = SendMessageW(edit, 0x00B1, Some(WPARAM(0)), Some(LPARAM(-1)));
let _ = windows::Win32::UI::Input::KeyboardAndMouse::SetFocus(Some(edit));
}
LRESULT(0)
}
WM_COMMAND => {
let id = (wparam.0 & 0xFFFF) as i32;
if id == IDC_TEXTINPUT_OK {
let edit = TEXTINPUT_EDIT_HWND.with(|h| *h.borrow());
let len = unsafe { GetWindowTextLengthW(edit) } as usize;
let mut buf = vec![0u16; len + 1];
unsafe { GetWindowTextW(edit, &mut buf) };
let text = String::from_utf16_lossy(&buf[..len]);
TEXTINPUT_RESULT.with(|r| *r.borrow_mut() = Some(text.trim().to_string()));
unsafe { let _ = DestroyWindow(hwnd); }
}
LRESULT(0)
}
WM_CLOSE => {
TEXTINPUT_RESULT.with(|r| *r.borrow_mut() = Some(String::new()));
unsafe { let _ = DestroyWindow(hwnd); }
LRESULT(0)
}
WM_DESTROY => {
unsafe { PostQuitMessage(0); }
LRESULT(0)
}
_ => unsafe { DefWindowProcW(hwnd, msg, wparam, lparam) },
}
}
/// Show API key input dialog. Returns true if user provided a key.
fn show_api_key_dialog(hwnd: HWND) -> bool {
let current = config::get().api_key;
if let Some(key) = show_text_input_dialog(hwnd, "设置 API Key", "请输入 Qwen ASR API Key", &current) {
config::set_api_key(key);
true
} else {
false
}
}
/// Show model input dialog.
fn show_model_dialog(hwnd: HWND) {
let current = config::get().model;
if let Some(model) = show_text_input_dialog(hwnd, "设置模型", "请输入 ASR 模型名称:", &current) {
config::set_model(model);
}
}
fn show_context_menu(hwnd: HWND) {
unsafe {
let cfg = config::get();
let menu = CreatePopupMenu().unwrap();
// Hotkey item
let hotkey_label: Vec<u16> = format!("设置快捷键 (当前: {})\0", config::vk_name(cfg.hotkey_vk))
.encode_utf16()
.collect();
AppendMenuW(menu, MF_STRING, IDM_CHANGE_HOTKEY, PCWSTR(hotkey_label.as_ptr())).unwrap();
// Media pause toggle
let pause_label: Vec<u16> = "录音时暂停媒体播放\0".encode_utf16().collect();
let flags = if cfg.media_pause_enabled {
MF_STRING | MF_CHECKED
} else {
MF_STRING | MF_UNCHECKED
};
AppendMenuW(menu, flags, IDM_TOGGLE_MEDIA_PAUSE, PCWSTR(pause_label.as_ptr())).unwrap();
// API Key
let api_key_label: Vec<u16> = "设置 API Key...\0".encode_utf16().collect();
AppendMenuW(menu, MF_STRING, IDM_SET_API_KEY, PCWSTR(api_key_label.as_ptr())).unwrap();
// Model
let model_label: Vec<u16> = format!("设置模型 ({})...\0", cfg.model).encode_utf16().collect();
AppendMenuW(menu, MF_STRING, IDM_SET_MODEL, PCWSTR(model_label.as_ptr())).unwrap();
// Separator
AppendMenuW(menu, MF_SEPARATOR, 0, None).unwrap();
// Exit
let exit_label: Vec<u16> = "退出\0".encode_utf16().collect();
AppendMenuW(menu, MF_STRING, IDM_EXIT, PCWSTR(exit_label.as_ptr())).unwrap();
let mut pt = windows::Win32::Foundation::POINT::default();
let _ = GetCursorPos(&mut pt);
// Required for the menu to disappear when clicking outside
let _ = SetForegroundWindow(hwnd);
let _ = TrackPopupMenu(menu, TPM_BOTTOMALIGN | TPM_LEFTALIGN, pt.x, pt.y, Some(0), hwnd, None);
let _ = DestroyMenu(menu);
}
}
/// Register the global hotkey using the current config.
fn register_configured_hotkey(hwnd: HWND) -> bool {
let vk = config::get().hotkey_vk;
unsafe {
RegisterHotKey(
Some(hwnd),
HOTKEY_ID,
HOT_KEY_MODIFIERS(0),
vk as u32,
)
.is_ok()
}
}
/// Show a small dialog that captures the next key press as the new hotkey.
fn show_hotkey_picker(hwnd: HWND) {
// Create a small popup window for key capture
unsafe {
let instance = GetModuleHandleW(None).unwrap();
// Register class for picker window (once is fine, re-register is harmless)
let wc = WNDCLASSEXW {
cbSize: std::mem::size_of::<WNDCLASSEXW>() as u32,
lpfnWndProc: Some(picker_wnd_proc),
hInstance: instance.into(),
lpszClassName: w!("VoiceIMEPicker"),
hbrBackground: std::mem::transmute(GetStockObject(
windows::Win32::Graphics::Gdi::WHITE_BRUSH,
)),
..Default::default()
};
RegisterClassExW(&wc);
// Get screen center
let screen_w = GetSystemMetrics(SM_CXSCREEN);
let screen_h = GetSystemMetrics(SM_CYSCREEN);
let dlg_w = 320;
let dlg_h = 120;
let picker = CreateWindowExW(
WS_EX_TOPMOST | WS_EX_TOOLWINDOW,
w!("VoiceIMEPicker"),
w!("设置快捷键"),
WS_POPUP | WS_CAPTION | WS_SYSMENU,
(screen_w - dlg_w) / 2,
(screen_h - dlg_h) / 2,
dlg_w,
dlg_h,
Some(hwnd),
None,
Some(instance.into()),
None,
)
.unwrap();
// Unregister current hotkey so the key can be captured
let _ = UnregisterHotKey(Some(hwnd), HOTKEY_ID);
PICKING_HOTKEY.with(|p| *p.borrow_mut() = true);
let _ = ShowWindow(picker, SW_SHOW);
let _ = SetForegroundWindow(picker);
// Run a modal message loop for the picker
let mut msg = MSG::default();
while GetMessageW(&mut msg, None, 0, 0).as_bool() {
let _ = TranslateMessage(&msg);
DispatchMessageW(&msg);
if !PICKING_HOTKEY.with(|p| *p.borrow()) {
break;
}
}
// Re-register hotkey with (possibly new) config
if !register_configured_hotkey(hwnd) {
let text: Vec<u16> = format!(
"无法注册快捷键 {},可能被其他程序占用。\0",
config::vk_name(config::get().hotkey_vk)
)
.encode_utf16()
.collect();
let title: Vec<u16> = "语音输入\0".encode_utf16().collect();
MessageBoxW(
Some(hwnd),
PCWSTR(text.as_ptr()),
PCWSTR(title.as_ptr()),
MB_OK | MB_ICONWARNING,
);
}
}
}
unsafe extern "system" fn picker_wnd_proc(
hwnd: HWND,
msg: u32,
wparam: WPARAM,
lparam: LPARAM,
) -> LRESULT {
match msg {
WM_CREATE => {
// Create a static label
let text: Vec<u16> = "请按下新的快捷键...\0".encode_utf16().collect();
let instance = unsafe { GetModuleHandleW(None).unwrap() };
unsafe {
let _ = CreateWindowExW(
WINDOW_EX_STYLE::default(),
w!("STATIC"),
PCWSTR(text.as_ptr()),
WS_CHILD | WS_VISIBLE | WINDOW_STYLE(0x01),
0,
25,
320,
40,
Some(hwnd),
None,
Some(instance.into()),
None,
);
}
LRESULT(0)
}
WM_KEYDOWN | WM_SYSKEYDOWN => {
let vk = (wparam.0 & 0xFF) as u16;
// Ignore modifier-only keys
if !matches!(vk, 0x10 | 0x11 | 0x12 | 0xA0..=0xA5) {
config::set_hotkey_vk(vk);
PICKING_HOTKEY.with(|p| *p.borrow_mut() = false);
unsafe { let _ = DestroyWindow(hwnd); }
}
LRESULT(0)
}
WM_CLOSE => {
PICKING_HOTKEY.with(|p| *p.borrow_mut() = false);
unsafe { let _ = DestroyWindow(hwnd); }
LRESULT(0)
}
WM_DESTROY => {
PICKING_HOTKEY.with(|p| *p.borrow_mut() = false);
LRESULT(0)
}
_ => unsafe { DefWindowProcW(hwnd, msg, wparam, lparam) },
}
}
unsafe extern "system" fn wnd_proc(
hwnd: HWND,
msg: u32,
wparam: WPARAM,
lparam: LPARAM,
) -> LRESULT {
match msg {
WM_HOTKEY => {
if wparam.0 == HOTKEY_ID as usize {
toggle_recording(hwnd);
}
LRESULT(0)
}
WM_COMMAND => {
let id = (wparam.0 & 0xFFFF) as usize;
if id == IDM_EXIT {
// Clean up and exit
SESSION.with(|s| {
let mut session = s.borrow_mut();
if let Some(mut sess) = session.take() {
sess.stop();
}
});
unsafe { DestroyWindow(hwnd).unwrap() };
} else if id == IDM_CHANGE_HOTKEY {
show_hotkey_picker(hwnd);
// Update tray tooltip with new hotkey name
set_tray_tooltip(hwnd, &format!("语音输入 ({})", config::vk_name(config::get().hotkey_vk)), false);
} else if id == IDM_TOGGLE_MEDIA_PAUSE {
let enabled = config::get().media_pause_enabled;
config::set_media_pause(!enabled);
} else if id == IDM_SET_API_KEY {
show_api_key_dialog(hwnd);
} else if id == IDM_SET_MODEL {
show_model_dialog(hwnd);
}
LRESULT(0)
}
x if x == WM_TRAYICON => {
let event = (lparam.0 & 0xFFFF) as u32;
if event == WM_RBUTTONUP {
show_context_menu(hwnd);
}
LRESULT(0)
}
WM_DESTROY => {
// Remove tray icon
let nid = NOTIFYICONDATAW {
cbSize: std::mem::size_of::<NOTIFYICONDATAW>() as u32,
hWnd: hwnd,
uID: 1,
..Default::default()
};
unsafe { let _ = Shell_NotifyIconW(NIM_DELETE, &nid); };
// Unregister hotkey
unsafe { let _ = UnregisterHotKey(Some(hwnd), HOTKEY_ID); };
unsafe { PostQuitMessage(0) };
LRESULT(0)
}
_ => unsafe { DefWindowProcW(hwnd, msg, wparam, lparam) },
}
}
fn create_hidden_window() -> HWND {
unsafe {
let instance = GetModuleHandleW(None).unwrap();
let wc = WNDCLASSEXW {
cbSize: std::mem::size_of::<WNDCLASSEXW>() as u32,
lpfnWndProc: Some(wnd_proc),
hInstance: instance.into(),
lpszClassName: w!("VoiceIMEWindow"),
hbrBackground: std::mem::transmute(GetStockObject(BLACK_BRUSH)),
..Default::default()
};
RegisterClassExW(&wc);
let hwnd = CreateWindowExW(
WINDOW_EX_STYLE::default(),
w!("VoiceIMEWindow"),
w!("Voice IME"),
WS_OVERLAPPEDWINDOW,
0,
0,
0,
0,
Some(HWND_MESSAGE), // Message-only window
None,
Some(instance.into()),
None,
)
.unwrap();
hwnd
}
}
fn create_tray_icon(hwnd: HWND) {
let mut nid = NOTIFYICONDATAW {
cbSize: std::mem::size_of::<NOTIFYICONDATAW>() as u32,
hWnd: hwnd,
uID: 1,
uFlags: NIF_MESSAGE | NIF_ICON | NIF_TIP,
uCallbackMessage: WM_TRAYICON,
..Default::default()
};
nid.hIcon = load_icon_from_ico(ICO_IDLE);
let tip = "语音输入 - 空闲 (F10)";
let tip_wide: Vec<u16> = tip.encode_utf16().chain(std::iter::once(0)).collect();
let len = tip_wide.len().min(nid.szTip.len());
nid.szTip[..len].copy_from_slice(&tip_wide[..len]);
unsafe {
let _ = Shell_NotifyIconW(NIM_ADD, &nid);
}
}
fn main() {
config::load();
let hwnd = create_hidden_window();
HWND_MAIN.with(|h| *h.borrow_mut() = hwnd);
// Prompt for API key if not configured
if config::get().api_key.is_empty() {
if !show_api_key_dialog(hwnd) {
return;
}
}
// Register global hotkey from config
if !register_configured_hotkey(hwnd) {
eprintln!("[voice-ime] Failed to register hotkey. Is another instance running?");
return;
}
create_tray_icon(hwnd);
let hotkey_name = config::vk_name(config::get().hotkey_vk);
set_tray_tooltip(hwnd, &format!("语音输入 ({})", hotkey_name), false);
eprintln!("[voice-ime] Running. Press {} to toggle recording.", hotkey_name);
// Message loop
unsafe {
let mut msg = MSG::default();
while GetMessageW(&mut msg, None, 0, 0).as_bool() {
let _ = TranslateMessage(&msg);
DispatchMessageW(&msg);
}
}
eprintln!("[voice-ime] Exiting.");
}

99
src/session.rs Normal file
View File

@ -0,0 +1,99 @@
use std::sync::{Arc, Mutex};
use std::thread;
use tokio::sync::mpsc;
use crate::audio::AudioCapture;
use crate::config;
use crate::input;
use crate::ws;
/// Tracks text state across multiple speech segments in a VAD session.
struct TextState {
/// Concatenation of all completed segment transcripts.
completed_text: String,
/// Current segment's partial preview (text + stash from latest .text event).
current_partial: String,
/// The full text we last typed into the input field.
last_displayed: String,
}
pub struct RecordingSession {
stop_tx: Option<mpsc::Sender<()>>,
thread_handle: Option<thread::JoinHandle<()>>,
_capture: Option<AudioCapture>,
}
impl RecordingSession {
pub fn start() -> Result<Self, String> {
let (stop_tx, stop_rx) = mpsc::channel::<()>(1);
let (audio_tx, audio_rx) = mpsc::unbounded_channel::<Vec<u8>>();
let (capture, _audio_cfg) = AudioCapture::start(audio_tx)?;
let cfg = config::get();
let api_key = cfg.api_key.clone();
let model = cfg.model.clone();
let thread_handle = thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("Failed to create tokio runtime");
let state = Arc::new(Mutex::new(TextState {
completed_text: String::new(),
current_partial: String::new(),
last_displayed: String::new(),
}));
let on_event = {
let state = state.clone();
move |event: ws::AsrEvent| {
let mut st = state.lock().unwrap();
match event {
ws::AsrEvent::Partial { text, stash } => {
st.current_partial = format!("{text}{stash}");
let full = format!("{}{}", st.completed_text, st.current_partial);
st.last_displayed = input::apply_text_update(&st.last_displayed, &full);
}
ws::AsrEvent::SegmentCompleted { transcript } => {
st.completed_text.push_str(&transcript);
st.current_partial.clear();
let full = st.completed_text.clone();
st.last_displayed = input::apply_text_update(&st.last_displayed, &full);
}
}
}
};
let result = rt.block_on(ws::run_ws_session(&api_key, &model, audio_rx, stop_rx, on_event));
if let Err(e) = result {
eprintln!("[voice-ime] Recording session error: {e}");
}
});
Ok(RecordingSession {
stop_tx: Some(stop_tx),
thread_handle: Some(thread_handle),
_capture: Some(capture),
})
}
pub fn stop(&mut self) {
self._capture.take();
if let Some(tx) = self.stop_tx.take() {
let _ = tx.blocking_send(());
}
if let Some(handle) = self.thread_handle.take() {
let _ = handle.join();
}
}
}
impl Drop for RecordingSession {
fn drop(&mut self) {
self.stop();
}
}

31
src/sound.rs Normal file
View File

@ -0,0 +1,31 @@
use rodio::{Decoder, OutputStream, Sink};
use std::io::Cursor;
static SND_START: &[u8] = include_bytes!("../assets/start.mp3");
static SND_STOP: &[u8] = include_bytes!("../assets/stop.mp3");
/// Play an embedded sound in a background thread (non-blocking).
fn play_bytes(data: &'static [u8]) {
std::thread::spawn(move || {
let Ok((_stream, handle)) = OutputStream::try_default() else {
return;
};
let Ok(sink) = Sink::try_new(&handle) else {
return;
};
let cursor = Cursor::new(data);
let Ok(source) = Decoder::new(cursor) else {
return;
};
sink.append(source);
sink.sleep_until_end();
});
}
pub fn play_start() {
play_bytes(SND_START);
}
pub fn play_stop() {
play_bytes(SND_STOP);
}

230
src/ws.rs Normal file
View File

@ -0,0 +1,230 @@
use base64::{engine::general_purpose::STANDARD, Engine as _};
use futures_util::{SinkExt, StreamExt};
use serde_json::Value;
use tokio::sync::mpsc;
use tokio_tungstenite::{
connect_async_tls_with_config,
tungstenite::{http::Request, Message},
};
const WS_BASE_URL: &str = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model=";
/// Events emitted by the ASR WebSocket session.
pub enum AsrEvent {
/// Partial result for current speech segment. Full preview = text + stash.
Partial { text: String, stash: String },
/// Final result for a completed speech segment.
SegmentCompleted { transcript: String },
}
/// Run a WebSocket ASR session using the Qwen3 ASR Realtime API.
pub async fn run_ws_session(
api_key: &str,
model: &str,
mut audio_rx: mpsc::UnboundedReceiver<Vec<u8>>,
mut stop_rx: mpsc::Receiver<()>,
on_event: impl Fn(AsrEvent) + Send + 'static,
) -> Result<(), String> {
let ws_url = format!("{WS_BASE_URL}{model}");
// Build request with auth headers
let request = Request::builder()
.uri(&ws_url)
.header("Authorization", format!("Bearer {api_key}"))
.header("OpenAI-Beta", "realtime=v1")
.header("Host", "dashscope.aliyuncs.com")
.header("Connection", "Upgrade")
.header("Upgrade", "websocket")
.header("Sec-WebSocket-Version", "13")
.header(
"Sec-WebSocket-Key",
tokio_tungstenite::tungstenite::handshake::client::generate_key(),
)
.body(())
.map_err(|e| format!("Build request failed: {e}"))?;
let (ws_stream, _) = connect_async_tls_with_config(request, None, false, None)
.await
.map_err(|e| format!("WebSocket connect failed: {e}"))?;
let (mut write, mut read) = ws_stream.split();
// 1. Wait for session.created
wait_for_type(&mut read, "session.created").await?;
eprintln!("[voice-ime] WS: session.created");
// 2. Send session.update (VAD mode)
let session_update = serde_json::json!({
"event_id": "evt_session_update",
"type": "session.update",
"session": {
"modalities": ["text"],
"input_audio_format": "pcm16",
"sample_rate": 16000,
"input_audio_transcription": {
"language": "zh"
},
"turn_detection": {
"type": "server_vad",
"threshold": 0.0,
"silence_duration_ms": 400
}
}
});
write
.send(Message::Text(session_update.to_string().into()))
.await
.map_err(|e| format!("Send session.update failed: {e}"))?;
// 3. Wait for session.updated
wait_for_type(&mut read, "session.updated").await?;
eprintln!("[voice-ime] WS: session.updated, streaming audio...");
// 4. Spawn sender task: forwards audio as base64 JSON events
let (finish_tx, mut finish_rx) = mpsc::channel::<()>(1);
let send_task = tokio::spawn(async move {
let mut seq = 0u64;
loop {
tokio::select! {
biased;
_ = stop_rx.recv() => {
// Drain remaining audio
while let Ok(chunk) = audio_rx.try_recv() {
let _ = send_audio(&mut write, &chunk, &mut seq).await;
}
// Send session.finish
let finish = serde_json::json!({
"event_id": "evt_finish",
"type": "session.finish"
});
let _ = write.send(Message::Text(finish.to_string().into())).await;
let _ = finish_tx.send(()).await;
break;
}
chunk = audio_rx.recv() => {
match chunk {
Some(data) => {
if send_audio(&mut write, &data, &mut seq).await.is_err() {
break;
}
}
None => {
// Audio channel closed
let finish = serde_json::json!({
"event_id": "evt_finish",
"type": "session.finish"
});
let _ = write.send(Message::Text(finish.to_string().into())).await;
let _ = finish_tx.send(()).await;
break;
}
}
}
}
}
});
// 5. Receive task: process server events
let recv_task = tokio::spawn(async move {
let mut finish_sent = false;
loop {
tokio::select! {
biased;
_ = finish_rx.recv(), if !finish_sent => {
finish_sent = true;
// Keep reading until session.finished
}
msg = read.next() => {
match msg {
Some(Ok(Message::Text(text))) => {
let v: Value = match serde_json::from_str(&text) {
Ok(v) => v,
Err(_) => continue,
};
let event_type = v["type"].as_str().unwrap_or("");
match event_type {
"conversation.item.input_audio_transcription.text" => {
let text_part = v["text"].as_str().unwrap_or("").to_string();
let stash = v["stash"].as_str().unwrap_or("").to_string();
on_event(AsrEvent::Partial { text: text_part, stash });
}
"conversation.item.input_audio_transcription.completed" => {
let transcript = v["transcript"].as_str().unwrap_or("").to_string();
eprintln!("[voice-ime] Segment completed: {transcript}");
on_event(AsrEvent::SegmentCompleted { transcript });
}
"session.finished" => {
eprintln!("[voice-ime] WS: session.finished");
return;
}
"error" => {
let msg = v["error"]["message"].as_str().unwrap_or("unknown");
eprintln!("[voice-ime] ASR error: {msg}");
return;
}
_ => {} // ignore speech_started, speech_stopped, committed, etc.
}
}
Some(Ok(_)) => {} // ping/pong/binary
Some(Err(e)) => {
eprintln!("[voice-ime] WS read error: {e}");
return;
}
None => return,
}
}
}
}
});
let _ = send_task.await;
let _ = recv_task.await;
Ok(())
}
/// Send a PCM audio chunk as a base64-encoded input_audio_buffer.append event.
async fn send_audio<S>(write: &mut S, pcm_bytes: &[u8], seq: &mut u64) -> Result<(), String>
where
S: futures_util::Sink<Message> + Unpin,
S::Error: std::fmt::Display,
{
let encoded = STANDARD.encode(pcm_bytes);
*seq += 1;
let event = serde_json::json!({
"event_id": format!("evt_audio_{seq}"),
"type": "input_audio_buffer.append",
"audio": encoded
});
write
.send(Message::Text(event.to_string().into()))
.await
.map_err(|e| format!("Send audio failed: {e}"))
}
/// Read messages until one matches the expected type.
async fn wait_for_type<S>(read: &mut S, expected: &str) -> Result<Value, String>
where
S: futures_util::Stream<Item = Result<Message, tokio_tungstenite::tungstenite::Error>> + Unpin,
{
loop {
match read.next().await {
Some(Ok(Message::Text(text))) => {
let v: Value = serde_json::from_str(&text)
.map_err(|e| format!("Parse JSON failed: {e}"))?;
let t = v["type"].as_str().unwrap_or("");
if t == expected {
return Ok(v);
}
if t == "error" {
let msg = v["error"]["message"].as_str().unwrap_or("unknown");
return Err(format!("Server error: {msg}"));
}
// Ignore other event types while waiting
}
Some(Ok(_)) => {} // ignore non-text
Some(Err(e)) => return Err(format!("WS read error: {e}")),
None => return Err("Connection closed unexpectedly".to_string()),
}
}
}