gestura_core/
speech.rs

1//! Speech processing integration.
2//!
3//! Speech domain types are defined in `gestura-core-audio` and re-exported
4//! here.  This module adds the [`SpeechProcessorCoreExt`] extension trait
5//! which provides methods that depend on core-only types (secure config
6//! loading, LLM provider selection).
7
8pub use gestura_core_audio::speech::*;
9
10use crate::config::{AppConfig, AppConfigSecurityExt};
11use crate::error::AppError;
12use crate::llm_provider::{AgentContext, select_provider};
13use std::path::Path;
14
15/// Extension trait providing integration methods on [`SpeechProcessor`]
16/// that depend on core-only types (secure storage, LLM provider selection).
17///
18/// Callers must `use gestura_core::speech::SpeechProcessorCoreExt` (or
19/// `use gestura_core::SpeechProcessorCoreExt`) to access these methods.
20#[async_trait::async_trait]
21pub trait SpeechProcessorCoreExt {
22    /// Transcribe audio file to text using core-owned STT provider selection.
23    ///
24    /// Uses the unified STT provider abstraction from `stt_provider` module.
25    /// The provider is selected based on `AppConfig.voice.provider` and respects
26    /// configured base_url and model settings.
27    async fn transcribe_audio(&self, audio_path: &Path) -> Result<TranscriptionResult, AppError>;
28
29    /// Process transcribed text with configured LLM provider.
30    async fn process_with_llm(&self, text: &str) -> Result<LlmResponse, AppError>;
31}
32
33#[async_trait::async_trait]
34impl SpeechProcessorCoreExt for SpeechProcessor {
35    async fn transcribe_audio(&self, audio_path: &Path) -> Result<TranscriptionResult, AppError> {
36        let app_config = AppConfig::load_async().await;
37        // Use secure storage (keychain when enabled) for API key fallback chains.
38        let secret_provider = crate::secrets::SecureStorageSecretProvider::new(
39            crate::security::create_secure_storage(),
40        );
41        let provider =
42            crate::stt_provider::select_provider(&app_config, Some(&secret_provider)).await;
43
44        tracing::info!(
45            "Transcribing audio with provider: {} (config.voice.provider={})",
46            provider.provider_id(),
47            app_config.voice.provider
48        );
49
50        provider.transcribe_file(audio_path).await
51    }
52
53    async fn process_with_llm(&self, text: &str) -> Result<LlmResponse, AppError> {
54        let app_config = AppConfig::load_async().await;
55        let provider = select_provider(
56            &app_config,
57            &AgentContext {
58                agent_id: "speech".into(),
59            },
60        );
61
62        tracing::info!("Processing with LLM provider: {}", app_config.llm.primary);
63
64        let response = provider
65            .call(text)
66            .await
67            .map_err(|e| AppError::Llm(format!("LLM processing failed: {}", e)))?;
68
69        Ok(LlmResponse {
70            text: response,
71            provider: app_config.llm.primary.clone(),
72            cached: false,
73        })
74    }
75}
76
77// Global speech processor instance
78lazy_static::lazy_static! {
79    static ref SPEECH_PROCESSOR: SpeechProcessor = SpeechProcessor::new();
80}
81
82/// Get the global speech processor instance
83pub fn get_speech_processor() -> &'static SpeechProcessor {
84    &SPEECH_PROCESSOR
85}
86
87/// Check if speech is currently being recorded
88pub fn is_speech_recording() -> bool {
89    SPEECH_PROCESSOR.is_recording()
90}
91
92/// Update the global speech processor configuration
93pub fn update_speech_config(config: SpeechConfig) {
94    SPEECH_PROCESSOR.update_config(config);
95}