1use std::collections::HashMap;
28
29use chrono::{DateTime, Utc};
30use gestura_core_pipeline::RequestSource;
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34pub const INTENT_NORMALIZATION_ENABLED: bool = cfg!(feature = "advanced-primitives");
37
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
44#[serde(rename_all = "snake_case")]
45pub enum InputModality {
46 Voice,
48 Chat,
50 Gesture,
52 Future(String),
54}
55
56impl InputModality {
57 pub fn from_request_source(source: &RequestSource) -> Self {
59 match source {
60 RequestSource::GuiVoice => Self::Voice,
61 RequestSource::GuiText | RequestSource::CliTui | RequestSource::CliBasic => Self::Chat,
62 RequestSource::Orchestrator | RequestSource::Unknown => Self::Chat,
65 }
66 }
67
68 pub fn label(&self) -> &str {
70 match self {
71 Self::Voice => "voice",
72 Self::Chat => "chat",
73 Self::Gesture => "gesture",
74 Self::Future(name) => name.as_str(),
75 }
76 }
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct GestureData {
86 pub gesture_type: String,
88 #[serde(default, skip_serializing_if = "Option::is_none")]
90 pub acceleration: Option<[f32; 3]>,
91 #[serde(default, skip_serializing_if = "Option::is_none")]
93 pub gyroscope: Option<[f32; 3]>,
94 #[serde(default = "default_gesture_confidence")]
96 pub confidence: f32,
97}
98
99fn default_gesture_confidence() -> f32 {
100 0.9
101}
102
103#[derive(Debug, Clone)]
109pub struct RawInput {
110 pub text: String,
112 pub modality: InputModality,
114 pub session_id: Option<String>,
116 pub gesture_data: Option<GestureData>,
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct Intent {
131 pub id: String,
133 pub timestamp: DateTime<Utc>,
135 pub modality: InputModality,
137 pub raw_source: String,
139 pub primary_action: String,
141 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
143 pub parameters: HashMap<String, serde_json::Value>,
144 pub confidence: f32,
146 #[serde(default, skip_serializing_if = "Vec::is_empty")]
148 pub context_hints: Vec<String>,
149}
150
151pub fn normalize_input_to_intent(raw_input: RawInput) -> Intent {
160 match &raw_input.modality {
161 InputModality::Voice => normalize_voice(raw_input),
162 InputModality::Chat => normalize_chat(raw_input),
163 InputModality::Gesture => normalize_gesture(raw_input),
164 InputModality::Future(_) => normalize_future(raw_input),
165 }
166}
167
168fn extract_primary_action(text: &str) -> String {
174 let trimmed = text.trim();
175 if trimmed.is_empty() {
176 return String::new();
177 }
178
179 let end = find_sentence_boundary(trimmed);
180 let first_sentence = trimmed[..end].trim();
181
182 first_sentence.chars().take(128).collect()
184}
185
186fn find_sentence_boundary(text: &str) -> usize {
207 let bytes = text.as_bytes();
208 for (i, &b) in bytes.iter().enumerate() {
209 match b {
210 b'!' | b'?' | b'\n' => return i,
211 b'.' => {
212 match bytes.get(i + 1) {
216 None | Some(b' ') | Some(b'\t') | Some(b'\r') => return i,
217 _ => {} }
219 }
220 _ => {}
221 }
222 }
223 text.len() }
225
226const VOICE_FILLER_WORDS: &[&str] = &[
230 "um",
231 "uh",
232 "er",
233 "ah",
234 "like",
235 "you know",
236 "so basically",
237 "basically",
238 "I mean",
239 "well",
240 "okay so",
241 "right so",
242];
243
244fn strip_fillers(text: &str) -> String {
245 let mut result = text.to_string();
246 for filler in VOICE_FILLER_WORDS {
247 let pattern_lower = filler.to_lowercase();
253 while let Some((pos, end)) = find_filler_in_original(&result, &pattern_lower) {
254 result = format!("{}{}", &result[..pos], &result[end..]);
255 }
256 }
257 collapse_whitespace(&result)
259}
260
261fn find_filler_in_original(haystack: &str, needle: &str) -> Option<(usize, usize)> {
278 if needle.is_empty() {
279 return Some((0, 0));
280 }
281
282 let needle_chars: Vec<char> = needle.chars().collect();
284 let needle_len = needle_chars.len();
285
286 let chars: Vec<(usize, char)> = haystack.char_indices().collect();
290
291 'outer: for i in 0..chars.len() {
292 if chars.len() - i < needle_len {
293 break;
294 }
295 for j in 0..needle_len {
296 let hc = chars[i + j].1;
297 let nc = needle_chars[j]; if !hc.to_lowercase().eq(std::iter::once(nc)) {
302 continue 'outer;
303 }
304 }
305 let start = chars[i].0;
306 let end = chars
307 .get(i + needle_len)
308 .map_or(haystack.len(), |&(b, _)| b);
309 return Some((start, end));
310 }
311 None
312}
313
314fn collapse_whitespace(text: &str) -> String {
315 let mut result = String::with_capacity(text.len());
316 let mut last_was_space = false;
317 for ch in text.chars() {
318 if ch.is_whitespace() {
319 if !last_was_space && !result.is_empty() {
320 result.push(' ');
321 }
322 last_was_space = true;
323 } else {
324 result.push(ch);
325 last_was_space = false;
326 }
327 }
328 result.trim().to_string()
329}
330
331fn voice_confidence(text: &str) -> f32 {
332 let word_count = text.split_whitespace().count();
333 if word_count == 0 {
334 return 0.0;
335 }
336 if word_count < 3 {
338 0.6
339 } else if word_count < 8 {
340 0.75
341 } else {
342 0.85
343 }
344}
345
346fn normalize_voice(raw: RawInput) -> Intent {
347 let cleaned = strip_fillers(&raw.text);
348 let primary_action = extract_primary_action(&cleaned);
349 let confidence = voice_confidence(&cleaned);
350
351 let mut context_hints = Vec::new();
352 context_hints.push("source:voice_transcript".to_string());
353 if cleaned.len() < raw.text.len() {
354 context_hints.push("fillers_stripped".to_string());
355 }
356
357 Intent {
358 id: Uuid::new_v4().to_string(),
359 timestamp: Utc::now(),
360 modality: InputModality::Voice,
361 raw_source: raw.text,
362 primary_action,
363 parameters: HashMap::new(),
364 confidence,
365 context_hints,
366 }
367}
368
369fn normalize_chat(raw: RawInput) -> Intent {
372 let trimmed = raw.text.trim().to_string();
373 let primary_action = extract_primary_action(&trimmed);
374
375 Intent {
376 id: Uuid::new_v4().to_string(),
377 timestamp: Utc::now(),
378 modality: InputModality::Chat,
379 raw_source: raw.text,
380 primary_action,
381 parameters: HashMap::new(),
382 confidence: 0.95,
383 context_hints: vec!["source:chat_text".to_string()],
384 }
385}
386
387fn gesture_to_action(gesture_type: &str) -> (&'static str, f32) {
391 match gesture_type.to_lowercase().as_str() {
392 "tap" => ("confirm", 0.9),
393 "double_tap" => ("execute", 0.92),
394 "triple_tap" => ("cancel", 0.88),
395 "tilt_left" => ("previous", 0.85),
396 "tilt_right" => ("next", 0.85),
397 "tilt_up" => ("scroll_up", 0.8),
398 "tilt_down" => ("scroll_down", 0.8),
399 "twist_cw" => ("increase", 0.82),
400 "twist_ccw" => ("decrease", 0.82),
401 "shake" => ("dismiss", 0.78),
402 "hold" => ("select", 0.88),
403 _ => ("unknown_gesture", 0.5),
404 }
405}
406
407fn normalize_gesture(raw: RawInput) -> Intent {
408 let gesture_type = raw
409 .gesture_data
410 .as_ref()
411 .map(|g| g.gesture_type.as_str())
412 .unwrap_or_else(|| raw.text.trim());
413
414 let device_confidence = raw
415 .gesture_data
416 .as_ref()
417 .map(|g| g.confidence)
418 .unwrap_or(0.9);
419
420 let (action, mapping_confidence) = gesture_to_action(gesture_type);
421
422 let confidence = device_confidence * mapping_confidence;
424
425 let mut parameters = HashMap::new();
426 parameters.insert(
427 "gesture_type".to_string(),
428 serde_json::Value::String(gesture_type.to_string()),
429 );
430
431 if let Some(ref gesture) = raw.gesture_data {
432 if let Some(accel) = gesture.acceleration {
433 parameters.insert("acceleration".to_string(), serde_json::json!(accel));
434 }
435 if let Some(gyro) = gesture.gyroscope {
436 parameters.insert("gyroscope".to_string(), serde_json::json!(gyro));
437 }
438 }
439
440 let mut context_hints = vec!["source:gesture_ring".to_string()];
441 if action == "unknown_gesture" {
442 context_hints.push("unmapped_gesture".to_string());
443 }
444
445 Intent {
446 id: Uuid::new_v4().to_string(),
447 timestamp: Utc::now(),
448 modality: InputModality::Gesture,
449 raw_source: raw.text,
450 primary_action: action.to_string(),
451 parameters,
452 confidence,
453 context_hints,
454 }
455}
456
457fn normalize_future(raw: RawInput) -> Intent {
460 let primary_action = extract_primary_action(&raw.text);
461
462 Intent {
463 id: Uuid::new_v4().to_string(),
464 timestamp: Utc::now(),
465 modality: raw.modality.clone(),
466 raw_source: raw.text,
467 primary_action,
468 parameters: HashMap::new(),
469 confidence: 0.7,
470 context_hints: vec!["source:future_modality".to_string()],
471 }
472}
473
474#[cfg(test)]
479mod tests {
480 use super::*;
481
482 #[test]
483 fn voice_produces_valid_intent() {
484 let raw = RawInput {
485 text: "Um, like, please create a new file called foo.rs".to_string(),
486 modality: InputModality::Voice,
487 session_id: Some("session-1".to_string()),
488 gesture_data: None,
489 };
490
491 let intent = normalize_input_to_intent(raw);
492
493 assert_eq!(intent.modality, InputModality::Voice);
494 assert!(!intent.id.is_empty());
495 assert!(!intent.primary_action.is_empty());
496 assert!(
498 !intent.primary_action.to_lowercase().contains("um,"),
499 "Filler 'um' should be stripped"
500 );
501 assert!(intent.confidence > 0.0 && intent.confidence <= 1.0);
502 assert!(
503 intent
504 .context_hints
505 .contains(&"source:voice_transcript".to_string())
506 );
507 assert!(intent.raw_source.contains("Um")); }
509
510 #[test]
511 fn chat_produces_valid_intent() {
512 let raw = RawInput {
513 text: "Refactor the authentication module to use OAuth2".to_string(),
514 modality: InputModality::Chat,
515 session_id: None,
516 gesture_data: None,
517 };
518
519 let intent = normalize_input_to_intent(raw);
520
521 assert_eq!(intent.modality, InputModality::Chat);
522 assert!(!intent.id.is_empty());
523 assert_eq!(
524 intent.primary_action,
525 "Refactor the authentication module to use OAuth2"
526 );
527 assert!(
528 (intent.confidence - 0.95).abs() < f32::EPSILON,
529 "Chat confidence should be 0.95"
530 );
531 assert!(
532 intent
533 .context_hints
534 .contains(&"source:chat_text".to_string())
535 );
536 }
537
538 #[test]
539 fn gesture_produces_valid_intent() {
540 let raw = RawInput {
541 text: "double_tap".to_string(),
542 modality: InputModality::Gesture,
543 session_id: Some("session-2".to_string()),
544 gesture_data: Some(GestureData {
545 gesture_type: "double_tap".to_string(),
546 acceleration: Some([0.1, 9.8, 0.3]),
547 gyroscope: None,
548 confidence: 0.95,
549 }),
550 };
551
552 let intent = normalize_input_to_intent(raw);
553
554 assert_eq!(intent.modality, InputModality::Gesture);
555 assert_eq!(intent.primary_action, "execute");
556 assert!(intent.confidence > 0.8);
557 assert!(intent.parameters.contains_key("gesture_type"));
558 assert!(intent.parameters.contains_key("acceleration"));
559 assert!(
560 intent
561 .context_hints
562 .contains(&"source:gesture_ring".to_string())
563 );
564 }
565
566 #[test]
567 fn gesture_without_data_falls_back_to_text() {
568 let raw = RawInput {
569 text: "tap".to_string(),
570 modality: InputModality::Gesture,
571 session_id: None,
572 gesture_data: None,
573 };
574
575 let intent = normalize_input_to_intent(raw);
576 assert_eq!(intent.primary_action, "confirm");
577 }
578
579 #[test]
580 fn unknown_gesture_has_low_confidence() {
581 let raw = RawInput {
582 text: "backflip".to_string(),
583 modality: InputModality::Gesture,
584 session_id: None,
585 gesture_data: Some(GestureData {
586 gesture_type: "backflip".to_string(),
587 acceleration: None,
588 gyroscope: None,
589 confidence: 0.9,
590 }),
591 };
592
593 let intent = normalize_input_to_intent(raw);
594 assert_eq!(intent.primary_action, "unknown_gesture");
595 assert!(intent.confidence < 0.6);
596 assert!(
597 intent
598 .context_hints
599 .contains(&"unmapped_gesture".to_string())
600 );
601 }
602
603 #[test]
604 fn future_modality_passes_through() {
605 let raw = RawInput {
606 text: "Neural signal: focus next element".to_string(),
607 modality: InputModality::Future("neural".to_string()),
608 session_id: None,
609 gesture_data: None,
610 };
611
612 let intent = normalize_input_to_intent(raw);
613 assert_eq!(intent.modality, InputModality::Future("neural".to_string()));
614 assert!(!intent.primary_action.is_empty());
615 assert!((intent.confidence - 0.7).abs() < f32::EPSILON);
616 }
617
618 #[test]
619 fn voice_chat_gesture_produce_equivalent_structs() {
620 let voice = normalize_input_to_intent(RawInput {
623 text: "hello world".to_string(),
624 modality: InputModality::Voice,
625 session_id: None,
626 gesture_data: None,
627 });
628 let chat = normalize_input_to_intent(RawInput {
629 text: "hello world".to_string(),
630 modality: InputModality::Chat,
631 session_id: None,
632 gesture_data: None,
633 });
634 let gesture = normalize_input_to_intent(RawInput {
635 text: "tap".to_string(),
636 modality: InputModality::Gesture,
637 session_id: None,
638 gesture_data: None,
639 });
640
641 for intent in [&voice, &chat, &gesture] {
643 assert!(!intent.id.is_empty());
644 assert!(!intent.primary_action.is_empty());
645 assert!(intent.confidence > 0.0);
646 assert!(!intent.context_hints.is_empty());
647 }
648 }
649
650 #[test]
651 fn modality_from_request_source() {
652 assert_eq!(
653 InputModality::from_request_source(&RequestSource::GuiVoice),
654 InputModality::Voice
655 );
656 assert_eq!(
657 InputModality::from_request_source(&RequestSource::GuiText),
658 InputModality::Chat
659 );
660 assert_eq!(
661 InputModality::from_request_source(&RequestSource::CliTui),
662 InputModality::Chat
663 );
664 assert_eq!(
665 InputModality::from_request_source(&RequestSource::Orchestrator),
666 InputModality::Chat
667 );
668 }
669
670 #[test]
671 fn intent_serialization_roundtrip() {
672 let intent = normalize_input_to_intent(RawInput {
673 text: "Build the project".to_string(),
674 modality: InputModality::Chat,
675 session_id: Some("s-1".to_string()),
676 gesture_data: None,
677 });
678
679 let json = serde_json::to_string(&intent).expect("serialize");
680 let parsed: Intent = serde_json::from_str(&json).expect("deserialize");
681 assert_eq!(parsed.id, intent.id);
682 assert_eq!(parsed.primary_action, intent.primary_action);
683 assert_eq!(parsed.modality, intent.modality);
684 }
685
686 #[test]
687 fn empty_text_has_zero_voice_confidence() {
688 let intent = normalize_input_to_intent(RawInput {
689 text: "".to_string(),
690 modality: InputModality::Voice,
691 session_id: None,
692 gesture_data: None,
693 });
694
695 assert!((intent.confidence - 0.0).abs() < f32::EPSILON);
696 }
697
698 #[test]
703 fn strip_fillers_removes_ascii_filler_case_insensitively() {
704 assert_eq!(
708 strip_fillers("Um please open the file"),
709 "please open the file"
710 );
711 assert_eq!(
712 strip_fillers("like please open the file"),
713 "please open the file"
714 );
715 assert_eq!(strip_fillers("UM like uh do it"), "do it");
716 }
717
718 #[test]
719 fn strip_fillers_preserves_non_filler_content() {
720 let input = "Create a new Rust project";
721 assert_eq!(strip_fillers(input), input);
722 }
723
724 #[test]
725 fn strip_fillers_with_non_ascii_prefix_does_not_panic() {
726 let input = "İ um please do this";
733 let result = strip_fillers(input);
734 assert!(
736 !result.contains("um"),
737 "filler 'um' should be removed, got: {result:?}"
738 );
739 assert!(
740 result.contains('İ'),
741 "non-ASCII prefix should be preserved, got: {result:?}"
742 );
743 }
744
745 #[test]
746 fn strip_fillers_with_non_ascii_interleaved_does_not_panic() {
747 let input = "Ñoño um test";
752 let result = strip_fillers(input);
753 assert!(
754 !result.contains("um"),
755 "filler 'um' should be removed, got: {result:?}"
756 );
757 assert!(
758 result.contains("Ñoño"),
759 "non-ASCII word should survive, got: {result:?}"
760 );
761 }
762
763 #[test]
764 fn find_filler_returns_valid_original_byte_range() {
765 let haystack = "İ um test"; let needle = "um";
769 let (start, end) = find_filler_in_original(haystack, needle).expect("should find 'um'");
770 let before = &haystack[..start];
772 let after = &haystack[end..];
773 assert!(before.contains('İ'));
774 assert_eq!(after.trim(), "test");
775 }
776
777 #[test]
778 fn find_filler_returns_none_when_not_present() {
779 assert!(find_filler_in_original("hello world", "um").is_none());
780 }
781
782 #[test]
783 fn find_filler_empty_needle_returns_zero_range() {
784 assert_eq!(find_filler_in_original("hello", ""), Some((0, 0)));
785 }
786
787 #[test]
792 fn extract_primary_action_does_not_split_on_filename_dot() {
793 assert_eq!(
795 extract_primary_action("please create foo.rs and add tests"),
796 "please create foo.rs and add tests",
797 );
798 assert_eq!(
799 extract_primary_action("open lib.rs for editing"),
800 "open lib.rs for editing",
801 );
802 assert_eq!(
803 extract_primary_action("edit Cargo.toml to add the dependency"),
804 "edit Cargo.toml to add the dependency",
805 );
806 }
807
808 #[test]
809 fn extract_primary_action_does_not_split_on_version_number() {
810 assert_eq!(
811 extract_primary_action("upgrade to version 1.5 of the SDK"),
812 "upgrade to version 1.5 of the SDK",
813 );
814 assert_eq!(
815 extract_primary_action("pin gestura-core to 2.0.1 in Cargo.toml"),
816 "pin gestura-core to 2.0.1 in Cargo.toml",
817 );
818 }
819
820 #[test]
821 fn extract_primary_action_does_not_split_on_url() {
822 assert_eq!(
823 extract_primary_action("visit https://example.com/path for the docs"),
824 "visit https://example.com/path for the docs",
825 );
826 }
827
828 #[test]
829 fn extract_primary_action_does_not_split_on_method_call_dot() {
830 assert_eq!(
831 extract_primary_action("call vec.push() and return the result"),
832 "call vec.push() and return the result",
833 );
834 }
835
836 #[test]
837 fn extract_primary_action_splits_on_sentence_ending_dot() {
838 assert_eq!(
840 extract_primary_action("Fix the bug. Add tests afterwards."),
841 "Fix the bug",
842 );
843 }
844
845 #[test]
846 fn extract_primary_action_splits_on_dot_at_end_of_string() {
847 assert_eq!(extract_primary_action("Check file.rs."), "Check file.rs",);
850 }
851
852 #[test]
853 fn extract_primary_action_splits_on_exclamation_and_question() {
854 assert_eq!(
855 extract_primary_action("Do it now! Please hurry."),
856 "Do it now",
857 );
858 assert_eq!(
859 extract_primary_action("What should I do? Maybe this."),
860 "What should I do",
861 );
862 }
863
864 #[test]
865 fn extract_primary_action_splits_on_newline() {
866 assert_eq!(
867 extract_primary_action("First line\nSecond line"),
868 "First line",
869 );
870 }
871
872 #[test]
873 fn extract_primary_action_no_punctuation_returns_whole_text() {
874 let input = "update the authentication module to use OAuth2";
875 assert_eq!(extract_primary_action(input), input);
876 }
877
878 #[test]
879 fn extract_primary_action_caps_at_128_chars() {
880 let long_input = "a".repeat(200);
881 let result = extract_primary_action(&long_input);
882 assert_eq!(result.chars().count(), 128);
883 }
884
885 #[test]
886 fn find_sentence_boundary_dot_before_non_ascii_is_not_a_boundary() {
887 assert_eq!(extract_primary_action("foo.über alles"), "foo.über alles",);
890 }
891}