1use gestura_core_foundation::context::{
7 ContextCategory, EntityType, ExtractedEntity, RequestAnalysis,
8};
9use regex::Regex;
10use std::collections::HashMap;
11use std::sync::LazyLock;
12
13struct CategoryPattern {
15 keywords: &'static [&'static str],
16 phrases: &'static [&'static str],
17 category: ContextCategory,
18}
19
20const CATEGORY_PATTERNS: &[CategoryPattern] = &[
21 CategoryPattern {
22 keywords: &[
23 "file", "read", "write", "edit", "create", "delete", "save", "open", "path",
24 ],
25 phrases: &[
26 "show me",
27 "look at",
28 "what's in",
29 "create a",
30 "edit the",
31 "modify",
32 ],
33 category: ContextCategory::FileSystem,
34 },
35 CategoryPattern {
36 keywords: &[
37 "run", "execute", "shell", "command", "terminal", "bash", "sh", "npm", "cargo",
38 "build", "test", "compile", "check", "scaffold",
39 ],
40 phrases: &[
41 "run this",
42 "execute the",
43 "in terminal",
44 "run the",
45 "build and test",
46 "build it",
47 "run tests",
48 "compile it",
49 "scaffold the",
50 ],
51 category: ContextCategory::Shell,
52 },
53 CategoryPattern {
54 keywords: &[
55 "git", "commit", "branch", "merge", "push", "pull", "diff", "log", "status",
56 ],
57 phrases: &[
58 "commit the",
59 "push to",
60 "pull from",
61 "merge into",
62 "git status",
63 ],
64 category: ContextCategory::Git,
65 },
66 CategoryPattern {
67 keywords: &[
68 "code", "function", "class", "struct", "impl", "method", "variable", "symbol",
69 ],
70 phrases: &[
71 "find the",
72 "where is",
73 "definition of",
74 "references to",
75 "usage of",
76 ],
77 category: ContextCategory::Code,
78 },
79 CategoryPattern {
80 keywords: &[
81 "search", "web", "google", "url", "fetch", "download", "http", "api", "lookup",
82 "browse", "website", "page", "online", "internet",
83 "locate", "retrieve", "navigate", "domain", "link",
85 ],
86 phrases: &[
87 "search for",
88 "look up",
89 "lookup",
90 "find online",
91 "on the web",
92 "browse to",
93 "visit",
94 "check the",
95 "go to",
96 "open the",
97 "locate the",
99 "retrieve the",
100 "retrieve from",
101 "navigate to",
102 "from the web",
103 "on the site",
104 ],
105 category: ContextCategory::Web,
106 },
107 CategoryPattern {
108 keywords: &[
109 "voice",
110 "speak",
111 "listen",
112 "audio",
113 "microphone",
114 "transcribe",
115 "whisper",
116 ],
117 phrases: &["say this", "read aloud", "voice command", "start listening"],
118 category: ContextCategory::Voice,
119 },
120 CategoryPattern {
121 keywords: &["config", "setting", "configure", "preference", "option"],
122 phrases: &["change the", "set the", "update config", "configure the"],
123 category: ContextCategory::Config,
124 },
125 CategoryPattern {
126 keywords: &[
127 "session", "history", "previous", "earlier", "last", "before",
128 ],
129 phrases: &["what did", "earlier we", "last time", "in this session"],
130 category: ContextCategory::Session,
131 },
132 CategoryPattern {
133 keywords: &["tool", "tools", "capability", "available", "can you"],
134 phrases: &["what tools", "show tools", "list tools", "available tools"],
135 category: ContextCategory::Tools,
136 },
137 CategoryPattern {
138 keywords: &["agent", "delegate", "orchestrate", "supervisor", "worker"],
139 phrases: &["delegate to", "have an agent", "multi-agent"],
140 category: ContextCategory::Agent,
141 },
142 CategoryPattern {
143 keywords: &["mcp", "protocol", "server", "client", "capability"],
144 phrases: &["mcp server", "protocol message", "mcp client"],
145 category: ContextCategory::Mcp,
146 },
147 CategoryPattern {
148 keywords: &[
149 "a2a",
150 "agent-to-agent",
151 "remote agent",
152 "agent communication",
153 ],
154 phrases: &["send to agent", "agent profile", "a2a protocol"],
155 category: ContextCategory::A2a,
156 },
157 CategoryPattern {
158 keywords: &["task", "todo", "track", "checklist", "reminder"],
159 phrases: &[
160 "add a task",
161 "create a task",
162 "task list",
163 "my tasks",
164 "mark as done",
165 "complete this task",
166 ],
167 category: ContextCategory::Task,
168 },
169 CategoryPattern {
170 keywords: &[
171 "screenshot",
172 "screen_record",
173 "record",
174 "video",
175 "capture",
176 "recording",
177 "screencast",
178 "screengrab",
179 ],
180 phrases: &[
181 "take a screenshot",
182 "record the screen",
183 "record yourself",
184 "create a video",
185 "make a video",
186 "screen capture",
187 "screen recording",
188 "record a video",
189 "capture the screen",
190 "video of yourself",
191 "video of the screen",
192 ],
193 category: ContextCategory::Screen,
194 },
195];
196
197static FILE_PATH_REGEX: LazyLock<Regex> = LazyLock::new(|| {
200 Regex::new(r"(?:^|[\s\(\[])([./~]?(?:[\w-]+/)*[\w.-]+\.[a-zA-Z0-9]+)(?:[\s\)\].,;:!?]|$)")
201 .expect("Invalid file path regex")
202});
203
204const WELL_KNOWN_FILES: &[&str] = &[
207 "AGENTS.md",
208 "CLAUDE.md",
209 ".cursorrules",
210 "README.md",
211 "README.rst",
212 "README.txt",
213 "CONTRIBUTING.md",
214 "CHANGELOG.md",
215 "Cargo.toml",
216 "Cargo.lock",
217 "package.json",
218 "package-lock.json",
219 "yarn.lock",
220 "pnpm-lock.yaml",
221 ".gitignore",
222 ".env",
223 ".env.example",
224 "Makefile",
225 "Justfile",
226 "justfile",
227 "Dockerfile",
228 "docker-compose.yml",
229 "docker-compose.yaml",
230 "go.mod",
231 "go.sum",
232 "pyproject.toml",
233 "requirements.txt",
234 "tsconfig.json",
235 "vite.config.ts",
236 "vitest.config.ts",
237 "eslint.config.js",
238 ".eslintrc.json",
239];
240
241static BARE_FILENAME_REGEX: LazyLock<Regex> = LazyLock::new(|| {
245 Regex::new(
246 r#"(?:^|[\s\(\[,;'"\`])([A-Za-z][A-Za-z0-9_.-]*\.[a-zA-Z0-9]+)(?:[\s\)\].,;:!?'"\`]|$)"#,
247 )
248 .expect("Invalid bare filename regex")
249});
250
251static URL_REGEX: LazyLock<Regex> =
254 LazyLock::new(|| Regex::new(r"https?://[\w.-]+(?:/[\w./?%&=-]*)?").expect("Invalid URL regex"));
255
256static BARE_DOMAIN_REGEX: LazyLock<Regex> = LazyLock::new(|| {
262 Regex::new(
263 r"(?:^|[\s\(\[,;'`])((?:[A-Za-z0-9][A-Za-z0-9-]*\.)+(?:com|org|net|io|ai|dev|co|app|tech|edu|gov|info|biz|online|site|web|so|me|tv|us|uk|ca|de|fr|au|jp|cn))(?:[/\s,;:!?\)\]'`]|$)"
264 )
265 .expect("Invalid bare domain regex")
266});
267
268static GIT_BRANCH_REGEX: LazyLock<Regex> = LazyLock::new(|| {
271 Regex::new(r"\b(?:main|master|develop|feature/[\w-]+|bugfix/[\w-]+|release/[\w-]+)\b")
272 .expect("Invalid git branch regex")
273});
274
275pub struct RequestAnalyzer {
277 tool_categories: HashMap<String, ContextCategory>,
279 followup_patterns: Vec<&'static str>,
281}
282
283impl RequestAnalyzer {
284 pub fn new() -> Self {
286 let mut tool_categories = HashMap::new();
287 tool_categories.insert("file".to_string(), ContextCategory::FileSystem);
290 tool_categories.insert("shell".to_string(), ContextCategory::Shell);
291 tool_categories.insert("git".to_string(), ContextCategory::Git);
292 tool_categories.insert("code".to_string(), ContextCategory::Code);
293 tool_categories.insert("web".to_string(), ContextCategory::Web);
294 tool_categories.insert("web_search".to_string(), ContextCategory::Web);
295 tool_categories.insert("permissions".to_string(), ContextCategory::Tools);
296 tool_categories.insert("a2a".to_string(), ContextCategory::A2a);
297 tool_categories.insert("mcp".to_string(), ContextCategory::Mcp);
298 tool_categories.insert("screenshot".to_string(), ContextCategory::Screen);
299 tool_categories.insert("screen_record".to_string(), ContextCategory::Screen);
300 tool_categories.insert("task".to_string(), ContextCategory::Task);
301
302 Self {
303 tool_categories,
304 followup_patterns: vec![
305 "and also",
306 "also",
307 "additionally",
308 "another thing",
309 "one more",
310 "what about",
311 "how about",
312 "can you also",
313 "please also",
314 ],
315 }
316 }
317
318 pub fn analyze(&self, request: &str) -> RequestAnalysis {
320 let lower = request.to_lowercase();
321 let mut analysis = RequestAnalysis::new(request);
322
323 for pattern in CATEGORY_PATTERNS {
325 let mut score = 0;
326 for kw in pattern.keywords {
327 if lower.contains(kw) {
328 score += 1;
329 }
330 }
331 for phrase in pattern.phrases {
332 if lower.contains(phrase) {
333 score += 2;
334 }
335 }
336 if score > 0 {
337 analysis.categories.insert(pattern.category);
338 analysis.confidence += score as f32 * 0.1;
339 }
340 }
341
342 self.extract_entities(request, &mut analysis);
344
345 for pattern in &self.followup_patterns {
347 if lower.contains(pattern) {
348 analysis.is_followup = true;
349 analysis.categories.insert(ContextCategory::Session);
350 break;
351 }
352 }
353
354 for category in &analysis.categories {
356 for (tool, cat) in &self.tool_categories {
357 if cat == category && !analysis.suggested_tools.contains(tool) {
358 analysis.suggested_tools.push(tool.clone());
359 }
360 }
361 }
362
363 analysis.needs_tools = !analysis.categories.is_empty()
364 && !analysis.categories.contains(&ContextCategory::General);
365
366 if analysis.categories.is_empty() {
368 analysis.categories.insert(ContextCategory::General);
369 analysis.confidence = 0.8;
370 }
371
372 analysis.confidence = analysis.confidence.min(1.0);
374
375 analysis
376 }
377
378 fn extract_entities(&self, request: &str, analysis: &mut RequestAnalysis) {
381 let mut extracted_ranges: Vec<(usize, usize)> = Vec::new();
383
384 for cap in URL_REGEX.find_iter(request) {
386 let start = cap.start();
387 let end = cap.end();
388 if !Self::overlaps_any(&extracted_ranges, start, end) {
389 analysis.entities.push(ExtractedEntity {
390 entity_type: EntityType::Url,
391 value: cap.as_str().to_string(),
392 start,
393 end,
394 });
395 analysis.categories.insert(ContextCategory::Web);
396 analysis.confidence += 0.4;
400 extracted_ranges.push((start, end));
401 }
402 }
403
404 for cap in BARE_DOMAIN_REGEX.captures_iter(request) {
408 if let Some(m) = cap.get(1) {
409 let start = m.start();
410 let end = m.end();
411 if !Self::overlaps_any(&extracted_ranges, start, end) {
412 analysis.entities.push(ExtractedEntity {
413 entity_type: EntityType::Url,
414 value: m.as_str().to_string(),
415 start,
416 end,
417 });
418 analysis.categories.insert(ContextCategory::Web);
419 analysis.confidence += 0.3;
423 extracted_ranges.push((start, end));
424 }
425 }
426 }
427
428 for cap in FILE_PATH_REGEX.captures_iter(request) {
430 if let Some(m) = cap.get(1) {
431 let start = m.start();
432 let end = m.end();
433 let value = m.as_str();
434
435 if Self::overlaps_any(&extracted_ranges, start, end) {
437 continue;
438 }
439
440 let entity_type = if value.ends_with('/') {
442 EntityType::DirectoryPath
443 } else {
444 EntityType::FilePath
445 };
446
447 analysis.entities.push(ExtractedEntity {
448 entity_type,
449 value: value.to_string(),
450 start,
451 end,
452 });
453 analysis.categories.insert(ContextCategory::FileSystem);
454 extracted_ranges.push((start, end));
455 }
456 }
457
458 let lower = request.to_lowercase();
462 for &well_known in WELL_KNOWN_FILES {
463 let needle = well_known.to_lowercase();
464 if lower.contains(&needle) {
465 let already_extracted = analysis
467 .entities
468 .iter()
469 .any(|e| e.value.to_lowercase().ends_with(&needle));
470 if !already_extracted {
471 let start = lower.find(&needle).unwrap_or(0);
473 let end = start + well_known.len();
474 analysis.entities.push(ExtractedEntity {
475 entity_type: EntityType::FilePath,
476 value: well_known.to_string(),
477 start,
478 end,
479 });
480 analysis.categories.insert(ContextCategory::FileSystem);
481 extracted_ranges.push((start, end));
482 }
483 }
484 }
485
486 for cap in BARE_FILENAME_REGEX.captures_iter(request) {
489 if let Some(m) = cap.get(1) {
490 let start = m.start();
491 let end = m.end();
492 let value = m.as_str();
493
494 if Self::overlaps_any(&extracted_ranges, start, end) {
495 continue;
496 }
497
498 if !value.contains('.') {
502 continue;
503 }
504
505 analysis.entities.push(ExtractedEntity {
506 entity_type: EntityType::FilePath,
507 value: value.to_string(),
508 start,
509 end,
510 });
511 analysis.categories.insert(ContextCategory::FileSystem);
512 extracted_ranges.push((start, end));
513 }
514 }
515
516 for cap in GIT_BRANCH_REGEX.find_iter(request) {
518 let start = cap.start();
519 let end = cap.end();
520 if !Self::overlaps_any(&extracted_ranges, start, end) {
521 analysis.entities.push(ExtractedEntity {
522 entity_type: EntityType::GitBranch,
523 value: cap.as_str().to_string(),
524 start,
525 end,
526 });
527 analysis.categories.insert(ContextCategory::Git);
528 extracted_ranges.push((start, end));
529 }
530 }
531
532 for word in request.split_whitespace() {
535 if let Some(start) = request.find(word) {
536 let end = start + word.len();
537
538 if Self::overlaps_any(&extracted_ranges, start, end) {
540 continue;
541 }
542
543 if word.contains('/')
545 && !word.starts_with("http")
546 && !word.contains('.')
547 && word.len() > 2
548 {
549 analysis.entities.push(ExtractedEntity {
550 entity_type: EntityType::DirectoryPath,
551 value: word.to_string(),
552 start,
553 end,
554 });
555 analysis.categories.insert(ContextCategory::FileSystem);
556 extracted_ranges.push((start, end));
557 }
558 }
559 }
560 }
561
562 fn overlaps_any(ranges: &[(usize, usize)], start: usize, end: usize) -> bool {
564 ranges.iter().any(|(s, e)| start < *e && end > *s)
566 }
567}
568
569impl Default for RequestAnalyzer {
570 fn default() -> Self {
571 Self::new()
572 }
573}
574
575#[cfg(test)]
576mod tests {
577 use super::*;
578
579 #[test]
580 fn test_file_request_analysis() {
581 let analyzer = RequestAnalyzer::new();
582 let analysis = analyzer.analyze("Read the file src/main.rs and show me its contents");
583
584 assert!(analysis.categories.contains(&ContextCategory::FileSystem));
585 assert!(analysis.suggested_tools.contains(&"file".to_string()));
586 assert!(!analysis.entities.is_empty());
587 assert!(analysis.needs_tools);
588 }
589
590 #[test]
591 fn test_git_request_analysis() {
592 let analyzer = RequestAnalyzer::new();
593 let analysis = analyzer.analyze("Show me the git status and recent commits");
594
595 assert!(analysis.categories.contains(&ContextCategory::Git));
596 assert!(analysis.needs_tools);
597 }
598
599 #[test]
600 fn test_general_conversation() {
601 let analyzer = RequestAnalyzer::new();
602 let analysis = analyzer.analyze("What is the meaning of life?");
603
604 assert!(analysis.categories.contains(&ContextCategory::General));
605 assert!(!analysis.needs_tools);
606 }
607
608 #[test]
609 fn test_url_extraction() {
610 let analyzer = RequestAnalyzer::new();
611 let analysis = analyzer.analyze("Fetch https://example.com/api");
612
613 assert!(analysis.categories.contains(&ContextCategory::Web));
614 assert!(
615 analysis
616 .entities
617 .iter()
618 .any(|e| e.entity_type == EntityType::Url)
619 );
620 }
621
622 #[test]
623 fn test_followup_detection() {
624 let analyzer = RequestAnalyzer::new();
625 let analysis = analyzer.analyze("And also show me the tests");
626
627 assert!(analysis.is_followup);
628 assert!(analysis.categories.contains(&ContextCategory::Session));
629 }
630
631 #[test]
632 fn test_web_lookup_detection() {
633 let analyzer = RequestAnalyzer::new();
634 let analysis = analyzer.analyze(
635 "please lookup the langchain landing page and tell me the main links it talks about",
636 );
637
638 assert!(analysis.categories.contains(&ContextCategory::Web));
639 assert!(analysis.needs_tools);
640 }
641
642 #[test]
643 fn test_web_browse_detection() {
644 let analyzer = RequestAnalyzer::new();
645 let analysis = analyzer.analyze("browse to the documentation website");
646
647 assert!(analysis.categories.contains(&ContextCategory::Web));
648 assert!(analysis.needs_tools);
649 }
650
651 #[test]
652 fn test_bare_domain_detection() {
653 let analyzer = RequestAnalyzer::new();
654 let analysis = analyzer.analyze("please find llm.txt from Gestura.ai");
657
658 assert!(
659 analysis.categories.contains(&ContextCategory::Web),
660 "Expected Web category for bare domain Gestura.ai, got: {:?}",
661 analysis.categories
662 );
663 assert!(analysis.needs_tools);
664 assert!(
665 analysis
666 .entities
667 .iter()
668 .any(|e| e.entity_type == EntityType::Url
669 && e.value.to_lowercase().contains("gestura")),
670 "Expected Gestura.ai to be extracted as a URL entity"
671 );
672 assert!(
673 analysis.confidence >= 0.2,
674 "Bare domain detection should boost confidence above the 0.2 fallback threshold, got {}",
675 analysis.confidence
676 );
677 }
678
679 #[test]
685 fn test_locate_web_resource_regression() {
686 let analyzer = RequestAnalyzer::new();
687 let analysis = analyzer.analyze("please locate the llm.txt for Gestura.ai");
688
689 assert!(
690 analysis.categories.contains(&ContextCategory::Web),
691 "Expected Web category — 'locate' keyword + bare domain should both fire, got: {:?}",
692 analysis.categories
693 );
694 assert!(analysis.needs_tools, "Request requires tools");
695 assert!(
696 analysis.confidence >= 0.2,
697 "Confidence must clear 0.2 so category-based routing is used instead of all-tools \
698 fallback; got {}",
699 analysis.confidence
700 );
701 assert!(
702 analysis
703 .entities
704 .iter()
705 .any(|e| e.entity_type == EntityType::Url
706 && e.value.to_lowercase().contains("gestura")),
707 "Gestura.ai should be extracted as a URL entity"
708 );
709 }
710
711 #[test]
712 fn test_screen_record_detection() {
713 let analyzer = RequestAnalyzer::new();
714 let analysis = analyzer.analyze(
715 "I want you to create a video of yourself requesting the creation of a hello.txt",
716 );
717
718 assert!(
719 analysis.categories.contains(&ContextCategory::Screen),
720 "Expected Screen category for video/recording request, got: {:?}",
721 analysis.categories
722 );
723 assert!(analysis.needs_tools);
724 }
725
726 #[test]
727 fn test_screenshot_detection() {
728 let analyzer = RequestAnalyzer::new();
729 let analysis = analyzer.analyze("take a screenshot of the current window");
730
731 assert!(
732 analysis.categories.contains(&ContextCategory::Screen),
733 "Expected Screen category for screenshot request, got: {:?}",
734 analysis.categories
735 );
736 assert!(analysis.needs_tools);
737 }
738
739 #[test]
740 fn test_build_and_test_requests_include_shell() {
741 let analyzer = RequestAnalyzer::new();
742 let analysis = analyzer.analyze(
743 "I want to create a small tauri gui that says hello world. Please carefully plan and implement then build and test it.",
744 );
745
746 assert!(analysis.categories.contains(&ContextCategory::Shell));
747 assert!(analysis.categories.contains(&ContextCategory::FileSystem));
748 assert!(analysis.suggested_tools.contains(&"file".to_string()));
749 assert!(analysis.suggested_tools.contains(&"shell".to_string()));
750 assert!(analysis.suggested_tools.contains(&"code".to_string()));
751 assert!(analysis.needs_tools);
752 assert!(analysis.confidence >= 0.2);
753 }
754}