gestura_core_tools/
code.rs

1//! Code analysis and navigation tool
2//!
3//! Provides code analysis operations with structured output.
4
5use crate::error::{AppError, Result};
6use crate::shell::CommandResult;
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::fs;
11use std::path::{Path, PathBuf};
12use std::process::Command;
13use std::sync::OnceLock;
14use std::time::Instant;
15use toml::Value;
16
17/// Code symbol information
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct Symbol {
20    pub name: String,
21    pub kind: SymbolKind,
22    pub path: PathBuf,
23    pub line: usize,
24    pub column: usize,
25}
26
27/// Kind of code symbol
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
29pub enum SymbolKind {
30    Function,
31    Struct,
32    Enum,
33    Trait,
34    Impl,
35    Module,
36    Const,
37    Static,
38    Type,
39    Macro,
40    Unknown,
41}
42
43/// Code statistics
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct CodeStats {
46    pub total_files: usize,
47    pub total_lines: usize,
48    pub code_lines: usize,
49    pub comment_lines: usize,
50    pub blank_lines: usize,
51    pub by_language: HashMap<String, LanguageStats>,
52}
53
54/// Repository map output.
55///
56/// This is intended to be presentation-agnostic. Callers (CLI/GUI) can render
57/// the information however they like.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct RepositoryMap {
60    /// Root that was analyzed.
61    pub root: PathBuf,
62    /// Maximum directory depth included in the map.
63    pub max_depth: usize,
64    /// File extension -> count.
65    ///
66    /// Files without an extension use the key `(none)`.
67    pub file_types: HashMap<String, usize>,
68    /// Common "key" files found at the root.
69    pub key_files_found: Vec<String>,
70}
71
72/// A single reference hit (line-level match).
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct ReferenceHit {
75    pub path: PathBuf,
76    pub line: usize,
77    pub content: String,
78}
79
80/// A single definition hit.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct DefinitionHit {
83    pub kind: SymbolKind,
84    pub name: String,
85    pub path: PathBuf,
86    pub line: usize,
87    pub content: String,
88}
89
90/// Statistics for a specific language
91#[derive(Debug, Clone, Serialize, Deserialize, Default)]
92pub struct LanguageStats {
93    pub files: usize,
94    pub lines: usize,
95    pub code_lines: usize,
96}
97
98/// Dependency information
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct Dependency {
101    pub name: String,
102    pub version: String,
103    pub source: String,
104}
105
106/// A group of dependencies from a single manifest section (e.g. `[dependencies]`).
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct DependencyGroup {
109    /// Section name as it appears in the manifest (e.g. `dependencies`).
110    pub section: String,
111    /// Dependencies listed under that section.
112    pub dependencies: Vec<Dependency>,
113}
114
115/// Lint result
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct LintResult {
118    pub path: PathBuf,
119    pub line: usize,
120    pub column: usize,
121    pub level: LintLevel,
122    pub message: String,
123    pub code: Option<String>,
124}
125
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub enum LintLevel {
128    Error,
129    Warning,
130    Info,
131    Hint,
132}
133
134/// Test result
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct TestResult {
137    pub name: String,
138    pub passed: bool,
139    pub duration_ms: u64,
140    pub output: Option<String>,
141}
142
143/// A single file match returned by a glob search.
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct GlobMatch {
146    /// Absolute path to the matched file.
147    pub path: PathBuf,
148    /// Path relative to the search root, using forward slashes.
149    pub relative_path: String,
150}
151
152/// A single line match returned by a grep search, with optional surrounding context.
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct GrepMatch {
155    /// File the match was found in.
156    pub path: PathBuf,
157    /// 1-based line number of the matching line.
158    pub line: usize,
159    /// The matching line content.
160    pub content: String,
161    /// Lines before the match: `(1-based line number, line content)`.
162    pub context_before: Vec<(usize, String)>,
163    /// Lines after the match: `(1-based line number, line content)`.
164    pub context_after: Vec<(usize, String)>,
165}
166
167/// A single entry in a [`CodeTools::batch_read`] response.
168#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct BatchReadEntry {
170    /// The path that was requested.
171    pub path: String,
172    /// File content, or `None` if the read failed.
173    pub content: Option<String>,
174    /// Number of lines in the file (0 on error).
175    pub line_count: usize,
176    /// Error message if the read failed.
177    pub error: Option<String>,
178}
179
180/// A single str-replace edit operation for [`CodeTools::batch_edit`].
181#[derive(Debug, Clone, Serialize, Deserialize)]
182pub struct EditOp {
183    /// Path of the file to edit.
184    pub path: String,
185    /// Exact string to find.
186    pub old_str: String,
187    /// Replacement string.
188    pub new_str: String,
189}
190
191/// Result of applying a single [`EditOp`].
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct EditOpResult {
194    /// The path that was edited.
195    pub path: String,
196    /// Whether the operation succeeded.
197    pub success: bool,
198    /// Number of replacements made (0 when `success` is false).
199    pub replacements: usize,
200    /// Error message when `success` is false.
201    pub error: Option<String>,
202}
203
204/// A lightweight symbol entry for file outlines.
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct OutlineNode {
207    pub name: String,
208    pub kind: SymbolKind,
209    /// 1-based line number.
210    pub line: usize,
211    /// 1-based column.
212    pub column: usize,
213}
214
215/// Code analysis service
216pub struct CodeTools {
217    /// Working directory for relative path resolution.
218    /// Used to resolve relative paths in code analysis operations.
219    work_dir: Option<PathBuf>,
220}
221
222impl Default for CodeTools {
223    fn default() -> Self {
224        Self::new(None)
225    }
226}
227
228impl CodeTools {
229    /// Create a new [`CodeTools`].
230    ///
231    /// If `work_dir` is set, relative paths passed to methods will be resolved
232    /// against it.
233    pub fn new(work_dir: Option<PathBuf>) -> Self {
234        Self { work_dir }
235    }
236
237    /// Resolve a path, making it absolute if relative and work_dir is set.
238    /// Returns the original path if it's already absolute or no work_dir is configured.
239    pub fn resolve_path(&self, path: &Path) -> PathBuf {
240        if path.is_absolute() {
241            path.to_path_buf()
242        } else if let Some(ref work_dir) = self.work_dir {
243            work_dir.join(path)
244        } else {
245            path.to_path_buf()
246        }
247    }
248
249    /// Get the configured working directory
250    pub fn work_dir(&self) -> Option<&Path> {
251        self.work_dir.as_deref()
252    }
253
254    /// Get code statistics for a directory.
255    /// Resolves relative paths using the configured work_dir.
256    pub fn stats(&self, path: &Path) -> Result<CodeStats> {
257        let resolved_path = self.resolve_path(path);
258        let mut stats = CodeStats {
259            total_files: 0,
260            total_lines: 0,
261            code_lines: 0,
262            comment_lines: 0,
263            blank_lines: 0,
264            by_language: HashMap::new(),
265        };
266
267        self.collect_stats(&resolved_path, &mut stats)?;
268        Ok(stats)
269    }
270
271    /// Generate a repository map for `root` up to `max_depth`.
272    ///
273    /// Hidden entries and common non-source directories (`target`, `node_modules`)
274    /// are skipped.
275    pub fn repository_map(&self, root: &Path, max_depth: usize) -> Result<RepositoryMap> {
276        let resolved_root = self.resolve_path(root);
277        let mut file_types: HashMap<String, usize> = HashMap::new();
278        Self::count_files_by_extension(&resolved_root, &mut file_types, max_depth, 0)?;
279
280        let key_files = [
281            "README.md",
282            "Cargo.toml",
283            "package.json",
284            "pyproject.toml",
285            "Makefile",
286            "Justfile",
287            ".gitignore",
288            "LICENSE",
289        ];
290
291        let mut key_files_found = Vec::new();
292        for file in key_files {
293            let file_path = resolved_root.join(file);
294            if file_path.exists() {
295                key_files_found.push(file.to_string());
296            }
297        }
298
299        Ok(RepositoryMap {
300            root: resolved_root,
301            max_depth,
302            file_types,
303            key_files_found,
304        })
305    }
306
307    /// Extract top-level Rust-like symbols from a single file.
308    ///
309    /// This is a lightweight, regex-based approach meant for quick inspection.
310    /// It is not a full parser.
311    pub fn symbols(&self, path: &Path) -> Result<Vec<Symbol>> {
312        let path = self.resolve_path(path);
313        let content = fs::read_to_string(&path)?;
314
315        let mut out = Vec::new();
316        for (kind, re) in symbol_patterns().iter() {
317            for cap in re.captures_iter(&content) {
318                let name = cap.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
319                if name.is_empty() {
320                    continue;
321                }
322
323                // Determine line/column based on the match start.
324                let start = cap.get(1).map(|m| m.start()).unwrap_or(0);
325                let prefix = &content[..start];
326                let line = prefix.lines().count().max(1);
327                let col = prefix
328                    .lines()
329                    .last()
330                    .map(|l| l.chars().count() + 1)
331                    .unwrap_or(1);
332
333                out.push(Symbol {
334                    name,
335                    kind: *kind,
336                    path: path.clone(),
337                    line,
338                    column: col,
339                });
340            }
341        }
342
343        Ok(out)
344    }
345
346    /// Find references to `symbol` under `root`.
347    ///
348    /// This performs a simple word-boundary search (`\bSYMBOL\b`) and returns
349    /// line-level hits.
350    pub fn references(&self, symbol: &str, root: &Path) -> Result<Vec<ReferenceHit>> {
351        let root = self.resolve_path(root);
352        let pattern = format!(r"\b{}\b", regex::escape(symbol));
353        let re = Regex::new(&pattern).map_err(|e| {
354            crate::error::AppError::InvalidInput(format!("Invalid symbol regex: {e}"))
355        })?;
356
357        let mut hits = Vec::new();
358        Self::search_references(&root, &re, &mut hits)?;
359        Ok(hits)
360    }
361
362    /// Find the first definition of `symbol` under `root`.
363    ///
364    /// The search is regex-based (functions/structs/enums/types/consts). If multiple
365    /// definitions exist, the first encountered in directory traversal order is returned.
366    pub fn definition(&self, symbol: &str, root: &Path) -> Result<Option<DefinitionHit>> {
367        let root = self.resolve_path(root);
368
369        let patterns: Vec<(SymbolKind, Regex)> = vec![
370            (
371                SymbolKind::Function,
372                Regex::new(&format!(
373                    r"(?m)^(?:pub\s+)?(?:async\s+)?fn\s+{}\s*[<(]",
374                    regex::escape(symbol)
375                ))
376                .map_err(|e| {
377                    crate::error::AppError::InvalidInput(format!("Invalid definition regex: {e}"))
378                })?,
379            ),
380            (
381                SymbolKind::Struct,
382                Regex::new(&format!(
383                    r"(?m)^(?:pub\s+)?struct\s+{}\s*[<{{]",
384                    regex::escape(symbol)
385                ))
386                .map_err(|e| {
387                    crate::error::AppError::InvalidInput(format!("Invalid definition regex: {e}"))
388                })?,
389            ),
390            (
391                SymbolKind::Enum,
392                Regex::new(&format!(
393                    r"(?m)^(?:pub\s+)?enum\s+{}\s*[<{{]",
394                    regex::escape(symbol)
395                ))
396                .map_err(|e| {
397                    crate::error::AppError::InvalidInput(format!("Invalid definition regex: {e}"))
398                })?,
399            ),
400            (
401                SymbolKind::Type,
402                Regex::new(&format!(
403                    r"(?m)^(?:pub\s+)?type\s+{}\s*=",
404                    regex::escape(symbol)
405                ))
406                .map_err(|e| {
407                    crate::error::AppError::InvalidInput(format!("Invalid definition regex: {e}"))
408                })?,
409            ),
410            (
411                SymbolKind::Const,
412                Regex::new(&format!(
413                    r"(?m)^(?:pub\s+)?const\s+{}\s*:",
414                    regex::escape(symbol)
415                ))
416                .map_err(|e| {
417                    crate::error::AppError::InvalidInput(format!("Invalid definition regex: {e}"))
418                })?,
419            ),
420        ];
421
422        Self::find_definition(&root, symbol, &patterns)
423    }
424
425    /// Parse Rust/Cargo dependencies from a `Cargo.toml` at `root`.
426    ///
427    /// If `root` is a directory, this looks for `root/Cargo.toml`.
428    /// If `root` is a file, it is treated as the manifest.
429    pub fn cargo_dependencies(&self, root: &Path) -> Result<Vec<DependencyGroup>> {
430        let root = self.resolve_path(root);
431        let manifest_path = if root.is_dir() {
432            root.join("Cargo.toml")
433        } else {
434            root.clone()
435        };
436        if !manifest_path.exists() {
437            return Err(AppError::NotFound(format!(
438                "Cargo.toml not found at {}",
439                manifest_path.display()
440            )));
441        }
442
443        let content = fs::read_to_string(&manifest_path)?;
444        let parsed: Value = content.parse()?;
445        let Some(table) = parsed.as_table() else {
446            return Err(AppError::InvalidInput(
447                "Cargo.toml is not a table".to_string(),
448            ));
449        };
450
451        let sections = ["dependencies", "dev-dependencies", "build-dependencies"];
452        let mut out = Vec::new();
453        for section in sections {
454            let Some(deps_table) = table.get(section).and_then(|v| v.as_table()) else {
455                continue;
456            };
457
458            let mut deps = Vec::new();
459            for (name, value) in deps_table {
460                let (version, source) = match value {
461                    Value::String(v) => (v.clone(), "crates.io".to_string()),
462                    Value::Table(t) => {
463                        let version = t
464                            .get("version")
465                            .and_then(|v| v.as_str())
466                            .unwrap_or("")
467                            .to_string();
468
469                        let source = if t
470                            .get("workspace")
471                            .and_then(|v| v.as_bool())
472                            .unwrap_or(false)
473                        {
474                            "workspace".to_string()
475                        } else if let Some(p) = t.get("path").and_then(|v| v.as_str()) {
476                            format!("path:{p}")
477                        } else if let Some(g) = t.get("git").and_then(|v| v.as_str()) {
478                            format!("git:{g}")
479                        } else if let Some(r) = t.get("registry").and_then(|v| v.as_str()) {
480                            format!("registry:{r}")
481                        } else {
482                            "crates.io".to_string()
483                        };
484
485                        (version, source)
486                    }
487                    _ => ("".to_string(), "unknown".to_string()),
488                };
489
490                deps.push(Dependency {
491                    name: name.clone(),
492                    version,
493                    source,
494                });
495            }
496
497            deps.sort_by(|a, b| a.name.cmp(&b.name));
498            out.push(DependencyGroup {
499                section: section.to_string(),
500                dependencies: deps,
501            });
502        }
503
504        Ok(out)
505    }
506
507    /// Run `cargo clippy` within `root` and return captured stdout/stderr.
508    ///
509    /// This is intended for local developer tooling (CLI) and should not be used
510    /// for untrusted inputs.
511    pub fn cargo_clippy(&self, root: &Path, fix: bool) -> Result<CommandResult> {
512        let mut args = vec!["clippy"];
513        if fix {
514            args.push("--fix");
515        }
516        self.run_cargo(root, &args)
517    }
518
519    /// Run `cargo test` within `root` and return captured stdout/stderr.
520    ///
521    /// The optional `filter` is passed as the standard cargo test filter argument.
522    pub fn cargo_test(&self, root: &Path, filter: Option<&str>) -> Result<CommandResult> {
523        let mut args = vec!["test"];
524        if let Some(f) = filter {
525            args.push(f);
526        }
527        self.run_cargo(root, &args)
528    }
529
530    /// Run a `cargo` subcommand with the given args in `root`.
531    fn run_cargo(&self, root: &Path, args: &[&str]) -> Result<CommandResult> {
532        let root = self.resolve_path(root);
533        let start = Instant::now();
534
535        let output = Command::new("cargo")
536            .args(args)
537            .current_dir(&root)
538            .output()
539            .map_err(AppError::Io)?;
540
541        let duration_ms = start.elapsed().as_millis() as u64;
542        let exit_code = output.status.code().unwrap_or(-1);
543
544        Ok(CommandResult {
545            command: format!("cargo {}", args.join(" ")),
546            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
547            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
548            exit_code,
549            success: output.status.success(),
550            duration_ms,
551        })
552    }
553
554    /// Find all files whose paths (relative to `root`) match the glob `pattern`.
555    ///
556    /// Supports `**` (any path depth), `*` (any filename chars), `?` (one char).
557    /// Hidden directories and common build directories (`target`, `node_modules`) are skipped.
558    pub fn glob_search(
559        &self,
560        pattern: &str,
561        root: &Path,
562        max_results: usize,
563    ) -> Result<Vec<GlobMatch>> {
564        let root = self.resolve_path(root);
565        let regex_str = glob_to_regex_string(pattern);
566        let re = Regex::new(&regex_str).map_err(|e| {
567            AppError::InvalidInput(format!("Invalid glob pattern '{pattern}': {e}"))
568        })?;
569        let mut out = Vec::new();
570        Self::walk_for_glob(&root, &root, &re, max_results, &mut out)?;
571        Ok(out)
572    }
573
574    /// Search file contents under `root` for lines matching the regex `pattern`.
575    ///
576    /// - `file_glob`: optional glob to restrict which files are searched (e.g. `*.rs`)
577    /// - `context_lines`: number of context lines to include before and after each match
578    /// - `case_sensitive`: whether the match is case-sensitive
579    /// - `max_matches`: maximum number of [`GrepMatch`] entries to return
580    pub fn grep(
581        &self,
582        pattern: &str,
583        root: &Path,
584        file_glob: Option<&str>,
585        context_lines: usize,
586        case_sensitive: bool,
587        max_matches: usize,
588    ) -> Result<Vec<GrepMatch>> {
589        let root = self.resolve_path(root);
590        let pattern_str = if case_sensitive {
591            pattern.to_string()
592        } else {
593            format!("(?i){pattern}")
594        };
595        let re = Regex::new(&pattern_str).map_err(|e| {
596            AppError::InvalidInput(format!("Invalid grep pattern '{pattern}': {e}"))
597        })?;
598        let file_re: Option<Regex> =
599            match file_glob {
600                Some(g) => {
601                    let s = glob_to_regex_string(g);
602                    Some(Regex::new(&s).map_err(|e| {
603                        AppError::InvalidInput(format!("Invalid file glob '{g}': {e}"))
604                    })?)
605                }
606                None => None,
607            };
608        let mut out = Vec::new();
609        Self::walk_for_grep(
610            &root,
611            &re,
612            file_re.as_ref(),
613            context_lines,
614            max_matches,
615            &mut out,
616        )?;
617        Ok(out)
618    }
619
620    /// Read multiple files in one call.
621    ///
622    /// Each path is resolved through the configured `work_dir`. Failures are
623    /// captured per-entry rather than aborting the batch.
624    pub fn batch_read(&self, paths: &[&str]) -> Vec<BatchReadEntry> {
625        paths
626            .iter()
627            .map(|p| {
628                let resolved = self.resolve_path(Path::new(p));
629                match fs::read_to_string(&resolved) {
630                    Ok(content) => {
631                        let line_count = content.lines().count();
632                        BatchReadEntry {
633                            path: p.to_string(),
634                            content: Some(content),
635                            line_count,
636                            error: None,
637                        }
638                    }
639                    Err(e) => BatchReadEntry {
640                        path: p.to_string(),
641                        content: None,
642                        line_count: 0,
643                        error: Some(e.to_string()),
644                    },
645                }
646            })
647            .collect()
648    }
649
650    /// Apply multiple str-replace edits across files in one call.
651    ///
652    /// Each [`EditOp`] replaces all occurrences of `old_str` with `new_str` in
653    /// the target file.  Failures are captured per-entry rather than aborting
654    /// the batch, so callers must inspect [`EditOpResult::success`] for each entry.
655    pub fn batch_edit(&self, edits: &[EditOp]) -> Vec<EditOpResult> {
656        edits
657            .iter()
658            .map(|op| {
659                let resolved = self.resolve_path(Path::new(&op.path));
660                match fs::read_to_string(&resolved) {
661                    Ok(content) => {
662                        let replacements = content.matches(op.old_str.as_str()).count();
663                        if replacements == 0 {
664                            return EditOpResult {
665                                path: op.path.clone(),
666                                success: false,
667                                replacements: 0,
668                                error: Some(format!("old_str not found in '{}'", op.path)),
669                            };
670                        }
671                        let new_content = content.replace(op.old_str.as_str(), op.new_str.as_str());
672                        match fs::write(&resolved, new_content) {
673                            Ok(()) => EditOpResult {
674                                path: op.path.clone(),
675                                success: true,
676                                replacements,
677                                error: None,
678                            },
679                            Err(e) => EditOpResult {
680                                path: op.path.clone(),
681                                success: false,
682                                replacements: 0,
683                                error: Some(e.to_string()),
684                            },
685                        }
686                    }
687                    Err(e) => EditOpResult {
688                        path: op.path.clone(),
689                        success: false,
690                        replacements: 0,
691                        error: Some(e.to_string()),
692                    },
693                }
694            })
695            .collect()
696    }
697
698    /// Return a structured outline of all top-level symbols in `path`.
699    ///
700    /// This is a lightweight wrapper around [`Self::symbols`] that strips the
701    /// absolute path from each entry so the result is presentation-friendly.
702    pub fn outline(&self, path: &Path) -> Result<Vec<OutlineNode>> {
703        let syms = self.symbols(path)?;
704        Ok(syms
705            .into_iter()
706            .map(|s| OutlineNode {
707                name: s.name,
708                kind: s.kind,
709                line: s.line,
710                column: s.column,
711            })
712            .collect())
713    }
714
715    fn collect_stats(&self, path: &Path, stats: &mut CodeStats) -> Result<()> {
716        if path.is_file() {
717            self.analyze_file(path, stats)?;
718        } else if path.is_dir() {
719            for entry in fs::read_dir(path)? {
720                let entry = entry?;
721                let entry_path = entry.path();
722                let name = entry.file_name().to_string_lossy().to_string();
723
724                // Skip hidden and common non-source directories
725                if name.starts_with('.') || name == "target" || name == "node_modules" {
726                    continue;
727                }
728
729                self.collect_stats(&entry_path, stats)?;
730            }
731        }
732        Ok(())
733    }
734
735    /// Return `true` if a directory entry name should be skipped during filesystem traversal.
736    ///
737    /// This filters out hidden entries and common build/dependency directories.
738    fn should_skip_name(name: &str) -> bool {
739        name.starts_with('.') || name == "target" || name == "node_modules"
740    }
741
742    /// Recursively count files by extension under `path` up to `max_depth`.
743    ///
744    /// This is a traversal helper used by [`Self::repository_map`].
745    fn count_files_by_extension(
746        path: &Path,
747        counts: &mut HashMap<String, usize>,
748        max_depth: usize,
749        depth: usize,
750    ) -> Result<()> {
751        if depth > max_depth || !path.is_dir() {
752            return Ok(());
753        }
754
755        for entry in fs::read_dir(path)? {
756            let entry = entry?;
757            let entry_path = entry.path();
758            let name = entry.file_name().to_string_lossy().to_string();
759            if Self::should_skip_name(&name) {
760                continue;
761            }
762
763            if entry_path.is_file() {
764                let ext = entry_path
765                    .extension()
766                    .and_then(|e| e.to_str())
767                    .unwrap_or("(none)")
768                    .to_string();
769                *counts.entry(ext).or_insert(0) += 1;
770            } else if entry_path.is_dir() {
771                Self::count_files_by_extension(&entry_path, counts, max_depth, depth + 1)?;
772            }
773        }
774        Ok(())
775    }
776
777    /// Recursively search for `re` under `path` and append line hits into `out`.
778    ///
779    /// This is a traversal helper used by [`Self::references`].
780    fn search_references(path: &Path, re: &Regex, out: &mut Vec<ReferenceHit>) -> Result<()> {
781        if path.is_file() {
782            if let Ok(content) = fs::read_to_string(path) {
783                for (idx, line) in content.lines().enumerate() {
784                    if re.is_match(line) {
785                        out.push(ReferenceHit {
786                            path: path.to_path_buf(),
787                            line: idx + 1,
788                            content: line.trim().to_string(),
789                        });
790                    }
791                }
792            }
793        } else if path.is_dir() {
794            for entry in fs::read_dir(path)? {
795                let entry = entry?;
796                let p = entry.path();
797                let name = entry.file_name().to_string_lossy().to_string();
798                if Self::should_skip_name(&name) {
799                    continue;
800                }
801                Self::search_references(&p, re, out)?;
802            }
803        }
804        Ok(())
805    }
806
807    /// Recursively search for the first definition matching `patterns` under `path`.
808    ///
809    /// This is a traversal helper used by [`Self::definition`].
810    fn find_definition(
811        path: &Path,
812        symbol: &str,
813        patterns: &[(SymbolKind, Regex)],
814    ) -> Result<Option<DefinitionHit>> {
815        if path.is_file() {
816            if let Ok(content) = fs::read_to_string(path) {
817                for (line_num, line) in content.lines().enumerate() {
818                    for (kind, pattern) in patterns {
819                        if pattern.is_match(line) {
820                            return Ok(Some(DefinitionHit {
821                                kind: *kind,
822                                name: symbol.to_string(),
823                                path: path.to_path_buf(),
824                                line: line_num + 1,
825                                content: line.to_string(),
826                            }));
827                        }
828                    }
829                }
830            }
831        } else if path.is_dir() {
832            for entry in fs::read_dir(path)? {
833                let entry = entry?;
834                let p = entry.path();
835                let name = entry.file_name().to_string_lossy().to_string();
836                if Self::should_skip_name(&name) {
837                    continue;
838                }
839                if let Some(hit) = Self::find_definition(&p, symbol, patterns)? {
840                    return Ok(Some(hit));
841                }
842            }
843        }
844        Ok(None)
845    }
846
847    /// Recursive traversal helper for [`Self::glob_search`].
848    fn walk_for_glob(
849        root: &Path,
850        current: &Path,
851        re: &Regex,
852        limit: usize,
853        out: &mut Vec<GlobMatch>,
854    ) -> Result<()> {
855        if out.len() >= limit {
856            return Ok(());
857        }
858        if current.is_file() {
859            let rel = current.strip_prefix(root).unwrap_or(current);
860            let rel_str = rel.to_string_lossy().replace('\\', "/");
861            if re.is_match(&rel_str) {
862                out.push(GlobMatch {
863                    path: current.to_path_buf(),
864                    relative_path: rel_str,
865                });
866            }
867        } else if current.is_dir() {
868            let mut entries: Vec<_> = fs::read_dir(current)?.filter_map(|e| e.ok()).collect();
869            entries.sort_by_key(|e| e.file_name());
870            for entry in entries {
871                if out.len() >= limit {
872                    break;
873                }
874                let name = entry.file_name().to_string_lossy().to_string();
875                if Self::should_skip_name(&name) {
876                    continue;
877                }
878                Self::walk_for_glob(root, &entry.path(), re, limit, out)?;
879            }
880        }
881        Ok(())
882    }
883
884    /// Recursive traversal helper for [`Self::grep`].
885    fn walk_for_grep(
886        path: &Path,
887        re: &Regex,
888        file_re: Option<&Regex>,
889        context_lines: usize,
890        limit: usize,
891        out: &mut Vec<GrepMatch>,
892    ) -> Result<()> {
893        if out.len() >= limit {
894            return Ok(());
895        }
896        if path.is_file() {
897            // Filter by file name glob when one is provided.
898            if let Some(fre) = file_re {
899                let name = path
900                    .file_name()
901                    .map(|n| n.to_string_lossy().into_owned())
902                    .unwrap_or_default();
903                if !fre.is_match(&name) {
904                    return Ok(());
905                }
906            }
907            if let Ok(content) = fs::read_to_string(path) {
908                let lines: Vec<&str> = content.lines().collect();
909                for (idx, line) in lines.iter().enumerate() {
910                    if out.len() >= limit {
911                        break;
912                    }
913                    if re.is_match(line) {
914                        let ctx_before = (0..context_lines)
915                            .filter_map(|d| {
916                                let li = idx.checked_sub(context_lines - d)?;
917                                Some((li + 1, lines[li].to_string()))
918                            })
919                            .collect();
920                        let ctx_after = (1..=context_lines)
921                            .filter_map(|d| {
922                                let li = idx + d;
923                                if li < lines.len() {
924                                    Some((li + 1, lines[li].to_string()))
925                                } else {
926                                    None
927                                }
928                            })
929                            .collect();
930                        out.push(GrepMatch {
931                            path: path.to_path_buf(),
932                            line: idx + 1,
933                            content: line.to_string(),
934                            context_before: ctx_before,
935                            context_after: ctx_after,
936                        });
937                    }
938                }
939            }
940        } else if path.is_dir() {
941            let mut entries: Vec<_> = fs::read_dir(path)?.filter_map(|e| e.ok()).collect();
942            entries.sort_by_key(|e| e.file_name());
943            for entry in entries {
944                if out.len() >= limit {
945                    break;
946                }
947                let name = entry.file_name().to_string_lossy().to_string();
948                if Self::should_skip_name(&name) {
949                    continue;
950                }
951                Self::walk_for_grep(&entry.path(), re, file_re, context_lines, limit, out)?;
952            }
953        }
954        Ok(())
955    }
956
957    fn analyze_file(&self, path: &Path, stats: &mut CodeStats) -> Result<()> {
958        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
959        let lang = match ext {
960            "rs" => "Rust",
961            "ts" | "tsx" => "TypeScript",
962            "js" | "jsx" => "JavaScript",
963            "py" => "Python",
964            "go" => "Go",
965            "java" => "Java",
966            "c" | "h" => "C",
967            "cpp" | "hpp" | "cc" => "C++",
968            "md" => "Markdown",
969            "json" => "JSON",
970            "toml" => "TOML",
971            "yaml" | "yml" => "YAML",
972            _ => return Ok(()),
973        };
974
975        if let Ok(content) = fs::read_to_string(path) {
976            let lines: Vec<&str> = content.lines().collect();
977            let total = lines.len();
978            let blank = lines.iter().filter(|l| l.trim().is_empty()).count();
979            let comments = lines
980                .iter()
981                .filter(|l| {
982                    let t = l.trim();
983                    t.starts_with("//")
984                        || t.starts_with('#')
985                        || t.starts_with("/*")
986                        || t.starts_with('*')
987                })
988                .count();
989            let code = total.saturating_sub(blank + comments);
990
991            stats.total_files += 1;
992            stats.total_lines += total;
993            stats.blank_lines += blank;
994            stats.comment_lines += comments;
995            stats.code_lines += code;
996
997            let lang_stats = stats.by_language.entry(lang.to_string()).or_default();
998            lang_stats.files += 1;
999            lang_stats.lines += total;
1000            lang_stats.code_lines += code;
1001        }
1002        Ok(())
1003    }
1004}
1005
1006/// Convert a glob pattern to a regex string suitable for [`Regex::new`].
1007///
1008/// Supported glob metacharacters:
1009/// - `**` followed by `/` — match zero or more path segments (`(?:[^/]+/)* `)
1010/// - `**` at end — match anything (`.*`)
1011/// - `*`  — match any sequence of non-separator characters (`[^/]*`)
1012/// - `?`  — match any single non-separator character (`[^/]`)
1013///
1014/// All other regex metacharacters in the pattern are escaped.
1015fn glob_to_regex_string(pattern: &str) -> String {
1016    let mut result = String::from("^");
1017    let chars: Vec<char> = pattern.chars().collect();
1018    let mut i = 0;
1019    while i < chars.len() {
1020        match chars[i] {
1021            '*' if i + 1 < chars.len() && chars[i + 1] == '*' => {
1022                if i + 2 < chars.len() && chars[i + 2] == '/' {
1023                    // **/ — zero or more directory segments
1024                    result.push_str("(?:[^/]+/)*");
1025                    i += 3;
1026                } else {
1027                    // ** at end or without trailing slash — match anything
1028                    result.push_str(".*");
1029                    i += 2;
1030                }
1031            }
1032            '*' => {
1033                result.push_str("[^/]*");
1034                i += 1;
1035            }
1036            '?' => {
1037                result.push_str("[^/]");
1038                i += 1;
1039            }
1040            c => {
1041                if ".+^${}()|[]\\".contains(c) {
1042                    result.push('\\');
1043                }
1044                result.push(c);
1045                i += 1;
1046            }
1047        }
1048    }
1049    result.push('$');
1050    result
1051}
1052
1053fn symbol_patterns() -> &'static Vec<(SymbolKind, Regex)> {
1054    static PATTERNS: OnceLock<Vec<(SymbolKind, Regex)>> = OnceLock::new();
1055    PATTERNS.get_or_init(|| {
1056        vec![
1057            (
1058                SymbolKind::Function,
1059                Regex::new(r"(?m)^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)")
1060                    .expect("valid function regex"),
1061            ),
1062            (
1063                SymbolKind::Struct,
1064                Regex::new(r"(?m)^(?:pub\s+)?struct\s+(\w+)").expect("valid struct regex"),
1065            ),
1066            (
1067                SymbolKind::Enum,
1068                Regex::new(r"(?m)^(?:pub\s+)?enum\s+(\w+)").expect("valid enum regex"),
1069            ),
1070            (
1071                SymbolKind::Impl,
1072                Regex::new(r"(?m)^impl(?:<[^>]+>)?\s+(\w+)").expect("valid impl regex"),
1073            ),
1074        ]
1075    })
1076}
1077
1078#[cfg(test)]
1079mod tests {
1080    use super::*;
1081    use std::io::Write;
1082
1083    #[test]
1084    fn repository_map_respects_depth_and_ignores_common_dirs() {
1085        let dir = tempfile::tempdir().unwrap();
1086        let root = dir.path();
1087
1088        fs::write(root.join("Cargo.toml"), "[package]\nname='x'\n").unwrap();
1089        fs::create_dir_all(root.join("src/nested")).unwrap();
1090        fs::write(root.join("src/lib.rs"), "pub fn a() {}\n").unwrap();
1091        fs::write(root.join("src/nested/mod.rs"), "pub fn b() {}\n").unwrap();
1092
1093        fs::create_dir_all(root.join("target")).unwrap();
1094        fs::write(root.join("target/ignored.rs"), "pub fn nope() {}\n").unwrap();
1095
1096        let tools = CodeTools::default();
1097        let map_depth_1 = tools.repository_map(root, 1).unwrap();
1098        assert_eq!(map_depth_1.file_types.get("toml").copied().unwrap_or(0), 1);
1099        assert_eq!(map_depth_1.file_types.get("rs").copied().unwrap_or(0), 1);
1100
1101        let map_depth_2 = tools.repository_map(root, 2).unwrap();
1102        assert_eq!(map_depth_2.file_types.get("rs").copied().unwrap_or(0), 2);
1103        assert_eq!(map_depth_2.file_types.get("toml").copied().unwrap_or(0), 1);
1104    }
1105
1106    #[test]
1107    fn symbols_extracts_basic_rust_like_items() {
1108        let dir = tempfile::tempdir().unwrap();
1109        let file = dir.path().join("test.rs");
1110        fs::write(
1111            &file,
1112            "pub async fn foo() {}\nstruct Bar {}\nenum E { A }\nimpl Bar {}\n",
1113        )
1114        .unwrap();
1115
1116        let tools = CodeTools::default();
1117        let syms = tools.symbols(&file).unwrap();
1118        let mut names: Vec<_> = syms.iter().map(|s| (s.kind, s.name.clone())).collect();
1119        names.sort_by(|a, b| a.1.cmp(&b.1));
1120
1121        assert!(names.contains(&(SymbolKind::Function, "foo".to_string())));
1122        assert!(names.contains(&(SymbolKind::Struct, "Bar".to_string())));
1123        assert!(names.contains(&(SymbolKind::Enum, "E".to_string())));
1124        assert!(names.contains(&(SymbolKind::Impl, "Bar".to_string())));
1125    }
1126
1127    #[test]
1128    fn references_and_definition_work_and_skip_non_utf8_files() {
1129        let dir = tempfile::tempdir().unwrap();
1130        let root = dir.path();
1131        fs::create_dir_all(root.join("src")).unwrap();
1132        fs::write(
1133            root.join("src/main.rs"),
1134            "fn main() { let _x = MyType; }\n// MyType used here\n",
1135        )
1136        .unwrap();
1137
1138        // Non-UTF8 file should not cause an error for recursive searches.
1139        let mut f = fs::File::create(root.join("src/binary.bin")).unwrap();
1140        f.write_all(&[0xff, 0xfe, 0xfd]).unwrap();
1141
1142        // Ignored directory should be skipped.
1143        fs::create_dir_all(root.join("target")).unwrap();
1144        fs::write(root.join("target/ignored.rs"), "MyType\n").unwrap();
1145
1146        let tools = CodeTools::default();
1147        let refs = tools.references("MyType", root).unwrap();
1148        assert_eq!(refs.len(), 2);
1149        assert!(
1150            refs.iter()
1151                .all(|h| !h.path.to_string_lossy().contains("target"))
1152        );
1153
1154        let def = tools.definition("main", root).unwrap();
1155        assert!(def.is_some());
1156        let def = def.unwrap();
1157        assert_eq!(def.kind, SymbolKind::Function);
1158        assert!(def.path.to_string_lossy().contains("main.rs"));
1159    }
1160
1161    #[test]
1162    fn cargo_dependencies_parses_common_sections() {
1163        let dir = tempfile::tempdir().unwrap();
1164        let root = dir.path();
1165        fs::write(
1166            root.join("Cargo.toml"),
1167            r#"[package]
1168name = "x"
1169version = "0.1.0"
1170
1171[dependencies]
1172serde = "1"
1173tokio = { version = "1", features = ["rt"] }
1174
1175[dev-dependencies]
1176tempfile = { workspace = true }
1177"#,
1178        )
1179        .unwrap();
1180
1181        let tools = CodeTools::default();
1182        let groups = tools.cargo_dependencies(root).unwrap();
1183        assert_eq!(groups.len(), 2);
1184        let deps = groups.iter().find(|g| g.section == "dependencies").unwrap();
1185        assert!(
1186            deps.dependencies
1187                .iter()
1188                .any(|d| d.name == "serde" && d.version == "1")
1189        );
1190        assert!(
1191            deps.dependencies
1192                .iter()
1193                .any(|d| d.name == "tokio" && d.version == "1")
1194        );
1195
1196        let dev = groups
1197            .iter()
1198            .find(|g| g.section == "dev-dependencies")
1199            .unwrap();
1200        assert!(
1201            dev.dependencies
1202                .iter()
1203                .any(|d| d.name == "tempfile" && d.source == "workspace")
1204        );
1205    }
1206}