gestura_core_pipeline/
reflection.rs

1//! ERL-inspired experiential reflection types and pure helpers.
2//!
3//! This module adapts the high-level loop from Experiential Reinforcement
4//! Learning (ERL) into Gestura's pipeline model:
5//!
6//! 1. **Experience** — the agent makes an initial attempt and observes tool
7//!    outcomes plus any obvious failure signals.
8//! 2. **Reflection** — when the response quality score falls below a configured
9//!    threshold, the runtime asks the model for a structured explanation of what
10//!    went wrong and how to improve.
11//! 3. **Consolidation** — the resulting reflection can be reused within the same
12//!    turn, stored in session working memory, and promoted into long-term memory
13//!    for later prompt injection.
14//!
15//! This crate owns the *portable* pieces of that design:
16//!
17//! - reflection configuration and data structures
18//! - quality-signal extraction and heuristic scoring
19//! - prompt construction for reflection generation
20//! - parsing of the structured reflection response format
21//!
22//! The concrete runtime integration lives in
23//! `gestura-core/src/pipeline/reflection.rs`, which wires these helpers into the
24//! agent loop, streaming events, session storage, and memory-bank promotion.
25
26use chrono::{DateTime, Utc};
27use gestura_core_foundation::OutcomeSignal;
28use serde::{Deserialize, Serialize};
29
30use crate::types::{AgentResponse, ToolResult};
31
32/// Configuration for the experiential reflection system.
33///
34/// The settings map the ERL-inspired design onto Gestura's runtime behavior:
35///
36/// - `enabled` keeps the feature opt-in because it adds an extra LLM call on
37///   weak turns and can therefore increase latency/cost.
38/// - `quality_threshold` maps to ERL's τ-style gate for deciding when a turn is
39///   poor enough to merit reflection.
40/// - `max_injected_reflections` limits how much cross-episode corrective memory
41///   can be injected back into future prompts.
42/// - `max_retry_attempts` bounds same-turn corrective retries. A retry may be a
43///   text-only revision or one safe read-only re-execution driven by the
44///   reflection strategy, but the runtime still caps it to a single retry.
45/// - `promotion_confidence` gates whether a reflection is strong enough to move
46///   from short-term/session memory into long-term memory-bank storage.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct ReflectionConfig {
49    /// Enable the reflection phase in the agent loop.
50    pub enabled: bool,
51    /// Quality threshold (0.0–1.0). Reflection only triggers when
52    /// the response quality score falls below this value.
53    /// Maps to ERL's τ parameter (gated reflection).
54    pub quality_threshold: f32,
55    /// Maximum number of past reflections to inject into prompt context.
56    pub max_injected_reflections: usize,
57    /// Maximum number of reflection-guided corrective retries per turn.
58    ///
59    /// The runtime currently applies at most one bounded retry. That retry may
60    /// be a text-only revision or one safe re-execution with read-only tool
61    /// policy.
62    pub max_retry_attempts: usize,
63    /// Minimum confidence for a reflection to be promoted to long-term memory.
64    pub promotion_confidence: f32,
65}
66
67impl Default for ReflectionConfig {
68    fn default() -> Self {
69        Self {
70            enabled: true,          // On by default
71            quality_threshold: 0.6, // Trigger reflection below 60% quality
72            max_injected_reflections: 3,
73            max_retry_attempts: 1,
74            promotion_confidence: 0.75,
75        }
76    }
77}
78
79/// A structured reflection generated after a suboptimal agent turn.
80///
81/// This is Gestura's durable representation of ERL's corrective reflection: a
82/// concise summary of the attempted action, the failure mode, and the strategy
83/// the agent should apply next time.
84///
85/// The runtime can:
86///
87/// - use it immediately for a same-turn retry,
88/// - store it in session working memory as short-term corrective context, and
89/// - promote it into `MemoryType::Reflection` for retrieval in future turns.
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct AgentReflection {
92    /// Stable identifier so downstream outcomes can update the same reflection.
93    pub reflection_id: String,
94    /// What the agent attempted.
95    pub attempt_summary: String,
96    /// What went wrong or was suboptimal.
97    pub failure_analysis: String,
98    /// Concrete corrective strategy for future attempts.
99    pub corrective_strategy: String,
100    /// Quality improvement score (0.0–1.0) — did the reflection help?
101    /// Set after a subsequent attempt to measure improvement.
102    pub improvement_score: Option<f32>,
103    /// Tags for retrieval (tool names, error categories, task types).
104    #[serde(default, skip_serializing_if = "Vec::is_empty")]
105    pub tags: Vec<String>,
106    /// Durable outcome signals linked back from retries, gates, and task outcomes.
107    #[serde(default, skip_serializing_if = "Vec::is_empty")]
108    pub outcome_signals: Vec<OutcomeSignal>,
109    /// Session context.
110    pub session_id: String,
111    /// Task ID if available.
112    pub task_id: Option<String>,
113    /// When this reflection was created.
114    pub timestamp: DateTime<Utc>,
115}
116
117impl AgentReflection {
118    /// Create a new reflection.
119    pub fn new(
120        session_id: impl Into<String>,
121        attempt_summary: impl Into<String>,
122        failure_analysis: impl Into<String>,
123        corrective_strategy: impl Into<String>,
124    ) -> Self {
125        let session_id = session_id.into();
126        Self {
127            reflection_id: build_reflection_id(&session_id),
128            attempt_summary: attempt_summary.into(),
129            failure_analysis: failure_analysis.into(),
130            corrective_strategy: corrective_strategy.into(),
131            improvement_score: None,
132            tags: Vec::new(),
133            outcome_signals: Vec::new(),
134            session_id,
135            task_id: None,
136            timestamp: Utc::now(),
137        }
138    }
139
140    /// Attach tags for retrieval.
141    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
142        self.tags = tags;
143        self
144    }
145
146    /// Attach a task ID.
147    pub fn with_task(mut self, task_id: impl Into<String>) -> Self {
148        self.task_id = Some(task_id.into());
149        self
150    }
151
152    /// Attach durable outcome signals for corrective-learning provenance.
153    #[must_use]
154    pub fn with_outcome_signals(mut self, outcome_signals: Vec<OutcomeSignal>) -> Self {
155        self.outcome_signals = outcome_signals;
156        self
157    }
158
159    /// Record a single durable outcome signal.
160    pub fn push_outcome_signal(&mut self, signal: OutcomeSignal) {
161        self.outcome_signals = merge_outcome_signals(&self.outcome_signals, &[signal]);
162    }
163
164    /// Score how strongly this reflection should be promoted into durable memory.
165    #[must_use]
166    pub fn promotion_confidence(&self) -> f32 {
167        reflection_promotion_confidence(self.improvement_score, &self.outcome_signals)
168    }
169
170    /// Format as a prompt section for context injection.
171    pub fn to_prompt_section(&self) -> String {
172        let improvement = self.improvement_score.map(|score| {
173            format!(
174                "             - Observed improvement after retry: {:.0}%\n",
175                score * 100.0
176            )
177        });
178        let outcomes = if self.outcome_signals.is_empty() {
179            String::new()
180        } else {
181            format!(
182                "             - Outcomes: {}\n",
183                self.outcome_signals
184                    .iter()
185                    .map(|signal| signal.kind.label())
186                    .collect::<Vec<_>>()
187                    .join(", ")
188            )
189        };
190
191        format!(
192            "**Reflection** ({})\n\
193             - Attempted: {}\n\
194             - Issue: {}\n\
195             - Strategy: {}\n{}{}",
196            self.timestamp.format("%Y-%m-%d %H:%M UTC"),
197            self.attempt_summary,
198            self.failure_analysis,
199            self.corrective_strategy,
200            improvement.unwrap_or_default(),
201            outcomes,
202        )
203    }
204}
205
206fn build_reflection_id(session_id: &str) -> String {
207    let nanos = Utc::now()
208        .timestamp_nanos_opt()
209        .unwrap_or_else(|| Utc::now().timestamp_micros() * 1_000);
210    let session_fragment = session_id
211        .chars()
212        .filter(|ch| ch.is_ascii_alphanumeric())
213        .take(12)
214        .collect::<String>();
215    if session_fragment.is_empty() {
216        format!("reflection-{nanos}")
217    } else {
218        format!("reflection-{session_fragment}-{nanos}")
219    }
220}
221
222/// Score how strongly a reflection should be promoted into durable memory.
223#[must_use]
224pub fn reflection_promotion_confidence(
225    improvement_score: Option<f32>,
226    outcome_signals: &[OutcomeSignal],
227) -> f32 {
228    let base = improvement_score
229        .map(|score| (0.55 + (score * 0.35)).clamp(0.55, 0.90))
230        .unwrap_or(0.62);
231    let delta: f32 = outcome_signals
232        .iter()
233        .map(|signal| signal.kind.confidence_delta())
234        .sum();
235    (base + delta).clamp(0.30, 0.97)
236}
237
238/// Merge outcome signals, replacing older entries of the same kind with the newest.
239#[must_use]
240pub fn merge_outcome_signals(
241    existing: &[OutcomeSignal],
242    incoming: &[OutcomeSignal],
243) -> Vec<OutcomeSignal> {
244    let mut merged = existing.to_vec();
245    for signal in incoming {
246        if let Some(slot) = merged
247            .iter_mut()
248            .find(|current| current.kind == signal.kind)
249        {
250            *slot = signal.clone();
251        } else {
252            merged.push(signal.clone());
253        }
254    }
255    merged.sort_by_key(|signal| signal.observed_at);
256    merged
257}
258
259/// Score how much a reflection-guided retry improved quality.
260///
261/// Returns a normalized 0.0–1.0 value where:
262/// - `0.0` means no measurable improvement
263/// - `1.0` means the retry closed the entire remaining quality gap
264pub fn score_reflection_improvement(initial_quality: f32, retry_quality: f32) -> f32 {
265    if retry_quality <= initial_quality {
266        return 0.0;
267    }
268
269    let available_headroom = (1.0 - initial_quality).max(f32::EPSILON);
270    ((retry_quality - initial_quality) / available_headroom).clamp(0.0, 1.0)
271}
272
273/// Build heuristic quality signals from an agent response.
274///
275/// These signals stand in for the explicit reward signal used in ERL. Gestura
276/// does not have a verifiable task reward for most agent turns, so the runtime
277/// falls back to observable quality proxies such as tool errors, iteration
278/// pressure, truncation, and explicit failure language.
279pub fn quality_signals_for_response(
280    response: &AgentResponse,
281    max_iterations: usize,
282) -> QualitySignals {
283    let total_tool_calls = response.tool_calls.len();
284    let error_count = response
285        .tool_calls
286        .iter()
287        .filter(|tc| matches!(&tc.result, ToolResult::Error(_)))
288        .count();
289
290    let tool_error_rate = if total_tool_calls > 0 {
291        error_count as f32 / total_tool_calls as f32
292    } else {
293        0.0
294    };
295
296    QualitySignals {
297        tool_error_rate,
298        iterations_used: response.iterations,
299        max_iterations,
300        was_truncated: response.truncated,
301        has_failure_patterns: detect_failure_patterns(&response.content),
302        is_empty_response: response.content.trim().is_empty(),
303    }
304}
305
306/// Heuristic quality signals extracted from an agent response.
307///
308/// These signals drive the quality gate (ERL's τ threshold) that
309/// determines whether a reflection should be triggered.
310#[derive(Debug, Clone)]
311pub struct QualitySignals {
312    /// Fraction of tool calls that resulted in errors (0.0–1.0).
313    pub tool_error_rate: f32,
314    /// Number of agentic loop iterations used.
315    pub iterations_used: usize,
316    /// Maximum iterations configured.
317    pub max_iterations: usize,
318    /// Whether the response was truncated due to token limits.
319    pub was_truncated: bool,
320    /// Whether the response contains apology/failure patterns.
321    pub has_failure_patterns: bool,
322    /// Whether the response is empty or near-empty.
323    pub is_empty_response: bool,
324}
325
326impl QualitySignals {
327    /// Compute an aggregate quality score (0.0–1.0, higher is better).
328    ///
329    /// This is the heuristic that replaces ERL's verifiable reward signal.
330    /// In our setting we don't have a ground-truth reward, so we use
331    /// observable proxy signals from the agent's execution.
332    pub fn score(&self) -> f32 {
333        if self.is_empty_response {
334            return 0.0;
335        }
336
337        let mut score: f32 = 1.0;
338
339        // Tool errors are a strong negative signal
340        score -= self.tool_error_rate * 0.4;
341
342        // Using many iterations suggests the agent is struggling
343        if self.max_iterations > 0 {
344            let iteration_ratio = self.iterations_used as f32 / self.max_iterations as f32;
345            if iteration_ratio > 0.7 {
346                score -= (iteration_ratio - 0.7) * 0.5;
347            }
348        }
349
350        // Truncation indicates context overflow issues
351        if self.was_truncated {
352            score -= 0.15;
353        }
354
355        // Explicit failure/apology patterns
356        if self.has_failure_patterns {
357            score -= 0.25;
358        }
359
360        score.clamp(0.0, 1.0)
361    }
362}
363
364/// Check if response text contains common failure/apology patterns.
365pub fn detect_failure_patterns(text: &str) -> bool {
366    let lower = text.to_lowercase();
367    let patterns = [
368        "i'm sorry, i can't",
369        "i cannot",
370        "i'm unable to",
371        "unfortunately, i",
372        "i don't have the ability",
373        "i apologize, but i",
374        "i'm not able to",
375        "error occurred",
376        "failed to execute",
377    ];
378    patterns.iter().any(|p| lower.contains(p))
379}
380
381/// Build the reflection prompt that asks the LLM to analyze a suboptimal turn.
382///
383/// This is the pure prompt-construction step for reflection generation:
384///
385/// - the original user request becomes the task context,
386/// - the agent response becomes the failed/suboptimal attempt,
387/// - tool errors and quality signals become the environment feedback,
388/// - and the output contract forces the model into the structured
389///   `ATTEMPT`/`ISSUE`/`STRATEGY`/`TAGS` format expected by the parser.
390pub fn build_reflection_prompt(
391    user_request: &str,
392    agent_response: &str,
393    quality_signals: &QualitySignals,
394    tool_errors: &[String],
395) -> String {
396    let mut prompt = String::from(
397        "System: You are a self-reflective AI assistant analyzing a previous interaction \
398         that was suboptimal. Generate a structured reflection to improve future responses.\n\n",
399    );
400
401    prompt.push_str(&format!("User request: {}\n\n", user_request));
402    prompt.push_str(&format!(
403        "Agent response (quality score: {:.2}):\n{}\n\n",
404        quality_signals.score(),
405        agent_response
406    ));
407
408    if !tool_errors.is_empty() {
409        prompt.push_str("Tool errors encountered:\n");
410        for error in tool_errors {
411            prompt.push_str(&format!("- {}\n", error));
412        }
413        prompt.push('\n');
414    }
415
416    prompt.push_str(
417        "Provide a brief, structured reflection in the following format:\n\
418         ATTEMPT: [1-2 sentence summary of what was attempted]\n\
419         ISSUE: [1-2 sentence analysis of what went wrong]\n\
420         STRATEGY: [1-2 sentence corrective strategy for future attempts]\n\
421         TAGS: [comma-separated relevant tags]\n\
422         Important:\n\
423         - Output plain text only.\n\
424         - Do not wrap the reflection in Markdown code fences.\n\
425         - Do not add any preamble, explanation, or extra sections before or after the four fields.\n",
426    );
427
428    prompt
429}
430
431#[derive(Debug, Clone, Copy, PartialEq, Eq)]
432enum ReflectionField {
433    Attempt,
434    Issue,
435    Strategy,
436    Tags,
437}
438
439fn strip_tag_blocks(input: &str, open: &str, close: &str) -> String {
440    let mut output = String::new();
441    let mut cursor = 0usize;
442
443    while let Some(start_rel) = input[cursor..].find(open) {
444        let start = cursor + start_rel;
445        output.push_str(&input[cursor..start]);
446        let content_start = start + open.len();
447        let Some(end_rel) = input[content_start..].find(close) else {
448            return output.trim().to_string();
449        };
450        cursor = content_start + end_rel + close.len();
451    }
452
453    output.push_str(&input[cursor..]);
454    output.trim().to_string()
455}
456
457fn sanitize_reflection_response(response: &str) -> String {
458    strip_tag_blocks(response, "<think>", "</think>")
459        .lines()
460        .filter(|line| !line.trim_start().starts_with("```"))
461        .collect::<Vec<_>>()
462        .join("\n")
463        .trim()
464        .to_string()
465}
466
467fn compact_reflection_value(value: &str) -> String {
468    value.split_whitespace().collect::<Vec<_>>().join(" ")
469}
470
471fn clean_reflection_field_value(value: &str) -> String {
472    value
473        .trim()
474        .trim_end_matches(',')
475        .trim_matches(|c: char| matches!(c, '*' | '_' | '`' | '"' | '\''))
476        .trim()
477        .to_string()
478}
479
480fn push_reflection_segment(target: &mut String, segment: &str) {
481    let segment = compact_reflection_value(&clean_reflection_field_value(segment));
482    if segment.is_empty() {
483        return;
484    }
485    if !target.is_empty() {
486        target.push(' ');
487    }
488    target.push_str(&segment);
489}
490
491fn normalize_reflection_label(label: &str) -> Option<ReflectionField> {
492    let normalized = label
493        .trim()
494        .trim_matches(|c: char| matches!(c, '*' | '_' | '`' | '[' | ']' | '(' | ')' | '#'))
495        .chars()
496        .filter_map(|ch| {
497            if ch.is_ascii_alphanumeric() {
498                Some(ch.to_ascii_lowercase())
499            } else if matches!(ch, ' ' | '-' | '_') {
500                Some('_')
501            } else {
502                None
503            }
504        })
505        .collect::<String>();
506
507    let normalized = normalized.trim_matches('_');
508
509    match normalized {
510        "attempt" | "attempt_summary" | "summary" | "what_was_attempted" => {
511            Some(ReflectionField::Attempt)
512        }
513        "issue" | "failure" | "failure_analysis" | "problem" | "analysis" | "what_went_wrong" => {
514            Some(ReflectionField::Issue)
515        }
516        "strategy"
517        | "corrective_strategy"
518        | "correction"
519        | "fix"
520        | "improvement_strategy"
521        | "next_time" => Some(ReflectionField::Strategy),
522        "tags" | "labels" => Some(ReflectionField::Tags),
523        _ => None,
524    }
525}
526
527fn strip_reflection_line_prefix(line: &str) -> &str {
528    let mut trimmed = line.trim_start();
529
530    loop {
531        if let Some(rest) = trimmed.strip_prefix('>') {
532            trimmed = rest.trim_start();
533            continue;
534        }
535        if let Some(rest) = trimmed.strip_prefix("- ") {
536            trimmed = rest.trim_start();
537            continue;
538        }
539        if let Some(rest) = trimmed.strip_prefix("* ") {
540            trimmed = rest.trim_start();
541            continue;
542        }
543        if let Some(rest) = trimmed.strip_prefix("• ") {
544            trimmed = rest.trim_start();
545            continue;
546        }
547
548        let digit_count = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
549        if digit_count > 0 {
550            let suffix = &trimmed[digit_count..];
551            if let Some(rest) = suffix.strip_prefix(". ") {
552                trimmed = rest.trim_start();
553                continue;
554            }
555            if let Some(rest) = suffix.strip_prefix(") ") {
556                trimmed = rest.trim_start();
557                continue;
558            }
559        }
560
561        break;
562    }
563
564    trimmed
565}
566
567fn parse_reflection_field_line(line: &str) -> Option<(ReflectionField, String)> {
568    let candidate = strip_reflection_line_prefix(line);
569    let colon_idx = candidate.find(':')?;
570    let label = candidate[..colon_idx].trim();
571    let value = candidate[colon_idx + 1..].trim();
572    let field = normalize_reflection_label(label)?;
573    Some((field, value.to_string()))
574}
575
576fn parse_tag_values(value: &str) -> Vec<String> {
577    let trimmed = strip_reflection_line_prefix(value).trim();
578    let trimmed = clean_reflection_field_value(trimmed);
579    let trimmed = trimmed.trim_matches(|c: char| matches!(c, '[' | ']' | '{' | '}'));
580    if trimmed.is_empty() {
581        return Vec::new();
582    }
583    trimmed
584        .split(',')
585        .map(clean_reflection_field_value)
586        .filter(|tag| !tag.is_empty())
587        .collect()
588}
589
590fn extract_jsonish_string_value(source: &str, key: &str) -> Option<String> {
591    let needle = format!("\"{key}\"");
592    let start = source.find(&needle)? + needle.len();
593    let remainder = source[start..].trim_start();
594    let remainder = remainder.strip_prefix(':')?.trim_start();
595    let remainder = remainder.strip_prefix('"')?;
596
597    let mut value = String::new();
598    let mut escaped = false;
599    for ch in remainder.chars() {
600        if escaped {
601            value.push(match ch {
602                'n' => '\n',
603                'r' => '\r',
604                't' => '\t',
605                '"' => '"',
606                '\\' => '\\',
607                other => other,
608            });
609            escaped = false;
610            continue;
611        }
612        match ch {
613            '\\' => escaped = true,
614            '"' => return Some(value.trim().to_string()),
615            other => value.push(other),
616        }
617    }
618
619    None
620}
621
622fn extract_jsonish_tags(source: &str) -> Option<Vec<String>> {
623    if let Some(tags_str) = extract_jsonish_string_value(source, "tags") {
624        return Some(parse_tag_values(&tags_str));
625    }
626
627    let needle = "\"tags\"";
628    let start = source.find(needle)? + needle.len();
629    let remainder = source[start..].trim_start();
630    let remainder = remainder.strip_prefix(':')?.trim_start();
631    let remainder = remainder.strip_prefix('[')?;
632    let end = remainder.find(']')?;
633    let body = &remainder[..end];
634
635    Some(
636        body.split(',')
637            .map(|item| item.trim().trim_matches(|c: char| matches!(c, '"' | '\'')))
638            .filter(|item| !item.is_empty())
639            .map(ToOwned::to_owned)
640            .collect(),
641    )
642}
643
644fn parse_jsonish_reflection_response(response: &str, session_id: &str) -> Option<AgentReflection> {
645    let attempt = extract_jsonish_string_value(response, "attempt_summary")
646        .or_else(|| extract_jsonish_string_value(response, "attempt"))?;
647    let issue = extract_jsonish_string_value(response, "failure_analysis")
648        .or_else(|| extract_jsonish_string_value(response, "issue"))?;
649    let strategy = extract_jsonish_string_value(response, "corrective_strategy")
650        .or_else(|| extract_jsonish_string_value(response, "strategy"))?;
651    let tags = extract_jsonish_tags(response).unwrap_or_default();
652
653    Some(
654        AgentReflection::new(session_id, attempt, issue, strategy)
655            .with_tags(tags.into_iter().filter(|tag| !tag.is_empty()).collect()),
656    )
657}
658
659/// Parse a structured reflection from an LLM response.
660///
661/// Expects the format produced by `build_reflection_prompt` and intentionally
662/// fails closed if any of the core fields are missing. The runtime treats an
663/// unparsable reflection as non-durable so it does not enter session or
664/// long-term memory in a malformed shape.
665pub fn parse_reflection_response(response: &str, session_id: &str) -> Option<AgentReflection> {
666    let response = sanitize_reflection_response(response);
667    let mut attempt = String::new();
668    let mut issue = String::new();
669    let mut strategy = String::new();
670    let mut tags = Vec::new();
671    let mut current_field = None;
672
673    for line in response.lines() {
674        let trimmed = line.trim();
675        if trimmed.is_empty() {
676            continue;
677        }
678
679        if let Some((field, value)) = parse_reflection_field_line(trimmed) {
680            current_field = Some(field);
681            match field {
682                ReflectionField::Attempt => push_reflection_segment(&mut attempt, &value),
683                ReflectionField::Issue => push_reflection_segment(&mut issue, &value),
684                ReflectionField::Strategy => push_reflection_segment(&mut strategy, &value),
685                ReflectionField::Tags => tags.extend(parse_tag_values(&value)),
686            }
687            continue;
688        }
689
690        match current_field {
691            Some(ReflectionField::Attempt) => push_reflection_segment(&mut attempt, trimmed),
692            Some(ReflectionField::Issue) => push_reflection_segment(&mut issue, trimmed),
693            Some(ReflectionField::Strategy) => push_reflection_segment(&mut strategy, trimmed),
694            Some(ReflectionField::Tags) => tags.extend(parse_tag_values(trimmed)),
695            None => {}
696        }
697    }
698
699    let mut deduped_tags = Vec::new();
700    for tag in tags {
701        if !tag.is_empty() && !deduped_tags.contains(&tag) {
702            deduped_tags.push(tag);
703        }
704    }
705
706    if !attempt.is_empty() && !issue.is_empty() && !strategy.is_empty() {
707        return Some(
708            AgentReflection::new(session_id, attempt, issue, strategy).with_tags(deduped_tags),
709        );
710    }
711
712    parse_jsonish_reflection_response(&response, session_id)
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718    use gestura_core_foundation::OutcomeSignalKind;
719
720    #[test]
721    fn test_quality_scoring_high_quality_response() {
722        let signals = QualitySignals {
723            tool_error_rate: 0.0,
724            iterations_used: 1,
725            max_iterations: 10,
726            was_truncated: false,
727            has_failure_patterns: false,
728            is_empty_response: false,
729        };
730        let score = signals.score();
731        assert!(score > 0.9, "Good response should score > 0.9, got {score}");
732    }
733
734    #[test]
735    fn test_quality_scoring_tool_errors() {
736        let signals = QualitySignals {
737            tool_error_rate: 0.5, // Half the tools errored
738            iterations_used: 3,
739            max_iterations: 10,
740            was_truncated: false,
741            has_failure_patterns: false,
742            is_empty_response: false,
743        };
744        let score = signals.score();
745        assert!(
746            score < 0.85,
747            "50% tool errors should lower score, got {score}"
748        );
749    }
750
751    #[test]
752    fn test_quality_scoring_many_iterations() {
753        let signals = QualitySignals {
754            tool_error_rate: 0.0,
755            iterations_used: 9,
756            max_iterations: 10,
757            was_truncated: false,
758            has_failure_patterns: false,
759            is_empty_response: false,
760        };
761        let score = signals.score();
762        assert!(
763            score < 0.95,
764            "Using 90% iterations should lower score, got {score}"
765        );
766    }
767
768    #[test]
769    fn test_quality_scoring_empty_response() {
770        let signals = QualitySignals {
771            tool_error_rate: 0.0,
772            iterations_used: 1,
773            max_iterations: 10,
774            was_truncated: false,
775            has_failure_patterns: false,
776            is_empty_response: true,
777        };
778        assert_eq!(signals.score(), 0.0);
779    }
780
781    #[test]
782    fn test_quality_scoring_combined_issues() {
783        let signals = QualitySignals {
784            tool_error_rate: 0.3,
785            iterations_used: 8,
786            max_iterations: 10,
787            was_truncated: true,
788            has_failure_patterns: true,
789            is_empty_response: false,
790        };
791        let score = signals.score();
792        assert!(
793            score < 0.5,
794            "Multiple issues should produce low score, got {score}"
795        );
796    }
797
798    #[test]
799    fn test_detect_failure_patterns() {
800        assert!(detect_failure_patterns("I'm sorry, I can't do that"));
801        assert!(detect_failure_patterns(
802            "Unfortunately, I cannot access that file"
803        ));
804        assert!(!detect_failure_patterns(
805            "Here is the file content you requested"
806        ));
807    }
808
809    #[test]
810    fn test_reflection_prompt_construction() {
811        let signals = QualitySignals {
812            tool_error_rate: 0.5,
813            iterations_used: 3,
814            max_iterations: 10,
815            was_truncated: false,
816            has_failure_patterns: false,
817            is_empty_response: false,
818        };
819        let prompt = build_reflection_prompt(
820            "Read the file",
821            "Error: file not found",
822            &signals,
823            &["FileNotFound: /tmp/missing.txt".to_string()],
824        );
825        assert!(prompt.contains("Read the file"));
826        assert!(prompt.contains("Error: file not found"));
827        assert!(prompt.contains("FileNotFound"));
828        assert!(prompt.contains("ATTEMPT:"));
829        assert!(prompt.contains("ISSUE:"));
830        assert!(prompt.contains("STRATEGY:"));
831    }
832
833    #[test]
834    fn test_reflection_response_parsing() {
835        let response = "\
836            ATTEMPT: Tried to read the file at /tmp/missing.txt\n\
837            ISSUE: The file path was incorrect; the file does not exist\n\
838            STRATEGY: Verify file existence before attempting to read; suggest alternatives\n\
839            TAGS: file, read, path-error\n";
840
841        let reflection = parse_reflection_response(response, "session-123").unwrap();
842        assert_eq!(
843            reflection.attempt_summary,
844            "Tried to read the file at /tmp/missing.txt"
845        );
846        assert!(reflection.failure_analysis.contains("incorrect"));
847        assert!(reflection.corrective_strategy.contains("Verify"));
848        assert_eq!(reflection.tags, vec!["file", "read", "path-error"]);
849        assert_eq!(reflection.session_id, "session-123");
850    }
851
852    #[test]
853    fn test_reflection_response_parsing_incomplete() {
854        let response = "ATTEMPT: Something\nISSUE: Something else\n";
855        let reflection = parse_reflection_response(response, "s1");
856        assert!(reflection.is_none(), "Missing STRATEGY should return None");
857    }
858
859    #[test]
860    fn test_reflection_response_parsing_markdown_and_multiline() {
861        let response = "<think>diagnosing tool output</think>\n\
862            - **Attempt:** Tried to inspect the missing config file.\n\
863              I answered before verifying the real path.\n\
864            - **Issue:** The response relied on an assumed file location\n\
865              instead of repository evidence.\n\
866            - **Strategy:** Search for the config file first, then answer\n\
867              only from the verified path and contents.\n\
868            - **Tags:** file, verification\n";
869
870        let reflection = parse_reflection_response(response, "session-md").unwrap();
871        assert!(
872            reflection
873                .attempt_summary
874                .contains("inspect the missing config file")
875        );
876        assert!(
877            reflection
878                .attempt_summary
879                .contains("verifying the real path")
880        );
881        assert!(reflection.failure_analysis.contains("repository evidence"));
882        assert!(
883            reflection
884                .corrective_strategy
885                .contains("verified path and contents")
886        );
887        assert_eq!(reflection.tags, vec!["file", "verification"]);
888    }
889
890    #[test]
891    fn test_reflection_response_parsing_aliases_and_tag_list() {
892        let response = "attempt_summary: Investigated a build failure without reading the actual error output.\n\
893            failure_analysis: The explanation guessed at causes instead of grounding them in the logs.\n\
894            corrective_strategy: Read the concrete stderr output first, then explain only the confirmed failure mode.\n\
895            tags:\n\
896            - shell\n\
897            - validation\n";
898
899        let reflection = parse_reflection_response(response, "session-alias").unwrap();
900        assert!(reflection.attempt_summary.contains("build failure"));
901        assert!(
902            reflection
903                .failure_analysis
904                .contains("grounding them in the logs")
905        );
906        assert!(
907            reflection
908                .corrective_strategy
909                .contains("concrete stderr output")
910        );
911        assert_eq!(reflection.tags, vec!["shell", "validation"]);
912    }
913
914    #[test]
915    fn test_reflection_response_parsing_jsonish_payload() {
916        let response = "```json\n{\n  \"attempt_summary\": \"Tried to edit the wrong file\",\n  \"failure_analysis\": \"The response assumed the target path without confirming it\",\n  \"corrective_strategy\": \"Locate the file first, then apply the edit to the verified path\",\n  \"tags\": [\"file\", \"path\"]\n}\n```";
917
918        let reflection = parse_reflection_response(response, "session-json").unwrap();
919        assert_eq!(reflection.attempt_summary, "Tried to edit the wrong file");
920        assert!(
921            reflection
922                .failure_analysis
923                .contains("assumed the target path")
924        );
925        assert!(
926            reflection
927                .corrective_strategy
928                .contains("Locate the file first")
929        );
930        assert_eq!(reflection.tags, vec!["file", "path"]);
931    }
932
933    #[test]
934    fn test_reflection_to_prompt_section() {
935        let reflection = AgentReflection::new(
936            "s1",
937            "Read missing file",
938            "File did not exist",
939            "Check file existence first",
940        )
941        .with_outcome_signals(vec![
942            OutcomeSignal::new(OutcomeSignalKind::RetryImproved)
943                .with_summary("The revised answer used the correct path."),
944        ]);
945        let section = reflection.to_prompt_section();
946        assert!(section.contains("Read missing file"));
947        assert!(section.contains("File did not exist"));
948        assert!(section.contains("Check file existence first"));
949        assert!(section.contains("Retry improved"));
950    }
951
952    #[test]
953    fn test_reflection_improvement_score_increases_with_retry_quality() {
954        let score = score_reflection_improvement(0.40, 0.76);
955        assert!(
956            score > 0.5,
957            "Expected strong improvement signal, got {score}"
958        );
959    }
960
961    #[test]
962    fn test_reflection_improvement_score_zero_when_retry_is_not_better() {
963        assert_eq!(score_reflection_improvement(0.65, 0.65), 0.0);
964        assert_eq!(score_reflection_improvement(0.65, 0.52), 0.0);
965    }
966
967    #[test]
968    fn test_promotion_confidence_uses_outcome_signals() {
969        let baseline = AgentReflection::new(
970            "s1",
971            "Attempted a retry",
972            "The first answer was weak",
973            "Revise with the missing evidence",
974        );
975        let stronger = baseline.clone().with_outcome_signals(vec![
976            OutcomeSignal::new(OutcomeSignalKind::RetryImproved),
977            OutcomeSignal::new(OutcomeSignalKind::ReviewApproved),
978        ]);
979        let weaker = baseline.with_outcome_signals(vec![
980            OutcomeSignal::new(OutcomeSignalKind::RetryDidNotImprove),
981            OutcomeSignal::new(OutcomeSignalKind::ReviewNeedsRevision),
982        ]);
983
984        assert!(stronger.promotion_confidence() > 0.70);
985        assert!(weaker.promotion_confidence() < 0.50);
986    }
987
988    #[test]
989    fn test_merge_outcome_signals_replaces_existing_kind() {
990        let first = OutcomeSignal::new(OutcomeSignalKind::ReviewApproved)
991            .with_summary("Initial approval note");
992        let replacement = OutcomeSignal::new(OutcomeSignalKind::ReviewApproved)
993            .with_summary("Final approval note");
994
995        let merged = merge_outcome_signals(&[first], std::slice::from_ref(&replacement));
996
997        assert_eq!(merged.len(), 1);
998        assert_eq!(merged[0].summary.as_deref(), replacement.summary.as_deref());
999    }
1000}