1use chrono::{DateTime, Utc};
27use gestura_core_foundation::OutcomeSignal;
28use serde::{Deserialize, Serialize};
29
30use crate::types::{AgentResponse, ToolResult};
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct ReflectionConfig {
49 pub enabled: bool,
51 pub quality_threshold: f32,
55 pub max_injected_reflections: usize,
57 pub max_retry_attempts: usize,
63 pub promotion_confidence: f32,
65}
66
67impl Default for ReflectionConfig {
68 fn default() -> Self {
69 Self {
70 enabled: true, quality_threshold: 0.6, max_injected_reflections: 3,
73 max_retry_attempts: 1,
74 promotion_confidence: 0.75,
75 }
76 }
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct AgentReflection {
92 pub reflection_id: String,
94 pub attempt_summary: String,
96 pub failure_analysis: String,
98 pub corrective_strategy: String,
100 pub improvement_score: Option<f32>,
103 #[serde(default, skip_serializing_if = "Vec::is_empty")]
105 pub tags: Vec<String>,
106 #[serde(default, skip_serializing_if = "Vec::is_empty")]
108 pub outcome_signals: Vec<OutcomeSignal>,
109 pub session_id: String,
111 pub task_id: Option<String>,
113 pub timestamp: DateTime<Utc>,
115}
116
117impl AgentReflection {
118 pub fn new(
120 session_id: impl Into<String>,
121 attempt_summary: impl Into<String>,
122 failure_analysis: impl Into<String>,
123 corrective_strategy: impl Into<String>,
124 ) -> Self {
125 let session_id = session_id.into();
126 Self {
127 reflection_id: build_reflection_id(&session_id),
128 attempt_summary: attempt_summary.into(),
129 failure_analysis: failure_analysis.into(),
130 corrective_strategy: corrective_strategy.into(),
131 improvement_score: None,
132 tags: Vec::new(),
133 outcome_signals: Vec::new(),
134 session_id,
135 task_id: None,
136 timestamp: Utc::now(),
137 }
138 }
139
140 pub fn with_tags(mut self, tags: Vec<String>) -> Self {
142 self.tags = tags;
143 self
144 }
145
146 pub fn with_task(mut self, task_id: impl Into<String>) -> Self {
148 self.task_id = Some(task_id.into());
149 self
150 }
151
152 #[must_use]
154 pub fn with_outcome_signals(mut self, outcome_signals: Vec<OutcomeSignal>) -> Self {
155 self.outcome_signals = outcome_signals;
156 self
157 }
158
159 pub fn push_outcome_signal(&mut self, signal: OutcomeSignal) {
161 self.outcome_signals = merge_outcome_signals(&self.outcome_signals, &[signal]);
162 }
163
164 #[must_use]
166 pub fn promotion_confidence(&self) -> f32 {
167 reflection_promotion_confidence(self.improvement_score, &self.outcome_signals)
168 }
169
170 pub fn to_prompt_section(&self) -> String {
172 let improvement = self.improvement_score.map(|score| {
173 format!(
174 " - Observed improvement after retry: {:.0}%\n",
175 score * 100.0
176 )
177 });
178 let outcomes = if self.outcome_signals.is_empty() {
179 String::new()
180 } else {
181 format!(
182 " - Outcomes: {}\n",
183 self.outcome_signals
184 .iter()
185 .map(|signal| signal.kind.label())
186 .collect::<Vec<_>>()
187 .join(", ")
188 )
189 };
190
191 format!(
192 "**Reflection** ({})\n\
193 - Attempted: {}\n\
194 - Issue: {}\n\
195 - Strategy: {}\n{}{}",
196 self.timestamp.format("%Y-%m-%d %H:%M UTC"),
197 self.attempt_summary,
198 self.failure_analysis,
199 self.corrective_strategy,
200 improvement.unwrap_or_default(),
201 outcomes,
202 )
203 }
204}
205
206fn build_reflection_id(session_id: &str) -> String {
207 let nanos = Utc::now()
208 .timestamp_nanos_opt()
209 .unwrap_or_else(|| Utc::now().timestamp_micros() * 1_000);
210 let session_fragment = session_id
211 .chars()
212 .filter(|ch| ch.is_ascii_alphanumeric())
213 .take(12)
214 .collect::<String>();
215 if session_fragment.is_empty() {
216 format!("reflection-{nanos}")
217 } else {
218 format!("reflection-{session_fragment}-{nanos}")
219 }
220}
221
222#[must_use]
224pub fn reflection_promotion_confidence(
225 improvement_score: Option<f32>,
226 outcome_signals: &[OutcomeSignal],
227) -> f32 {
228 let base = improvement_score
229 .map(|score| (0.55 + (score * 0.35)).clamp(0.55, 0.90))
230 .unwrap_or(0.62);
231 let delta: f32 = outcome_signals
232 .iter()
233 .map(|signal| signal.kind.confidence_delta())
234 .sum();
235 (base + delta).clamp(0.30, 0.97)
236}
237
238#[must_use]
240pub fn merge_outcome_signals(
241 existing: &[OutcomeSignal],
242 incoming: &[OutcomeSignal],
243) -> Vec<OutcomeSignal> {
244 let mut merged = existing.to_vec();
245 for signal in incoming {
246 if let Some(slot) = merged
247 .iter_mut()
248 .find(|current| current.kind == signal.kind)
249 {
250 *slot = signal.clone();
251 } else {
252 merged.push(signal.clone());
253 }
254 }
255 merged.sort_by_key(|signal| signal.observed_at);
256 merged
257}
258
259pub fn score_reflection_improvement(initial_quality: f32, retry_quality: f32) -> f32 {
265 if retry_quality <= initial_quality {
266 return 0.0;
267 }
268
269 let available_headroom = (1.0 - initial_quality).max(f32::EPSILON);
270 ((retry_quality - initial_quality) / available_headroom).clamp(0.0, 1.0)
271}
272
273pub fn quality_signals_for_response(
280 response: &AgentResponse,
281 max_iterations: usize,
282) -> QualitySignals {
283 let total_tool_calls = response.tool_calls.len();
284 let error_count = response
285 .tool_calls
286 .iter()
287 .filter(|tc| matches!(&tc.result, ToolResult::Error(_)))
288 .count();
289
290 let tool_error_rate = if total_tool_calls > 0 {
291 error_count as f32 / total_tool_calls as f32
292 } else {
293 0.0
294 };
295
296 QualitySignals {
297 tool_error_rate,
298 iterations_used: response.iterations,
299 max_iterations,
300 was_truncated: response.truncated,
301 has_failure_patterns: detect_failure_patterns(&response.content),
302 is_empty_response: response.content.trim().is_empty(),
303 }
304}
305
306#[derive(Debug, Clone)]
311pub struct QualitySignals {
312 pub tool_error_rate: f32,
314 pub iterations_used: usize,
316 pub max_iterations: usize,
318 pub was_truncated: bool,
320 pub has_failure_patterns: bool,
322 pub is_empty_response: bool,
324}
325
326impl QualitySignals {
327 pub fn score(&self) -> f32 {
333 if self.is_empty_response {
334 return 0.0;
335 }
336
337 let mut score: f32 = 1.0;
338
339 score -= self.tool_error_rate * 0.4;
341
342 if self.max_iterations > 0 {
344 let iteration_ratio = self.iterations_used as f32 / self.max_iterations as f32;
345 if iteration_ratio > 0.7 {
346 score -= (iteration_ratio - 0.7) * 0.5;
347 }
348 }
349
350 if self.was_truncated {
352 score -= 0.15;
353 }
354
355 if self.has_failure_patterns {
357 score -= 0.25;
358 }
359
360 score.clamp(0.0, 1.0)
361 }
362}
363
364pub fn detect_failure_patterns(text: &str) -> bool {
366 let lower = text.to_lowercase();
367 let patterns = [
368 "i'm sorry, i can't",
369 "i cannot",
370 "i'm unable to",
371 "unfortunately, i",
372 "i don't have the ability",
373 "i apologize, but i",
374 "i'm not able to",
375 "error occurred",
376 "failed to execute",
377 ];
378 patterns.iter().any(|p| lower.contains(p))
379}
380
381pub fn build_reflection_prompt(
391 user_request: &str,
392 agent_response: &str,
393 quality_signals: &QualitySignals,
394 tool_errors: &[String],
395) -> String {
396 let mut prompt = String::from(
397 "System: You are a self-reflective AI assistant analyzing a previous interaction \
398 that was suboptimal. Generate a structured reflection to improve future responses.\n\n",
399 );
400
401 prompt.push_str(&format!("User request: {}\n\n", user_request));
402 prompt.push_str(&format!(
403 "Agent response (quality score: {:.2}):\n{}\n\n",
404 quality_signals.score(),
405 agent_response
406 ));
407
408 if !tool_errors.is_empty() {
409 prompt.push_str("Tool errors encountered:\n");
410 for error in tool_errors {
411 prompt.push_str(&format!("- {}\n", error));
412 }
413 prompt.push('\n');
414 }
415
416 prompt.push_str(
417 "Provide a brief, structured reflection in the following format:\n\
418 ATTEMPT: [1-2 sentence summary of what was attempted]\n\
419 ISSUE: [1-2 sentence analysis of what went wrong]\n\
420 STRATEGY: [1-2 sentence corrective strategy for future attempts]\n\
421 TAGS: [comma-separated relevant tags]\n\
422 Important:\n\
423 - Output plain text only.\n\
424 - Do not wrap the reflection in Markdown code fences.\n\
425 - Do not add any preamble, explanation, or extra sections before or after the four fields.\n",
426 );
427
428 prompt
429}
430
431#[derive(Debug, Clone, Copy, PartialEq, Eq)]
432enum ReflectionField {
433 Attempt,
434 Issue,
435 Strategy,
436 Tags,
437}
438
439fn strip_tag_blocks(input: &str, open: &str, close: &str) -> String {
440 let mut output = String::new();
441 let mut cursor = 0usize;
442
443 while let Some(start_rel) = input[cursor..].find(open) {
444 let start = cursor + start_rel;
445 output.push_str(&input[cursor..start]);
446 let content_start = start + open.len();
447 let Some(end_rel) = input[content_start..].find(close) else {
448 return output.trim().to_string();
449 };
450 cursor = content_start + end_rel + close.len();
451 }
452
453 output.push_str(&input[cursor..]);
454 output.trim().to_string()
455}
456
457fn sanitize_reflection_response(response: &str) -> String {
458 strip_tag_blocks(response, "<think>", "</think>")
459 .lines()
460 .filter(|line| !line.trim_start().starts_with("```"))
461 .collect::<Vec<_>>()
462 .join("\n")
463 .trim()
464 .to_string()
465}
466
467fn compact_reflection_value(value: &str) -> String {
468 value.split_whitespace().collect::<Vec<_>>().join(" ")
469}
470
471fn clean_reflection_field_value(value: &str) -> String {
472 value
473 .trim()
474 .trim_end_matches(',')
475 .trim_matches(|c: char| matches!(c, '*' | '_' | '`' | '"' | '\''))
476 .trim()
477 .to_string()
478}
479
480fn push_reflection_segment(target: &mut String, segment: &str) {
481 let segment = compact_reflection_value(&clean_reflection_field_value(segment));
482 if segment.is_empty() {
483 return;
484 }
485 if !target.is_empty() {
486 target.push(' ');
487 }
488 target.push_str(&segment);
489}
490
491fn normalize_reflection_label(label: &str) -> Option<ReflectionField> {
492 let normalized = label
493 .trim()
494 .trim_matches(|c: char| matches!(c, '*' | '_' | '`' | '[' | ']' | '(' | ')' | '#'))
495 .chars()
496 .filter_map(|ch| {
497 if ch.is_ascii_alphanumeric() {
498 Some(ch.to_ascii_lowercase())
499 } else if matches!(ch, ' ' | '-' | '_') {
500 Some('_')
501 } else {
502 None
503 }
504 })
505 .collect::<String>();
506
507 let normalized = normalized.trim_matches('_');
508
509 match normalized {
510 "attempt" | "attempt_summary" | "summary" | "what_was_attempted" => {
511 Some(ReflectionField::Attempt)
512 }
513 "issue" | "failure" | "failure_analysis" | "problem" | "analysis" | "what_went_wrong" => {
514 Some(ReflectionField::Issue)
515 }
516 "strategy"
517 | "corrective_strategy"
518 | "correction"
519 | "fix"
520 | "improvement_strategy"
521 | "next_time" => Some(ReflectionField::Strategy),
522 "tags" | "labels" => Some(ReflectionField::Tags),
523 _ => None,
524 }
525}
526
527fn strip_reflection_line_prefix(line: &str) -> &str {
528 let mut trimmed = line.trim_start();
529
530 loop {
531 if let Some(rest) = trimmed.strip_prefix('>') {
532 trimmed = rest.trim_start();
533 continue;
534 }
535 if let Some(rest) = trimmed.strip_prefix("- ") {
536 trimmed = rest.trim_start();
537 continue;
538 }
539 if let Some(rest) = trimmed.strip_prefix("* ") {
540 trimmed = rest.trim_start();
541 continue;
542 }
543 if let Some(rest) = trimmed.strip_prefix("• ") {
544 trimmed = rest.trim_start();
545 continue;
546 }
547
548 let digit_count = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
549 if digit_count > 0 {
550 let suffix = &trimmed[digit_count..];
551 if let Some(rest) = suffix.strip_prefix(". ") {
552 trimmed = rest.trim_start();
553 continue;
554 }
555 if let Some(rest) = suffix.strip_prefix(") ") {
556 trimmed = rest.trim_start();
557 continue;
558 }
559 }
560
561 break;
562 }
563
564 trimmed
565}
566
567fn parse_reflection_field_line(line: &str) -> Option<(ReflectionField, String)> {
568 let candidate = strip_reflection_line_prefix(line);
569 let colon_idx = candidate.find(':')?;
570 let label = candidate[..colon_idx].trim();
571 let value = candidate[colon_idx + 1..].trim();
572 let field = normalize_reflection_label(label)?;
573 Some((field, value.to_string()))
574}
575
576fn parse_tag_values(value: &str) -> Vec<String> {
577 let trimmed = strip_reflection_line_prefix(value).trim();
578 let trimmed = clean_reflection_field_value(trimmed);
579 let trimmed = trimmed.trim_matches(|c: char| matches!(c, '[' | ']' | '{' | '}'));
580 if trimmed.is_empty() {
581 return Vec::new();
582 }
583 trimmed
584 .split(',')
585 .map(clean_reflection_field_value)
586 .filter(|tag| !tag.is_empty())
587 .collect()
588}
589
590fn extract_jsonish_string_value(source: &str, key: &str) -> Option<String> {
591 let needle = format!("\"{key}\"");
592 let start = source.find(&needle)? + needle.len();
593 let remainder = source[start..].trim_start();
594 let remainder = remainder.strip_prefix(':')?.trim_start();
595 let remainder = remainder.strip_prefix('"')?;
596
597 let mut value = String::new();
598 let mut escaped = false;
599 for ch in remainder.chars() {
600 if escaped {
601 value.push(match ch {
602 'n' => '\n',
603 'r' => '\r',
604 't' => '\t',
605 '"' => '"',
606 '\\' => '\\',
607 other => other,
608 });
609 escaped = false;
610 continue;
611 }
612 match ch {
613 '\\' => escaped = true,
614 '"' => return Some(value.trim().to_string()),
615 other => value.push(other),
616 }
617 }
618
619 None
620}
621
622fn extract_jsonish_tags(source: &str) -> Option<Vec<String>> {
623 if let Some(tags_str) = extract_jsonish_string_value(source, "tags") {
624 return Some(parse_tag_values(&tags_str));
625 }
626
627 let needle = "\"tags\"";
628 let start = source.find(needle)? + needle.len();
629 let remainder = source[start..].trim_start();
630 let remainder = remainder.strip_prefix(':')?.trim_start();
631 let remainder = remainder.strip_prefix('[')?;
632 let end = remainder.find(']')?;
633 let body = &remainder[..end];
634
635 Some(
636 body.split(',')
637 .map(|item| item.trim().trim_matches(|c: char| matches!(c, '"' | '\'')))
638 .filter(|item| !item.is_empty())
639 .map(ToOwned::to_owned)
640 .collect(),
641 )
642}
643
644fn parse_jsonish_reflection_response(response: &str, session_id: &str) -> Option<AgentReflection> {
645 let attempt = extract_jsonish_string_value(response, "attempt_summary")
646 .or_else(|| extract_jsonish_string_value(response, "attempt"))?;
647 let issue = extract_jsonish_string_value(response, "failure_analysis")
648 .or_else(|| extract_jsonish_string_value(response, "issue"))?;
649 let strategy = extract_jsonish_string_value(response, "corrective_strategy")
650 .or_else(|| extract_jsonish_string_value(response, "strategy"))?;
651 let tags = extract_jsonish_tags(response).unwrap_or_default();
652
653 Some(
654 AgentReflection::new(session_id, attempt, issue, strategy)
655 .with_tags(tags.into_iter().filter(|tag| !tag.is_empty()).collect()),
656 )
657}
658
659pub fn parse_reflection_response(response: &str, session_id: &str) -> Option<AgentReflection> {
666 let response = sanitize_reflection_response(response);
667 let mut attempt = String::new();
668 let mut issue = String::new();
669 let mut strategy = String::new();
670 let mut tags = Vec::new();
671 let mut current_field = None;
672
673 for line in response.lines() {
674 let trimmed = line.trim();
675 if trimmed.is_empty() {
676 continue;
677 }
678
679 if let Some((field, value)) = parse_reflection_field_line(trimmed) {
680 current_field = Some(field);
681 match field {
682 ReflectionField::Attempt => push_reflection_segment(&mut attempt, &value),
683 ReflectionField::Issue => push_reflection_segment(&mut issue, &value),
684 ReflectionField::Strategy => push_reflection_segment(&mut strategy, &value),
685 ReflectionField::Tags => tags.extend(parse_tag_values(&value)),
686 }
687 continue;
688 }
689
690 match current_field {
691 Some(ReflectionField::Attempt) => push_reflection_segment(&mut attempt, trimmed),
692 Some(ReflectionField::Issue) => push_reflection_segment(&mut issue, trimmed),
693 Some(ReflectionField::Strategy) => push_reflection_segment(&mut strategy, trimmed),
694 Some(ReflectionField::Tags) => tags.extend(parse_tag_values(trimmed)),
695 None => {}
696 }
697 }
698
699 let mut deduped_tags = Vec::new();
700 for tag in tags {
701 if !tag.is_empty() && !deduped_tags.contains(&tag) {
702 deduped_tags.push(tag);
703 }
704 }
705
706 if !attempt.is_empty() && !issue.is_empty() && !strategy.is_empty() {
707 return Some(
708 AgentReflection::new(session_id, attempt, issue, strategy).with_tags(deduped_tags),
709 );
710 }
711
712 parse_jsonish_reflection_response(&response, session_id)
713}
714
715#[cfg(test)]
716mod tests {
717 use super::*;
718 use gestura_core_foundation::OutcomeSignalKind;
719
720 #[test]
721 fn test_quality_scoring_high_quality_response() {
722 let signals = QualitySignals {
723 tool_error_rate: 0.0,
724 iterations_used: 1,
725 max_iterations: 10,
726 was_truncated: false,
727 has_failure_patterns: false,
728 is_empty_response: false,
729 };
730 let score = signals.score();
731 assert!(score > 0.9, "Good response should score > 0.9, got {score}");
732 }
733
734 #[test]
735 fn test_quality_scoring_tool_errors() {
736 let signals = QualitySignals {
737 tool_error_rate: 0.5, iterations_used: 3,
739 max_iterations: 10,
740 was_truncated: false,
741 has_failure_patterns: false,
742 is_empty_response: false,
743 };
744 let score = signals.score();
745 assert!(
746 score < 0.85,
747 "50% tool errors should lower score, got {score}"
748 );
749 }
750
751 #[test]
752 fn test_quality_scoring_many_iterations() {
753 let signals = QualitySignals {
754 tool_error_rate: 0.0,
755 iterations_used: 9,
756 max_iterations: 10,
757 was_truncated: false,
758 has_failure_patterns: false,
759 is_empty_response: false,
760 };
761 let score = signals.score();
762 assert!(
763 score < 0.95,
764 "Using 90% iterations should lower score, got {score}"
765 );
766 }
767
768 #[test]
769 fn test_quality_scoring_empty_response() {
770 let signals = QualitySignals {
771 tool_error_rate: 0.0,
772 iterations_used: 1,
773 max_iterations: 10,
774 was_truncated: false,
775 has_failure_patterns: false,
776 is_empty_response: true,
777 };
778 assert_eq!(signals.score(), 0.0);
779 }
780
781 #[test]
782 fn test_quality_scoring_combined_issues() {
783 let signals = QualitySignals {
784 tool_error_rate: 0.3,
785 iterations_used: 8,
786 max_iterations: 10,
787 was_truncated: true,
788 has_failure_patterns: true,
789 is_empty_response: false,
790 };
791 let score = signals.score();
792 assert!(
793 score < 0.5,
794 "Multiple issues should produce low score, got {score}"
795 );
796 }
797
798 #[test]
799 fn test_detect_failure_patterns() {
800 assert!(detect_failure_patterns("I'm sorry, I can't do that"));
801 assert!(detect_failure_patterns(
802 "Unfortunately, I cannot access that file"
803 ));
804 assert!(!detect_failure_patterns(
805 "Here is the file content you requested"
806 ));
807 }
808
809 #[test]
810 fn test_reflection_prompt_construction() {
811 let signals = QualitySignals {
812 tool_error_rate: 0.5,
813 iterations_used: 3,
814 max_iterations: 10,
815 was_truncated: false,
816 has_failure_patterns: false,
817 is_empty_response: false,
818 };
819 let prompt = build_reflection_prompt(
820 "Read the file",
821 "Error: file not found",
822 &signals,
823 &["FileNotFound: /tmp/missing.txt".to_string()],
824 );
825 assert!(prompt.contains("Read the file"));
826 assert!(prompt.contains("Error: file not found"));
827 assert!(prompt.contains("FileNotFound"));
828 assert!(prompt.contains("ATTEMPT:"));
829 assert!(prompt.contains("ISSUE:"));
830 assert!(prompt.contains("STRATEGY:"));
831 }
832
833 #[test]
834 fn test_reflection_response_parsing() {
835 let response = "\
836 ATTEMPT: Tried to read the file at /tmp/missing.txt\n\
837 ISSUE: The file path was incorrect; the file does not exist\n\
838 STRATEGY: Verify file existence before attempting to read; suggest alternatives\n\
839 TAGS: file, read, path-error\n";
840
841 let reflection = parse_reflection_response(response, "session-123").unwrap();
842 assert_eq!(
843 reflection.attempt_summary,
844 "Tried to read the file at /tmp/missing.txt"
845 );
846 assert!(reflection.failure_analysis.contains("incorrect"));
847 assert!(reflection.corrective_strategy.contains("Verify"));
848 assert_eq!(reflection.tags, vec!["file", "read", "path-error"]);
849 assert_eq!(reflection.session_id, "session-123");
850 }
851
852 #[test]
853 fn test_reflection_response_parsing_incomplete() {
854 let response = "ATTEMPT: Something\nISSUE: Something else\n";
855 let reflection = parse_reflection_response(response, "s1");
856 assert!(reflection.is_none(), "Missing STRATEGY should return None");
857 }
858
859 #[test]
860 fn test_reflection_response_parsing_markdown_and_multiline() {
861 let response = "<think>diagnosing tool output</think>\n\
862 - **Attempt:** Tried to inspect the missing config file.\n\
863 I answered before verifying the real path.\n\
864 - **Issue:** The response relied on an assumed file location\n\
865 instead of repository evidence.\n\
866 - **Strategy:** Search for the config file first, then answer\n\
867 only from the verified path and contents.\n\
868 - **Tags:** file, verification\n";
869
870 let reflection = parse_reflection_response(response, "session-md").unwrap();
871 assert!(
872 reflection
873 .attempt_summary
874 .contains("inspect the missing config file")
875 );
876 assert!(
877 reflection
878 .attempt_summary
879 .contains("verifying the real path")
880 );
881 assert!(reflection.failure_analysis.contains("repository evidence"));
882 assert!(
883 reflection
884 .corrective_strategy
885 .contains("verified path and contents")
886 );
887 assert_eq!(reflection.tags, vec!["file", "verification"]);
888 }
889
890 #[test]
891 fn test_reflection_response_parsing_aliases_and_tag_list() {
892 let response = "attempt_summary: Investigated a build failure without reading the actual error output.\n\
893 failure_analysis: The explanation guessed at causes instead of grounding them in the logs.\n\
894 corrective_strategy: Read the concrete stderr output first, then explain only the confirmed failure mode.\n\
895 tags:\n\
896 - shell\n\
897 - validation\n";
898
899 let reflection = parse_reflection_response(response, "session-alias").unwrap();
900 assert!(reflection.attempt_summary.contains("build failure"));
901 assert!(
902 reflection
903 .failure_analysis
904 .contains("grounding them in the logs")
905 );
906 assert!(
907 reflection
908 .corrective_strategy
909 .contains("concrete stderr output")
910 );
911 assert_eq!(reflection.tags, vec!["shell", "validation"]);
912 }
913
914 #[test]
915 fn test_reflection_response_parsing_jsonish_payload() {
916 let response = "```json\n{\n \"attempt_summary\": \"Tried to edit the wrong file\",\n \"failure_analysis\": \"The response assumed the target path without confirming it\",\n \"corrective_strategy\": \"Locate the file first, then apply the edit to the verified path\",\n \"tags\": [\"file\", \"path\"]\n}\n```";
917
918 let reflection = parse_reflection_response(response, "session-json").unwrap();
919 assert_eq!(reflection.attempt_summary, "Tried to edit the wrong file");
920 assert!(
921 reflection
922 .failure_analysis
923 .contains("assumed the target path")
924 );
925 assert!(
926 reflection
927 .corrective_strategy
928 .contains("Locate the file first")
929 );
930 assert_eq!(reflection.tags, vec!["file", "path"]);
931 }
932
933 #[test]
934 fn test_reflection_to_prompt_section() {
935 let reflection = AgentReflection::new(
936 "s1",
937 "Read missing file",
938 "File did not exist",
939 "Check file existence first",
940 )
941 .with_outcome_signals(vec![
942 OutcomeSignal::new(OutcomeSignalKind::RetryImproved)
943 .with_summary("The revised answer used the correct path."),
944 ]);
945 let section = reflection.to_prompt_section();
946 assert!(section.contains("Read missing file"));
947 assert!(section.contains("File did not exist"));
948 assert!(section.contains("Check file existence first"));
949 assert!(section.contains("Retry improved"));
950 }
951
952 #[test]
953 fn test_reflection_improvement_score_increases_with_retry_quality() {
954 let score = score_reflection_improvement(0.40, 0.76);
955 assert!(
956 score > 0.5,
957 "Expected strong improvement signal, got {score}"
958 );
959 }
960
961 #[test]
962 fn test_reflection_improvement_score_zero_when_retry_is_not_better() {
963 assert_eq!(score_reflection_improvement(0.65, 0.65), 0.0);
964 assert_eq!(score_reflection_improvement(0.65, 0.52), 0.0);
965 }
966
967 #[test]
968 fn test_promotion_confidence_uses_outcome_signals() {
969 let baseline = AgentReflection::new(
970 "s1",
971 "Attempted a retry",
972 "The first answer was weak",
973 "Revise with the missing evidence",
974 );
975 let stronger = baseline.clone().with_outcome_signals(vec![
976 OutcomeSignal::new(OutcomeSignalKind::RetryImproved),
977 OutcomeSignal::new(OutcomeSignalKind::ReviewApproved),
978 ]);
979 let weaker = baseline.with_outcome_signals(vec![
980 OutcomeSignal::new(OutcomeSignalKind::RetryDidNotImprove),
981 OutcomeSignal::new(OutcomeSignalKind::ReviewNeedsRevision),
982 ]);
983
984 assert!(stronger.promotion_confidence() > 0.70);
985 assert!(weaker.promotion_confidence() < 0.50);
986 }
987
988 #[test]
989 fn test_merge_outcome_signals_replaces_existing_kind() {
990 let first = OutcomeSignal::new(OutcomeSignalKind::ReviewApproved)
991 .with_summary("Initial approval note");
992 let replacement = OutcomeSignal::new(OutcomeSignalKind::ReviewApproved)
993 .with_summary("Final approval note");
994
995 let merged = merge_outcome_signals(&[first], std::slice::from_ref(&replacement));
996
997 assert_eq!(merged.len(), 1);
998 assert_eq!(merged[0].summary.as_deref(), replacement.summary.as_deref());
999 }
1000}