1use chrono::{DateTime, Utc};
27use gestura_core_foundation::OutcomeSignal;
28use serde::{Deserialize, Serialize};
29
30use crate::types::{AgentResponse, ToolResult};
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct ReflectionConfig {
49 pub enabled: bool,
51 pub quality_threshold: f32,
55 pub max_injected_reflections: usize,
57 pub max_retry_attempts: usize,
63 pub promotion_confidence: f32,
65}
66
67impl Default for ReflectionConfig {
68 fn default() -> Self {
69 Self {
70 enabled: true, quality_threshold: 0.6, max_injected_reflections: 3,
73 max_retry_attempts: 1,
74 promotion_confidence: 0.75,
75 }
76 }
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct AgentReflection {
92 pub reflection_id: String,
94 pub attempt_summary: String,
96 pub failure_analysis: String,
98 pub corrective_strategy: String,
100 pub improvement_score: Option<f32>,
103 #[serde(default, skip_serializing_if = "Vec::is_empty")]
105 pub tags: Vec<String>,
106 #[serde(default, skip_serializing_if = "Vec::is_empty")]
108 pub outcome_signals: Vec<OutcomeSignal>,
109 pub session_id: String,
111 pub task_id: Option<String>,
113 pub timestamp: DateTime<Utc>,
115}
116
117impl AgentReflection {
118 pub fn new(
120 session_id: impl Into<String>,
121 attempt_summary: impl Into<String>,
122 failure_analysis: impl Into<String>,
123 corrective_strategy: impl Into<String>,
124 ) -> Self {
125 let session_id = session_id.into();
126 Self {
127 reflection_id: build_reflection_id(&session_id),
128 attempt_summary: attempt_summary.into(),
129 failure_analysis: failure_analysis.into(),
130 corrective_strategy: corrective_strategy.into(),
131 improvement_score: None,
132 tags: Vec::new(),
133 outcome_signals: Vec::new(),
134 session_id,
135 task_id: None,
136 timestamp: Utc::now(),
137 }
138 }
139
140 pub fn with_tags(mut self, tags: Vec<String>) -> Self {
142 self.tags = tags;
143 self
144 }
145
146 pub fn with_task(mut self, task_id: impl Into<String>) -> Self {
148 self.task_id = Some(task_id.into());
149 self
150 }
151
152 #[must_use]
154 pub fn with_outcome_signals(mut self, outcome_signals: Vec<OutcomeSignal>) -> Self {
155 self.outcome_signals = outcome_signals;
156 self
157 }
158
159 pub fn push_outcome_signal(&mut self, signal: OutcomeSignal) {
161 self.outcome_signals = merge_outcome_signals(&self.outcome_signals, &[signal]);
162 }
163
164 #[must_use]
166 pub fn promotion_confidence(&self) -> f32 {
167 reflection_promotion_confidence(self.improvement_score, &self.outcome_signals)
168 }
169
170 pub fn to_prompt_section(&self) -> String {
172 let improvement = self.improvement_score.map(|score| {
173 format!(
174 " - Observed improvement after retry: {:.0}%\n",
175 score * 100.0
176 )
177 });
178 let outcomes = if self.outcome_signals.is_empty() {
179 String::new()
180 } else {
181 format!(
182 " - Outcomes: {}\n",
183 self.outcome_signals
184 .iter()
185 .map(|signal| signal.kind.label())
186 .collect::<Vec<_>>()
187 .join(", ")
188 )
189 };
190
191 format!(
192 "**Reflection** ({})\n\
193 - Attempted: {}\n\
194 - Issue: {}\n\
195 - Strategy: {}\n{}{}",
196 self.timestamp.format("%Y-%m-%d %H:%M UTC"),
197 self.attempt_summary,
198 self.failure_analysis,
199 self.corrective_strategy,
200 improvement.unwrap_or_default(),
201 outcomes,
202 )
203 }
204}
205
206fn build_reflection_id(session_id: &str) -> String {
207 let nanos = Utc::now()
208 .timestamp_nanos_opt()
209 .unwrap_or_else(|| Utc::now().timestamp_micros() * 1_000);
210 let session_fragment = session_id
211 .chars()
212 .filter(|ch| ch.is_ascii_alphanumeric())
213 .take(12)
214 .collect::<String>();
215 if session_fragment.is_empty() {
216 format!("reflection-{nanos}")
217 } else {
218 format!("reflection-{session_fragment}-{nanos}")
219 }
220}
221
222#[must_use]
224pub fn reflection_promotion_confidence(
225 improvement_score: Option<f32>,
226 outcome_signals: &[OutcomeSignal],
227) -> f32 {
228 let base = improvement_score
229 .map(|score| (0.55 + (score * 0.35)).clamp(0.55, 0.90))
230 .unwrap_or(0.62);
231 let delta: f32 = outcome_signals
232 .iter()
233 .map(|signal| signal.kind.confidence_delta())
234 .sum();
235 (base + delta).clamp(0.30, 0.97)
236}
237
238#[must_use]
240pub fn merge_outcome_signals(
241 existing: &[OutcomeSignal],
242 incoming: &[OutcomeSignal],
243) -> Vec<OutcomeSignal> {
244 let mut merged = existing.to_vec();
245 for signal in incoming {
246 if let Some(slot) = merged
247 .iter_mut()
248 .find(|current| current.kind == signal.kind)
249 {
250 *slot = signal.clone();
251 } else {
252 merged.push(signal.clone());
253 }
254 }
255 merged.sort_by_key(|signal| signal.observed_at);
256 merged
257}
258
259pub fn score_reflection_improvement(initial_quality: f32, retry_quality: f32) -> f32 {
265 if retry_quality <= initial_quality {
266 return 0.0;
267 }
268
269 let available_headroom = (1.0 - initial_quality).max(f32::EPSILON);
270 ((retry_quality - initial_quality) / available_headroom).clamp(0.0, 1.0)
271}
272
273pub fn quality_signals_for_response(
280 response: &AgentResponse,
281 max_iterations: usize,
282) -> QualitySignals {
283 let total_tool_calls = response.tool_calls.len();
284 let error_count = response
285 .tool_calls
286 .iter()
287 .filter(|tc| matches!(&tc.result, ToolResult::Error(_)))
288 .count();
289
290 let tool_error_rate = if total_tool_calls > 0 {
291 error_count as f32 / total_tool_calls as f32
292 } else {
293 0.0
294 };
295
296 QualitySignals {
297 tool_error_rate,
298 iterations_used: response.iterations,
299 max_iterations,
300 was_truncated: response.truncated,
301 has_failure_patterns: detect_failure_patterns(&response.content)
302 || detect_assertive_uncertainty(&response.content)
303 || detect_missing_debug_structure(&response.content),
304 is_empty_response: response.content.trim().is_empty(),
305 }
306}
307
308#[derive(Debug, Clone)]
313pub struct QualitySignals {
314 pub tool_error_rate: f32,
316 pub iterations_used: usize,
318 pub max_iterations: usize,
320 pub was_truncated: bool,
322 pub has_failure_patterns: bool,
324 pub is_empty_response: bool,
326}
327
328impl QualitySignals {
329 pub fn score(&self) -> f32 {
335 if self.is_empty_response {
336 return 0.0;
337 }
338
339 let mut score: f32 = 1.0;
340
341 score -= self.tool_error_rate * 0.4;
343
344 if self.max_iterations > 0 {
346 let iteration_ratio = self.iterations_used as f32 / self.max_iterations as f32;
347 if iteration_ratio > 0.7 {
348 score -= (iteration_ratio - 0.7) * 0.5;
349 }
350 }
351
352 if self.was_truncated {
354 score -= 0.15;
355 }
356
357 if self.has_failure_patterns {
359 score -= 0.25;
360 }
361
362 score.clamp(0.0, 1.0)
363 }
364}
365
366pub fn detect_failure_patterns(text: &str) -> bool {
368 let lower = text.to_lowercase();
369 let patterns = [
370 "i'm sorry, i can't",
371 "i cannot",
372 "i'm unable to",
373 "unfortunately, i",
374 "i don't have the ability",
375 "i apologize, but i",
376 "i'm not able to",
377 "error occurred",
378 "failed to execute",
379 "i was not able to",
380 "i was unable to",
381 "as an ai, i",
382 ];
383 patterns.iter().any(|p| lower.contains(p))
384}
385
386pub fn detect_assertive_uncertainty(text: &str) -> bool {
393 let lower = text.to_lowercase();
394 let patterns = [
395 "the fact is that",
396 "it is a fact that",
397 "it is definitely the case",
398 "there is no question that",
399 "it is absolutely certain",
400 "without any doubt",
401 ];
402 patterns.iter().any(|p| lower.contains(p))
403}
404
405pub fn detect_missing_debug_structure(response: &str) -> bool {
413 let lower = response.to_lowercase();
414
415 let is_debug_context = [
416 "error",
417 "bug",
418 "fix",
419 "crash",
420 "failure",
421 "exception",
422 "traceback",
423 "stack trace",
424 "panicked",
425 "undefined",
426 "null pointer",
427 "segfault",
428 "diagnos",
429 "debug",
430 "root cause",
431 ]
432 .iter()
433 .any(|p| lower.contains(p));
434
435 if !is_debug_context {
436 return false;
437 }
438
439 let has_root_cause = [
440 "root cause",
441 "caused by",
442 "because",
443 "the reason",
444 "this happens when",
445 "this is because",
446 "due to",
447 "stems from",
448 "originates from",
449 ]
450 .iter()
451 .any(|p| lower.contains(p));
452
453 let has_verification = [
454 "verify",
455 "verif",
456 "to confirm",
457 "run ",
458 "check ",
459 "test ",
460 "validate",
461 "you can confirm",
462 "to verify",
463 "make sure",
464 "ensure",
465 ]
466 .iter()
467 .any(|p| lower.contains(p));
468
469 !has_root_cause || !has_verification
470}
471
472pub fn build_reflection_prompt(
482 user_request: &str,
483 agent_response: &str,
484 quality_signals: &QualitySignals,
485 tool_errors: &[String],
486) -> String {
487 let mut prompt = String::from(
488 "System: You are a self-reflective AI assistant analyzing a previous interaction \
489 that was suboptimal. Generate a structured reflection to improve future responses.\n\n",
490 );
491
492 prompt.push_str(&format!("User request: {}\n\n", user_request));
493 prompt.push_str(&format!(
494 "Agent response (quality score: {:.2}):\n{}\n\n",
495 quality_signals.score(),
496 agent_response
497 ));
498
499 if !tool_errors.is_empty() {
500 prompt.push_str("Tool errors encountered:\n");
501 for error in tool_errors {
502 prompt.push_str(&format!("- {}\n", error));
503 }
504 prompt.push('\n');
505 }
506
507 prompt.push_str(
508 "Provide a brief, structured reflection in the following format:\n\
509 ATTEMPT: [1-2 sentence summary of what was attempted]\n\
510 ISSUE: [1-2 sentence analysis of what went wrong]\n\
511 STRATEGY: [1-2 sentence corrective strategy for future attempts]\n\
512 TAGS: [comma-separated relevant tags]\n\
513 Important:\n\
514 - Output plain text only.\n\
515 - Do not wrap the reflection in Markdown code fences.\n\
516 - Do not add any preamble, explanation, or extra sections before or after the four fields.\n",
517 );
518
519 prompt
520}
521
522#[derive(Debug, Clone, Copy, PartialEq, Eq)]
523enum ReflectionField {
524 Attempt,
525 Issue,
526 Strategy,
527 Tags,
528}
529
530fn strip_tag_blocks(input: &str, open: &str, close: &str) -> String {
531 let mut output = String::new();
532 let mut cursor = 0usize;
533
534 while let Some(start_rel) = input[cursor..].find(open) {
535 let start = cursor + start_rel;
536 output.push_str(&input[cursor..start]);
537 let content_start = start + open.len();
538 let Some(end_rel) = input[content_start..].find(close) else {
539 return output.trim().to_string();
540 };
541 cursor = content_start + end_rel + close.len();
542 }
543
544 output.push_str(&input[cursor..]);
545 output.trim().to_string()
546}
547
548fn sanitize_reflection_response(response: &str) -> String {
549 strip_tag_blocks(response, "<think>", "</think>")
550 .lines()
551 .filter(|line| !line.trim_start().starts_with("```"))
552 .collect::<Vec<_>>()
553 .join("\n")
554 .trim()
555 .to_string()
556}
557
558fn compact_reflection_value(value: &str) -> String {
559 value.split_whitespace().collect::<Vec<_>>().join(" ")
560}
561
562fn clean_reflection_field_value(value: &str) -> String {
563 value
564 .trim()
565 .trim_end_matches(',')
566 .trim_matches(|c: char| matches!(c, '*' | '_' | '`' | '"' | '\''))
567 .trim()
568 .to_string()
569}
570
571fn push_reflection_segment(target: &mut String, segment: &str) {
572 let segment = compact_reflection_value(&clean_reflection_field_value(segment));
573 if segment.is_empty() {
574 return;
575 }
576 if !target.is_empty() {
577 target.push(' ');
578 }
579 target.push_str(&segment);
580}
581
582fn normalize_reflection_label(label: &str) -> Option<ReflectionField> {
583 let normalized = label
584 .trim()
585 .trim_matches(|c: char| matches!(c, '*' | '_' | '`' | '[' | ']' | '(' | ')' | '#'))
586 .chars()
587 .filter_map(|ch| {
588 if ch.is_ascii_alphanumeric() {
589 Some(ch.to_ascii_lowercase())
590 } else if matches!(ch, ' ' | '-' | '_') {
591 Some('_')
592 } else {
593 None
594 }
595 })
596 .collect::<String>();
597
598 let normalized = normalized.trim_matches('_');
599
600 match normalized {
601 "attempt" | "attempt_summary" | "summary" | "what_was_attempted" => {
602 Some(ReflectionField::Attempt)
603 }
604 "issue" | "failure" | "failure_analysis" | "problem" | "analysis" | "what_went_wrong" => {
605 Some(ReflectionField::Issue)
606 }
607 "strategy"
608 | "corrective_strategy"
609 | "correction"
610 | "fix"
611 | "improvement_strategy"
612 | "next_time" => Some(ReflectionField::Strategy),
613 "tags" | "labels" => Some(ReflectionField::Tags),
614 _ => None,
615 }
616}
617
618fn strip_reflection_line_prefix(line: &str) -> &str {
619 let mut trimmed = line.trim_start();
620
621 loop {
622 if let Some(rest) = trimmed.strip_prefix('>') {
623 trimmed = rest.trim_start();
624 continue;
625 }
626 if let Some(rest) = trimmed.strip_prefix("- ") {
627 trimmed = rest.trim_start();
628 continue;
629 }
630 if let Some(rest) = trimmed.strip_prefix("* ") {
631 trimmed = rest.trim_start();
632 continue;
633 }
634 if let Some(rest) = trimmed.strip_prefix("• ") {
635 trimmed = rest.trim_start();
636 continue;
637 }
638
639 let digit_count = trimmed.chars().take_while(|ch| ch.is_ascii_digit()).count();
640 if digit_count > 0 {
641 let suffix = &trimmed[digit_count..];
642 if let Some(rest) = suffix.strip_prefix(". ") {
643 trimmed = rest.trim_start();
644 continue;
645 }
646 if let Some(rest) = suffix.strip_prefix(") ") {
647 trimmed = rest.trim_start();
648 continue;
649 }
650 }
651
652 break;
653 }
654
655 trimmed
656}
657
658fn parse_reflection_field_line(line: &str) -> Option<(ReflectionField, String)> {
659 let candidate = strip_reflection_line_prefix(line);
660 let colon_idx = candidate.find(':')?;
661 let label = candidate[..colon_idx].trim();
662 let value = candidate[colon_idx + 1..].trim();
663 let field = normalize_reflection_label(label)?;
664 Some((field, value.to_string()))
665}
666
667fn parse_tag_values(value: &str) -> Vec<String> {
668 let trimmed = strip_reflection_line_prefix(value).trim();
669 let trimmed = clean_reflection_field_value(trimmed);
670 let trimmed = trimmed.trim_matches(|c: char| matches!(c, '[' | ']' | '{' | '}'));
671 if trimmed.is_empty() {
672 return Vec::new();
673 }
674 trimmed
675 .split(',')
676 .map(clean_reflection_field_value)
677 .filter(|tag| !tag.is_empty())
678 .collect()
679}
680
681fn extract_jsonish_string_value(source: &str, key: &str) -> Option<String> {
682 let needle = format!("\"{key}\"");
683 let start = source.find(&needle)? + needle.len();
684 let remainder = source[start..].trim_start();
685 let remainder = remainder.strip_prefix(':')?.trim_start();
686 let remainder = remainder.strip_prefix('"')?;
687
688 let mut value = String::new();
689 let mut escaped = false;
690 for ch in remainder.chars() {
691 if escaped {
692 value.push(match ch {
693 'n' => '\n',
694 'r' => '\r',
695 't' => '\t',
696 '"' => '"',
697 '\\' => '\\',
698 other => other,
699 });
700 escaped = false;
701 continue;
702 }
703 match ch {
704 '\\' => escaped = true,
705 '"' => return Some(value.trim().to_string()),
706 other => value.push(other),
707 }
708 }
709
710 None
711}
712
713fn extract_jsonish_tags(source: &str) -> Option<Vec<String>> {
714 if let Some(tags_str) = extract_jsonish_string_value(source, "tags") {
715 return Some(parse_tag_values(&tags_str));
716 }
717
718 let needle = "\"tags\"";
719 let start = source.find(needle)? + needle.len();
720 let remainder = source[start..].trim_start();
721 let remainder = remainder.strip_prefix(':')?.trim_start();
722 let remainder = remainder.strip_prefix('[')?;
723 let end = remainder.find(']')?;
724 let body = &remainder[..end];
725
726 Some(
727 body.split(',')
728 .map(|item| item.trim().trim_matches(|c: char| matches!(c, '"' | '\'')))
729 .filter(|item| !item.is_empty())
730 .map(ToOwned::to_owned)
731 .collect(),
732 )
733}
734
735fn parse_jsonish_reflection_response(response: &str, session_id: &str) -> Option<AgentReflection> {
736 let attempt = extract_jsonish_string_value(response, "attempt_summary")
737 .or_else(|| extract_jsonish_string_value(response, "attempt"))?;
738 let issue = extract_jsonish_string_value(response, "failure_analysis")
739 .or_else(|| extract_jsonish_string_value(response, "issue"))?;
740 let strategy = extract_jsonish_string_value(response, "corrective_strategy")
741 .or_else(|| extract_jsonish_string_value(response, "strategy"))?;
742 let tags = extract_jsonish_tags(response).unwrap_or_default();
743
744 Some(
745 AgentReflection::new(session_id, attempt, issue, strategy)
746 .with_tags(tags.into_iter().filter(|tag| !tag.is_empty()).collect()),
747 )
748}
749
750pub fn parse_reflection_response(response: &str, session_id: &str) -> Option<AgentReflection> {
757 let response = sanitize_reflection_response(response);
758 let mut attempt = String::new();
759 let mut issue = String::new();
760 let mut strategy = String::new();
761 let mut tags = Vec::new();
762 let mut current_field = None;
763
764 for line in response.lines() {
765 let trimmed = line.trim();
766 if trimmed.is_empty() {
767 continue;
768 }
769
770 if let Some((field, value)) = parse_reflection_field_line(trimmed) {
771 current_field = Some(field);
772 match field {
773 ReflectionField::Attempt => push_reflection_segment(&mut attempt, &value),
774 ReflectionField::Issue => push_reflection_segment(&mut issue, &value),
775 ReflectionField::Strategy => push_reflection_segment(&mut strategy, &value),
776 ReflectionField::Tags => tags.extend(parse_tag_values(&value)),
777 }
778 continue;
779 }
780
781 match current_field {
782 Some(ReflectionField::Attempt) => push_reflection_segment(&mut attempt, trimmed),
783 Some(ReflectionField::Issue) => push_reflection_segment(&mut issue, trimmed),
784 Some(ReflectionField::Strategy) => push_reflection_segment(&mut strategy, trimmed),
785 Some(ReflectionField::Tags) => tags.extend(parse_tag_values(trimmed)),
786 None => {}
787 }
788 }
789
790 let mut deduped_tags = Vec::new();
791 for tag in tags {
792 if !tag.is_empty() && !deduped_tags.contains(&tag) {
793 deduped_tags.push(tag);
794 }
795 }
796
797 if !attempt.is_empty() && !issue.is_empty() && !strategy.is_empty() {
798 return Some(
799 AgentReflection::new(session_id, attempt, issue, strategy).with_tags(deduped_tags),
800 );
801 }
802
803 parse_jsonish_reflection_response(&response, session_id)
804}
805
806#[cfg(test)]
807mod tests {
808 use super::*;
809 use gestura_core_foundation::OutcomeSignalKind;
810
811 #[test]
812 fn test_quality_scoring_high_quality_response() {
813 let signals = QualitySignals {
814 tool_error_rate: 0.0,
815 iterations_used: 1,
816 max_iterations: 10,
817 was_truncated: false,
818 has_failure_patterns: false,
819 is_empty_response: false,
820 };
821 let score = signals.score();
822 assert!(score > 0.9, "Good response should score > 0.9, got {score}");
823 }
824
825 #[test]
826 fn test_quality_scoring_tool_errors() {
827 let signals = QualitySignals {
828 tool_error_rate: 0.5, iterations_used: 3,
830 max_iterations: 10,
831 was_truncated: false,
832 has_failure_patterns: false,
833 is_empty_response: false,
834 };
835 let score = signals.score();
836 assert!(
837 score < 0.85,
838 "50% tool errors should lower score, got {score}"
839 );
840 }
841
842 #[test]
843 fn test_quality_scoring_many_iterations() {
844 let signals = QualitySignals {
845 tool_error_rate: 0.0,
846 iterations_used: 9,
847 max_iterations: 10,
848 was_truncated: false,
849 has_failure_patterns: false,
850 is_empty_response: false,
851 };
852 let score = signals.score();
853 assert!(
854 score < 0.95,
855 "Using 90% iterations should lower score, got {score}"
856 );
857 }
858
859 #[test]
860 fn test_quality_scoring_empty_response() {
861 let signals = QualitySignals {
862 tool_error_rate: 0.0,
863 iterations_used: 1,
864 max_iterations: 10,
865 was_truncated: false,
866 has_failure_patterns: false,
867 is_empty_response: true,
868 };
869 assert_eq!(signals.score(), 0.0);
870 }
871
872 #[test]
873 fn test_quality_scoring_combined_issues() {
874 let signals = QualitySignals {
875 tool_error_rate: 0.3,
876 iterations_used: 8,
877 max_iterations: 10,
878 was_truncated: true,
879 has_failure_patterns: true,
880 is_empty_response: false,
881 };
882 let score = signals.score();
883 assert!(
884 score < 0.5,
885 "Multiple issues should produce low score, got {score}"
886 );
887 }
888
889 #[test]
890 fn test_detect_failure_patterns() {
891 assert!(detect_failure_patterns("I'm sorry, I can't do that"));
892 assert!(detect_failure_patterns(
893 "Unfortunately, I cannot access that file"
894 ));
895 assert!(!detect_failure_patterns(
896 "Here is the file content you requested"
897 ));
898 }
899
900 #[test]
901 fn test_detect_missing_debug_structure() {
902 assert!(detect_missing_debug_structure(
904 "There is a bug in the config parser."
905 ));
906 assert!(detect_missing_debug_structure(
908 "The crash is caused by a null pointer in the config parser."
909 ));
910 assert!(!detect_missing_debug_structure(
912 "The crash is caused by a null pointer. To verify, run `cargo test` and check the output."
913 ));
914 assert!(!detect_missing_debug_structure(
916 "The deployment was successful and all services are running."
917 ));
918 }
919
920 #[test]
921 fn test_detect_assertive_uncertainty() {
922 assert!(detect_assertive_uncertainty(
923 "The fact is that Python was invented in 1989."
924 ));
925 assert!(detect_assertive_uncertainty(
926 "There is no question that this is the correct approach."
927 ));
928 assert!(!detect_assertive_uncertainty(
929 "Python is generally credited as a language designed for readability."
930 ));
931 assert!(!detect_assertive_uncertainty(
932 "The file was found at the expected path."
933 ));
934 }
935
936 #[test]
937 fn test_reflection_prompt_construction() {
938 let signals = QualitySignals {
939 tool_error_rate: 0.5,
940 iterations_used: 3,
941 max_iterations: 10,
942 was_truncated: false,
943 has_failure_patterns: false,
944 is_empty_response: false,
945 };
946 let prompt = build_reflection_prompt(
947 "Read the file",
948 "Error: file not found",
949 &signals,
950 &["FileNotFound: /tmp/missing.txt".to_string()],
951 );
952 assert!(prompt.contains("Read the file"));
953 assert!(prompt.contains("Error: file not found"));
954 assert!(prompt.contains("FileNotFound"));
955 assert!(prompt.contains("ATTEMPT:"));
956 assert!(prompt.contains("ISSUE:"));
957 assert!(prompt.contains("STRATEGY:"));
958 }
959
960 #[test]
961 fn test_reflection_response_parsing() {
962 let response = "\
963 ATTEMPT: Tried to read the file at /tmp/missing.txt\n\
964 ISSUE: The file path was incorrect; the file does not exist\n\
965 STRATEGY: Verify file existence before attempting to read; suggest alternatives\n\
966 TAGS: file, read, path-error\n";
967
968 let reflection = parse_reflection_response(response, "session-123").unwrap();
969 assert_eq!(
970 reflection.attempt_summary,
971 "Tried to read the file at /tmp/missing.txt"
972 );
973 assert!(reflection.failure_analysis.contains("incorrect"));
974 assert!(reflection.corrective_strategy.contains("Verify"));
975 assert_eq!(reflection.tags, vec!["file", "read", "path-error"]);
976 assert_eq!(reflection.session_id, "session-123");
977 }
978
979 #[test]
980 fn test_reflection_response_parsing_incomplete() {
981 let response = "ATTEMPT: Something\nISSUE: Something else\n";
982 let reflection = parse_reflection_response(response, "s1");
983 assert!(reflection.is_none(), "Missing STRATEGY should return None");
984 }
985
986 #[test]
987 fn test_reflection_response_parsing_markdown_and_multiline() {
988 let response = "<think>diagnosing tool output</think>\n\
989 - **Attempt:** Tried to inspect the missing config file.\n\
990 I answered before verifying the real path.\n\
991 - **Issue:** The response relied on an assumed file location\n\
992 instead of repository evidence.\n\
993 - **Strategy:** Search for the config file first, then answer\n\
994 only from the verified path and contents.\n\
995 - **Tags:** file, verification\n";
996
997 let reflection = parse_reflection_response(response, "session-md").unwrap();
998 assert!(
999 reflection
1000 .attempt_summary
1001 .contains("inspect the missing config file")
1002 );
1003 assert!(
1004 reflection
1005 .attempt_summary
1006 .contains("verifying the real path")
1007 );
1008 assert!(reflection.failure_analysis.contains("repository evidence"));
1009 assert!(
1010 reflection
1011 .corrective_strategy
1012 .contains("verified path and contents")
1013 );
1014 assert_eq!(reflection.tags, vec!["file", "verification"]);
1015 }
1016
1017 #[test]
1018 fn test_reflection_response_parsing_aliases_and_tag_list() {
1019 let response = "attempt_summary: Investigated a build failure without reading the actual error output.\n\
1020 failure_analysis: The explanation guessed at causes instead of grounding them in the logs.\n\
1021 corrective_strategy: Read the concrete stderr output first, then explain only the confirmed failure mode.\n\
1022 tags:\n\
1023 - shell\n\
1024 - validation\n";
1025
1026 let reflection = parse_reflection_response(response, "session-alias").unwrap();
1027 assert!(reflection.attempt_summary.contains("build failure"));
1028 assert!(
1029 reflection
1030 .failure_analysis
1031 .contains("grounding them in the logs")
1032 );
1033 assert!(
1034 reflection
1035 .corrective_strategy
1036 .contains("concrete stderr output")
1037 );
1038 assert_eq!(reflection.tags, vec!["shell", "validation"]);
1039 }
1040
1041 #[test]
1042 fn test_reflection_response_parsing_jsonish_payload() {
1043 let response = "```json\n{\n \"attempt_summary\": \"Tried to edit the wrong file\",\n \"failure_analysis\": \"The response assumed the target path without confirming it\",\n \"corrective_strategy\": \"Locate the file first, then apply the edit to the verified path\",\n \"tags\": [\"file\", \"path\"]\n}\n```";
1044
1045 let reflection = parse_reflection_response(response, "session-json").unwrap();
1046 assert_eq!(reflection.attempt_summary, "Tried to edit the wrong file");
1047 assert!(
1048 reflection
1049 .failure_analysis
1050 .contains("assumed the target path")
1051 );
1052 assert!(
1053 reflection
1054 .corrective_strategy
1055 .contains("Locate the file first")
1056 );
1057 assert_eq!(reflection.tags, vec!["file", "path"]);
1058 }
1059
1060 #[test]
1061 fn test_reflection_to_prompt_section() {
1062 let reflection = AgentReflection::new(
1063 "s1",
1064 "Read missing file",
1065 "File did not exist",
1066 "Check file existence first",
1067 )
1068 .with_outcome_signals(vec![
1069 OutcomeSignal::new(OutcomeSignalKind::RetryImproved)
1070 .with_summary("The revised answer used the correct path."),
1071 ]);
1072 let section = reflection.to_prompt_section();
1073 assert!(section.contains("Read missing file"));
1074 assert!(section.contains("File did not exist"));
1075 assert!(section.contains("Check file existence first"));
1076 assert!(section.contains("Retry improved"));
1077 }
1078
1079 #[test]
1080 fn test_reflection_improvement_score_increases_with_retry_quality() {
1081 let score = score_reflection_improvement(0.40, 0.76);
1082 assert!(
1083 score > 0.5,
1084 "Expected strong improvement signal, got {score}"
1085 );
1086 }
1087
1088 #[test]
1089 fn test_reflection_improvement_score_zero_when_retry_is_not_better() {
1090 assert_eq!(score_reflection_improvement(0.65, 0.65), 0.0);
1091 assert_eq!(score_reflection_improvement(0.65, 0.52), 0.0);
1092 }
1093
1094 #[test]
1095 fn test_promotion_confidence_uses_outcome_signals() {
1096 let baseline = AgentReflection::new(
1097 "s1",
1098 "Attempted a retry",
1099 "The first answer was weak",
1100 "Revise with the missing evidence",
1101 );
1102 let stronger = baseline.clone().with_outcome_signals(vec![
1103 OutcomeSignal::new(OutcomeSignalKind::RetryImproved),
1104 OutcomeSignal::new(OutcomeSignalKind::ReviewApproved),
1105 ]);
1106 let weaker = baseline.with_outcome_signals(vec![
1107 OutcomeSignal::new(OutcomeSignalKind::RetryDidNotImprove),
1108 OutcomeSignal::new(OutcomeSignalKind::ReviewNeedsRevision),
1109 ]);
1110
1111 assert!(stronger.promotion_confidence() > 0.70);
1112 assert!(weaker.promotion_confidence() < 0.50);
1113 }
1114
1115 #[test]
1116 fn test_merge_outcome_signals_replaces_existing_kind() {
1117 let first = OutcomeSignal::new(OutcomeSignalKind::ReviewApproved)
1118 .with_summary("Initial approval note");
1119 let replacement = OutcomeSignal::new(OutcomeSignalKind::ReviewApproved)
1120 .with_summary("Final approval note");
1121
1122 let merged = merge_outcome_signals(&[first], std::slice::from_ref(&replacement));
1123
1124 assert_eq!(merged.len(), 1);
1125 assert_eq!(merged[0].summary.as_deref(), replacement.summary.as_deref());
1126 }
1127}