gestura_core/
compaction.rs

1//! Context compaction for managing conversation history within token limits
2//!
3//! This module provides automatic history trimming and summarization when
4//! context approaches the token limit. Based on Block Goose architecture patterns.
5
6use crate::pipeline::types::Message;
7use serde::{Deserialize, Serialize};
8
9/// Configuration for context compaction
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct CompactionConfig {
12    /// Maximum tokens before triggering compaction
13    pub max_context_tokens: usize,
14    /// Target tokens after compaction (should be < max)
15    pub target_context_tokens: usize,
16    /// Minimum messages to always keep (most recent)
17    pub min_recent_messages: usize,
18    /// Whether to preserve tool call/result pairs
19    pub preserve_tool_calls: bool,
20    /// Whether to preserve messages marked as important
21    pub preserve_important: bool,
22    /// Strategy for compaction
23    pub strategy: CompactionStrategy,
24}
25
26impl Default for CompactionConfig {
27    fn default() -> Self {
28        Self {
29            max_context_tokens: 100_000,
30            target_context_tokens: 80_000,
31            min_recent_messages: 4,
32            preserve_tool_calls: true,
33            preserve_important: true,
34            strategy: CompactionStrategy::SlidingWindow,
35        }
36    }
37}
38
39/// Strategy for compacting context
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41pub enum CompactionStrategy {
42    /// Keep most recent messages, drop oldest
43    SlidingWindow,
44    /// Summarize older messages into a single summary
45    Summarize,
46    /// Keep important messages, drop less important ones
47    ImportanceBased,
48}
49
50/// Result of a compaction operation
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct CompactionResult {
53    /// Number of messages before compaction
54    pub messages_before: usize,
55    /// Number of messages after compaction
56    pub messages_after: usize,
57    /// Estimated tokens before compaction
58    pub tokens_before: usize,
59    /// Estimated tokens after compaction
60    pub tokens_after: usize,
61    /// Summary of dropped content (if any)
62    pub summary: Option<String>,
63    /// Whether compaction was performed
64    pub compacted: bool,
65}
66
67/// Event emitted during compaction for user notification
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct CompactionEvent {
70    /// Type of compaction event
71    pub event_type: CompactionEventType,
72    /// Human-readable message
73    pub message: String,
74    /// Number of messages affected
75    pub messages_affected: usize,
76    /// Tokens saved
77    pub tokens_saved: usize,
78}
79
80/// Types of compaction events
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
82pub enum CompactionEventType {
83    /// Context is approaching limit, compaction will occur soon
84    Warning,
85    /// Compaction is starting
86    Started,
87    /// Compaction completed successfully
88    Completed,
89    /// Compaction failed
90    Failed,
91}
92
93/// Manages context compaction for conversation history
94#[derive(Debug, Clone)]
95pub struct ContextCompactor {
96    config: CompactionConfig,
97}
98
99impl Default for ContextCompactor {
100    fn default() -> Self {
101        Self::new(CompactionConfig::default())
102    }
103}
104
105impl ContextCompactor {
106    /// Create a new context compactor with the given configuration
107    pub fn new(config: CompactionConfig) -> Self {
108        Self { config }
109    }
110
111    /// Create with default config for a specific max token limit
112    pub fn with_max_tokens(max_tokens: usize) -> Self {
113        Self::new(CompactionConfig {
114            max_context_tokens: max_tokens,
115            target_context_tokens: (max_tokens as f64 * 0.8) as usize,
116            ..Default::default()
117        })
118    }
119
120    /// Check if compaction is needed based on current token count
121    pub fn needs_compaction(&self, current_tokens: usize) -> bool {
122        current_tokens >= self.config.max_context_tokens
123    }
124
125    /// Check if we're approaching the limit (warning threshold at 90%)
126    pub fn approaching_limit(&self, current_tokens: usize) -> bool {
127        let warning_threshold = (self.config.max_context_tokens as f64 * 0.9) as usize;
128        current_tokens >= warning_threshold && current_tokens < self.config.max_context_tokens
129    }
130
131    /// Estimate token count for a message
132    fn estimate_message_tokens(msg: &Message) -> usize {
133        let content_tokens = estimate_tokens(&msg.content);
134        let thinking_tokens = msg
135            .thinking
136            .as_ref()
137            .map(|t| estimate_tokens(t))
138            .unwrap_or(0);
139        let role_tokens = 4; // Approximate overhead for role
140        content_tokens + thinking_tokens + role_tokens
141    }
142
143    /// Estimate total tokens for a list of messages
144    pub fn estimate_total_tokens(messages: &[Message]) -> usize {
145        messages.iter().map(Self::estimate_message_tokens).sum()
146    }
147
148    /// Compact messages to fit within token limit
149    pub fn compact(&self, messages: &[Message]) -> CompactionResult {
150        let tokens_before = Self::estimate_total_tokens(messages);
151        let messages_before = messages.len();
152
153        // Check if compaction is needed
154        if tokens_before < self.config.max_context_tokens {
155            return CompactionResult {
156                messages_before,
157                messages_after: messages_before,
158                tokens_before,
159                tokens_after: tokens_before,
160                summary: None,
161                compacted: false,
162            };
163        }
164
165        // Apply compaction strategy
166        let (compacted_messages, summary) = match self.config.strategy {
167            CompactionStrategy::SlidingWindow => self.compact_sliding_window(messages),
168            CompactionStrategy::Summarize => self.compact_with_summary(messages),
169            CompactionStrategy::ImportanceBased => self.compact_importance_based(messages),
170        };
171
172        let tokens_after = Self::estimate_total_tokens(&compacted_messages);
173
174        CompactionResult {
175            messages_before,
176            messages_after: compacted_messages.len(),
177            tokens_before,
178            tokens_after,
179            summary,
180            compacted: true,
181        }
182    }
183
184    /// Compact messages and return the compacted list
185    pub fn compact_messages(&self, messages: Vec<Message>) -> (Vec<Message>, CompactionResult) {
186        let tokens_before = Self::estimate_total_tokens(&messages);
187        let messages_before = messages.len();
188
189        if tokens_before < self.config.max_context_tokens {
190            let result = CompactionResult {
191                messages_before,
192                messages_after: messages_before,
193                tokens_before,
194                tokens_after: tokens_before,
195                summary: None,
196                compacted: false,
197            };
198            return (messages, result);
199        }
200
201        let (compacted, summary) = match self.config.strategy {
202            CompactionStrategy::SlidingWindow => self.compact_sliding_window(&messages),
203            CompactionStrategy::Summarize => self.compact_with_summary(&messages),
204            CompactionStrategy::ImportanceBased => self.compact_importance_based(&messages),
205        };
206
207        let tokens_after = Self::estimate_total_tokens(&compacted);
208        let result = CompactionResult {
209            messages_before,
210            messages_after: compacted.len(),
211            tokens_before,
212            tokens_after,
213            summary,
214            compacted: true,
215        };
216
217        (compacted, result)
218    }
219
220    /// Sliding window compaction: keep most recent messages
221    fn compact_sliding_window(&self, messages: &[Message]) -> (Vec<Message>, Option<String>) {
222        let mut result = Vec::new();
223        let mut current_tokens = 0;
224        let target = self.config.target_context_tokens;
225
226        // Always include minimum recent messages
227        let min_recent = self.config.min_recent_messages.min(messages.len());
228
229        // Add messages from the end until we hit the target
230        for msg in messages.iter().rev() {
231            let msg_tokens = Self::estimate_message_tokens(msg);
232            if current_tokens + msg_tokens > target && result.len() >= min_recent {
233                break;
234            }
235            result.push(msg.clone());
236            current_tokens += msg_tokens;
237        }
238
239        result.reverse();
240
241        let dropped = messages.len() - result.len();
242        let summary = if dropped > 0 {
243            Some(format!(
244                "[Context compacted: {} earlier messages removed to stay within token limit]",
245                dropped
246            ))
247        } else {
248            None
249        };
250
251        (result, summary)
252    }
253
254    /// Summarize older messages into a single summary message
255    fn compact_with_summary(&self, messages: &[Message]) -> (Vec<Message>, Option<String>) {
256        let target = self.config.target_context_tokens;
257        let min_recent = self.config.min_recent_messages.min(messages.len());
258
259        // Calculate how many recent messages to keep
260        let mut recent_tokens = 0;
261        let mut keep_from = messages.len();
262
263        for (i, msg) in messages.iter().enumerate().rev() {
264            let msg_tokens = Self::estimate_message_tokens(msg);
265            if recent_tokens + msg_tokens > target / 2 && messages.len() - i >= min_recent {
266                keep_from = i + 1;
267                break;
268            }
269            recent_tokens += msg_tokens;
270            keep_from = i;
271        }
272
273        // Create summary of older messages
274        let older_messages = &messages[..keep_from];
275        let summary_text = self.create_summary(older_messages);
276
277        // Build result with summary + recent messages
278        let mut result = Vec::new();
279
280        if !summary_text.is_empty() {
281            result.push(Message {
282                role: "system".to_string(),
283                content: format!("[Conversation summary: {}]", summary_text),
284                tool_call_id: None,
285                thinking: None,
286            });
287        }
288
289        result.extend(messages[keep_from..].iter().cloned());
290
291        (result, Some(summary_text))
292    }
293
294    /// Importance-based compaction: keep important messages, drop less important
295    fn compact_importance_based(&self, messages: &[Message]) -> (Vec<Message>, Option<String>) {
296        let target = self.config.target_context_tokens;
297        let min_recent = self.config.min_recent_messages;
298
299        // Score each message by importance
300        let scored: Vec<(usize, i32, &Message)> = messages
301            .iter()
302            .enumerate()
303            .map(|(i, msg)| (i, self.score_importance(msg, i, messages.len()), msg))
304            .collect();
305
306        // Sort by importance (descending), keeping original order for ties
307        let mut sorted = scored.clone();
308        sorted.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
309
310        // Select messages until we hit target
311        let mut selected_indices: Vec<usize> = Vec::new();
312        let mut current_tokens = 0;
313
314        for (idx, _score, msg) in sorted {
315            let msg_tokens = Self::estimate_message_tokens(msg);
316            if current_tokens + msg_tokens <= target || selected_indices.len() < min_recent {
317                selected_indices.push(idx);
318                current_tokens += msg_tokens;
319            }
320        }
321
322        // Sort indices to maintain original order
323        selected_indices.sort();
324
325        // Build result
326        let result: Vec<Message> = selected_indices
327            .iter()
328            .map(|&i| messages[i].clone())
329            .collect();
330
331        let dropped = messages.len() - result.len();
332        let summary = if dropped > 0 {
333            Some(format!(
334                "[Context compacted: {} less important messages removed]",
335                dropped
336            ))
337        } else {
338            None
339        };
340
341        (result, summary)
342    }
343
344    /// Score message importance (higher = more important)
345    fn score_importance(&self, msg: &Message, index: usize, total: usize) -> i32 {
346        let mut score = 0i32;
347
348        // Recent messages are more important
349        let recency = (index as f64 / total as f64 * 50.0) as i32;
350        score += recency;
351
352        // Tool calls and results are important if configured
353        if self.config.preserve_tool_calls {
354            if msg.role == "tool" || msg.tool_call_id.is_some() {
355                score += 30;
356            }
357            // Assistant messages with tool calls
358            if msg.role == "assistant" && msg.content.contains("tool_call") {
359                score += 20;
360            }
361        }
362
363        // User messages are generally important
364        if msg.role == "user" {
365            score += 15;
366        }
367
368        // System messages are very important
369        if msg.role == "system" {
370            score += 40;
371        }
372
373        // Longer messages might contain more context
374        let length_bonus = (msg.content.len() / 100).min(10) as i32;
375        score += length_bonus;
376
377        score
378    }
379
380    /// Create a simple summary of messages
381    fn create_summary(&self, messages: &[Message]) -> String {
382        if messages.is_empty() {
383            return String::new();
384        }
385
386        let user_count = messages.iter().filter(|m| m.role == "user").count();
387        let assistant_count = messages.iter().filter(|m| m.role == "assistant").count();
388        let tool_count = messages.iter().filter(|m| m.role == "tool").count();
389
390        let mut summary_parts = Vec::new();
391
392        if user_count > 0 {
393            summary_parts.push(format!("{} user messages", user_count));
394        }
395        if assistant_count > 0 {
396            summary_parts.push(format!("{} assistant responses", assistant_count));
397        }
398        if tool_count > 0 {
399            summary_parts.push(format!("{} tool interactions", tool_count));
400        }
401
402        // Extract key topics from user messages
403        let topics: Vec<&str> = messages
404            .iter()
405            .filter(|m| m.role == "user")
406            .take(3)
407            .filter_map(|m| m.content.split_whitespace().take(5).last())
408            .collect();
409
410        if !topics.is_empty() {
411            summary_parts.push(format!("Topics: {}", topics.join(", ")));
412        }
413
414        summary_parts.join("; ")
415    }
416
417    /// Get the current configuration
418    pub fn config(&self) -> &CompactionConfig {
419        &self.config
420    }
421}
422
423/// Estimate token count for a string (same as in context/manager.rs)
424pub fn estimate_tokens(s: &str) -> usize {
425    // Rough estimate: ~4 chars per token on average
426    (s.len() / 4).max(1)
427}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432
433    fn make_message(role: &str, content: &str) -> Message {
434        Message {
435            role: role.to_string(),
436            content: content.to_string(),
437            tool_call_id: None,
438            thinking: None,
439        }
440    }
441
442    #[test]
443    fn test_no_compaction_needed() {
444        let compactor = ContextCompactor::with_max_tokens(10000);
445        let messages = vec![
446            make_message("user", "Hello"),
447            make_message("assistant", "Hi there!"),
448        ];
449
450        let result = compactor.compact(&messages);
451        assert!(!result.compacted);
452        assert_eq!(result.messages_before, result.messages_after);
453    }
454
455    #[test]
456    fn test_sliding_window_compaction() {
457        let config = CompactionConfig {
458            max_context_tokens: 100,
459            target_context_tokens: 50,
460            min_recent_messages: 2,
461            strategy: CompactionStrategy::SlidingWindow,
462            ..Default::default()
463        };
464        let compactor = ContextCompactor::new(config);
465
466        // Create messages that exceed the limit
467        let messages: Vec<Message> = (0..10)
468            .map(|i| make_message("user", &format!("Message number {} with some content", i)))
469            .collect();
470
471        let (compacted, result) = compactor.compact_messages(messages);
472        assert!(result.compacted);
473        assert!(compacted.len() < 10);
474        assert!(compacted.len() >= 2); // min_recent_messages
475    }
476
477    #[test]
478    fn test_approaching_limit() {
479        let compactor = ContextCompactor::with_max_tokens(1000);
480
481        // 90% of limit should trigger warning
482        assert!(compactor.approaching_limit(900));
483        assert!(compactor.approaching_limit(950));
484
485        // Below 90% should not trigger
486        assert!(!compactor.approaching_limit(800));
487
488        // At or above limit should not trigger (needs_compaction instead)
489        assert!(!compactor.approaching_limit(1000));
490    }
491
492    #[test]
493    fn test_needs_compaction() {
494        let compactor = ContextCompactor::with_max_tokens(1000);
495
496        assert!(!compactor.needs_compaction(500));
497        assert!(!compactor.needs_compaction(999));
498        assert!(compactor.needs_compaction(1000));
499        assert!(compactor.needs_compaction(1500));
500    }
501
502    #[test]
503    fn test_estimate_tokens() {
504        // 5 chars / 4 = 1.25 -> 1, max(1) = 1
505        assert_eq!(estimate_tokens("hello"), 1);
506        // 16 chars / 4 = 4
507        assert_eq!(estimate_tokens("hello world test"), 4);
508        // Empty string -> 0/4 = 0, max(1) = 1
509        assert_eq!(estimate_tokens(""), 1);
510    }
511}