컨텍스트 전송 뷰와 압축 트리거를 claw-code 기준으로 정리

claw-code의 query.ts, autoCompact.ts, sessionMemoryCompact.ts 흐름을 참고해 AX Agent의 컨텍스트 관리와 압축 동작을 더 가깝게 맞췄다. - AgentQueryContextBuilder를 추가해 저장된 전체 대화와 실제 LLM 전송용 query view를 분리 - compact boundary 이후만 전송하고 tool_result/tool_use 짝이 끊기지 않도록 start index를 보정 - 오래된 tool_result는 query view에서만 별도 budget으로 축약하도록 조정 - ContextCondenser의 자동 압축 시작점을 effective context window, summary reserve, buffer 기준으로 재계산 - 미사용 입력 높이 캐시 필드를 제거해 빌드 경고를 해소 - README.md, docs/DEVELOPMENT.md에 2026-04-12 21:34 (KST) 기준 작업 이력 반영 검증: dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify\\ -p:IntermediateOutputPath=obj\\verify\\ / 경고 0, 오류 0
2026-04-12 21:36:50 +09:00
parent 9175dfe657
commit 0f83dc802c
6 changed files with 844 additions and 42 deletions
--- a/src/AxCopilot/Services/Agent/ContextCondenser.cs
+++ b/src/AxCopilot/Services/Agent/ContextCondenser.cs
@@ -40,8 +40,10 @@ public static class ContextCondenser

    /// <summary>요약 시 유지할 최근 메시지 수</summary>
    private const int RecentKeepCount = 6;
+    private const int AutoCompactBufferTokens = 13_000;
+    private const int SummaryReserveTokens = 20_000;

-    /// <summary>모델별 입력 토큰 한도 (대략). 정확한 값은 중요하지 않음 — 안전 마진으로 70% 적용.</summary>
+    /// <summary>모델별 입력 토큰 한도 (대략).</summary>
    private static int GetModelInputLimit(string service, string model)
    {
        var key = $"{service}:{model}".ToLowerInvariant();
@@ -59,6 +61,13 @@ public static class ContextCondenser
        };
    }

+    private static int GetEffectiveContextWindowSize(string service, string model, int configuredLimit)
+    {
+        var contextWindow = configuredLimit > 0 ? configuredLimit : GetModelInputLimit(service, model);
+        var reservedForSummary = Math.Min(SummaryReserveTokens, Math.Max(4_000, contextWindow / 8));
+        return Math.Max(8_000, contextWindow - reservedForSummary);
+    }
+
    /// <summary>
    /// 메시지 목록의 토큰이 모델 한도에 근접하면 자동 압축합니다.
    /// 1단계: 도구 결과 축약 (빠르고 LLM 호출 없음)
@@ -67,7 +76,7 @@ public static class ContextCondenser
    /// </summary>
    public static async Task<bool> CondenseIfNeededAsync(
        List<ChatMessage> messages,
-        LlmService llm,
+        ILlmService llm,
        int maxOutputTokens,
        bool proactiveEnabled = true,
        int triggerPercent = 80,
@@ -80,7 +89,7 @@ public static class ContextCondenser

    public static async Task<ContextCompactionResult> CondenseWithStatsAsync(
        List<ChatMessage> messages,
-        LlmService llm,
+        ILlmService llm,
        int maxOutputTokens,
        bool proactiveEnabled = true,
        int triggerPercent = 80,
@@ -94,10 +103,11 @@ public static class ContextCondenser
        // 현재 모델의 입력 토큰 한도
        var settings = llm.GetCurrentModelInfo();
        // 사용자가 설정한 컨텍스트 크기를 우선 사용. 미설정 시 모델별 기본값 적용.
-        var inputLimit = GetModelInputLimit(settings.service, settings.model);
-        var effectiveMax = maxOutputTokens > 0 ? maxOutputTokens : inputLimit;
+        var effectiveWindow = GetEffectiveContextWindowSize(settings.service, settings.model, maxOutputTokens);
        var percent = Math.Clamp(triggerPercent, 50, 95);
-        var threshold = (int)(effectiveMax * (percent / 100.0)); // 설정 임계치에서 압축 시작
+        var percentThreshold = (int)(effectiveWindow * (percent / 100.0));
+        var bufferedThreshold = Math.Max(4_000, effectiveWindow - AutoCompactBufferTokens);
+        var threshold = Math.Min(percentThreshold, bufferedThreshold);

        var currentTokens = TokenEstimator.EstimateMessages(messages);
        result.BeforeTokens = currentTokens;
@@ -668,7 +678,7 @@ public static class ContextCondenser
    /// 시스템 메시지 + 최근 N개는 유지하고, 나머지를 요약으로 교체합니다.
    /// </summary>
    private static async Task<bool> SummarizeOldMessagesAsync(
-        List<ChatMessage> messages, LlmService llm, CancellationToken ct)
+        List<ChatMessage> messages, ILlmService llm, CancellationToken ct)
    {
        var systemMsg = messages.FirstOrDefault(m => m.Role == "system");
        var systemCount = systemMsg != null ? 1 : 0;