모델 프로파일 기반 Cowork/Code 루프와 진행 UX 고도화 반영

- 등록 모델 실행 프로파일을 검증 게이트, 문서 fallback, post-tool verification까지 확장 적용 - Cowork/Code 진행 카드에 계획/도구/검증/압축/폴백/재시도 단계 메타를 추가해 대기 상태 가시성 강화 - OpenAI/vLLM tool 요청에 병렬 도구 호출 힌트를 추가하고 회귀 프롬프트 문서를 프로파일 기준으로 전면 정리 - 검증: dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify\\ -p:IntermediateOutputPath=obj\\verify\\ (경고 0 / 오류 0)
2026-04-08 13:41:57 +09:00
parent b391dfdfb3
commit a2c952879d
552 changed files with 8094 additions and 13595 deletions
--- a/src/AxCopilot/Services/Agent/ContextCondenser.cs
+++ b/src/AxCopilot/Services/Agent/ContextCondenser.cs
@@ -52,7 +52,10 @@ public static class ContextCondenser
            _ when key.Contains("gemini-2.0") => 900_000,
            _ when key.Contains("gemini") => 900_000,
            _ when key.Contains("gpt-4") => 120_000,      // GPT-4 128K
-            _ => 16_000, // Ollama/vLLM 로컬 모델 기본값
+            _ when key.Contains("deepseek") => 128_000,   // DeepSeek-V3/R1 128K
+            _ when key.Contains("qwen") => 32_000,        // Qwen 계열 32K
+            _ when key.Contains("llama") => 32_000,       // LLaMA 계열 32K
+            _ => 32_000, // vLLM/Ollama 알 수 없는 모델 기본값 (보수적으로 32K)
        };
    }

@@ -90,8 +93,9 @@ public static class ContextCondenser

        // 현재 모델의 입력 토큰 한도
        var settings = llm.GetCurrentModelInfo();
+        // 사용자가 설정한 컨텍스트 크기를 우선 사용. 미설정 시 모델별 기본값 적용.
        var inputLimit = GetModelInputLimit(settings.service, settings.model);
-        var effectiveMax = maxOutputTokens > 0 ? Math.Min(inputLimit, maxOutputTokens) : inputLimit;
+        var effectiveMax = maxOutputTokens > 0 ? maxOutputTokens : inputLimit;
        var percent = Math.Clamp(triggerPercent, 50, 95);
        var threshold = (int)(effectiveMax * (percent / 100.0)); // 설정 임계치에서 압축 시작