claude-code 기준 provider 호환성과 compact 후속 흐름을 보강한다

- OpenAI 호환 tool_choice 400 오류에 대한 일반 fallback을 추가하고 Qwen·LLaMA·DeepSeek 계열 vLLM의 도구 호출 프로파일을 더 보수적으로 조정 - compact 이후 branch context와 최근 tool state를 query view에 재주입하고 UI 표현 수준에 맞춰 compact 카드/컨텍스트 사용 팝업/최종 보고 밀도를 세분화 - README와 DEVELOPMENT 문서 이력을 2026-04-12 23:45 KST 기준으로 갱신 - 검증: dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify\\ -p:IntermediateOutputPath=obj\\verify\\ 경고 0 / 오류 0
2026-04-12 22:32:40 +09:00
parent 58b798d3e4
commit da11029284
7 changed files with 215 additions and 27 deletions
--- a/src/AxCopilot/Services/LlmService.ToolUse.cs
+++ b/src/AxCopilot/Services/LlmService.ToolUse.cs
@@ -545,10 +545,14 @@ public partial class LlmService
            var detail = ExtractErrorDetail(errBody);
            LogService.Warn($"[ToolUse] {activeService} API 오류 ({resp.StatusCode}): {errBody}");

-            if (isIbmDeployment && forceToolCall && (int)resp.StatusCode == 400)
+            if (forceToolCall && (int)resp.StatusCode == 400)
            {
-                LogService.Warn("[ToolUse] IBM 배포형 경로에서 tool_choice가 거부되어 대체 강제 전략으로 재시도합니다.");
-                var fallbackBody = BuildIbmToolBody(messages, tools, forceToolCall: true, useToolChoice: false);
+                LogService.Warn(isIbmDeployment
+                    ? "[ToolUse] IBM 배포형 경로에서 tool_choice가 거부되어 대체 강제 전략으로 재시도합니다."
+                    : "[ToolUse] OpenAI 호환 경로에서 tool_choice가 거부되어 텍스트 지시 기반 강제 전략으로 재시도합니다.");
+                var fallbackBody = isIbmDeployment
+                    ? BuildIbmToolBody(messages, tools, forceToolCall: true, useToolChoice: false)
+                    : BuildOpenAiToolBody(messages, tools, forceToolCall: true, useToolChoice: false);
                var fallbackJson = JsonSerializer.Serialize(fallbackBody);
                WorkflowLogService.LogLlmRawRequestFromContext(url, fallbackJson);
                using var retryReq = new HttpRequestMessage(HttpMethod.Post, url)
@@ -558,7 +562,7 @@ public partial class LlmService
                await ApplyAuthHeaderAsync(retryReq, ct);
                using var retryResp = await SendWithTlsAsync(retryReq, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead);
                if (retryResp.IsSuccessStatusCode)
-                    return await ReadOpenAiToolBlocksFromStreamAsync(retryResp, true, prefetchToolCallAsync, ct);
+                    return await ReadOpenAiToolBlocksFromStreamAsync(retryResp, isIbmDeployment, prefetchToolCallAsync, ct);
            }

            // 400 BadRequest → 도구 없이 일반 응답으로 폴백 시도
@@ -686,11 +690,59 @@ public partial class LlmService
        }
    }

-    private object BuildOpenAiToolBody(List<ChatMessage> messages, IReadOnlyCollection<IAgentTool> tools, bool forceToolCall = false)
+    private sealed record OpenAiToolCompatibilityProfile(
+        int StructuredHistoryRecentWindow,
+        bool AllowParallelToolCalls,
+        bool IncludeReasoningEffort,
+        bool AddToolOnlyDirectiveOnFallback);
+
+    private OpenAiToolCompatibilityProfile GetOpenAiToolCompatibilityProfile(string service, string model)
+    {
+        var normalizedService = NormalizeServiceName(service);
+        var normalizedModel = (model ?? "").Trim().ToLowerInvariant();
+        var isFragileVllmFamily =
+            normalizedService == "vllm" &&
+            (normalizedModel.Contains("qwen", StringComparison.OrdinalIgnoreCase)
+             || normalizedModel.Contains("llama", StringComparison.OrdinalIgnoreCase)
+             || normalizedModel.Contains("deepseek", StringComparison.OrdinalIgnoreCase)
+             || normalizedModel.Contains("mistral", StringComparison.OrdinalIgnoreCase));
+
+        if (isFragileVllmFamily)
+        {
+            return new OpenAiToolCompatibilityProfile(
+                StructuredHistoryRecentWindow: 4,
+                AllowParallelToolCalls: false,
+                IncludeReasoningEffort: false,
+                AddToolOnlyDirectiveOnFallback: true);
+        }
+
+        return new OpenAiToolCompatibilityProfile(
+            StructuredHistoryRecentWindow: 8,
+            AllowParallelToolCalls: true,
+            IncludeReasoningEffort: true,
+            AddToolOnlyDirectiveOnFallback: true);
+    }
+
+    private static string BuildOpenAiToolOnlyDirective(IReadOnlyCollection<IAgentTool> tools)
+    {
+        var toolNames = string.Join(", ", tools.Select(t => t.Name).Take(12));
+        return "[TOOL_ONLY] 텍스트로 설명하지 말고 지금 바로 도구를 호출하세요. " +
+               $"사용 가능한 도구: {toolNames}. " +
+               "plain text 대신 function/tool call을 사용하세요.";
+    }
+
+    private object BuildOpenAiToolBody(
+        List<ChatMessage> messages,
+        IReadOnlyCollection<IAgentTool> tools,
+        bool forceToolCall = false,
+        bool useToolChoice = true)
    {
        var llm = _settings.Settings.Llm;
+        var activeService = ResolveService();
+        var activeModel = ResolveModel();
+        var compatibilityProfile = GetOpenAiToolCompatibilityProfile(activeService, activeModel);
        var msgs = new List<object>();
-        var structuredHistoryStart = GetStructuredToolHistoryStartIndex(messages);
+        var structuredHistoryStart = GetStructuredToolHistoryStartIndex(messages, compatibilityProfile.StructuredHistoryRecentWindow);

        for (var messageIndex = 0; messageIndex < messages.Count; messageIndex++)
        {
@@ -790,6 +842,15 @@ public partial class LlmService
            }
        }

+        if (forceToolCall && !useToolChoice && compatibilityProfile.AddToolOnlyDirectiveOnFallback)
+        {
+            msgs.Add(new
+            {
+                role = "user",
+                content = BuildOpenAiToolOnlyDirective(tools),
+            });
+        }
+
        // ── tool_calls ↔ tool 메시지 쌍 검증 ──
        // 컨텍스트 압축 후 tool_calls assistant 메시지는 남아있는데
        // 대응하는 tool result 메시지가 누락되면 vLLM이 400 에러를 반환함.
@@ -822,8 +883,6 @@ public partial class LlmService
            };
        }).ToArray();

-        var activeService = ResolveService();
-        var activeModel = ResolveModel();
        var executionPolicy = GetActiveExecutionPolicy();
        var isOllama = activeService.Equals("ollama", StringComparison.OrdinalIgnoreCase);
        if (isOllama)
@@ -839,7 +898,7 @@ public partial class LlmService
                ["options"] = new { temperature = ResolveToolTemperature() }
            };
            // Ollama에도 tool_choice 전달 — 프로파일 ForceInitialToolCall 적용
-            if (forceToolCall)
+            if (forceToolCall && useToolChoice)
                ollamaBody["tool_choice"] = "required";
            return ollamaBody;
        }
@@ -852,23 +911,22 @@ public partial class LlmService
            ["stream"] = true,
            ["temperature"] = ResolveToolTemperature(),
            ["max_tokens"] = ResolveOpenAiCompatibleMaxTokens(),
-            ["parallel_tool_calls"] = executionPolicy.EnableParallelReadBatch,
+            ["parallel_tool_calls"] = executionPolicy.EnableParallelReadBatch && compatibilityProfile.AllowParallelToolCalls,
        };
        // 스트리밍 시 마지막 청크에 토큰 사용량 포함 요청 (vLLM/OpenAI 호환)
        body["stream_options"] = new { include_usage = true };
        // tool_choice: "required" — 모델이 반드시 도구를 호출하도록 강제
        // 아직 한 번도 도구를 호출하지 않은 첫 번째 요청에서만 사용 (chatty 모델 대응)
-        if (forceToolCall)
+        if (forceToolCall && useToolChoice)
            body["tool_choice"] = "required";
        var effort = ResolveReasoningEffort();
-        if (!string.IsNullOrWhiteSpace(effort))
+        if (compatibilityProfile.IncludeReasoningEffort && !string.IsNullOrWhiteSpace(effort))
            body["reasoning_effort"] = effort;
        return body;
    }

-    private static int GetStructuredToolHistoryStartIndex(IReadOnlyList<ChatMessage> messages)
+    private static int GetStructuredToolHistoryStartIndex(IReadOnlyList<ChatMessage> messages, int protectedRecentNonSystemMessages)
    {
-        const int protectedRecentNonSystemMessages = 8;
        var nonSystemMessages = messages
            .Select((message, index) => new { message, index })
            .Where(x => !string.Equals(x.message.Role, "system", StringComparison.OrdinalIgnoreCase))
@@ -1229,9 +1287,14 @@ public partial class LlmService
        {
            var errBody = await resp.Content.ReadAsStringAsync(ct);
            var detail = ExtractErrorDetail(errBody);
-            if (isIbmDeployment && forceToolCall && (int)resp.StatusCode == 400)
+            if (forceToolCall && (int)resp.StatusCode == 400)
            {
-                var fallbackBody = BuildIbmToolBody(messages, tools, forceToolCall: true, useToolChoice: false);
+                LogService.Warn(isIbmDeployment
+                    ? "[ToolUse] IBM 배포형 스트리밍 경로에서 tool_choice가 거부되어 대체 강제 전략으로 재시도합니다."
+                    : "[ToolUse] OpenAI 호환 스트리밍 경로에서 tool_choice가 거부되어 텍스트 지시 기반 강제 전략으로 재시도합니다.");
+                var fallbackBody = isIbmDeployment
+                    ? BuildIbmToolBody(messages, tools, forceToolCall: true, useToolChoice: false)
+                    : BuildOpenAiToolBody(messages, tools, forceToolCall: true, useToolChoice: false);
                var fallbackJson = JsonSerializer.Serialize(fallbackBody);
                WorkflowLogService.LogLlmRawRequestFromContext(url, fallbackJson);
                using var retryReq = new HttpRequestMessage(HttpMethod.Post, url)