From 606ecbe6cd8a58a3d1e8b24f5a2b315907dd2211 Mon Sep 17 00:00:00 2001 From: lacvet Date: Mon, 6 Apr 2026 17:49:48 +0900 Subject: [PATCH] =?UTF-8?q?IBM=20vLLM=20=EB=B0=B0=ED=8F=AC=ED=98=95=20?= =?UTF-8?q?=EC=B1=84=ED=8C=85=20=EC=9A=94=EC=B2=AD=20=EC=8A=A4=ED=82=A4?= =?UTF-8?q?=EB=A7=88=20=EB=B6=84=EA=B8=B0=EC=99=80=20=EB=AC=B8=EC=84=9C=20?= =?UTF-8?q?=EB=B0=98=EC=98=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IBM/CP4D 인증을 사용하는 vLLM 등록 모델에서 배포형 /ml/v1/deployments/.../text/chat 계열 엔드포인트를 감지하도록 정리했다. 일반 OpenAI 호환 body 대신 messages+parameters 형태의 IBM deployment chat body를 사용하고 /v1/chat/completions를 강제로 붙이지 않도록 수정했다. IBM 배포형 응답은 results.generated_text, output_text, choices.message.content를 함께 파싱하도록 보강했고 도구 호출 경로는 안전하게 일반 응답 폴백을 유도하도록 정리했다. README와 DEVELOPMENT 문서를 2026-04-06 18:02 (KST) 기준으로 갱신했고 dotnet build 검증에서 경고 0 / 오류 0을 확인했다. --- README.md | 4 + docs/DEVELOPMENT.md | 3 + src/AxCopilot/Services/LlmService.ToolUse.cs | 9 +- src/AxCopilot/Services/LlmService.cs | 164 ++++++++++++++++++- 4 files changed, 170 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a0d255a..75382d2 100644 --- a/README.md +++ b/README.md @@ -1311,3 +1311,7 @@ MIT License - 업데이트: 2026-04-06 17:52 (KST) - 런처 표시 체감 속도를 유지하기 위해 [LauncherWindow](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/Views/LauncherWindow.xaml.cs) 사전 생성은 다시 복원했다. 대신 무거운 후보를 색인으로 더 좁히기 위해, [App.xaml.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/App.xaml.cs)의 인덱스 워밍업 진입점을 런처 표시 시점이 아니라 실제 검색 시점으로 옮겼다. - [LauncherViewModel.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/ViewModels/LauncherViewModel.cs) 의 `SearchAsync(...)` 시작 시에만 `EnsureIndexWarmupStarted()`를 호출하도록 바꿔, 사용자가 런처를 단순 호출만 할 때는 전체 인덱스 스캔과 파일 감시가 돌지 않게 정리했다. +- 업데이트: 2026-04-06 18:02 (KST) + - IBM 연결형 vLLM에서 `model_id` 또는 `mode`를 body에 넣지 말라는 응답이 오던 문제를 수정했다. [LlmService.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/Services/LlmService.cs)에 IBM/CP4D 인증 + `/ml/v1/deployments/.../text/chat` 계열 엔드포인트를 감지하는 분기를 추가하고, 이 경우 일반 OpenAI 호환 body 대신 `messages + parameters` 형태의 IBM deployment chat body를 사용하도록 바꿨다. + - 같은 파일에서 IBM deployment chat 경로는 `/v1/chat/completions`를 더 이상 강제로 붙이지 않고, 스트리밍 여부에 따라 `/text/chat` 또는 `/text/chat_stream` URL을 사용하도록 정리했다. 응답 파싱도 `results[].generated_text`, `output_text`, `choices[].message.content`를 함께 지원하게 확장했다. + - [LlmService.ToolUse.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/Services/LlmService.ToolUse.cs) 에서는 IBM deployment chat API가 감지되면 OpenAI function-calling body를 그대로 보내지 않고 `ToolCallNotSupportedException`으로 일반 응답 경로 폴백을 유도하도록 안전장치를 추가했다. diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index a702a00..505efa4 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -4994,3 +4994,6 @@ ow + toggle ?쒓컖 ?몄뼱濡??ㅼ떆 ?뺣젹?덈떎. - Document update: 2026-04-06 17:43 (KST) - Removed the eager tray-menu warmup path (`PrepareForDisplay()`) from startup. This avoids doing popup layout/measure work for the tray menu until the user actually opens it, reducing idle desktop overhead further. - Document update: 2026-04-06 17:52 (KST) - Restored eager `LauncherWindow` construction so launcher open latency stays low, but kept the tray-menu warmup removed. This keeps the launcher responsive while continuing to trim startup work elsewhere. - Document update: 2026-04-06 17:52 (KST) - Moved index warmup from launcher-show time to actual search time. `LauncherViewModel.SearchAsync(...)` now triggers `App.EnsureIndexWarmupStarted()` only when the user performs a real search, so opening the launcher by itself no longer starts a full file scan and watcher bootstrap. +- Document update: 2026-04-06 18:02 (KST) - Added IBM deployment-chat detection in `LlmService.cs` for vLLM registered models using `ibm_iam` or `cp4d_*` auth with `/ml/v1/deployments/...`-style endpoints. These requests now use IBM deployment chat URLs (`/text/chat` or `/text/chat_stream`) instead of appending `/v1/chat/completions`. +- Document update: 2026-04-06 18:02 (KST) - Added an IBM deployment request body builder in `LlmService.cs` that omits OpenAI-style `model` and `stream` fields and sends `messages + parameters` instead. This directly addresses IBM responses complaining that `model_id` or `mode` must not be specified in the request body. +- Document update: 2026-04-06 18:02 (KST) - Hardened vLLM response handling for IBM deployment endpoints by accepting `results[].generated_text`, `output_text`, and `choices[].message.content`, and by short-circuiting tool-use requests in `LlmService.ToolUse.cs` with a `ToolCallNotSupportedException` so IBM deployment chat connections do not receive an incompatible OpenAI function-calling payload. diff --git a/src/AxCopilot/Services/LlmService.ToolUse.cs b/src/AxCopilot/Services/LlmService.ToolUse.cs index f366174..6629783 100644 --- a/src/AxCopilot/Services/LlmService.ToolUse.cs +++ b/src/AxCopilot/Services/LlmService.ToolUse.cs @@ -431,13 +431,20 @@ public partial class LlmService List messages, IReadOnlyCollection tools, CancellationToken ct) { var activeService = ResolveService(); - var body = BuildOpenAiToolBody(messages, tools); // 등록 모델의 커스텀 엔드포인트 우선 사용 (ResolveServerInfo) var (resolvedEp, _, allowInsecureTls) = ResolveServerInfo(); var endpoint = string.IsNullOrEmpty(resolvedEp) ? ResolveEndpointForService(activeService) : resolvedEp; + var registered = GetActiveRegisteredModel(); + if (UsesIbmDeploymentChatApi(activeService, registered, endpoint)) + { + throw new ToolCallNotSupportedException( + "IBM 배포형 vLLM 연결은 OpenAI 도구 호출 형식과 다를 수 있어 일반 대화 경로로 폴백합니다."); + } + + var body = BuildOpenAiToolBody(messages, tools); var url = activeService.ToLowerInvariant() == "ollama" ? endpoint.TrimEnd('/') + "/api/chat" diff --git a/src/AxCopilot/Services/LlmService.cs b/src/AxCopilot/Services/LlmService.cs index 04bde33..93eb1c5 100644 --- a/src/AxCopilot/Services/LlmService.cs +++ b/src/AxCopilot/Services/LlmService.cs @@ -320,6 +320,103 @@ public partial class LlmService : IDisposable m.Alias == modelName)); } + private Models.RegisteredModel? GetActiveRegisteredModel() + { + var llm = _settings.Settings.Llm; + return FindRegisteredModel(llm, ResolveService(), ResolveModel()); + } + + private static bool UsesIbmDeploymentChatApi(string service, Models.RegisteredModel? registered, string? endpoint) + { + if (!string.Equals(NormalizeServiceName(service), "vllm", StringComparison.OrdinalIgnoreCase)) + return false; + if (registered == null) + return false; + + var authType = (registered.AuthType ?? "").Trim().ToLowerInvariant(); + if (authType is not ("ibm_iam" or "cp4d" or "cp4d_password" or "cp4d_api_key")) + return false; + + var normalizedEndpoint = (endpoint ?? "").Trim().ToLowerInvariant(); + return normalizedEndpoint.Contains("/ml/") || + normalizedEndpoint.Contains("/deployments/") || + normalizedEndpoint.Contains("/text/chat"); + } + + private string BuildIbmDeploymentChatUrl(string endpoint, bool stream) + { + var trimmed = (endpoint ?? "").Trim(); + if (string.IsNullOrWhiteSpace(trimmed)) + throw new InvalidOperationException("IBM 배포형 vLLM 엔드포인트가 비어 있습니다."); + + var normalized = trimmed.ToLowerInvariant(); + if (normalized.Contains("/text/chat_stream")) + return stream ? trimmed : trimmed.Replace("/text/chat_stream", "/text/chat", StringComparison.OrdinalIgnoreCase); + if (normalized.Contains("/text/chat")) + return stream ? trimmed.Replace("/text/chat", "/text/chat_stream", StringComparison.OrdinalIgnoreCase) : trimmed; + if (normalized.Contains("/deployments/")) + return trimmed.TrimEnd('/') + (stream ? "/text/chat_stream" : "/text/chat"); + + return trimmed; + } + + private object BuildIbmDeploymentBody(List messages) + { + var msgs = new List(); + if (!string.IsNullOrWhiteSpace(_systemPrompt)) + msgs.Add(new { role = "system", content = _systemPrompt }); + + foreach (var m in messages) + { + if (m.Role == "system") + continue; + + msgs.Add(new + { + role = m.Role == "assistant" ? "assistant" : "user", + content = m.Content + }); + } + + return new + { + messages = msgs, + parameters = new + { + temperature = ResolveTemperature(), + max_new_tokens = ResolveOpenAiCompatibleMaxTokens() + } + }; + } + + private static string ExtractIbmDeploymentText(JsonElement root) + { + if (root.TryGetProperty("choices", out var choices) && choices.ValueKind == JsonValueKind.Array && choices.GetArrayLength() > 0) + { + var message = choices[0].TryGetProperty("message", out var choiceMessage) ? choiceMessage : default; + if (message.ValueKind == JsonValueKind.Object && + message.TryGetProperty("content", out var content)) + return content.GetString() ?? ""; + } + + if (root.TryGetProperty("results", out var results) && results.ValueKind == JsonValueKind.Array && results.GetArrayLength() > 0) + { + var first = results[0]; + if (first.TryGetProperty("generated_text", out var generatedText)) + return generatedText.GetString() ?? ""; + if (first.TryGetProperty("output_text", out var outputText)) + return outputText.GetString() ?? ""; + } + + if (root.TryGetProperty("generated_text", out var generated)) + return generated.GetString() ?? ""; + + if (root.TryGetProperty("message", out var messageValue) && messageValue.ValueKind == JsonValueKind.String) + return messageValue.GetString() ?? ""; + + return ""; + } + /// /// 현재 활성 모델의 인증 헤더 값을 반환합니다. /// IBM IAM / CP4D 인증인 경우 토큰을 자동 발급/캐싱하여 반환합니다. @@ -606,8 +703,14 @@ public partial class LlmService : IDisposable var llm = _settings.Settings.Llm; var (endpoint, _, allowInsecureTls) = ResolveServerInfo(); var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint; - var body = BuildOpenAiBody(messages, stream: false); - var url = ep.TrimEnd('/') + "/v1/chat/completions"; + var registered = GetActiveRegisteredModel(); + var usesIbmDeploymentApi = UsesIbmDeploymentChatApi("vllm", registered, ep); + var body = usesIbmDeploymentApi + ? BuildIbmDeploymentBody(messages) + : BuildOpenAiBody(messages, stream: false); + var url = usesIbmDeploymentApi + ? BuildIbmDeploymentChatUrl(ep, stream: false) + : ep.TrimEnd('/') + "/v1/chat/completions"; var json = JsonSerializer.Serialize(body); using var req = new HttpRequestMessage(HttpMethod.Post, url) @@ -621,6 +724,12 @@ public partial class LlmService : IDisposable return SafeParseJson(respBody, root => { TryParseOpenAiUsage(root); + if (usesIbmDeploymentApi) + { + var parsed = ExtractIbmDeploymentText(root); + return string.IsNullOrWhiteSpace(parsed) ? "(빈 응답)" : parsed; + } + var choices = root.GetProperty("choices"); if (choices.GetArrayLength() == 0) return "(빈 응답)"; return choices[0].GetProperty("message").GetProperty("content").GetString() ?? ""; @@ -634,8 +743,14 @@ public partial class LlmService : IDisposable var llm = _settings.Settings.Llm; var (endpoint, _, allowInsecureTls) = ResolveServerInfo(); var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint; - var body = BuildOpenAiBody(messages, stream: true); - var url = ep.TrimEnd('/') + "/v1/chat/completions"; + var registered = GetActiveRegisteredModel(); + var usesIbmDeploymentApi = UsesIbmDeploymentChatApi("vllm", registered, ep); + var body = usesIbmDeploymentApi + ? BuildIbmDeploymentBody(messages) + : BuildOpenAiBody(messages, stream: true); + var url = usesIbmDeploymentApi + ? BuildIbmDeploymentChatUrl(ep, stream: true) + : ep.TrimEnd('/') + "/v1/chat/completions"; using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) }; await ApplyAuthHeaderAsync(req, ct); @@ -657,12 +772,43 @@ public partial class LlmService : IDisposable { using var doc = JsonDocument.Parse(data); TryParseOpenAiUsage(doc.RootElement); - var choices = doc.RootElement.GetProperty("choices"); - if (choices.GetArrayLength() > 0) + if (usesIbmDeploymentApi) { - var delta = choices[0].GetProperty("delta"); - if (delta.TryGetProperty("content", out var c)) - text = c.GetString(); + if (doc.RootElement.TryGetProperty("status", out var status) && + string.Equals(status.GetString(), "error", StringComparison.OrdinalIgnoreCase)) + { + var detail = doc.RootElement.TryGetProperty("message", out var message) + ? message.GetString() + : "IBM vLLM 스트리밍 오류"; + throw new InvalidOperationException(detail); + } + + if (doc.RootElement.TryGetProperty("results", out var results) && + results.ValueKind == JsonValueKind.Array && + results.GetArrayLength() > 0) + { + var first = results[0]; + if (first.TryGetProperty("generated_text", out var generatedText)) + text = generatedText.GetString(); + else if (first.TryGetProperty("output_text", out var outputText)) + text = outputText.GetString(); + } + else if (doc.RootElement.TryGetProperty("choices", out var ibmChoices) && ibmChoices.GetArrayLength() > 0) + { + var delta = ibmChoices[0].GetProperty("delta"); + if (delta.TryGetProperty("content", out var c)) + text = c.GetString(); + } + } + else + { + var choices = doc.RootElement.GetProperty("choices"); + if (choices.GetArrayLength() > 0) + { + var delta = choices[0].GetProperty("delta"); + if (delta.TryGetProperty("content", out var c)) + text = c.GetString(); + } } } catch (JsonException ex)