IBM vLLM 배포형 채팅 요청 스키마 분기와 문서 반영
IBM/CP4D 인증을 사용하는 vLLM 등록 모델에서 배포형 /ml/v1/deployments/.../text/chat 계열 엔드포인트를 감지하도록 정리했다. 일반 OpenAI 호환 body 대신 messages+parameters 형태의 IBM deployment chat body를 사용하고 /v1/chat/completions를 강제로 붙이지 않도록 수정했다. IBM 배포형 응답은 results.generated_text, output_text, choices.message.content를 함께 파싱하도록 보강했고 도구 호출 경로는 안전하게 일반 응답 폴백을 유도하도록 정리했다. README와 DEVELOPMENT 문서를 2026-04-06 18:02 (KST) 기준으로 갱신했고 dotnet build 검증에서 경고 0 / 오류 0을 확인했다.
This commit is contained in:
@@ -431,13 +431,20 @@ public partial class LlmService
|
||||
List<ChatMessage> messages, IReadOnlyCollection<IAgentTool> tools, CancellationToken ct)
|
||||
{
|
||||
var activeService = ResolveService();
|
||||
var body = BuildOpenAiToolBody(messages, tools);
|
||||
|
||||
// 등록 모델의 커스텀 엔드포인트 우선 사용 (ResolveServerInfo)
|
||||
var (resolvedEp, _, allowInsecureTls) = ResolveServerInfo();
|
||||
var endpoint = string.IsNullOrEmpty(resolvedEp)
|
||||
? ResolveEndpointForService(activeService)
|
||||
: resolvedEp;
|
||||
var registered = GetActiveRegisteredModel();
|
||||
if (UsesIbmDeploymentChatApi(activeService, registered, endpoint))
|
||||
{
|
||||
throw new ToolCallNotSupportedException(
|
||||
"IBM 배포형 vLLM 연결은 OpenAI 도구 호출 형식과 다를 수 있어 일반 대화 경로로 폴백합니다.");
|
||||
}
|
||||
|
||||
var body = BuildOpenAiToolBody(messages, tools);
|
||||
|
||||
var url = activeService.ToLowerInvariant() == "ollama"
|
||||
? endpoint.TrimEnd('/') + "/api/chat"
|
||||
|
||||
@@ -320,6 +320,103 @@ public partial class LlmService : IDisposable
|
||||
m.Alias == modelName));
|
||||
}
|
||||
|
||||
private Models.RegisteredModel? GetActiveRegisteredModel()
|
||||
{
|
||||
var llm = _settings.Settings.Llm;
|
||||
return FindRegisteredModel(llm, ResolveService(), ResolveModel());
|
||||
}
|
||||
|
||||
private static bool UsesIbmDeploymentChatApi(string service, Models.RegisteredModel? registered, string? endpoint)
|
||||
{
|
||||
if (!string.Equals(NormalizeServiceName(service), "vllm", StringComparison.OrdinalIgnoreCase))
|
||||
return false;
|
||||
if (registered == null)
|
||||
return false;
|
||||
|
||||
var authType = (registered.AuthType ?? "").Trim().ToLowerInvariant();
|
||||
if (authType is not ("ibm_iam" or "cp4d" or "cp4d_password" or "cp4d_api_key"))
|
||||
return false;
|
||||
|
||||
var normalizedEndpoint = (endpoint ?? "").Trim().ToLowerInvariant();
|
||||
return normalizedEndpoint.Contains("/ml/") ||
|
||||
normalizedEndpoint.Contains("/deployments/") ||
|
||||
normalizedEndpoint.Contains("/text/chat");
|
||||
}
|
||||
|
||||
private string BuildIbmDeploymentChatUrl(string endpoint, bool stream)
|
||||
{
|
||||
var trimmed = (endpoint ?? "").Trim();
|
||||
if (string.IsNullOrWhiteSpace(trimmed))
|
||||
throw new InvalidOperationException("IBM 배포형 vLLM 엔드포인트가 비어 있습니다.");
|
||||
|
||||
var normalized = trimmed.ToLowerInvariant();
|
||||
if (normalized.Contains("/text/chat_stream"))
|
||||
return stream ? trimmed : trimmed.Replace("/text/chat_stream", "/text/chat", StringComparison.OrdinalIgnoreCase);
|
||||
if (normalized.Contains("/text/chat"))
|
||||
return stream ? trimmed.Replace("/text/chat", "/text/chat_stream", StringComparison.OrdinalIgnoreCase) : trimmed;
|
||||
if (normalized.Contains("/deployments/"))
|
||||
return trimmed.TrimEnd('/') + (stream ? "/text/chat_stream" : "/text/chat");
|
||||
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
private object BuildIbmDeploymentBody(List<ChatMessage> messages)
|
||||
{
|
||||
var msgs = new List<object>();
|
||||
if (!string.IsNullOrWhiteSpace(_systemPrompt))
|
||||
msgs.Add(new { role = "system", content = _systemPrompt });
|
||||
|
||||
foreach (var m in messages)
|
||||
{
|
||||
if (m.Role == "system")
|
||||
continue;
|
||||
|
||||
msgs.Add(new
|
||||
{
|
||||
role = m.Role == "assistant" ? "assistant" : "user",
|
||||
content = m.Content
|
||||
});
|
||||
}
|
||||
|
||||
return new
|
||||
{
|
||||
messages = msgs,
|
||||
parameters = new
|
||||
{
|
||||
temperature = ResolveTemperature(),
|
||||
max_new_tokens = ResolveOpenAiCompatibleMaxTokens()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static string ExtractIbmDeploymentText(JsonElement root)
|
||||
{
|
||||
if (root.TryGetProperty("choices", out var choices) && choices.ValueKind == JsonValueKind.Array && choices.GetArrayLength() > 0)
|
||||
{
|
||||
var message = choices[0].TryGetProperty("message", out var choiceMessage) ? choiceMessage : default;
|
||||
if (message.ValueKind == JsonValueKind.Object &&
|
||||
message.TryGetProperty("content", out var content))
|
||||
return content.GetString() ?? "";
|
||||
}
|
||||
|
||||
if (root.TryGetProperty("results", out var results) && results.ValueKind == JsonValueKind.Array && results.GetArrayLength() > 0)
|
||||
{
|
||||
var first = results[0];
|
||||
if (first.TryGetProperty("generated_text", out var generatedText))
|
||||
return generatedText.GetString() ?? "";
|
||||
if (first.TryGetProperty("output_text", out var outputText))
|
||||
return outputText.GetString() ?? "";
|
||||
}
|
||||
|
||||
if (root.TryGetProperty("generated_text", out var generated))
|
||||
return generated.GetString() ?? "";
|
||||
|
||||
if (root.TryGetProperty("message", out var messageValue) && messageValue.ValueKind == JsonValueKind.String)
|
||||
return messageValue.GetString() ?? "";
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 현재 활성 모델의 인증 헤더 값을 반환합니다.
|
||||
/// IBM IAM / CP4D 인증인 경우 토큰을 자동 발급/캐싱하여 반환합니다.
|
||||
@@ -606,8 +703,14 @@ public partial class LlmService : IDisposable
|
||||
var llm = _settings.Settings.Llm;
|
||||
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
|
||||
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
|
||||
var body = BuildOpenAiBody(messages, stream: false);
|
||||
var url = ep.TrimEnd('/') + "/v1/chat/completions";
|
||||
var registered = GetActiveRegisteredModel();
|
||||
var usesIbmDeploymentApi = UsesIbmDeploymentChatApi("vllm", registered, ep);
|
||||
var body = usesIbmDeploymentApi
|
||||
? BuildIbmDeploymentBody(messages)
|
||||
: BuildOpenAiBody(messages, stream: false);
|
||||
var url = usesIbmDeploymentApi
|
||||
? BuildIbmDeploymentChatUrl(ep, stream: false)
|
||||
: ep.TrimEnd('/') + "/v1/chat/completions";
|
||||
var json = JsonSerializer.Serialize(body);
|
||||
|
||||
using var req = new HttpRequestMessage(HttpMethod.Post, url)
|
||||
@@ -621,6 +724,12 @@ public partial class LlmService : IDisposable
|
||||
return SafeParseJson(respBody, root =>
|
||||
{
|
||||
TryParseOpenAiUsage(root);
|
||||
if (usesIbmDeploymentApi)
|
||||
{
|
||||
var parsed = ExtractIbmDeploymentText(root);
|
||||
return string.IsNullOrWhiteSpace(parsed) ? "(빈 응답)" : parsed;
|
||||
}
|
||||
|
||||
var choices = root.GetProperty("choices");
|
||||
if (choices.GetArrayLength() == 0) return "(빈 응답)";
|
||||
return choices[0].GetProperty("message").GetProperty("content").GetString() ?? "";
|
||||
@@ -634,8 +743,14 @@ public partial class LlmService : IDisposable
|
||||
var llm = _settings.Settings.Llm;
|
||||
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
|
||||
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
|
||||
var body = BuildOpenAiBody(messages, stream: true);
|
||||
var url = ep.TrimEnd('/') + "/v1/chat/completions";
|
||||
var registered = GetActiveRegisteredModel();
|
||||
var usesIbmDeploymentApi = UsesIbmDeploymentChatApi("vllm", registered, ep);
|
||||
var body = usesIbmDeploymentApi
|
||||
? BuildIbmDeploymentBody(messages)
|
||||
: BuildOpenAiBody(messages, stream: true);
|
||||
var url = usesIbmDeploymentApi
|
||||
? BuildIbmDeploymentChatUrl(ep, stream: true)
|
||||
: ep.TrimEnd('/') + "/v1/chat/completions";
|
||||
|
||||
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
|
||||
await ApplyAuthHeaderAsync(req, ct);
|
||||
@@ -657,12 +772,43 @@ public partial class LlmService : IDisposable
|
||||
{
|
||||
using var doc = JsonDocument.Parse(data);
|
||||
TryParseOpenAiUsage(doc.RootElement);
|
||||
var choices = doc.RootElement.GetProperty("choices");
|
||||
if (choices.GetArrayLength() > 0)
|
||||
if (usesIbmDeploymentApi)
|
||||
{
|
||||
var delta = choices[0].GetProperty("delta");
|
||||
if (delta.TryGetProperty("content", out var c))
|
||||
text = c.GetString();
|
||||
if (doc.RootElement.TryGetProperty("status", out var status) &&
|
||||
string.Equals(status.GetString(), "error", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var detail = doc.RootElement.TryGetProperty("message", out var message)
|
||||
? message.GetString()
|
||||
: "IBM vLLM 스트리밍 오류";
|
||||
throw new InvalidOperationException(detail);
|
||||
}
|
||||
|
||||
if (doc.RootElement.TryGetProperty("results", out var results) &&
|
||||
results.ValueKind == JsonValueKind.Array &&
|
||||
results.GetArrayLength() > 0)
|
||||
{
|
||||
var first = results[0];
|
||||
if (first.TryGetProperty("generated_text", out var generatedText))
|
||||
text = generatedText.GetString();
|
||||
else if (first.TryGetProperty("output_text", out var outputText))
|
||||
text = outputText.GetString();
|
||||
}
|
||||
else if (doc.RootElement.TryGetProperty("choices", out var ibmChoices) && ibmChoices.GetArrayLength() > 0)
|
||||
{
|
||||
var delta = ibmChoices[0].GetProperty("delta");
|
||||
if (delta.TryGetProperty("content", out var c))
|
||||
text = c.GetString();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var choices = doc.RootElement.GetProperty("choices");
|
||||
if (choices.GetArrayLength() > 0)
|
||||
{
|
||||
var delta = choices[0].GetProperty("delta");
|
||||
if (delta.TryGetProperty("content", out var c))
|
||||
text = c.GetString();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (JsonException ex)
|
||||
|
||||
Reference in New Issue
Block a user