Files
AX-Copilot-Codex/src/AxCopilot/Services/LlmService.cs
lacvet 817fc94f41
Some checks failed
Release Gate / gate (push) Has been cancelled
IBM 연동형 vLLM 인증 실패 원인 수정
IBM Cloud 계열 vLLM 연결에서 등록 모델 인증 방식이 Bearer와 CP4D만 지원하던 문제를 점검하고, IBM IAM 토큰 교환 경로를 추가했습니다.

- RegisteredModel/AuthType에 ibm_iam 경로를 반영했습니다.

- IbmIamTokenService를 추가해 API 키를 IAM access token으로 교환한 뒤 Bearer 헤더로 적용하도록 했습니다.

- 모델 등록 다이얼로그, 설정 ViewModel, AX Agent 오버레이 모델 목록에도 IBM IAM 표시를 추가했습니다.

- README.md와 docs/DEVELOPMENT.md에 2026-04-06 14:06 (KST) 기준 이력을 반영했습니다.

검증: dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify\\ -p:IntermediateOutputPath=obj\\verify\\ (경고 0 / 오류 0)
2026-04-06 15:02:42 +09:00

1214 lines
50 KiB
C#

using System.IO;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using AxCopilot.Models;
namespace AxCopilot.Services;
/// <summary>토큰 사용량 정보.</summary>
public record TokenUsage(int PromptTokens, int CompletionTokens)
{
public int TotalTokens => PromptTokens + CompletionTokens;
}
/// <summary>
/// LLM API 호출 서비스. Ollama / vLLM / Gemini / Claude 백엔드를 지원합니다.
/// 스트리밍(SSE) 및 비스트리밍 모두 지원합니다.
/// </summary>
public partial class LlmService : IDisposable
{
private readonly HttpClient _http;
private readonly HttpClient _httpInsecure;
private readonly SettingsService _settings;
private string? _systemPrompt;
private const int MaxRetries = 2;
private static readonly TimeSpan ChunkTimeout = TimeSpan.FromSeconds(30);
private static readonly string SigmoidApiHost = string.Concat("api.", "an", "thr", "opic.com");
private static readonly string SigmoidApiVersionHeader = string.Concat("an", "thr", "opic-version");
private const string SigmoidApiVersion = "2023-06-01";
// ─── 자동 모델 라우팅 오버라이드 ─────────────────────────────────
private readonly object _overrideLock = new();
private readonly Stack<(string? Service, string? Model, double? Temperature, string? ReasoningEffort)> _overrideStack = new();
private string? _serviceOverride;
private string? _modelOverride;
private double? _temperatureOverride;
private string? _reasoningEffortOverride;
/// <summary>자동 라우팅용 서비스/모델 오버라이드를 설정합니다. 요청 완료 후 반드시 ClearRouteOverride().</summary>
public void PushRouteOverride(string service, string model)
{
PushInferenceOverride(service, model, null, null);
}
/// <summary>서비스/모델 오버라이드를 해제합니다.</summary>
public void ClearRouteOverride()
{
PopInferenceOverride();
}
/// <summary>모델/추론 파라미터 오버라이드를 푸시합니다. PopInferenceOverride로 복원합니다.</summary>
public void PushInferenceOverride(
string? service = null,
string? model = null,
double? temperature = null,
string? reasoningEffort = null)
{
lock (_overrideLock)
{
_overrideStack.Push((_serviceOverride, _modelOverride, _temperatureOverride, _reasoningEffortOverride));
if (!string.IsNullOrWhiteSpace(service)) _serviceOverride = service;
if (!string.IsNullOrWhiteSpace(model)) _modelOverride = model;
if (temperature.HasValue) _temperatureOverride = temperature.Value;
if (!string.IsNullOrWhiteSpace(reasoningEffort)) _reasoningEffortOverride = reasoningEffort.Trim();
}
}
/// <summary>가장 최근 PushInferenceOverride 상태를 복원합니다.</summary>
public void PopInferenceOverride()
{
lock (_overrideLock)
{
if (_overrideStack.Count == 0)
{
_serviceOverride = null;
_modelOverride = null;
_temperatureOverride = null;
_reasoningEffortOverride = null;
return;
}
var prev = _overrideStack.Pop();
_serviceOverride = prev.Service;
_modelOverride = prev.Model;
_temperatureOverride = prev.Temperature;
_reasoningEffortOverride = prev.ReasoningEffort;
}
}
/// <summary>현재 활성 모델의 서비스명과 모델명을 반환합니다.</summary>
public (string service, string model) GetCurrentModelInfo() => (ResolveService(), ResolveModel());
/// <summary>오버라이드를 고려한 실제 서비스명.</summary>
private string ResolveService() => NormalizeServiceName(_serviceOverride ?? _settings.Settings.Llm.Service);
private static bool IsExternalLlmService(string normalizedService)
=> normalizedService is "gemini" or "sigmoid";
private void EnsureOperationModeAllowsLlmService(string service)
{
if (!OperationModePolicy.IsInternal(_settings.Settings))
return;
var normalized = NormalizeServiceName(service);
if (!IsExternalLlmService(normalized))
return;
var display = normalized == "sigmoid" ? "Claude" : "Gemini";
throw new InvalidOperationException(
$"사내 모드에서는 외부 LLM 호출이 차단됩니다: {display}. " +
"설정에서 operationMode를 external로 변경하거나 사내 LLM(Ollama/vLLM)을 사용하세요.");
}
private static string NormalizeServiceName(string? service)
{
var key = (service ?? "").Trim().ToLowerInvariant();
return key switch
{
"cl" + "aude" => "sigmoid",
"sigmoid" => "sigmoid",
"gemini" => "gemini",
"vllm" => "vllm",
_ => "ollama",
};
}
/// <summary>오버라이드를 고려한 실제 모델명.</summary>
private string ResolveModel()
{
if (_modelOverride != null) return _modelOverride;
return ResolveModelName();
}
private double ResolveTemperature() => _temperatureOverride ?? _settings.Settings.Llm.Temperature;
private string? ResolveReasoningEffort() => _reasoningEffortOverride;
private static bool LooksLikeEncryptedPayload(string value)
{
if (string.IsNullOrWhiteSpace(value) || value.Length < 32 || (value.Length % 4) != 0)
return false;
foreach (var ch in value)
{
var isBase64 = (ch >= 'A' && ch <= 'Z')
|| (ch >= 'a' && ch <= 'z')
|| (ch >= '0' && ch <= '9')
|| ch is '+' or '/' or '=';
if (!isBase64) return false;
}
return true;
}
private static string ResolveSecretValue(string raw, bool encryptionEnabled)
{
if (string.IsNullOrWhiteSpace(raw)) return "";
if (raw.Trim() == "(저장됨)") return "";
if (!encryptionEnabled) return raw.Trim();
var decrypted = CryptoService.DecryptIfEnabled(raw, encryptionEnabled).Trim();
if (string.IsNullOrWhiteSpace(decrypted)) return "";
if (string.Equals(decrypted, raw, StringComparison.Ordinal) && LooksLikeEncryptedPayload(raw))
return "";
return decrypted;
}
/// <summary>지정 서비스의 API 키를 반환합니다.</summary>
private string ResolveApiKeyForService(string service)
{
var llm = _settings.Settings.Llm;
return NormalizeServiceName(service) switch
{
"gemini" => llm.GeminiApiKey,
"sigmoid" => llm.ClaudeApiKey,
"vllm" => ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled),
"ollama" => ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled),
_ => "",
};
}
/// <summary>지정 서비스의 엔드포인트를 반환합니다.</summary>
private string ResolveEndpointForService(string service)
{
var llm = _settings.Settings.Llm;
return NormalizeServiceName(service) switch
{
"vllm" => llm.VllmEndpoint,
"ollama" => llm.OllamaEndpoint,
_ => llm.Endpoint,
};
}
/// <summary>가장 최근 요청의 토큰 사용량. 스트리밍/비스트리밍 완료 후 갱신됩니다.</summary>
public TokenUsage? LastTokenUsage { get; private set; }
public record RuntimeConnectionSnapshot(
string Service,
string Model,
string Endpoint,
bool AllowInsecureTls,
bool HasApiKey);
public LlmService(SettingsService settings)
{
_settings = settings;
_http = new HttpClient { Timeout = TimeSpan.FromMinutes(10) };
var insecureHandler = new HttpClientHandler
{
ServerCertificateCustomValidationCallback = HttpClientHandler.DangerousAcceptAnyServerCertificateValidator
};
_httpInsecure = new HttpClient(insecureHandler) { Timeout = TimeSpan.FromMinutes(10) };
LoadSystemPrompt();
}
public RuntimeConnectionSnapshot GetRuntimeConnectionSnapshot()
{
var service = ResolveService();
var model = ResolveModel();
var (endpoint, apiKey, allowInsecureTls) = ResolveServerInfo();
if (string.IsNullOrWhiteSpace(endpoint))
endpoint = ResolveEndpointForService(service);
return new RuntimeConnectionSnapshot(
service,
model,
endpoint ?? "",
allowInsecureTls,
!string.IsNullOrWhiteSpace(apiKey));
}
// ─── 시스템 프롬프트 (빌드 경로에서 동적 로딩) ─────────────────────────
private void LoadSystemPrompt()
{
var exeDir = AppContext.BaseDirectory;
var promptFile = Path.Combine(exeDir, "system_prompt.txt");
if (File.Exists(promptFile))
{
_systemPrompt = File.ReadAllText(promptFile, Encoding.UTF8).Trim();
}
}
public string? SystemPrompt => _systemPrompt;
/// <summary>사내 서비스(Ollama/vLLM)면 암호화 모드에 따라 복호화, 외부면 그대로 반환.</summary>
private string ResolveModelName()
{
var llm = _settings.Settings.Llm;
var service = NormalizeServiceName(llm.Service);
if (service is "ollama" or "vllm" && !string.IsNullOrEmpty(llm.Model))
{
var registered = FindRegisteredModel(llm, service, llm.Model);
if (registered != null)
{
var registeredModelName = CryptoService.DecryptIfEnabled(registered.EncryptedModelName, llm.EncryptionEnabled);
if (!string.IsNullOrWhiteSpace(registeredModelName))
return registeredModelName;
}
return CryptoService.DecryptIfEnabled(llm.Model, llm.EncryptionEnabled);
}
return llm.Model;
}
private int ResolveOpenAiCompatibleMaxTokens()
{
var llm = _settings.Settings.Llm;
var requested = Math.Clamp(llm.MaxContextTokens, 1, 1_000_000);
var service = NormalizeServiceName(llm.Service);
if (service == "vllm")
return Math.Min(requested, 8192);
return requested;
}
/// <summary>
/// 현재 활성 모델에 매칭되는 RegisteredModel을 찾아 엔드포인트/API키를 반환합니다.
/// RegisteredModel에 전용 서버 정보가 있으면 그것을 사용하고, 없으면 기본 설정을 사용합니다.
/// </summary>
private (string Endpoint, string ApiKey, bool AllowInsecureTls) ResolveServerInfo()
{
var llm = _settings.Settings.Llm;
var activeService = ResolveService();
var modelName = ResolveModel();
// RegisteredModel에서 현재 모델과 서비스가 일치하는 항목 검색
var registered = FindRegisteredModel(llm, activeService, modelName);
if (registered != null)
{
var endpoint = string.IsNullOrWhiteSpace(registered.Endpoint)
? ResolveEndpointForService(activeService)
: registered.Endpoint;
var apiKey = !string.IsNullOrEmpty(registered.ApiKey)
? ResolveSecretValue(registered.ApiKey, llm.EncryptionEnabled)
: GetDefaultApiKey(llm, activeService);
var allowInsecureTls = activeService == "vllm"
? (registered.AllowInsecureTls || llm.VllmAllowInsecureTls)
: false;
return (endpoint, apiKey, allowInsecureTls);
}
// 기본 엔드포인트 사용
return activeService.ToLowerInvariant() switch
{
"vllm" => (llm.VllmEndpoint, ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled), llm.VllmAllowInsecureTls),
"ollama" => (llm.OllamaEndpoint, ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled), false),
_ => ("", "", false),
};
}
/// <summary>현재 활성 모델의 RegisteredModel을 찾습니다.</summary>
private static Models.RegisteredModel? FindRegisteredModel(Models.LlmSettings llm, string service, string modelName)
{
return llm.RegisteredModels.FirstOrDefault(m =>
m.Service.Equals(service, StringComparison.OrdinalIgnoreCase) &&
(CryptoService.DecryptIfEnabled(m.EncryptedModelName, llm.EncryptionEnabled) == modelName ||
m.Alias == modelName));
}
/// <summary>
/// 현재 활성 모델의 인증 헤더 값을 반환합니다.
/// IBM IAM / CP4D 인증인 경우 토큰을 자동 발급/캐싱하여 반환합니다.
/// </summary>
internal async Task<string?> ResolveAuthTokenAsync(CancellationToken ct = default)
{
var llm = _settings.Settings.Llm;
var activeService = ResolveService();
var modelName = ResolveModel();
var registered = FindRegisteredModel(llm, activeService, modelName);
// IBM Cloud IAM 인증 방식인 경우
if (registered != null &&
registered.AuthType.Equals("ibm_iam", StringComparison.OrdinalIgnoreCase))
{
var ibmApiKey = !string.IsNullOrWhiteSpace(registered.ApiKey)
? ResolveSecretValue(registered.ApiKey, llm.EncryptionEnabled)
: GetDefaultApiKey(llm, activeService);
var token = await IbmIamTokenService.GetTokenAsync(ibmApiKey, ct: ct);
return token;
}
// CP4D 인증 방식인 경우
if (registered != null &&
registered.AuthType.Equals("cp4d", StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrWhiteSpace(registered.Cp4dUrl))
{
var password = CryptoService.DecryptIfEnabled(registered.Cp4dPassword, llm.EncryptionEnabled);
var token = await Cp4dTokenService.GetTokenAsync(
registered.Cp4dUrl, registered.Cp4dUsername, password, ct);
return token;
}
// 기본 Bearer 인증 — 기존 API 키 반환
var (_, apiKey, _) = ResolveServerInfo();
return string.IsNullOrEmpty(apiKey) ? null : apiKey;
}
/// <summary>
/// HttpRequestMessage에 인증 헤더를 적용합니다.
/// IBM IAM / CP4D 인증인 경우 자동 토큰 발급, 일반 Bearer인 경우 API 키를 사용합니다.
/// </summary>
private async Task ApplyAuthHeaderAsync(HttpRequestMessage req, CancellationToken ct)
{
var token = await ResolveAuthTokenAsync(ct);
if (!string.IsNullOrEmpty(token))
req.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", token);
}
private static string GetDefaultApiKey(LlmSettings llm, string? service = null)
{
var svc = service ?? llm.Service;
return svc.ToLowerInvariant() switch
{
"vllm" => ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled),
"ollama" => ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled),
_ => "",
};
}
// ─── 비스트리밍 응답 ───────────────────────────────────────────────────
public async Task<string> SendAsync(List<ChatMessage> messages, CancellationToken ct = default)
{
var llm = _settings.Settings.Llm;
var activeService = ResolveService();
EnsureOperationModeAllowsLlmService(activeService);
try
{
return await SendWithServiceAsync(activeService, messages, ct);
}
catch (Exception ex) when (llm.FallbackModels.Count > 0)
{
// 폴백 모델 순차 시도
foreach (var fallback in llm.FallbackModels)
{
var parts = fallback.Split(':', 2);
var fbService = parts[0].Trim();
var fbModel = parts.Length > 1 ? parts[1].Trim() : "";
try
{
EnsureOperationModeAllowsLlmService(fbService);
LogService.Warn($"모델 폴백: {activeService} → {fbService} ({ex.Message})");
LastFallbackInfo = $"{activeService} → {fbService}";
return await SendWithServiceAsync(fbService, messages, ct);
}
catch { continue; }
}
throw; // 모든 폴백 실패
}
}
/// <summary>마지막 폴백 정보 (UI 표시용). null이면 폴백 미발생.</summary>
public string? LastFallbackInfo { get; private set; }
private Task<string> SendWithServiceAsync(string service, List<ChatMessage> messages, CancellationToken ct)
{
return NormalizeServiceName(service) switch
{
"gemini" => SendGeminiAsync(messages, ct),
"sigmoid" => SendSigmoidAsync(messages, ct),
"vllm" => SendOpenAiCompatibleAsync(messages, ct),
_ => SendOllamaAsync(messages, ct),
};
}
// ─── 스트리밍 응답 ────────────────────────────────────────────────────
public async IAsyncEnumerable<string> StreamAsync(
List<ChatMessage> messages,
[EnumeratorCancellation] CancellationToken ct = default)
{
var activeService = ResolveService();
EnsureOperationModeAllowsLlmService(activeService);
var stream = NormalizeServiceName(activeService) switch
{
"gemini" => StreamGeminiAsync(messages, ct),
"sigmoid" => StreamSigmoidAsync(messages, ct),
"vllm" => StreamOpenAiCompatibleAsync(messages, ct),
_ => StreamOllamaAsync(messages, ct),
};
await foreach (var chunk in stream.WithCancellation(ct))
yield return chunk;
}
// ─── 연결 테스트 ──────────────────────────────────────────────────────
public async Task<(bool ok, string message)> TestConnectionAsync()
{
try
{
var llm = _settings.Settings.Llm;
var normalizedService = ResolveService();
if (OperationModePolicy.IsInternal(_settings.Settings) && IsExternalLlmService(normalizedService))
{
var blockedName = normalizedService == "sigmoid" ? "Claude" : "Gemini";
return (false, $"사내 모드에서는 {blockedName} 외부 LLM 연결이 차단됩니다.");
}
switch (normalizedService)
{
case "ollama":
{
var (endpoint, _, _) = ResolveServerInfo();
var ep = string.IsNullOrWhiteSpace(endpoint) ? ResolveEndpointForService("ollama") : endpoint;
var resp = await _http.GetAsync(ep.TrimEnd('/') + "/api/tags");
return resp.IsSuccessStatusCode
? (true, "Ollama 연결 성공")
: (false, ClassifyHttpError(resp));
}
case "vllm":
{
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
var ep = string.IsNullOrWhiteSpace(endpoint) ? ResolveEndpointForService("vllm") : endpoint;
using var vReq = new HttpRequestMessage(HttpMethod.Get, ep.TrimEnd('/') + "/v1/models");
await ApplyAuthHeaderAsync(vReq, CancellationToken.None);
using var vResp = await SendWithTlsAsync(vReq, allowInsecureTls, CancellationToken.None);
return vResp.IsSuccessStatusCode
? (true, "vLLM 연결 성공")
: (false, ClassifyHttpError(vResp));
}
case "gemini":
var gKey = ResolveApiKeyForService("gemini");
if (string.IsNullOrEmpty(gKey)) return (false, "API 키가 설정되지 않았습니다");
var gResp = await _http.GetAsync(
$"https://generativelanguage.googleapis.com/v1beta/models?key={gKey}");
return gResp.IsSuccessStatusCode
? (true, "Gemini API 연결 성공")
: (false, ClassifyHttpError(gResp));
case "sigmoid":
{
var cKey = ResolveApiKeyForService("sigmoid");
if (string.IsNullOrEmpty(cKey)) return (false, "API 키가 설정되지 않았습니다");
using var cReq = new HttpRequestMessage(HttpMethod.Get, $"https://{SigmoidApiHost}/v1/models");
cReq.Headers.Add("x-api-key", cKey);
cReq.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion);
var cResp = await _http.SendAsync(cReq);
return cResp.IsSuccessStatusCode
? (true, "Claude API 연결 성공")
: (false, ClassifyHttpError(cResp));
}
default:
return (false, "알 수 없는 서비스");
}
}
catch (TaskCanceledException)
{
return (false, "연결 시간 초과 — 서버가 응답하지 않습니다");
}
catch (HttpRequestException ex)
{
return (false, $"연결 실패 — {ex.Message}");
}
catch (Exception ex)
{
return (false, ex.Message);
}
}
// ═══════════════════════════════════════════════════════════════════════
// Ollama
// ═══════════════════════════════════════════════════════════════════════
private async Task<string> SendOllamaAsync(List<ChatMessage> messages, CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOllamaBody(messages, stream: false);
var resp = await PostJsonWithRetryAsync(ep.TrimEnd('/') + "/api/chat", body, allowInsecureTls, ct);
return SafeParseJson(resp, root =>
{
TryParseOllamaUsage(root);
return root.GetProperty("message").GetProperty("content").GetString() ?? "";
}, "Ollama 응답");
}
private async IAsyncEnumerable<string> StreamOllamaAsync(
List<ChatMessage> messages,
[EnumeratorCancellation] CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOllamaBody(messages, stream: true);
var url = ep.TrimEnd('/') + "/api/chat";
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
using var resp = await SendWithErrorClassificationAsync(req, allowInsecureTls, ct);
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
while (!reader.EndOfStream && !ct.IsCancellationRequested)
{
var line = await ReadLineWithTimeoutAsync(reader, ct);
if (line == null) break;
if (string.IsNullOrEmpty(line)) continue;
string? text = null;
try
{
using var doc = JsonDocument.Parse(line);
if (doc.RootElement.TryGetProperty("message", out var msg) &&
msg.TryGetProperty("content", out var c))
text = c.GetString();
// Ollama: done=true 시 토큰 사용량 포함
if (doc.RootElement.TryGetProperty("done", out var done) && done.GetBoolean())
TryParseOllamaUsage(doc.RootElement);
}
catch (JsonException ex)
{
LogService.Warn($"Ollama 스트리밍 JSON 파싱 오류: {ex.Message}");
}
if (!string.IsNullOrEmpty(text)) yield return text;
}
}
private object BuildOllamaBody(List<ChatMessage> messages, bool stream)
{
var llm = _settings.Settings.Llm;
var msgs = BuildMessageList(messages);
return new
{
model = ResolveModelName(),
messages = msgs,
stream = stream,
options = new { temperature = ResolveTemperature() }
};
}
// ═══════════════════════════════════════════════════════════════════════
// OpenAI-Compatible (vLLM)
// ═══════════════════════════════════════════════════════════════════════
private async Task<string> SendOpenAiCompatibleAsync(List<ChatMessage> messages, CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOpenAiBody(messages, stream: false);
var url = ep.TrimEnd('/') + "/v1/chat/completions";
var json = JsonSerializer.Serialize(body);
using var req = new HttpRequestMessage(HttpMethod.Post, url)
{
Content = new StringContent(json, Encoding.UTF8, "application/json")
};
await ApplyAuthHeaderAsync(req, ct);
using var resp = await SendWithErrorClassificationAsync(req, allowInsecureTls, ct);
var respBody = await resp.Content.ReadAsStringAsync(ct);
return SafeParseJson(respBody, root =>
{
TryParseOpenAiUsage(root);
var choices = root.GetProperty("choices");
if (choices.GetArrayLength() == 0) return "(빈 응답)";
return choices[0].GetProperty("message").GetProperty("content").GetString() ?? "";
}, "vLLM 응답");
}
private async IAsyncEnumerable<string> StreamOpenAiCompatibleAsync(
List<ChatMessage> messages,
[EnumeratorCancellation] CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOpenAiBody(messages, stream: true);
var url = ep.TrimEnd('/') + "/v1/chat/completions";
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
await ApplyAuthHeaderAsync(req, ct);
using var resp = await SendWithErrorClassificationAsync(req, allowInsecureTls, ct);
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
while (!reader.EndOfStream && !ct.IsCancellationRequested)
{
var line = await ReadLineWithTimeoutAsync(reader, ct);
if (line == null) break;
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
var data = line["data: ".Length..];
if (data == "[DONE]") break;
string? text = null;
try
{
using var doc = JsonDocument.Parse(data);
TryParseOpenAiUsage(doc.RootElement);
var choices = doc.RootElement.GetProperty("choices");
if (choices.GetArrayLength() > 0)
{
var delta = choices[0].GetProperty("delta");
if (delta.TryGetProperty("content", out var c))
text = c.GetString();
}
}
catch (JsonException ex)
{
LogService.Warn($"vLLM 스트리밍 JSON 파싱 오류: {ex.Message}");
}
if (!string.IsNullOrEmpty(text)) yield return text;
}
}
private object BuildOpenAiBody(List<ChatMessage> messages, bool stream)
{
var llm = _settings.Settings.Llm;
var msgs = BuildMessageList(messages, openAiVision: true);
var body = new Dictionary<string, object?>
{
["model"] = ResolveModelName(),
["messages"] = msgs,
["stream"] = stream,
["temperature"] = ResolveTemperature(),
["max_tokens"] = ResolveOpenAiCompatibleMaxTokens()
};
var effort = ResolveReasoningEffort();
if (!string.IsNullOrWhiteSpace(effort))
body["reasoning_effort"] = effort;
return body;
}
// ═══════════════════════════════════════════════════════════════════════
// Gemini
// ═══════════════════════════════════════════════════════════════════════
private async Task<string> SendGeminiAsync(List<ChatMessage> messages, CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var apiKey = ResolveApiKeyForService("gemini");
if (string.IsNullOrEmpty(apiKey))
throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다. 설정 > AX Agent에서 API 키를 입력하세요.");
var model = ResolveModel();
var body = BuildGeminiBody(messages);
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={apiKey}";
var resp = await PostJsonWithRetryAsync(url, body, false, ct);
return SafeParseJson(resp, root =>
{
TryParseGeminiUsage(root);
var candidates = root.GetProperty("candidates");
if (candidates.GetArrayLength() == 0) return "(빈 응답)";
var parts = candidates[0].GetProperty("content").GetProperty("parts");
if (parts.GetArrayLength() == 0) return "(빈 응답)";
return parts[0].GetProperty("text").GetString() ?? "";
}, "Gemini 응답");
}
private async IAsyncEnumerable<string> StreamGeminiAsync(
List<ChatMessage> messages,
[EnumeratorCancellation] CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var apiKey = ResolveApiKeyForService("gemini");
if (string.IsNullOrEmpty(apiKey))
throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다.");
var model = ResolveModel();
var body = BuildGeminiBody(messages);
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:streamGenerateContent?alt=sse&key={apiKey}";
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
using var resp = await SendWithErrorClassificationAsync(req, false, ct);
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
while (!reader.EndOfStream && !ct.IsCancellationRequested)
{
var line = await ReadLineWithTimeoutAsync(reader, ct);
if (line == null) break;
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
var data = line["data: ".Length..];
string? parsed = null;
try
{
using var doc = JsonDocument.Parse(data);
TryParseGeminiUsage(doc.RootElement);
var candidates = doc.RootElement.GetProperty("candidates");
if (candidates.GetArrayLength() == 0) continue;
var sb = new StringBuilder();
var parts = candidates[0].GetProperty("content").GetProperty("parts");
foreach (var part in parts.EnumerateArray())
{
if (part.TryGetProperty("text", out var t))
{
var text = t.GetString();
if (!string.IsNullOrEmpty(text)) sb.Append(text);
}
}
if (sb.Length > 0) parsed = sb.ToString();
}
catch (JsonException ex)
{
LogService.Warn($"Gemini 스트리밍 JSON 파싱 오류: {ex.Message}");
}
if (parsed != null) yield return parsed;
}
}
private object BuildGeminiBody(List<ChatMessage> messages)
{
var llm = _settings.Settings.Llm;
var contents = new List<object>();
object? systemInstruction = null;
if (!string.IsNullOrEmpty(_systemPrompt))
{
systemInstruction = new { parts = new[] { new { text = _systemPrompt } } };
}
foreach (var m in messages)
{
if (m.Role == "system") continue;
var parts = new List<object> { new { text = m.Content } };
if (m.Images?.Count > 0)
{
foreach (var img in m.Images)
parts.Add(new { inlineData = new { mimeType = img.MimeType, data = img.Base64 } });
}
contents.Add(new
{
role = m.Role == "assistant" ? "model" : "user",
parts
});
}
if (systemInstruction != null)
return new
{
systemInstruction,
contents,
generationConfig = new { temperature = ResolveTemperature(), maxOutputTokens = llm.MaxContextTokens }
};
return new
{
contents,
generationConfig = new { temperature = ResolveTemperature(), maxOutputTokens = llm.MaxContextTokens }
};
}
// ═══════════════════════════════════════════════════════════════════════
// Claude (messages API)
// ═══════════════════════════════════════════════════════════════════════
private async Task<string> SendSigmoidAsync(List<ChatMessage> messages, CancellationToken ct)
{
var apiKey = ResolveApiKeyForService("sigmoid");
if (string.IsNullOrEmpty(apiKey))
throw new InvalidOperationException("Claude API 키가 설정되지 않았습니다. 설정 > AX Agent에서 API 키를 입력하세요.");
var body = BuildSigmoidBody(messages, stream: false);
var json = JsonSerializer.Serialize(body);
using var req = new HttpRequestMessage(HttpMethod.Post, $"https://{SigmoidApiHost}/v1/messages");
req.Content = new StringContent(json, Encoding.UTF8, "application/json");
req.Headers.Add("x-api-key", apiKey);
req.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion);
using var resp = await _http.SendAsync(req, ct);
if (!resp.IsSuccessStatusCode)
{
var errBody = await resp.Content.ReadAsStringAsync(ct);
throw new HttpRequestException(ClassifyHttpError(resp, errBody));
}
var respJson = await resp.Content.ReadAsStringAsync(ct);
return SafeParseJson(respJson, root =>
{
TryParseSigmoidUsage(root);
var content = root.GetProperty("content");
if (content.GetArrayLength() == 0) return "(빈 응답)";
return content[0].GetProperty("text").GetString() ?? "";
}, "Claude 응답");
}
private async IAsyncEnumerable<string> StreamSigmoidAsync(
List<ChatMessage> messages,
[EnumeratorCancellation] CancellationToken ct)
{
var apiKey = ResolveApiKeyForService("sigmoid");
if (string.IsNullOrEmpty(apiKey))
throw new InvalidOperationException("Claude API 키가 설정되지 않았습니다.");
var body = BuildSigmoidBody(messages, stream: true);
var json = JsonSerializer.Serialize(body);
using var req = new HttpRequestMessage(HttpMethod.Post, $"https://{SigmoidApiHost}/v1/messages");
req.Content = new StringContent(json, Encoding.UTF8, "application/json");
req.Headers.Add("x-api-key", apiKey);
req.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion);
using var resp = await _http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct);
if (!resp.IsSuccessStatusCode)
{
var errBody = await resp.Content.ReadAsStringAsync(ct);
throw new HttpRequestException(ClassifyHttpError(resp, errBody));
}
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
while (!reader.EndOfStream && !ct.IsCancellationRequested)
{
var line = await ReadLineWithTimeoutAsync(reader, ct);
if (line == null) break;
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
var data = line["data: ".Length..];
string? text = null;
try
{
using var doc = JsonDocument.Parse(data);
var type = doc.RootElement.GetProperty("type").GetString();
if (type == "content_block_delta")
{
var delta = doc.RootElement.GetProperty("delta");
if (delta.TryGetProperty("text", out var t))
text = t.GetString();
}
else if (type is "message_start" or "message_delta")
{
// message_start: usage in .message.usage, message_delta: usage in .usage
if (doc.RootElement.TryGetProperty("message", out var msg) &&
msg.TryGetProperty("usage", out var u1))
TryParseSigmoidUsageFromElement(u1);
else if (doc.RootElement.TryGetProperty("usage", out var u2))
TryParseSigmoidUsageFromElement(u2);
}
}
catch (JsonException ex)
{
LogService.Warn($"Claude 스트리밍 JSON 파싱 오류: {ex.Message}");
}
if (!string.IsNullOrEmpty(text)) yield return text;
}
}
private object BuildSigmoidBody(List<ChatMessage> messages, bool stream)
{
var llm = _settings.Settings.Llm;
var msgs = new List<object>();
foreach (var m in messages)
{
if (m.Role == "system") continue;
if (m.Images?.Count > 0)
{
// Claude Vision: content를 배열로 변환 (이미지 + 텍스트)
var contentParts = new List<object>();
foreach (var img in m.Images)
contentParts.Add(new { type = "image", source = new { type = "base64", media_type = img.MimeType, data = img.Base64 } });
contentParts.Add(new { type = "text", text = m.Content });
msgs.Add(new { role = m.Role, content = contentParts });
}
else
{
msgs.Add(new { role = m.Role, content = m.Content });
}
}
var activeModel = ResolveModel();
if (!string.IsNullOrEmpty(_systemPrompt))
{
return new
{
model = activeModel,
max_tokens = llm.MaxContextTokens,
temperature = ResolveTemperature(),
system = _systemPrompt,
messages = msgs,
stream
};
}
return new
{
model = activeModel,
max_tokens = llm.MaxContextTokens,
temperature = ResolveTemperature(),
messages = msgs,
stream
};
}
// ─── 공용 헬퍼 ─────────────────────────────────────────────────────────
private List<object> BuildMessageList(List<ChatMessage> messages, bool openAiVision = false)
{
var result = new List<object>();
if (!string.IsNullOrEmpty(_systemPrompt))
result.Add(new { role = "system", content = _systemPrompt });
foreach (var m in messages)
{
if (m.Role == "system") continue;
if (m.Images?.Count > 0)
{
if (openAiVision)
{
// OpenAI Vision: content 배열 (text + image_url)
var contentParts = new List<object>();
contentParts.Add(new { type = "text", text = m.Content });
foreach (var img in m.Images)
contentParts.Add(new { type = "image_url", image_url = new { url = $"data:{img.MimeType};base64,{img.Base64}" } });
result.Add(new { role = m.Role, content = contentParts });
}
else
{
// Ollama Vision: images 필드에 base64 배열
result.Add(new { role = m.Role, content = m.Content, images = m.Images.Select(i => i.Base64).ToArray() });
}
}
else
{
result.Add(new { role = m.Role, content = m.Content });
}
}
return result;
}
private async Task<HttpResponseMessage> SendWithTlsAsync(
HttpRequestMessage req,
bool allowInsecureTls,
CancellationToken ct,
HttpCompletionOption completion = HttpCompletionOption.ResponseContentRead)
{
if (!allowInsecureTls)
return await _http.SendAsync(req, completion, ct);
return await _httpInsecure.SendAsync(req, completion, ct);
}
/// <summary>비스트리밍 POST + 재시도 (일시적 오류 시 최대 2회)</summary>
private async Task<string> PostJsonWithRetryAsync(string url, object body, bool allowInsecureTls, CancellationToken ct)
{
var json = JsonSerializer.Serialize(body);
Exception? lastEx = null;
for (int attempt = 0; attempt <= MaxRetries; attempt++)
{
try
{
using var req = new HttpRequestMessage(HttpMethod.Post, url)
{
Content = new StringContent(json, Encoding.UTF8, "application/json")
};
using var resp = await SendWithTlsAsync(req, allowInsecureTls, ct);
if (resp.IsSuccessStatusCode)
return await resp.Content.ReadAsStringAsync(ct);
// 429 Rate Limit → 재시도
if ((int)resp.StatusCode == 429 && attempt < MaxRetries)
{
await Task.Delay(1000 * (attempt + 1), ct);
continue;
}
// 그 외 에러 → 분류 후 예외
var errBody = await resp.Content.ReadAsStringAsync(ct);
throw new HttpRequestException(ClassifyHttpError(resp, errBody));
}
catch (HttpRequestException) { throw; }
catch (TaskCanceledException) when (!ct.IsCancellationRequested && attempt < MaxRetries)
{
lastEx = new TimeoutException("요청 시간 초과");
await Task.Delay(1000 * (attempt + 1), ct);
}
}
throw lastEx ?? new HttpRequestException("요청 실패");
}
/// <summary>스트리밍 전용 — HTTP 요청 전송 + 에러 분류</summary>
private async Task<HttpResponseMessage> SendWithErrorClassificationAsync(
HttpRequestMessage req, bool allowInsecureTls, CancellationToken ct)
{
var resp = await SendWithTlsAsync(req, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead);
if (!resp.IsSuccessStatusCode)
{
var errBody = await resp.Content.ReadAsStringAsync(ct);
var errorMsg = ClassifyHttpError(resp, errBody);
resp.Dispose();
throw new HttpRequestException(errorMsg);
}
return resp;
}
/// <summary>스트리밍 ReadLine에 청크 타임아웃 적용</summary>
private static async Task<string?> ReadLineWithTimeoutAsync(StreamReader reader, CancellationToken ct)
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(ChunkTimeout);
try
{
return await reader.ReadLineAsync(cts.Token);
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
LogService.Warn("스트리밍 청크 타임아웃 (30초 무응답)");
return null; // 타임아웃 시 스트림 종료
}
}
/// <summary>JSON 파싱 안전 래퍼 — 파싱 실패 시 상세 에러 메시지 반환</summary>
private static string SafeParseJson(string json, Func<JsonElement, string> extractor, string context)
{
try
{
using var doc = JsonDocument.Parse(json);
// API 에러 응답 감지
if (doc.RootElement.TryGetProperty("error", out var error))
{
var msg = error.TryGetProperty("message", out var m) ? m.GetString() : error.ToString();
throw new HttpRequestException($"[{context}] API 에러: {msg}");
}
return extractor(doc.RootElement);
}
catch (JsonException ex)
{
var preview = json.Length > 200 ? json[..200] + "…" : json;
throw new InvalidOperationException(
$"[{context}] 응답 형식 오류 — 예상하지 못한 JSON 형식입니다.\n파싱 오류: {ex.Message}\n응답 미리보기: {preview}");
}
catch (KeyNotFoundException)
{
var preview = json.Length > 200 ? json[..200] + "…" : json;
throw new InvalidOperationException(
$"[{context}] 응답에 필요한 필드가 없습니다.\n응답 미리보기: {preview}");
}
}
/// <summary>HTTP 에러 코드별 사용자 친화적 메시지</summary>
private static string ClassifyHttpError(HttpResponseMessage resp, string? body = null)
{
var code = (int)resp.StatusCode;
var detail = "";
// JSON error.message 추출 시도
if (!string.IsNullOrEmpty(body))
{
try
{
using var doc = JsonDocument.Parse(body);
if (doc.RootElement.TryGetProperty("error", out var err))
{
if (err.ValueKind == JsonValueKind.Object && err.TryGetProperty("message", out var m))
detail = m.GetString() ?? "";
else if (err.ValueKind == JsonValueKind.String)
detail = err.GetString() ?? "";
}
}
catch { }
}
var msg = code switch
{
400 => "잘못된 요청 — 모델 이름이나 요청 형식을 확인하세요",
401 => "인증 실패 — API 키가 유효하지 않습니다",
403 => "접근 거부 — API 키 권한을 확인하세요",
404 => "모델을 찾을 수 없습니다 — 모델 이름을 확인하세요",
429 => "요청 한도 초과 — 잠시 후 다시 시도하세요",
500 => "서버 내부 오류 — LLM 서버 상태를 확인하세요",
502 or 503 => "서버 일시 장애 — 잠시 후 다시 시도하세요",
_ => $"HTTP {code} 오류"
};
return string.IsNullOrEmpty(detail) ? msg : $"{msg}\n상세: {detail}";
}
private static StringContent JsonContent(object body)
{
var json = JsonSerializer.Serialize(body);
return new StringContent(json, Encoding.UTF8, "application/json");
}
// ─── 토큰 사용량 파싱 헬퍼 ──────────────────────────────────────────
private void TryParseOllamaUsage(JsonElement root)
{
try
{
var prompt = root.TryGetProperty("prompt_eval_count", out var p) ? p.GetInt32() : 0;
var completion = root.TryGetProperty("eval_count", out var e) ? e.GetInt32() : 0;
if (prompt > 0 || completion > 0)
LastTokenUsage = new TokenUsage(prompt, completion);
}
catch { }
}
private void TryParseOpenAiUsage(JsonElement root)
{
try
{
if (!root.TryGetProperty("usage", out var usage)) return;
var prompt = usage.TryGetProperty("prompt_tokens", out var p) ? p.GetInt32() : 0;
var completion = usage.TryGetProperty("completion_tokens", out var c) ? c.GetInt32() : 0;
if (prompt > 0 || completion > 0)
LastTokenUsage = new TokenUsage(prompt, completion);
}
catch { }
}
private void TryParseGeminiUsage(JsonElement root)
{
try
{
if (!root.TryGetProperty("usageMetadata", out var usage)) return;
var prompt = usage.TryGetProperty("promptTokenCount", out var p) ? p.GetInt32() : 0;
var completion = usage.TryGetProperty("candidatesTokenCount", out var c) ? c.GetInt32() : 0;
if (prompt > 0 || completion > 0)
LastTokenUsage = new TokenUsage(prompt, completion);
}
catch { }
}
private void TryParseSigmoidUsage(JsonElement root)
{
try
{
if (!root.TryGetProperty("usage", out var usage)) return;
TryParseSigmoidUsageFromElement(usage);
}
catch { }
}
private void TryParseSigmoidUsageFromElement(JsonElement usage)
{
try
{
var input = usage.TryGetProperty("input_tokens", out var i) ? i.GetInt32() : 0;
var output = usage.TryGetProperty("output_tokens", out var o) ? o.GetInt32() : 0;
if (input > 0 || output > 0)
LastTokenUsage = new TokenUsage(input, output);
}
catch { }
}
public void Dispose()
{
_http.Dispose();
_httpInsecure.Dispose();
}
}