using System.IO; using System.Net.Http; using System.Runtime.CompilerServices; using System.Text; using System.Text.Json; using AxCopilot.Models; namespace AxCopilot.Services; /// 토큰 사용량 정보. public record TokenUsage(int PromptTokens, int CompletionTokens) { public int TotalTokens => PromptTokens + CompletionTokens; } /// /// LLM API 호출 서비스. Ollama / vLLM / Gemini / Claude 백엔드를 지원합니다. /// 스트리밍(SSE) 및 비스트리밍 모두 지원합니다. /// public partial class LlmService : IDisposable { private readonly HttpClient _http; private readonly SettingsService _settings; private string? _systemPrompt; private const int MaxRetries = 2; private static readonly TimeSpan ChunkTimeout = TimeSpan.FromSeconds(30); private static readonly string SigmoidApiHost = string.Concat("api.", "an", "thr", "opic.com"); private static readonly string SigmoidApiVersionHeader = string.Concat("an", "thr", "opic-version"); private const string SigmoidApiVersion = "2023-06-01"; // ─── 자동 모델 라우팅 오버라이드 ───────────────────────────────── private readonly object _overrideLock = new(); private readonly Stack<(string? Service, string? Model, double? Temperature, string? ReasoningEffort)> _overrideStack = new(); private string? _serviceOverride; private string? _modelOverride; private double? _temperatureOverride; private string? _reasoningEffortOverride; /// 자동 라우팅용 서비스/모델 오버라이드를 설정합니다. 요청 완료 후 반드시 ClearRouteOverride(). public void PushRouteOverride(string service, string model) { PushInferenceOverride(service, model, null, null); } /// 서비스/모델 오버라이드를 해제합니다. public void ClearRouteOverride() { PopInferenceOverride(); } /// 모델/추론 파라미터 오버라이드를 푸시합니다. PopInferenceOverride로 복원합니다. public void PushInferenceOverride( string? service = null, string? model = null, double? temperature = null, string? reasoningEffort = null) { lock (_overrideLock) { _overrideStack.Push((_serviceOverride, _modelOverride, _temperatureOverride, _reasoningEffortOverride)); if (!string.IsNullOrWhiteSpace(service)) _serviceOverride = service; if (!string.IsNullOrWhiteSpace(model)) _modelOverride = model; if (temperature.HasValue) _temperatureOverride = temperature.Value; if (!string.IsNullOrWhiteSpace(reasoningEffort)) _reasoningEffortOverride = reasoningEffort.Trim(); } } /// 가장 최근 PushInferenceOverride 상태를 복원합니다. public void PopInferenceOverride() { lock (_overrideLock) { if (_overrideStack.Count == 0) { _serviceOverride = null; _modelOverride = null; _temperatureOverride = null; _reasoningEffortOverride = null; return; } var prev = _overrideStack.Pop(); _serviceOverride = prev.Service; _modelOverride = prev.Model; _temperatureOverride = prev.Temperature; _reasoningEffortOverride = prev.ReasoningEffort; } } /// 현재 활성 모델의 서비스명과 모델명을 반환합니다. public (string service, string model) GetCurrentModelInfo() => (ResolveService(), ResolveModel()); /// 오버라이드를 고려한 실제 서비스명. private string ResolveService() => NormalizeServiceName(_serviceOverride ?? _settings.Settings.Llm.Service); private static bool IsExternalLlmService(string normalizedService) => normalizedService is "gemini" or "sigmoid"; private void EnsureOperationModeAllowsLlmService(string service) { if (!OperationModePolicy.IsInternal(_settings.Settings)) return; var normalized = NormalizeServiceName(service); if (!IsExternalLlmService(normalized)) return; var display = normalized == "sigmoid" ? "Claude" : "Gemini"; throw new InvalidOperationException( $"사내 모드에서는 외부 LLM 호출이 차단됩니다: {display}. " + "설정에서 operationMode를 external로 변경하거나 사내 LLM(Ollama/vLLM)을 사용하세요."); } private static string NormalizeServiceName(string? service) { var key = (service ?? "").Trim().ToLowerInvariant(); return key switch { "cl" + "aude" => "sigmoid", "sigmoid" => "sigmoid", "gemini" => "gemini", "vllm" => "vllm", _ => "ollama", }; } /// 오버라이드를 고려한 실제 모델명. private string ResolveModel() { if (_modelOverride != null) return _modelOverride; return ResolveModelName(); } private double ResolveTemperature() => _temperatureOverride ?? _settings.Settings.Llm.Temperature; private string? ResolveReasoningEffort() => _reasoningEffortOverride; /// 지정 서비스의 API 키를 반환합니다. private string ResolveApiKeyForService(string service) { var llm = _settings.Settings.Llm; return NormalizeServiceName(service) switch { "gemini" => llm.GeminiApiKey, "sigmoid" => llm.ClaudeApiKey, "vllm" => CryptoService.DecryptIfEnabled(llm.VllmApiKey, llm.EncryptionEnabled), "ollama" => CryptoService.DecryptIfEnabled(llm.OllamaApiKey, llm.EncryptionEnabled), _ => "", }; } /// 지정 서비스의 엔드포인트를 반환합니다. private string ResolveEndpointForService(string service) { var llm = _settings.Settings.Llm; return NormalizeServiceName(service) switch { "vllm" => llm.VllmEndpoint, "ollama" => llm.OllamaEndpoint, _ => llm.Endpoint, }; } /// 가장 최근 요청의 토큰 사용량. 스트리밍/비스트리밍 완료 후 갱신됩니다. public TokenUsage? LastTokenUsage { get; private set; } public LlmService(SettingsService settings) { _settings = settings; _http = new HttpClient { Timeout = TimeSpan.FromMinutes(10) }; LoadSystemPrompt(); } // ─── 시스템 프롬프트 (빌드 경로에서 동적 로딩) ───────────────────────── private void LoadSystemPrompt() { var exeDir = AppContext.BaseDirectory; var promptFile = Path.Combine(exeDir, "system_prompt.txt"); if (File.Exists(promptFile)) { _systemPrompt = File.ReadAllText(promptFile, Encoding.UTF8).Trim(); } } public string? SystemPrompt => _systemPrompt; /// 사내 서비스(Ollama/vLLM)면 암호화 모드에 따라 복호화, 외부면 그대로 반환. private string ResolveModelName() { var llm = _settings.Settings.Llm; var service = NormalizeServiceName(llm.Service); if (service is "ollama" or "vllm" && !string.IsNullOrEmpty(llm.Model)) return CryptoService.DecryptIfEnabled(llm.Model, llm.EncryptionEnabled); return llm.Model; } /// /// 현재 활성 모델에 매칭되는 RegisteredModel을 찾아 엔드포인트/API키를 반환합니다. /// RegisteredModel에 전용 서버 정보가 있으면 그것을 사용하고, 없으면 기본 설정을 사용합니다. /// private (string Endpoint, string ApiKey) ResolveServerInfo() { var llm = _settings.Settings.Llm; var activeService = ResolveService(); var modelName = ResolveModel(); // RegisteredModel에서 현재 모델과 서비스가 일치하는 항목 검색 var registered = FindRegisteredModel(llm, activeService, modelName); if (registered != null && !string.IsNullOrEmpty(registered.Endpoint)) { var apiKey = !string.IsNullOrEmpty(registered.ApiKey) ? CryptoService.DecryptIfEnabled(registered.ApiKey, llm.EncryptionEnabled) : GetDefaultApiKey(llm, activeService); return (registered.Endpoint, apiKey); } // 기본 엔드포인트 사용 return activeService.ToLowerInvariant() switch { "vllm" => (llm.VllmEndpoint, CryptoService.DecryptIfEnabled(llm.VllmApiKey, llm.EncryptionEnabled)), "ollama" => (llm.OllamaEndpoint, CryptoService.DecryptIfEnabled(llm.OllamaApiKey, llm.EncryptionEnabled)), _ => ("", ""), }; } /// 현재 활성 모델의 RegisteredModel을 찾습니다. private static Models.RegisteredModel? FindRegisteredModel(Models.LlmSettings llm, string service, string modelName) { return llm.RegisteredModels.FirstOrDefault(m => m.Service.Equals(service, StringComparison.OrdinalIgnoreCase) && (CryptoService.DecryptIfEnabled(m.EncryptedModelName, llm.EncryptionEnabled) == modelName || m.Alias == modelName)); } /// /// 현재 활성 모델의 인증 헤더 값을 반환합니다. /// CP4D 인증인 경우 토큰을 자동 발급/캐싱하여 반환합니다. /// internal async Task ResolveAuthTokenAsync(CancellationToken ct = default) { var llm = _settings.Settings.Llm; var activeService = ResolveService(); var modelName = ResolveModel(); var registered = FindRegisteredModel(llm, activeService, modelName); // CP4D 인증 방식인 경우 if (registered != null && registered.AuthType.Equals("cp4d", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(registered.Cp4dUrl)) { var password = CryptoService.DecryptIfEnabled(registered.Cp4dPassword, llm.EncryptionEnabled); var token = await Cp4dTokenService.GetTokenAsync( registered.Cp4dUrl, registered.Cp4dUsername, password, ct); return token; } // 기본 Bearer 인증 — 기존 API 키 반환 var (_, apiKey) = ResolveServerInfo(); return string.IsNullOrEmpty(apiKey) ? null : apiKey; } /// /// HttpRequestMessage에 인증 헤더를 적용합니다. /// CP4D 인증인 경우 자동 토큰 발급, 일반 Bearer인 경우 API 키를 사용합니다. /// private async Task ApplyAuthHeaderAsync(HttpRequestMessage req, CancellationToken ct) { var token = await ResolveAuthTokenAsync(ct); if (!string.IsNullOrEmpty(token)) req.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", token); } private static string GetDefaultApiKey(LlmSettings llm, string? service = null) { var svc = service ?? llm.Service; return svc.ToLowerInvariant() switch { "vllm" => CryptoService.DecryptIfEnabled(llm.VllmApiKey, llm.EncryptionEnabled), "ollama" => CryptoService.DecryptIfEnabled(llm.OllamaApiKey, llm.EncryptionEnabled), _ => "", }; } // ─── 비스트리밍 응답 ─────────────────────────────────────────────────── public async Task SendAsync(List messages, CancellationToken ct = default) { var llm = _settings.Settings.Llm; var activeService = ResolveService(); EnsureOperationModeAllowsLlmService(activeService); try { return await SendWithServiceAsync(activeService, messages, ct); } catch (Exception ex) when (llm.FallbackModels.Count > 0) { // 폴백 모델 순차 시도 foreach (var fallback in llm.FallbackModels) { var parts = fallback.Split(':', 2); var fbService = parts[0].Trim(); var fbModel = parts.Length > 1 ? parts[1].Trim() : ""; try { EnsureOperationModeAllowsLlmService(fbService); LogService.Warn($"모델 폴백: {activeService} → {fbService} ({ex.Message})"); LastFallbackInfo = $"{activeService} → {fbService}"; return await SendWithServiceAsync(fbService, messages, ct); } catch { continue; } } throw; // 모든 폴백 실패 } } /// 마지막 폴백 정보 (UI 표시용). null이면 폴백 미발생. public string? LastFallbackInfo { get; private set; } private Task SendWithServiceAsync(string service, List messages, CancellationToken ct) { return NormalizeServiceName(service) switch { "gemini" => SendGeminiAsync(messages, ct), "sigmoid" => SendSigmoidAsync(messages, ct), "vllm" => SendOpenAiCompatibleAsync(messages, ct), _ => SendOllamaAsync(messages, ct), }; } // ─── 스트리밍 응답 ──────────────────────────────────────────────────── public async IAsyncEnumerable StreamAsync( List messages, [EnumeratorCancellation] CancellationToken ct = default) { var activeService = ResolveService(); EnsureOperationModeAllowsLlmService(activeService); var stream = NormalizeServiceName(activeService) switch { "gemini" => StreamGeminiAsync(messages, ct), "sigmoid" => StreamSigmoidAsync(messages, ct), "vllm" => StreamOpenAiCompatibleAsync(messages, ct), _ => StreamOllamaAsync(messages, ct), }; await foreach (var chunk in stream.WithCancellation(ct)) yield return chunk; } // ─── 연결 테스트 ────────────────────────────────────────────────────── public async Task<(bool ok, string message)> TestConnectionAsync() { try { var llm = _settings.Settings.Llm; var normalizedService = NormalizeServiceName(llm.Service); if (OperationModePolicy.IsInternal(_settings.Settings) && IsExternalLlmService(normalizedService)) { var blockedName = normalizedService == "sigmoid" ? "Claude" : "Gemini"; return (false, $"사내 모드에서는 {blockedName} 외부 LLM 연결이 차단됩니다."); } switch (normalizedService) { case "ollama": var resp = await _http.GetAsync(llm.Endpoint.TrimEnd('/') + "/api/tags"); return resp.IsSuccessStatusCode ? (true, "Ollama 연결 성공") : (false, ClassifyHttpError(resp)); case "vllm": var vResp = await _http.GetAsync(llm.Endpoint.TrimEnd('/') + "/v1/models"); return vResp.IsSuccessStatusCode ? (true, "vLLM 연결 성공") : (false, ClassifyHttpError(vResp)); case "gemini": var gKey = ResolveApiKeyForService("gemini"); if (string.IsNullOrEmpty(gKey)) return (false, "API 키가 설정되지 않았습니다"); var gResp = await _http.GetAsync( $"https://generativelanguage.googleapis.com/v1beta/models?key={gKey}"); return gResp.IsSuccessStatusCode ? (true, "Gemini API 연결 성공") : (false, ClassifyHttpError(gResp)); case "sigmoid": { var cKey = ResolveApiKeyForService("sigmoid"); if (string.IsNullOrEmpty(cKey)) return (false, "API 키가 설정되지 않았습니다"); using var cReq = new HttpRequestMessage(HttpMethod.Get, $"https://{SigmoidApiHost}/v1/models"); cReq.Headers.Add("x-api-key", cKey); cReq.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion); var cResp = await _http.SendAsync(cReq); return cResp.IsSuccessStatusCode ? (true, "Claude API 연결 성공") : (false, ClassifyHttpError(cResp)); } default: return (false, "알 수 없는 서비스"); } } catch (TaskCanceledException) { return (false, "연결 시간 초과 — 서버가 응답하지 않습니다"); } catch (HttpRequestException ex) { return (false, $"연결 실패 — {ex.Message}"); } catch (Exception ex) { return (false, ex.Message); } } // ═══════════════════════════════════════════════════════════════════════ // Ollama // ═══════════════════════════════════════════════════════════════════════ private async Task SendOllamaAsync(List messages, CancellationToken ct) { var llm = _settings.Settings.Llm; var (endpoint, _) = ResolveServerInfo(); var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint; var body = BuildOllamaBody(messages, stream: false); var resp = await PostJsonWithRetryAsync(ep.TrimEnd('/') + "/api/chat", body, ct); return SafeParseJson(resp, root => { TryParseOllamaUsage(root); return root.GetProperty("message").GetProperty("content").GetString() ?? ""; }, "Ollama 응답"); } private async IAsyncEnumerable StreamOllamaAsync( List messages, [EnumeratorCancellation] CancellationToken ct) { var llm = _settings.Settings.Llm; var (endpoint, _) = ResolveServerInfo(); var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint; var body = BuildOllamaBody(messages, stream: true); var url = ep.TrimEnd('/') + "/api/chat"; using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) }; using var resp = await SendWithErrorClassificationAsync(req, ct); using var stream = await resp.Content.ReadAsStreamAsync(ct); using var reader = new StreamReader(stream); while (!reader.EndOfStream && !ct.IsCancellationRequested) { var line = await ReadLineWithTimeoutAsync(reader, ct); if (line == null) break; if (string.IsNullOrEmpty(line)) continue; string? text = null; try { using var doc = JsonDocument.Parse(line); if (doc.RootElement.TryGetProperty("message", out var msg) && msg.TryGetProperty("content", out var c)) text = c.GetString(); // Ollama: done=true 시 토큰 사용량 포함 if (doc.RootElement.TryGetProperty("done", out var done) && done.GetBoolean()) TryParseOllamaUsage(doc.RootElement); } catch (JsonException ex) { LogService.Warn($"Ollama 스트리밍 JSON 파싱 오류: {ex.Message}"); } if (!string.IsNullOrEmpty(text)) yield return text; } } private object BuildOllamaBody(List messages, bool stream) { var llm = _settings.Settings.Llm; var msgs = BuildMessageList(messages); return new { model = ResolveModelName(), messages = msgs, stream = stream, options = new { temperature = ResolveTemperature() } }; } // ═══════════════════════════════════════════════════════════════════════ // OpenAI-Compatible (vLLM) // ═══════════════════════════════════════════════════════════════════════ private async Task SendOpenAiCompatibleAsync(List messages, CancellationToken ct) { var llm = _settings.Settings.Llm; var (endpoint, _) = ResolveServerInfo(); var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint; var body = BuildOpenAiBody(messages, stream: false); var url = ep.TrimEnd('/') + "/v1/chat/completions"; var json = JsonSerializer.Serialize(body); using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = new StringContent(json, Encoding.UTF8, "application/json") }; await ApplyAuthHeaderAsync(req, ct); using var resp = await SendWithErrorClassificationAsync(req, ct); var respBody = await resp.Content.ReadAsStringAsync(ct); return SafeParseJson(respBody, root => { TryParseOpenAiUsage(root); var choices = root.GetProperty("choices"); if (choices.GetArrayLength() == 0) return "(빈 응답)"; return choices[0].GetProperty("message").GetProperty("content").GetString() ?? ""; }, "vLLM 응답"); } private async IAsyncEnumerable StreamOpenAiCompatibleAsync( List messages, [EnumeratorCancellation] CancellationToken ct) { var llm = _settings.Settings.Llm; var (endpoint, _) = ResolveServerInfo(); var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint; var body = BuildOpenAiBody(messages, stream: true); var url = ep.TrimEnd('/') + "/v1/chat/completions"; using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) }; await ApplyAuthHeaderAsync(req, ct); using var resp = await SendWithErrorClassificationAsync(req, ct); using var stream = await resp.Content.ReadAsStreamAsync(ct); using var reader = new StreamReader(stream); while (!reader.EndOfStream && !ct.IsCancellationRequested) { var line = await ReadLineWithTimeoutAsync(reader, ct); if (line == null) break; if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue; var data = line["data: ".Length..]; if (data == "[DONE]") break; string? text = null; try { using var doc = JsonDocument.Parse(data); TryParseOpenAiUsage(doc.RootElement); var choices = doc.RootElement.GetProperty("choices"); if (choices.GetArrayLength() > 0) { var delta = choices[0].GetProperty("delta"); if (delta.TryGetProperty("content", out var c)) text = c.GetString(); } } catch (JsonException ex) { LogService.Warn($"vLLM 스트리밍 JSON 파싱 오류: {ex.Message}"); } if (!string.IsNullOrEmpty(text)) yield return text; } } private object BuildOpenAiBody(List messages, bool stream) { var llm = _settings.Settings.Llm; var msgs = BuildMessageList(messages, openAiVision: true); var body = new Dictionary { ["model"] = ResolveModelName(), ["messages"] = msgs, ["stream"] = stream, ["temperature"] = ResolveTemperature(), ["max_tokens"] = llm.MaxContextTokens }; var effort = ResolveReasoningEffort(); if (!string.IsNullOrWhiteSpace(effort)) body["reasoning_effort"] = effort; return body; } // ═══════════════════════════════════════════════════════════════════════ // Gemini // ═══════════════════════════════════════════════════════════════════════ private async Task SendGeminiAsync(List messages, CancellationToken ct) { var llm = _settings.Settings.Llm; var apiKey = ResolveApiKeyForService("gemini"); if (string.IsNullOrEmpty(apiKey)) throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다. 설정 > AX Agent에서 API 키를 입력하세요."); var model = ResolveModel(); var body = BuildGeminiBody(messages); var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={apiKey}"; var resp = await PostJsonWithRetryAsync(url, body, ct); return SafeParseJson(resp, root => { TryParseGeminiUsage(root); var candidates = root.GetProperty("candidates"); if (candidates.GetArrayLength() == 0) return "(빈 응답)"; var parts = candidates[0].GetProperty("content").GetProperty("parts"); if (parts.GetArrayLength() == 0) return "(빈 응답)"; return parts[0].GetProperty("text").GetString() ?? ""; }, "Gemini 응답"); } private async IAsyncEnumerable StreamGeminiAsync( List messages, [EnumeratorCancellation] CancellationToken ct) { var llm = _settings.Settings.Llm; var apiKey = ResolveApiKeyForService("gemini"); if (string.IsNullOrEmpty(apiKey)) throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다."); var model = ResolveModel(); var body = BuildGeminiBody(messages); var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:streamGenerateContent?alt=sse&key={apiKey}"; using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) }; using var resp = await SendWithErrorClassificationAsync(req, ct); using var stream = await resp.Content.ReadAsStreamAsync(ct); using var reader = new StreamReader(stream); while (!reader.EndOfStream && !ct.IsCancellationRequested) { var line = await ReadLineWithTimeoutAsync(reader, ct); if (line == null) break; if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue; var data = line["data: ".Length..]; string? parsed = null; try { using var doc = JsonDocument.Parse(data); TryParseGeminiUsage(doc.RootElement); var candidates = doc.RootElement.GetProperty("candidates"); if (candidates.GetArrayLength() == 0) continue; var sb = new StringBuilder(); var parts = candidates[0].GetProperty("content").GetProperty("parts"); foreach (var part in parts.EnumerateArray()) { if (part.TryGetProperty("text", out var t)) { var text = t.GetString(); if (!string.IsNullOrEmpty(text)) sb.Append(text); } } if (sb.Length > 0) parsed = sb.ToString(); } catch (JsonException ex) { LogService.Warn($"Gemini 스트리밍 JSON 파싱 오류: {ex.Message}"); } if (parsed != null) yield return parsed; } } private object BuildGeminiBody(List messages) { var llm = _settings.Settings.Llm; var contents = new List(); object? systemInstruction = null; if (!string.IsNullOrEmpty(_systemPrompt)) { systemInstruction = new { parts = new[] { new { text = _systemPrompt } } }; } foreach (var m in messages) { if (m.Role == "system") continue; var parts = new List { new { text = m.Content } }; if (m.Images?.Count > 0) { foreach (var img in m.Images) parts.Add(new { inlineData = new { mimeType = img.MimeType, data = img.Base64 } }); } contents.Add(new { role = m.Role == "assistant" ? "model" : "user", parts }); } if (systemInstruction != null) return new { systemInstruction, contents, generationConfig = new { temperature = ResolveTemperature(), maxOutputTokens = llm.MaxContextTokens } }; return new { contents, generationConfig = new { temperature = ResolveTemperature(), maxOutputTokens = llm.MaxContextTokens } }; } // ═══════════════════════════════════════════════════════════════════════ // Claude (messages API) // ═══════════════════════════════════════════════════════════════════════ private async Task SendSigmoidAsync(List messages, CancellationToken ct) { var apiKey = ResolveApiKeyForService("sigmoid"); if (string.IsNullOrEmpty(apiKey)) throw new InvalidOperationException("Claude API 키가 설정되지 않았습니다. 설정 > AX Agent에서 API 키를 입력하세요."); var body = BuildSigmoidBody(messages, stream: false); var json = JsonSerializer.Serialize(body); using var req = new HttpRequestMessage(HttpMethod.Post, $"https://{SigmoidApiHost}/v1/messages"); req.Content = new StringContent(json, Encoding.UTF8, "application/json"); req.Headers.Add("x-api-key", apiKey); req.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion); using var resp = await _http.SendAsync(req, ct); if (!resp.IsSuccessStatusCode) { var errBody = await resp.Content.ReadAsStringAsync(ct); throw new HttpRequestException(ClassifyHttpError(resp, errBody)); } var respJson = await resp.Content.ReadAsStringAsync(ct); return SafeParseJson(respJson, root => { TryParseSigmoidUsage(root); var content = root.GetProperty("content"); if (content.GetArrayLength() == 0) return "(빈 응답)"; return content[0].GetProperty("text").GetString() ?? ""; }, "Claude 응답"); } private async IAsyncEnumerable StreamSigmoidAsync( List messages, [EnumeratorCancellation] CancellationToken ct) { var apiKey = ResolveApiKeyForService("sigmoid"); if (string.IsNullOrEmpty(apiKey)) throw new InvalidOperationException("Claude API 키가 설정되지 않았습니다."); var body = BuildSigmoidBody(messages, stream: true); var json = JsonSerializer.Serialize(body); using var req = new HttpRequestMessage(HttpMethod.Post, $"https://{SigmoidApiHost}/v1/messages"); req.Content = new StringContent(json, Encoding.UTF8, "application/json"); req.Headers.Add("x-api-key", apiKey); req.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion); using var resp = await _http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct); if (!resp.IsSuccessStatusCode) { var errBody = await resp.Content.ReadAsStringAsync(ct); throw new HttpRequestException(ClassifyHttpError(resp, errBody)); } using var stream = await resp.Content.ReadAsStreamAsync(ct); using var reader = new StreamReader(stream); while (!reader.EndOfStream && !ct.IsCancellationRequested) { var line = await ReadLineWithTimeoutAsync(reader, ct); if (line == null) break; if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue; var data = line["data: ".Length..]; string? text = null; try { using var doc = JsonDocument.Parse(data); var type = doc.RootElement.GetProperty("type").GetString(); if (type == "content_block_delta") { var delta = doc.RootElement.GetProperty("delta"); if (delta.TryGetProperty("text", out var t)) text = t.GetString(); } else if (type is "message_start" or "message_delta") { // message_start: usage in .message.usage, message_delta: usage in .usage if (doc.RootElement.TryGetProperty("message", out var msg) && msg.TryGetProperty("usage", out var u1)) TryParseSigmoidUsageFromElement(u1); else if (doc.RootElement.TryGetProperty("usage", out var u2)) TryParseSigmoidUsageFromElement(u2); } } catch (JsonException ex) { LogService.Warn($"Claude 스트리밍 JSON 파싱 오류: {ex.Message}"); } if (!string.IsNullOrEmpty(text)) yield return text; } } private object BuildSigmoidBody(List messages, bool stream) { var llm = _settings.Settings.Llm; var msgs = new List(); foreach (var m in messages) { if (m.Role == "system") continue; if (m.Images?.Count > 0) { // Claude Vision: content를 배열로 변환 (이미지 + 텍스트) var contentParts = new List(); foreach (var img in m.Images) contentParts.Add(new { type = "image", source = new { type = "base64", media_type = img.MimeType, data = img.Base64 } }); contentParts.Add(new { type = "text", text = m.Content }); msgs.Add(new { role = m.Role, content = contentParts }); } else { msgs.Add(new { role = m.Role, content = m.Content }); } } var activeModel = ResolveModel(); if (!string.IsNullOrEmpty(_systemPrompt)) { return new { model = activeModel, max_tokens = llm.MaxContextTokens, temperature = ResolveTemperature(), system = _systemPrompt, messages = msgs, stream }; } return new { model = activeModel, max_tokens = llm.MaxContextTokens, temperature = ResolveTemperature(), messages = msgs, stream }; } // ─── 공용 헬퍼 ───────────────────────────────────────────────────────── private List BuildMessageList(List messages, bool openAiVision = false) { var result = new List(); if (!string.IsNullOrEmpty(_systemPrompt)) result.Add(new { role = "system", content = _systemPrompt }); foreach (var m in messages) { if (m.Role == "system") continue; if (m.Images?.Count > 0) { if (openAiVision) { // OpenAI Vision: content 배열 (text + image_url) var contentParts = new List(); contentParts.Add(new { type = "text", text = m.Content }); foreach (var img in m.Images) contentParts.Add(new { type = "image_url", image_url = new { url = $"data:{img.MimeType};base64,{img.Base64}" } }); result.Add(new { role = m.Role, content = contentParts }); } else { // Ollama Vision: images 필드에 base64 배열 result.Add(new { role = m.Role, content = m.Content, images = m.Images.Select(i => i.Base64).ToArray() }); } } else { result.Add(new { role = m.Role, content = m.Content }); } } return result; } /// 비스트리밍 POST + 재시도 (일시적 오류 시 최대 2회) private async Task PostJsonWithRetryAsync(string url, object body, CancellationToken ct) { var json = JsonSerializer.Serialize(body); Exception? lastEx = null; for (int attempt = 0; attempt <= MaxRetries; attempt++) { try { using var content = new StringContent(json, Encoding.UTF8, "application/json"); using var resp = await _http.PostAsync(url, content, ct); if (resp.IsSuccessStatusCode) return await resp.Content.ReadAsStringAsync(ct); // 429 Rate Limit → 재시도 if ((int)resp.StatusCode == 429 && attempt < MaxRetries) { await Task.Delay(1000 * (attempt + 1), ct); continue; } // 그 외 에러 → 분류 후 예외 var errBody = await resp.Content.ReadAsStringAsync(ct); throw new HttpRequestException(ClassifyHttpError(resp, errBody)); } catch (HttpRequestException) { throw; } catch (TaskCanceledException) when (!ct.IsCancellationRequested && attempt < MaxRetries) { lastEx = new TimeoutException("요청 시간 초과"); await Task.Delay(1000 * (attempt + 1), ct); } } throw lastEx ?? new HttpRequestException("요청 실패"); } /// 스트리밍 전용 — HTTP 요청 전송 + 에러 분류 private async Task SendWithErrorClassificationAsync( HttpRequestMessage req, CancellationToken ct) { var resp = await _http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct); if (!resp.IsSuccessStatusCode) { var errBody = await resp.Content.ReadAsStringAsync(ct); var errorMsg = ClassifyHttpError(resp, errBody); resp.Dispose(); throw new HttpRequestException(errorMsg); } return resp; } /// 스트리밍 ReadLine에 청크 타임아웃 적용 private static async Task ReadLineWithTimeoutAsync(StreamReader reader, CancellationToken ct) { using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); cts.CancelAfter(ChunkTimeout); try { return await reader.ReadLineAsync(cts.Token); } catch (OperationCanceledException) when (!ct.IsCancellationRequested) { LogService.Warn("스트리밍 청크 타임아웃 (30초 무응답)"); return null; // 타임아웃 시 스트림 종료 } } /// JSON 파싱 안전 래퍼 — 파싱 실패 시 상세 에러 메시지 반환 private static string SafeParseJson(string json, Func extractor, string context) { try { using var doc = JsonDocument.Parse(json); // API 에러 응답 감지 if (doc.RootElement.TryGetProperty("error", out var error)) { var msg = error.TryGetProperty("message", out var m) ? m.GetString() : error.ToString(); throw new HttpRequestException($"[{context}] API 에러: {msg}"); } return extractor(doc.RootElement); } catch (JsonException ex) { var preview = json.Length > 200 ? json[..200] + "…" : json; throw new InvalidOperationException( $"[{context}] 응답 형식 오류 — 예상하지 못한 JSON 형식입니다.\n파싱 오류: {ex.Message}\n응답 미리보기: {preview}"); } catch (KeyNotFoundException) { var preview = json.Length > 200 ? json[..200] + "…" : json; throw new InvalidOperationException( $"[{context}] 응답에 필요한 필드가 없습니다.\n응답 미리보기: {preview}"); } } /// HTTP 에러 코드별 사용자 친화적 메시지 private static string ClassifyHttpError(HttpResponseMessage resp, string? body = null) { var code = (int)resp.StatusCode; var detail = ""; // JSON error.message 추출 시도 if (!string.IsNullOrEmpty(body)) { try { using var doc = JsonDocument.Parse(body); if (doc.RootElement.TryGetProperty("error", out var err)) { if (err.ValueKind == JsonValueKind.Object && err.TryGetProperty("message", out var m)) detail = m.GetString() ?? ""; else if (err.ValueKind == JsonValueKind.String) detail = err.GetString() ?? ""; } } catch { } } var msg = code switch { 400 => "잘못된 요청 — 모델 이름이나 요청 형식을 확인하세요", 401 => "인증 실패 — API 키가 유효하지 않습니다", 403 => "접근 거부 — API 키 권한을 확인하세요", 404 => "모델을 찾을 수 없습니다 — 모델 이름을 확인하세요", 429 => "요청 한도 초과 — 잠시 후 다시 시도하세요", 500 => "서버 내부 오류 — LLM 서버 상태를 확인하세요", 502 or 503 => "서버 일시 장애 — 잠시 후 다시 시도하세요", _ => $"HTTP {code} 오류" }; return string.IsNullOrEmpty(detail) ? msg : $"{msg}\n상세: {detail}"; } private static StringContent JsonContent(object body) { var json = JsonSerializer.Serialize(body); return new StringContent(json, Encoding.UTF8, "application/json"); } // ─── 토큰 사용량 파싱 헬퍼 ────────────────────────────────────────── private void TryParseOllamaUsage(JsonElement root) { try { var prompt = root.TryGetProperty("prompt_eval_count", out var p) ? p.GetInt32() : 0; var completion = root.TryGetProperty("eval_count", out var e) ? e.GetInt32() : 0; if (prompt > 0 || completion > 0) LastTokenUsage = new TokenUsage(prompt, completion); } catch { } } private void TryParseOpenAiUsage(JsonElement root) { try { if (!root.TryGetProperty("usage", out var usage)) return; var prompt = usage.TryGetProperty("prompt_tokens", out var p) ? p.GetInt32() : 0; var completion = usage.TryGetProperty("completion_tokens", out var c) ? c.GetInt32() : 0; if (prompt > 0 || completion > 0) LastTokenUsage = new TokenUsage(prompt, completion); } catch { } } private void TryParseGeminiUsage(JsonElement root) { try { if (!root.TryGetProperty("usageMetadata", out var usage)) return; var prompt = usage.TryGetProperty("promptTokenCount", out var p) ? p.GetInt32() : 0; var completion = usage.TryGetProperty("candidatesTokenCount", out var c) ? c.GetInt32() : 0; if (prompt > 0 || completion > 0) LastTokenUsage = new TokenUsage(prompt, completion); } catch { } } private void TryParseSigmoidUsage(JsonElement root) { try { if (!root.TryGetProperty("usage", out var usage)) return; TryParseSigmoidUsageFromElement(usage); } catch { } } private void TryParseSigmoidUsageFromElement(JsonElement usage) { try { var input = usage.TryGetProperty("input_tokens", out var i) ? i.GetInt32() : 0; var output = usage.TryGetProperty("output_tokens", out var o) ? o.GetInt32() : 0; if (input > 0 || output > 0) LastTokenUsage = new TokenUsage(input, output); } catch { } } public void Dispose() => _http.Dispose(); }