using System.IO;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using AxCopilot.Models;
namespace AxCopilot.Services;
/// 토큰 사용량 정보.
public record TokenUsage(int PromptTokens, int CompletionTokens)
{
public int TotalTokens => PromptTokens + CompletionTokens;
}
///
/// LLM API 호출 서비스. Ollama / vLLM / Gemini / Claude 백엔드를 지원합니다.
/// 스트리밍(SSE) 및 비스트리밍 모두 지원합니다.
///
public partial class LlmService : IDisposable
{
private readonly HttpClient _http;
private readonly SettingsService _settings;
private string? _systemPrompt;
private const int MaxRetries = 2;
private static readonly TimeSpan ChunkTimeout = TimeSpan.FromSeconds(30);
private static readonly string SigmoidApiHost = string.Concat("api.", "an", "thr", "opic.com");
private static readonly string SigmoidApiVersionHeader = string.Concat("an", "thr", "opic-version");
private const string SigmoidApiVersion = "2023-06-01";
// ─── 자동 모델 라우팅 오버라이드 ─────────────────────────────────
private readonly object _overrideLock = new();
private readonly Stack<(string? Service, string? Model, double? Temperature, string? ReasoningEffort)> _overrideStack = new();
private string? _serviceOverride;
private string? _modelOverride;
private double? _temperatureOverride;
private string? _reasoningEffortOverride;
/// 자동 라우팅용 서비스/모델 오버라이드를 설정합니다. 요청 완료 후 반드시 ClearRouteOverride().
public void PushRouteOverride(string service, string model)
{
PushInferenceOverride(service, model, null, null);
}
/// 서비스/모델 오버라이드를 해제합니다.
public void ClearRouteOverride()
{
PopInferenceOverride();
}
/// 모델/추론 파라미터 오버라이드를 푸시합니다. PopInferenceOverride로 복원합니다.
public void PushInferenceOverride(
string? service = null,
string? model = null,
double? temperature = null,
string? reasoningEffort = null)
{
lock (_overrideLock)
{
_overrideStack.Push((_serviceOverride, _modelOverride, _temperatureOverride, _reasoningEffortOverride));
if (!string.IsNullOrWhiteSpace(service)) _serviceOverride = service;
if (!string.IsNullOrWhiteSpace(model)) _modelOverride = model;
if (temperature.HasValue) _temperatureOverride = temperature.Value;
if (!string.IsNullOrWhiteSpace(reasoningEffort)) _reasoningEffortOverride = reasoningEffort.Trim();
}
}
/// 가장 최근 PushInferenceOverride 상태를 복원합니다.
public void PopInferenceOverride()
{
lock (_overrideLock)
{
if (_overrideStack.Count == 0)
{
_serviceOverride = null;
_modelOverride = null;
_temperatureOverride = null;
_reasoningEffortOverride = null;
return;
}
var prev = _overrideStack.Pop();
_serviceOverride = prev.Service;
_modelOverride = prev.Model;
_temperatureOverride = prev.Temperature;
_reasoningEffortOverride = prev.ReasoningEffort;
}
}
/// 현재 활성 모델의 서비스명과 모델명을 반환합니다.
public (string service, string model) GetCurrentModelInfo() => (ResolveService(), ResolveModel());
/// 오버라이드를 고려한 실제 서비스명.
private string ResolveService() => NormalizeServiceName(_serviceOverride ?? _settings.Settings.Llm.Service);
private static bool IsExternalLlmService(string normalizedService)
=> normalizedService is "gemini" or "sigmoid";
private void EnsureOperationModeAllowsLlmService(string service)
{
if (!OperationModePolicy.IsInternal(_settings.Settings))
return;
var normalized = NormalizeServiceName(service);
if (!IsExternalLlmService(normalized))
return;
var display = normalized == "sigmoid" ? "Claude" : "Gemini";
throw new InvalidOperationException(
$"사내 모드에서는 외부 LLM 호출이 차단됩니다: {display}. " +
"설정에서 operationMode를 external로 변경하거나 사내 LLM(Ollama/vLLM)을 사용하세요.");
}
private static string NormalizeServiceName(string? service)
{
var key = (service ?? "").Trim().ToLowerInvariant();
return key switch
{
"cl" + "aude" => "sigmoid",
"sigmoid" => "sigmoid",
"gemini" => "gemini",
"vllm" => "vllm",
_ => "ollama",
};
}
/// 오버라이드를 고려한 실제 모델명.
private string ResolveModel()
{
if (_modelOverride != null) return _modelOverride;
return ResolveModelName();
}
private double ResolveTemperature() => _temperatureOverride ?? _settings.Settings.Llm.Temperature;
private string? ResolveReasoningEffort() => _reasoningEffortOverride;
/// 지정 서비스의 API 키를 반환합니다.
private string ResolveApiKeyForService(string service)
{
var llm = _settings.Settings.Llm;
return NormalizeServiceName(service) switch
{
"gemini" => llm.GeminiApiKey,
"sigmoid" => llm.ClaudeApiKey,
"vllm" => CryptoService.DecryptIfEnabled(llm.VllmApiKey, llm.EncryptionEnabled),
"ollama" => CryptoService.DecryptIfEnabled(llm.OllamaApiKey, llm.EncryptionEnabled),
_ => "",
};
}
/// 지정 서비스의 엔드포인트를 반환합니다.
private string ResolveEndpointForService(string service)
{
var llm = _settings.Settings.Llm;
return NormalizeServiceName(service) switch
{
"vllm" => llm.VllmEndpoint,
"ollama" => llm.OllamaEndpoint,
_ => llm.Endpoint,
};
}
/// 가장 최근 요청의 토큰 사용량. 스트리밍/비스트리밍 완료 후 갱신됩니다.
public TokenUsage? LastTokenUsage { get; private set; }
public LlmService(SettingsService settings)
{
_settings = settings;
_http = new HttpClient { Timeout = TimeSpan.FromMinutes(10) };
LoadSystemPrompt();
}
// ─── 시스템 프롬프트 (빌드 경로에서 동적 로딩) ─────────────────────────
private void LoadSystemPrompt()
{
var exeDir = AppContext.BaseDirectory;
var promptFile = Path.Combine(exeDir, "system_prompt.txt");
if (File.Exists(promptFile))
{
_systemPrompt = File.ReadAllText(promptFile, Encoding.UTF8).Trim();
}
}
public string? SystemPrompt => _systemPrompt;
/// 사내 서비스(Ollama/vLLM)면 암호화 모드에 따라 복호화, 외부면 그대로 반환.
private string ResolveModelName()
{
var llm = _settings.Settings.Llm;
var service = NormalizeServiceName(llm.Service);
if (service is "ollama" or "vllm" && !string.IsNullOrEmpty(llm.Model))
return CryptoService.DecryptIfEnabled(llm.Model, llm.EncryptionEnabled);
return llm.Model;
}
///
/// 현재 활성 모델에 매칭되는 RegisteredModel을 찾아 엔드포인트/API키를 반환합니다.
/// RegisteredModel에 전용 서버 정보가 있으면 그것을 사용하고, 없으면 기본 설정을 사용합니다.
///
private (string Endpoint, string ApiKey) ResolveServerInfo()
{
var llm = _settings.Settings.Llm;
var activeService = ResolveService();
var modelName = ResolveModel();
// RegisteredModel에서 현재 모델과 서비스가 일치하는 항목 검색
var registered = FindRegisteredModel(llm, activeService, modelName);
if (registered != null && !string.IsNullOrEmpty(registered.Endpoint))
{
var apiKey = !string.IsNullOrEmpty(registered.ApiKey)
? CryptoService.DecryptIfEnabled(registered.ApiKey, llm.EncryptionEnabled)
: GetDefaultApiKey(llm, activeService);
return (registered.Endpoint, apiKey);
}
// 기본 엔드포인트 사용
return activeService.ToLowerInvariant() switch
{
"vllm" => (llm.VllmEndpoint, CryptoService.DecryptIfEnabled(llm.VllmApiKey, llm.EncryptionEnabled)),
"ollama" => (llm.OllamaEndpoint, CryptoService.DecryptIfEnabled(llm.OllamaApiKey, llm.EncryptionEnabled)),
_ => ("", ""),
};
}
/// 현재 활성 모델의 RegisteredModel을 찾습니다.
private static Models.RegisteredModel? FindRegisteredModel(Models.LlmSettings llm, string service, string modelName)
{
return llm.RegisteredModels.FirstOrDefault(m =>
m.Service.Equals(service, StringComparison.OrdinalIgnoreCase) &&
(CryptoService.DecryptIfEnabled(m.EncryptedModelName, llm.EncryptionEnabled) == modelName ||
m.Alias == modelName));
}
///
/// 현재 활성 모델의 인증 헤더 값을 반환합니다.
/// CP4D 인증인 경우 토큰을 자동 발급/캐싱하여 반환합니다.
///
internal async Task ResolveAuthTokenAsync(CancellationToken ct = default)
{
var llm = _settings.Settings.Llm;
var activeService = ResolveService();
var modelName = ResolveModel();
var registered = FindRegisteredModel(llm, activeService, modelName);
// CP4D 인증 방식인 경우
if (registered != null &&
registered.AuthType.Equals("cp4d", StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrWhiteSpace(registered.Cp4dUrl))
{
var password = CryptoService.DecryptIfEnabled(registered.Cp4dPassword, llm.EncryptionEnabled);
var token = await Cp4dTokenService.GetTokenAsync(
registered.Cp4dUrl, registered.Cp4dUsername, password, ct);
return token;
}
// 기본 Bearer 인증 — 기존 API 키 반환
var (_, apiKey) = ResolveServerInfo();
return string.IsNullOrEmpty(apiKey) ? null : apiKey;
}
///
/// HttpRequestMessage에 인증 헤더를 적용합니다.
/// CP4D 인증인 경우 자동 토큰 발급, 일반 Bearer인 경우 API 키를 사용합니다.
///
private async Task ApplyAuthHeaderAsync(HttpRequestMessage req, CancellationToken ct)
{
var token = await ResolveAuthTokenAsync(ct);
if (!string.IsNullOrEmpty(token))
req.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", token);
}
private static string GetDefaultApiKey(LlmSettings llm, string? service = null)
{
var svc = service ?? llm.Service;
return svc.ToLowerInvariant() switch
{
"vllm" => CryptoService.DecryptIfEnabled(llm.VllmApiKey, llm.EncryptionEnabled),
"ollama" => CryptoService.DecryptIfEnabled(llm.OllamaApiKey, llm.EncryptionEnabled),
_ => "",
};
}
// ─── 비스트리밍 응답 ───────────────────────────────────────────────────
public async Task SendAsync(List messages, CancellationToken ct = default)
{
var llm = _settings.Settings.Llm;
var activeService = ResolveService();
EnsureOperationModeAllowsLlmService(activeService);
try
{
return await SendWithServiceAsync(activeService, messages, ct);
}
catch (Exception ex) when (llm.FallbackModels.Count > 0)
{
// 폴백 모델 순차 시도
foreach (var fallback in llm.FallbackModels)
{
var parts = fallback.Split(':', 2);
var fbService = parts[0].Trim();
var fbModel = parts.Length > 1 ? parts[1].Trim() : "";
try
{
EnsureOperationModeAllowsLlmService(fbService);
LogService.Warn($"모델 폴백: {activeService} → {fbService} ({ex.Message})");
LastFallbackInfo = $"{activeService} → {fbService}";
return await SendWithServiceAsync(fbService, messages, ct);
}
catch { continue; }
}
throw; // 모든 폴백 실패
}
}
/// 마지막 폴백 정보 (UI 표시용). null이면 폴백 미발생.
public string? LastFallbackInfo { get; private set; }
private Task SendWithServiceAsync(string service, List messages, CancellationToken ct)
{
return NormalizeServiceName(service) switch
{
"gemini" => SendGeminiAsync(messages, ct),
"sigmoid" => SendSigmoidAsync(messages, ct),
"vllm" => SendOpenAiCompatibleAsync(messages, ct),
_ => SendOllamaAsync(messages, ct),
};
}
// ─── 스트리밍 응답 ────────────────────────────────────────────────────
public async IAsyncEnumerable StreamAsync(
List messages,
[EnumeratorCancellation] CancellationToken ct = default)
{
var activeService = ResolveService();
EnsureOperationModeAllowsLlmService(activeService);
var stream = NormalizeServiceName(activeService) switch
{
"gemini" => StreamGeminiAsync(messages, ct),
"sigmoid" => StreamSigmoidAsync(messages, ct),
"vllm" => StreamOpenAiCompatibleAsync(messages, ct),
_ => StreamOllamaAsync(messages, ct),
};
await foreach (var chunk in stream.WithCancellation(ct))
yield return chunk;
}
// ─── 연결 테스트 ──────────────────────────────────────────────────────
public async Task<(bool ok, string message)> TestConnectionAsync()
{
try
{
var llm = _settings.Settings.Llm;
var normalizedService = NormalizeServiceName(llm.Service);
if (OperationModePolicy.IsInternal(_settings.Settings) && IsExternalLlmService(normalizedService))
{
var blockedName = normalizedService == "sigmoid" ? "Claude" : "Gemini";
return (false, $"사내 모드에서는 {blockedName} 외부 LLM 연결이 차단됩니다.");
}
switch (normalizedService)
{
case "ollama":
var resp = await _http.GetAsync(llm.Endpoint.TrimEnd('/') + "/api/tags");
return resp.IsSuccessStatusCode
? (true, "Ollama 연결 성공")
: (false, ClassifyHttpError(resp));
case "vllm":
var vResp = await _http.GetAsync(llm.Endpoint.TrimEnd('/') + "/v1/models");
return vResp.IsSuccessStatusCode
? (true, "vLLM 연결 성공")
: (false, ClassifyHttpError(vResp));
case "gemini":
var gKey = ResolveApiKeyForService("gemini");
if (string.IsNullOrEmpty(gKey)) return (false, "API 키가 설정되지 않았습니다");
var gResp = await _http.GetAsync(
$"https://generativelanguage.googleapis.com/v1beta/models?key={gKey}");
return gResp.IsSuccessStatusCode
? (true, "Gemini API 연결 성공")
: (false, ClassifyHttpError(gResp));
case "sigmoid":
{
var cKey = ResolveApiKeyForService("sigmoid");
if (string.IsNullOrEmpty(cKey)) return (false, "API 키가 설정되지 않았습니다");
using var cReq = new HttpRequestMessage(HttpMethod.Get, $"https://{SigmoidApiHost}/v1/models");
cReq.Headers.Add("x-api-key", cKey);
cReq.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion);
var cResp = await _http.SendAsync(cReq);
return cResp.IsSuccessStatusCode
? (true, "Claude API 연결 성공")
: (false, ClassifyHttpError(cResp));
}
default:
return (false, "알 수 없는 서비스");
}
}
catch (TaskCanceledException)
{
return (false, "연결 시간 초과 — 서버가 응답하지 않습니다");
}
catch (HttpRequestException ex)
{
return (false, $"연결 실패 — {ex.Message}");
}
catch (Exception ex)
{
return (false, ex.Message);
}
}
// ═══════════════════════════════════════════════════════════════════════
// Ollama
// ═══════════════════════════════════════════════════════════════════════
private async Task SendOllamaAsync(List messages, CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOllamaBody(messages, stream: false);
var resp = await PostJsonWithRetryAsync(ep.TrimEnd('/') + "/api/chat", body, ct);
return SafeParseJson(resp, root =>
{
TryParseOllamaUsage(root);
return root.GetProperty("message").GetProperty("content").GetString() ?? "";
}, "Ollama 응답");
}
private async IAsyncEnumerable StreamOllamaAsync(
List messages,
[EnumeratorCancellation] CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOllamaBody(messages, stream: true);
var url = ep.TrimEnd('/') + "/api/chat";
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
using var resp = await SendWithErrorClassificationAsync(req, ct);
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
while (!reader.EndOfStream && !ct.IsCancellationRequested)
{
var line = await ReadLineWithTimeoutAsync(reader, ct);
if (line == null) break;
if (string.IsNullOrEmpty(line)) continue;
string? text = null;
try
{
using var doc = JsonDocument.Parse(line);
if (doc.RootElement.TryGetProperty("message", out var msg) &&
msg.TryGetProperty("content", out var c))
text = c.GetString();
// Ollama: done=true 시 토큰 사용량 포함
if (doc.RootElement.TryGetProperty("done", out var done) && done.GetBoolean())
TryParseOllamaUsage(doc.RootElement);
}
catch (JsonException ex)
{
LogService.Warn($"Ollama 스트리밍 JSON 파싱 오류: {ex.Message}");
}
if (!string.IsNullOrEmpty(text)) yield return text;
}
}
private object BuildOllamaBody(List messages, bool stream)
{
var llm = _settings.Settings.Llm;
var msgs = BuildMessageList(messages);
return new
{
model = ResolveModelName(),
messages = msgs,
stream = stream,
options = new { temperature = ResolveTemperature() }
};
}
// ═══════════════════════════════════════════════════════════════════════
// OpenAI-Compatible (vLLM)
// ═══════════════════════════════════════════════════════════════════════
private async Task SendOpenAiCompatibleAsync(List messages, CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOpenAiBody(messages, stream: false);
var url = ep.TrimEnd('/') + "/v1/chat/completions";
var json = JsonSerializer.Serialize(body);
using var req = new HttpRequestMessage(HttpMethod.Post, url)
{
Content = new StringContent(json, Encoding.UTF8, "application/json")
};
await ApplyAuthHeaderAsync(req, ct);
using var resp = await SendWithErrorClassificationAsync(req, ct);
var respBody = await resp.Content.ReadAsStringAsync(ct);
return SafeParseJson(respBody, root =>
{
TryParseOpenAiUsage(root);
var choices = root.GetProperty("choices");
if (choices.GetArrayLength() == 0) return "(빈 응답)";
return choices[0].GetProperty("message").GetProperty("content").GetString() ?? "";
}, "vLLM 응답");
}
private async IAsyncEnumerable StreamOpenAiCompatibleAsync(
List messages,
[EnumeratorCancellation] CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var (endpoint, _) = ResolveServerInfo();
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
var body = BuildOpenAiBody(messages, stream: true);
var url = ep.TrimEnd('/') + "/v1/chat/completions";
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
await ApplyAuthHeaderAsync(req, ct);
using var resp = await SendWithErrorClassificationAsync(req, ct);
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
while (!reader.EndOfStream && !ct.IsCancellationRequested)
{
var line = await ReadLineWithTimeoutAsync(reader, ct);
if (line == null) break;
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
var data = line["data: ".Length..];
if (data == "[DONE]") break;
string? text = null;
try
{
using var doc = JsonDocument.Parse(data);
TryParseOpenAiUsage(doc.RootElement);
var choices = doc.RootElement.GetProperty("choices");
if (choices.GetArrayLength() > 0)
{
var delta = choices[0].GetProperty("delta");
if (delta.TryGetProperty("content", out var c))
text = c.GetString();
}
}
catch (JsonException ex)
{
LogService.Warn($"vLLM 스트리밍 JSON 파싱 오류: {ex.Message}");
}
if (!string.IsNullOrEmpty(text)) yield return text;
}
}
private object BuildOpenAiBody(List messages, bool stream)
{
var llm = _settings.Settings.Llm;
var msgs = BuildMessageList(messages, openAiVision: true);
var body = new Dictionary
{
["model"] = ResolveModelName(),
["messages"] = msgs,
["stream"] = stream,
["temperature"] = ResolveTemperature(),
["max_tokens"] = llm.MaxContextTokens
};
var effort = ResolveReasoningEffort();
if (!string.IsNullOrWhiteSpace(effort))
body["reasoning_effort"] = effort;
return body;
}
// ═══════════════════════════════════════════════════════════════════════
// Gemini
// ═══════════════════════════════════════════════════════════════════════
private async Task SendGeminiAsync(List messages, CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var apiKey = ResolveApiKeyForService("gemini");
if (string.IsNullOrEmpty(apiKey))
throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다. 설정 > AX Agent에서 API 키를 입력하세요.");
var model = ResolveModel();
var body = BuildGeminiBody(messages);
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={apiKey}";
var resp = await PostJsonWithRetryAsync(url, body, ct);
return SafeParseJson(resp, root =>
{
TryParseGeminiUsage(root);
var candidates = root.GetProperty("candidates");
if (candidates.GetArrayLength() == 0) return "(빈 응답)";
var parts = candidates[0].GetProperty("content").GetProperty("parts");
if (parts.GetArrayLength() == 0) return "(빈 응답)";
return parts[0].GetProperty("text").GetString() ?? "";
}, "Gemini 응답");
}
private async IAsyncEnumerable StreamGeminiAsync(
List messages,
[EnumeratorCancellation] CancellationToken ct)
{
var llm = _settings.Settings.Llm;
var apiKey = ResolveApiKeyForService("gemini");
if (string.IsNullOrEmpty(apiKey))
throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다.");
var model = ResolveModel();
var body = BuildGeminiBody(messages);
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:streamGenerateContent?alt=sse&key={apiKey}";
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
using var resp = await SendWithErrorClassificationAsync(req, ct);
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
while (!reader.EndOfStream && !ct.IsCancellationRequested)
{
var line = await ReadLineWithTimeoutAsync(reader, ct);
if (line == null) break;
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
var data = line["data: ".Length..];
string? parsed = null;
try
{
using var doc = JsonDocument.Parse(data);
TryParseGeminiUsage(doc.RootElement);
var candidates = doc.RootElement.GetProperty("candidates");
if (candidates.GetArrayLength() == 0) continue;
var sb = new StringBuilder();
var parts = candidates[0].GetProperty("content").GetProperty("parts");
foreach (var part in parts.EnumerateArray())
{
if (part.TryGetProperty("text", out var t))
{
var text = t.GetString();
if (!string.IsNullOrEmpty(text)) sb.Append(text);
}
}
if (sb.Length > 0) parsed = sb.ToString();
}
catch (JsonException ex)
{
LogService.Warn($"Gemini 스트리밍 JSON 파싱 오류: {ex.Message}");
}
if (parsed != null) yield return parsed;
}
}
private object BuildGeminiBody(List messages)
{
var llm = _settings.Settings.Llm;
var contents = new List