using System.IO;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using AxCopilot.Models;
namespace AxCopilot.Services;
/// 토큰 사용량 정보.
public record TokenUsage(int PromptTokens, int CompletionTokens)
{
public int TotalTokens => PromptTokens + CompletionTokens;
}
///
/// LLM API 호출 서비스. Ollama / vLLM / Gemini / Claude 백엔드를 지원합니다.
/// 스트리밍(SSE) 및 비스트리밍 모두 지원합니다.
///
public partial class LlmService : IDisposable
{
private readonly HttpClient _http;
private readonly HttpClient _httpInsecure;
private readonly SettingsService _settings;
private string? _systemPrompt;
private const int MaxRetries = 2;
// 첫 청크: 모델이 컨텍스트를 처리하는 시간 (대용량 컨텍스트에서 3분까지 허용)
private static readonly TimeSpan FirstChunkTimeout = TimeSpan.FromSeconds(180);
// 이후 청크: 스트리밍이 시작된 후 청크 간 최대 간격
private static readonly TimeSpan SubsequentChunkTimeout = TimeSpan.FromSeconds(45);
private static readonly string SigmoidApiHost = string.Concat("api.", "an", "thr", "opic.com");
private static readonly string SigmoidApiVersionHeader = string.Concat("an", "thr", "opic-version");
private const string SigmoidApiVersion = "2023-06-01";
// ─── 자동 모델 라우팅 오버라이드 ─────────────────────────────────
private readonly object _overrideLock = new();
private readonly Stack<(string? Service, string? Model, double? Temperature, string? ReasoningEffort)> _overrideStack = new();
private string? _serviceOverride;
private string? _modelOverride;
private double? _temperatureOverride;
private string? _reasoningEffortOverride;
/// 자동 라우팅용 서비스/모델 오버라이드를 설정합니다. 요청 완료 후 반드시 ClearRouteOverride().
public void PushRouteOverride(string service, string model)
{
PushInferenceOverride(service, model, null, null);
}
/// 서비스/모델 오버라이드를 해제합니다.
public void ClearRouteOverride()
{
PopInferenceOverride();
}
/// 모델/추론 파라미터 오버라이드를 푸시합니다. PopInferenceOverride로 복원합니다.
public void PushInferenceOverride(
string? service = null,
string? model = null,
double? temperature = null,
string? reasoningEffort = null)
{
lock (_overrideLock)
{
_overrideStack.Push((_serviceOverride, _modelOverride, _temperatureOverride, _reasoningEffortOverride));
if (!string.IsNullOrWhiteSpace(service)) _serviceOverride = service;
if (!string.IsNullOrWhiteSpace(model)) _modelOverride = model;
if (temperature.HasValue) _temperatureOverride = temperature.Value;
if (!string.IsNullOrWhiteSpace(reasoningEffort)) _reasoningEffortOverride = reasoningEffort.Trim();
}
}
/// 가장 최근 PushInferenceOverride 상태를 복원합니다.
public void PopInferenceOverride()
{
lock (_overrideLock)
{
if (_overrideStack.Count == 0)
{
_serviceOverride = null;
_modelOverride = null;
_temperatureOverride = null;
_reasoningEffortOverride = null;
return;
}
var prev = _overrideStack.Pop();
_serviceOverride = prev.Service;
_modelOverride = prev.Model;
_temperatureOverride = prev.Temperature;
_reasoningEffortOverride = prev.ReasoningEffort;
}
}
/// 현재 활성 모델의 서비스명과 모델명을 반환합니다.
public (string service, string model) GetCurrentModelInfo() => (ResolveService(), ResolveModel());
/// 오버라이드를 고려한 실제 서비스명.
private string ResolveService()
{
string? svc;
lock (_overrideLock) svc = _serviceOverride;
return NormalizeServiceName(svc ?? _settings.Settings.Llm.Service);
}
private static bool IsExternalLlmService(string normalizedService)
=> normalizedService is "gemini" or "sigmoid";
private void EnsureOperationModeAllowsLlmService(string service)
{
if (!OperationModePolicy.IsInternal(_settings.Settings))
return;
var normalized = NormalizeServiceName(service);
if (!IsExternalLlmService(normalized))
return;
var display = normalized == "sigmoid" ? "Claude" : "Gemini";
throw new InvalidOperationException(
$"사내 모드에서는 외부 LLM 호출이 차단됩니다: {display}. " +
"설정에서 operationMode를 external로 변경하거나 사내 LLM(Ollama/vLLM)을 사용하세요.");
}
private static string NormalizeServiceName(string? service)
{
var key = (service ?? "").Trim().ToLowerInvariant();
return key switch
{
"cl" + "aude" => "sigmoid",
"sigmoid" => "sigmoid",
"gemini" => "gemini",
"vllm" => "vllm",
_ => "ollama",
};
}
/// 오버라이드를 고려한 실제 모델명.
private string ResolveModel()
{
string? mdl;
lock (_overrideLock) mdl = _modelOverride;
return mdl ?? ResolveModelName();
}
private double ResolveTemperature() => _temperatureOverride ?? _settings.Settings.Llm.Temperature;
internal string GetActiveExecutionProfileKey()
=> Agent.ModelExecutionProfileCatalog.Normalize(GetActiveRegisteredModel()?.ExecutionProfile);
internal Agent.ModelExecutionProfileCatalog.ExecutionPolicy GetActiveExecutionPolicy()
=> Agent.ModelExecutionProfileCatalog.Get(GetActiveExecutionProfileKey());
internal double ResolveToolTemperature()
{
var resolved = ResolveTemperature();
if (!_settings.Settings.Llm.UseAutomaticProfileTemperature)
return resolved;
var cap = GetActiveExecutionPolicy().ToolTemperatureCap;
return cap.HasValue ? Math.Min(resolved, cap.Value) : resolved;
}
private string? ResolveReasoningEffort() => _reasoningEffortOverride;
private static bool LooksLikeEncryptedPayload(string value)
{
if (string.IsNullOrWhiteSpace(value) || value.Length < 32 || (value.Length % 4) != 0)
return false;
foreach (var ch in value)
{
var isBase64 = (ch >= 'A' && ch <= 'Z')
|| (ch >= 'a' && ch <= 'z')
|| (ch >= '0' && ch <= '9')
|| ch is '+' or '/' or '=';
if (!isBase64) return false;
}
return true;
}
private static string ResolveSecretValue(string raw, bool encryptionEnabled)
{
if (string.IsNullOrWhiteSpace(raw)) return "";
if (raw.Trim() == "(저장됨)") return "";
if (!encryptionEnabled) return raw.Trim();
var decrypted = CryptoService.DecryptIfEnabled(raw, encryptionEnabled).Trim();
if (string.IsNullOrWhiteSpace(decrypted)) return "";
if (string.Equals(decrypted, raw, StringComparison.Ordinal) && LooksLikeEncryptedPayload(raw))
return "";
return decrypted;
}
/// 지정 서비스의 API 키를 반환합니다.
private string ResolveApiKeyForService(string service)
{
var llm = _settings.Settings.Llm;
return NormalizeServiceName(service) switch
{
"gemini" => llm.GeminiApiKey,
"sigmoid" => llm.ClaudeApiKey,
"vllm" => ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled),
"ollama" => ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled),
_ => "",
};
}
/// 지정 서비스의 엔드포인트를 반환합니다.
private string ResolveEndpointForService(string service)
{
var llm = _settings.Settings.Llm;
return NormalizeServiceName(service) switch
{
"vllm" => llm.VllmEndpoint,
"ollama" => llm.OllamaEndpoint,
_ => llm.Endpoint,
};
}
/// 가장 최근 요청의 토큰 사용량. 스트리밍/비스트리밍 완료 후 갱신됩니다.
public TokenUsage? LastTokenUsage { get; private set; }
public record RuntimeConnectionSnapshot(
string Service,
string Model,
string Endpoint,
bool AllowInsecureTls,
bool HasApiKey);
public LlmService(SettingsService settings)
{
_settings = settings;
_http = new HttpClient { Timeout = TimeSpan.FromMinutes(10) };
var insecureHandler = new HttpClientHandler
{
ServerCertificateCustomValidationCallback = HttpClientHandler.DangerousAcceptAnyServerCertificateValidator
};
_httpInsecure = new HttpClient(insecureHandler) { Timeout = TimeSpan.FromMinutes(10) };
LoadSystemPrompt();
}
public RuntimeConnectionSnapshot GetRuntimeConnectionSnapshot()
{
var service = ResolveService();
var model = ResolveModel();
var (endpoint, apiKey, allowInsecureTls) = ResolveServerInfo();
if (string.IsNullOrWhiteSpace(endpoint))
endpoint = ResolveEndpointForService(service);
return new RuntimeConnectionSnapshot(
service,
model,
endpoint ?? "",
allowInsecureTls,
!string.IsNullOrWhiteSpace(apiKey));
}
// ─── 시스템 프롬프트 (빌드 경로에서 동적 로딩) ─────────────────────────
private void LoadSystemPrompt()
{
var exeDir = AppContext.BaseDirectory;
var promptFile = Path.Combine(exeDir, "system_prompt.txt");
if (File.Exists(promptFile))
{
_systemPrompt = File.ReadAllText(promptFile, Encoding.UTF8).Trim();
}
}
public string? SystemPrompt => _systemPrompt;
/// 사내 서비스(Ollama/vLLM)면 암호화 모드에 따라 복호화, 외부면 그대로 반환.
private string ResolveModelName()
{
var llm = _settings.Settings.Llm;
var service = NormalizeServiceName(llm.Service);
if (service is "ollama" or "vllm" && !string.IsNullOrEmpty(llm.Model))
{
var registered = FindRegisteredModel(llm, service, llm.Model);
if (registered != null)
{
var registeredModelName = CryptoService.DecryptIfEnabled(registered.EncryptedModelName, llm.EncryptionEnabled);
if (!string.IsNullOrWhiteSpace(registeredModelName))
return registeredModelName;
}
return CryptoService.DecryptIfEnabled(llm.Model, llm.EncryptionEnabled);
}
return llm.Model;
}
private int ResolveOpenAiCompatibleMaxTokens()
{
var llm = _settings.Settings.Llm;
var requested = Math.Clamp(llm.MaxContextTokens, 1, 1_000_000);
var service = NormalizeServiceName(llm.Service);
if (service == "vllm")
return Math.Min(requested, 8192);
return requested;
}
///
/// 현재 활성 모델에 매칭되는 RegisteredModel을 찾아 엔드포인트/API키를 반환합니다.
/// RegisteredModel에 전용 서버 정보가 있으면 그것을 사용하고, 없으면 기본 설정을 사용합니다.
///
private (string Endpoint, string ApiKey, bool AllowInsecureTls) ResolveServerInfo()
{
var llm = _settings.Settings.Llm;
var activeService = ResolveService();
var modelName = ResolveModel();
// RegisteredModel에서 현재 모델과 서비스가 일치하는 항목 검색
var registered = FindRegisteredModel(llm, activeService, modelName);
if (registered != null)
{
var endpoint = string.IsNullOrWhiteSpace(registered.Endpoint)
? ResolveEndpointForService(activeService)
: registered.Endpoint;
var apiKey = !string.IsNullOrEmpty(registered.ApiKey)
? ResolveSecretValue(registered.ApiKey, llm.EncryptionEnabled)
: GetDefaultApiKey(llm, activeService);
var allowInsecureTls = activeService == "vllm"
? (registered.AllowInsecureTls || llm.VllmAllowInsecureTls)
: false;
return (endpoint, apiKey, allowInsecureTls);
}
// 기본 엔드포인트 사용
return activeService.ToLowerInvariant() switch
{
"vllm" => (llm.VllmEndpoint, ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled), llm.VllmAllowInsecureTls),
"ollama" => (llm.OllamaEndpoint, ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled), false),
_ => ("", "", false),
};
}
/// 현재 활성 모델의 RegisteredModel을 찾습니다.
private static Models.RegisteredModel? FindRegisteredModel(Models.LlmSettings llm, string service, string modelName)
{
return llm.RegisteredModels.FirstOrDefault(m =>
m.Service.Equals(service, StringComparison.OrdinalIgnoreCase) &&
(CryptoService.DecryptIfEnabled(m.EncryptedModelName, llm.EncryptionEnabled) == modelName ||
m.Alias == modelName));
}
private Models.RegisteredModel? GetActiveRegisteredModel()
{
var llm = _settings.Settings.Llm;
return FindRegisteredModel(llm, ResolveService(), ResolveModel());
}
private static bool UsesIbmDeploymentChatApi(string service, Models.RegisteredModel? registered, string? endpoint)
{
if (!string.Equals(NormalizeServiceName(service), "vllm", StringComparison.OrdinalIgnoreCase))
return false;
if (registered == null)
return false;
var authType = (registered.AuthType ?? "").Trim().ToLowerInvariant();
if (authType is not ("ibm_iam" or "cp4d" or "cp4d_password" or "cp4d_api_key"))
return false;
var normalizedEndpoint = (endpoint ?? "").Trim().ToLowerInvariant();
return normalizedEndpoint.Contains("/ml/") ||
normalizedEndpoint.Contains("/deployments/") ||
normalizedEndpoint.Contains("/text/chat");
}
private string BuildIbmDeploymentChatUrl(string endpoint, bool stream)
{
var trimmed = (endpoint ?? "").Trim();
if (string.IsNullOrWhiteSpace(trimmed))
throw new InvalidOperationException("IBM 배포형 vLLM 엔드포인트가 비어 있습니다.");
var normalized = trimmed.ToLowerInvariant();
if (normalized.Contains("/text/chat_stream"))
return stream ? trimmed : trimmed.Replace("/text/chat_stream", "/text/chat", StringComparison.OrdinalIgnoreCase);
if (normalized.Contains("/text/chat"))
return stream ? trimmed.Replace("/text/chat", "/text/chat_stream", StringComparison.OrdinalIgnoreCase) : trimmed;
if (normalized.Contains("/deployments/"))
return trimmed.TrimEnd('/') + (stream ? "/text/chat_stream" : "/text/chat");
return trimmed;
}
private object BuildIbmDeploymentBody(List messages)
{
var msgs = new List