- AxAgentExecutionEngine에서 시스템 프롬프트 중복을 제거하고 structured tool_use/tool_result 전사본을 conversation.Messages로 동기화해 다음 턴과 저장 이력에서도 코드 작업 컨텍스트가 유지되도록 수정 - AgentQueryContextBuilder와 ContextCondenser에 post-compact tool snippet 복원, recent window 확대, tool result 보존 강화 로직을 추가해 장기 코드 실행 중 빌드/파일 근거 손실을 줄임 - MaxContextTokens=0 Auto 모드를 AppSettings, SettingsService 마이그레이션, 설정 UI, 오버레이 UI, 컨텍스트 사용량 표시, LLM 요청 본문에 연결하고 Auto 모드에서는 provider output cap 강제 주입을 제거 - 관련 회귀 테스트와 문서 README/DEVELOPMENT/CODE_CONTEXT_RELIABILITY_PLAN을 갱신하고 깨진 진단 문자열 기대값을 영어 기준으로 정리 검증: - dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify_context_reliability_followup\\ -p:IntermediateOutputPath=obj\\verify_context_reliability_followup\\ - dotnet test src/AxCopilot.Tests/AxCopilot.Tests.csproj -c Release -v minimal --filter "AxAgentExecutionEngineTests|AgentQueryContextBuilderTests|ContextCondenserTests|SettingsServiceTests|AgentLoopDiagnosticsFormatterTests" -p:OutputPath=bin\\verify_context_reliability_followup_tests\\ -p:IntermediateOutputPath=obj\\verify_context_reliability_followup_tests\\ - dotnet test src/AxCopilot.Tests/AxCopilot.Tests.csproj -c Release -v minimal --filter "AgentLoopQueryAssemblyServiceTests|AgentLoopPreLlmStageServiceTests|AgentLoopLlmRequestPreparationServiceTests|AgentMessageInvariantHelperTests|CodeTaskWorkingSetServiceTests|AgentLoopE2ETests" -p:OutputPath=bin\\verify_context_reliability_followup_tests2\\ -p:IntermediateOutputPath=obj\\verify_context_reliability_followup_tests2\\ - dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify_context_reliability_final\\ -p:IntermediateOutputPath=obj\\verify_context_reliability_final\\
1752 lines
76 KiB
C#
1752 lines
76 KiB
C#
using System.IO;
|
|
using System.Net.Http;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using AxCopilot.Models;
|
|
using AxCopilot.Services.Agent;
|
|
|
|
namespace AxCopilot.Services;
|
|
|
|
/// <summary>토큰 사용량 정보.</summary>
|
|
public record TokenUsage(int PromptTokens, int CompletionTokens)
|
|
{
|
|
public int TotalTokens => PromptTokens + CompletionTokens;
|
|
}
|
|
|
|
/// <summary>
|
|
/// LLM API 호출 서비스. Ollama / vLLM / Gemini / Claude 백엔드를 지원합니다.
|
|
/// 스트리밍(SSE) 및 비스트리밍 모두 지원합니다.
|
|
/// </summary>
|
|
public partial class LlmService : ILlmService
|
|
{
|
|
private readonly HttpClient _http;
|
|
private readonly HttpClient _httpInsecure;
|
|
private readonly SettingsService _settings;
|
|
private string? _systemPrompt;
|
|
|
|
private const int MaxRetries = 2;
|
|
|
|
/// <summary>IBM+Qwen 진단 로그 활성 여부 (EnableIbmDiagnosticLog 설정 연동).</summary>
|
|
private bool IsIbmDiagEnabled => _settings.Settings.Llm.EnableIbmDiagnosticLog;
|
|
|
|
/// <summary>IBM 진단 전용 Debug 로그. EnableIbmDiagnosticLog=true 일 때만 출력.</summary>
|
|
private void IbmDiagDebug(string msg)
|
|
{
|
|
if (IsIbmDiagEnabled) LogService.Info($"[IBM진단:DBG] {msg}");
|
|
}
|
|
|
|
/// <summary>IBM 진단 전용 Info 로그. EnableIbmDiagnosticLog=true 일 때만 출력.</summary>
|
|
private void IbmDiagInfo(string msg)
|
|
{
|
|
if (IsIbmDiagEnabled) LogService.Info(msg);
|
|
}
|
|
|
|
/// <summary>IBM 진단 전용 Error 로그. 설정 무관하게 항상 출력 (에러는 항상 기록).</summary>
|
|
private static void IbmDiagError(string msg) => LogService.Error(msg);
|
|
|
|
// 첫 청크: 모델이 컨텍스트를 처리하는 시간 (대용량 컨텍스트에서 3분까지 허용)
|
|
private static readonly TimeSpan FirstChunkTimeout = TimeSpan.FromSeconds(180);
|
|
// 이후 청크: 스트리밍이 시작된 후 청크 간 최대 간격
|
|
private static readonly TimeSpan SubsequentChunkTimeout = TimeSpan.FromSeconds(45);
|
|
private static readonly string SigmoidApiHost = string.Concat("api.", "an", "thr", "opic.com");
|
|
private static readonly string SigmoidApiVersionHeader = string.Concat("an", "thr", "opic-version");
|
|
private const string SigmoidApiVersion = "2023-06-01";
|
|
|
|
// ─── 자동 모델 라우팅 오버라이드 ─────────────────────────────────
|
|
private readonly object _overrideLock = new();
|
|
private readonly Stack<(string? Service, string? Model, double? Temperature, string? ReasoningEffort)> _overrideStack = new();
|
|
private string? _serviceOverride;
|
|
private string? _modelOverride;
|
|
private double? _temperatureOverride;
|
|
private string? _reasoningEffortOverride;
|
|
|
|
/// <summary>자동 라우팅용 서비스/모델 오버라이드를 설정합니다. 요청 완료 후 반드시 ClearRouteOverride().</summary>
|
|
public void PushRouteOverride(string service, string model)
|
|
{
|
|
PushInferenceOverride(service, model, null, null);
|
|
}
|
|
|
|
/// <summary>서비스/모델 오버라이드를 해제합니다.</summary>
|
|
public void ClearRouteOverride()
|
|
{
|
|
PopInferenceOverride();
|
|
}
|
|
|
|
/// <summary>모델/추론 파라미터 오버라이드를 푸시합니다. PopInferenceOverride로 복원합니다.</summary>
|
|
public void PushInferenceOverride(
|
|
string? service = null,
|
|
string? model = null,
|
|
double? temperature = null,
|
|
string? reasoningEffort = null)
|
|
{
|
|
lock (_overrideLock)
|
|
{
|
|
_overrideStack.Push((_serviceOverride, _modelOverride, _temperatureOverride, _reasoningEffortOverride));
|
|
if (!string.IsNullOrWhiteSpace(service)) _serviceOverride = service;
|
|
if (!string.IsNullOrWhiteSpace(model)) _modelOverride = model;
|
|
if (temperature.HasValue) _temperatureOverride = temperature.Value;
|
|
if (!string.IsNullOrWhiteSpace(reasoningEffort)) _reasoningEffortOverride = reasoningEffort.Trim();
|
|
}
|
|
}
|
|
|
|
/// <summary>가장 최근 PushInferenceOverride 상태를 복원합니다.</summary>
|
|
public void PopInferenceOverride()
|
|
{
|
|
lock (_overrideLock)
|
|
{
|
|
if (_overrideStack.Count == 0)
|
|
{
|
|
_serviceOverride = null;
|
|
_modelOverride = null;
|
|
_temperatureOverride = null;
|
|
_reasoningEffortOverride = null;
|
|
return;
|
|
}
|
|
|
|
var prev = _overrideStack.Pop();
|
|
_serviceOverride = prev.Service;
|
|
_modelOverride = prev.Model;
|
|
_temperatureOverride = prev.Temperature;
|
|
_reasoningEffortOverride = prev.ReasoningEffort;
|
|
}
|
|
}
|
|
|
|
/// <summary>현재 활성 모델의 서비스명과 모델명을 반환합니다.</summary>
|
|
public (string service, string model) GetCurrentModelInfo() => (ResolveService(), ResolveModel());
|
|
|
|
/// <summary>오버라이드를 고려한 실제 서비스명.</summary>
|
|
private string ResolveService()
|
|
{
|
|
string? svc;
|
|
lock (_overrideLock) svc = _serviceOverride;
|
|
return NormalizeServiceName(svc ?? _settings.Settings.Llm.Service);
|
|
}
|
|
|
|
private static bool IsExternalLlmService(string normalizedService)
|
|
=> normalizedService is "gemini" or "sigmoid";
|
|
|
|
private void EnsureOperationModeAllowsLlmService(string service)
|
|
{
|
|
if (!OperationModePolicy.IsInternal(_settings.Settings))
|
|
return;
|
|
|
|
var normalized = NormalizeServiceName(service);
|
|
if (!IsExternalLlmService(normalized))
|
|
return;
|
|
|
|
var display = normalized == "sigmoid" ? "Claude" : "Gemini";
|
|
throw new InvalidOperationException(
|
|
$"사내 모드에서는 외부 LLM 호출이 차단됩니다: {display}. " +
|
|
"설정에서 operationMode를 external로 변경하거나 사내 LLM(Ollama/vLLM)을 사용하세요.");
|
|
}
|
|
|
|
private static string NormalizeServiceName(string? service)
|
|
{
|
|
var key = (service ?? "").Trim().ToLowerInvariant();
|
|
return key switch
|
|
{
|
|
"cl" + "aude" => "sigmoid",
|
|
"sigmoid" => "sigmoid",
|
|
"gemini" => "gemini",
|
|
"vllm" => "vllm",
|
|
_ => "ollama",
|
|
};
|
|
}
|
|
|
|
/// <summary>오버라이드를 고려한 실제 모델명.</summary>
|
|
private string ResolveModel()
|
|
{
|
|
string? mdl;
|
|
lock (_overrideLock) mdl = _modelOverride;
|
|
return mdl ?? ResolveModelName();
|
|
}
|
|
|
|
private double ResolveTemperature() => _temperatureOverride ?? _settings.Settings.Llm.Temperature;
|
|
|
|
internal string GetActiveExecutionProfileKey()
|
|
=> Agent.ModelExecutionProfileCatalog.Normalize(GetActiveRegisteredModel()?.ExecutionProfile);
|
|
|
|
public Agent.ModelExecutionProfileCatalog.ExecutionPolicy GetActiveExecutionPolicy()
|
|
=> Agent.ModelExecutionProfileCatalog.Get(GetActiveExecutionProfileKey());
|
|
|
|
internal double ResolveToolTemperature()
|
|
{
|
|
var resolved = ResolveTemperature();
|
|
if (!_settings.Settings.Llm.UseAutomaticProfileTemperature)
|
|
return resolved;
|
|
|
|
var cap = GetActiveExecutionPolicy().ToolTemperatureCap;
|
|
return cap.HasValue ? Math.Min(resolved, cap.Value) : resolved;
|
|
}
|
|
|
|
private string? ResolveReasoningEffort() => _reasoningEffortOverride;
|
|
|
|
private static bool LooksLikeEncryptedPayload(string value)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(value) || value.Length < 32 || (value.Length % 4) != 0)
|
|
return false;
|
|
foreach (var ch in value)
|
|
{
|
|
var isBase64 = (ch >= 'A' && ch <= 'Z')
|
|
|| (ch >= 'a' && ch <= 'z')
|
|
|| (ch >= '0' && ch <= '9')
|
|
|| ch is '+' or '/' or '=';
|
|
if (!isBase64) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private static string ResolveSecretValue(string raw, bool encryptionEnabled)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(raw)) return "";
|
|
if (raw.Trim() == "(저장됨)") return "";
|
|
if (!encryptionEnabled) return raw.Trim();
|
|
|
|
var decrypted = CryptoService.DecryptIfEnabled(raw, encryptionEnabled).Trim();
|
|
if (string.IsNullOrWhiteSpace(decrypted)) return "";
|
|
if (string.Equals(decrypted, raw, StringComparison.Ordinal) && LooksLikeEncryptedPayload(raw))
|
|
return "";
|
|
return decrypted;
|
|
}
|
|
|
|
/// <summary>지정 서비스의 API 키를 반환합니다.</summary>
|
|
private string ResolveApiKeyForService(string service)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
return NormalizeServiceName(service) switch
|
|
{
|
|
"gemini" => llm.GeminiApiKey,
|
|
"sigmoid" => llm.ClaudeApiKey,
|
|
"vllm" => ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled),
|
|
"ollama" => ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled),
|
|
_ => "",
|
|
};
|
|
}
|
|
|
|
/// <summary>지정 서비스의 엔드포인트를 반환합니다.</summary>
|
|
private string ResolveEndpointForService(string service)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
return NormalizeServiceName(service) switch
|
|
{
|
|
"vllm" => llm.VllmEndpoint,
|
|
"ollama" => llm.OllamaEndpoint,
|
|
_ => llm.Endpoint,
|
|
};
|
|
}
|
|
|
|
/// <summary>가장 최근 요청의 토큰 사용량. 스트리밍/비스트리밍 완료 후 갱신됩니다.</summary>
|
|
public TokenUsage? LastTokenUsage { get; private set; }
|
|
|
|
public LlmService(SettingsService settings)
|
|
{
|
|
_settings = settings;
|
|
_http = new HttpClient { Timeout = TimeSpan.FromMinutes(10) };
|
|
var insecureHandler = new HttpClientHandler
|
|
{
|
|
ServerCertificateCustomValidationCallback = HttpClientHandler.DangerousAcceptAnyServerCertificateValidator
|
|
};
|
|
_httpInsecure = new HttpClient(insecureHandler) { Timeout = TimeSpan.FromMinutes(10) };
|
|
LoadSystemPrompt();
|
|
}
|
|
|
|
public RuntimeConnectionSnapshot GetRuntimeConnectionSnapshot()
|
|
{
|
|
var service = ResolveService();
|
|
var model = ResolveModel();
|
|
var (endpoint, apiKey, allowInsecureTls) = ResolveServerInfo();
|
|
if (string.IsNullOrWhiteSpace(endpoint))
|
|
endpoint = ResolveEndpointForService(service);
|
|
|
|
return new RuntimeConnectionSnapshot(
|
|
service,
|
|
model,
|
|
endpoint ?? "",
|
|
allowInsecureTls,
|
|
!string.IsNullOrWhiteSpace(apiKey));
|
|
}
|
|
|
|
// ─── 시스템 프롬프트 (빌드 경로에서 동적 로딩) ─────────────────────────
|
|
|
|
private void LoadSystemPrompt()
|
|
{
|
|
var exeDir = AppContext.BaseDirectory;
|
|
var promptFile = Path.Combine(exeDir, "system_prompt.txt");
|
|
if (File.Exists(promptFile))
|
|
{
|
|
_systemPrompt = File.ReadAllText(promptFile, Encoding.UTF8).Trim();
|
|
}
|
|
}
|
|
|
|
public string? SystemPrompt => _systemPrompt;
|
|
|
|
/// <summary>사내 서비스(Ollama/vLLM)면 암호화 모드에 따라 복호화, 외부면 그대로 반환.</summary>
|
|
private string ResolveModelName()
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var service = NormalizeServiceName(llm.Service);
|
|
if (service is "ollama" or "vllm" && !string.IsNullOrEmpty(llm.Model))
|
|
{
|
|
var registered = FindRegisteredModel(llm, service, llm.Model);
|
|
if (registered != null)
|
|
{
|
|
var registeredModelName = CryptoService.DecryptIfEnabled(registered.EncryptedModelName, llm.EncryptionEnabled);
|
|
if (!string.IsNullOrWhiteSpace(registeredModelName))
|
|
return registeredModelName;
|
|
}
|
|
|
|
return CryptoService.DecryptIfEnabled(llm.Model, llm.EncryptionEnabled);
|
|
}
|
|
return llm.Model;
|
|
}
|
|
|
|
private int? ResolveConfiguredMaxOutputTokens()
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
return llm.MaxContextTokens > 0
|
|
? Math.Clamp(llm.MaxContextTokens, 1, 1_000_000)
|
|
: null;
|
|
}
|
|
|
|
private int? ResolveOpenAiCompatibleMaxTokens()
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var requested = ResolveConfiguredMaxOutputTokens();
|
|
if (!requested.HasValue)
|
|
return null;
|
|
|
|
var service = NormalizeServiceName(llm.Service);
|
|
|
|
if (service == "vllm")
|
|
return Math.Min(requested.Value, 8192);
|
|
|
|
return requested;
|
|
}
|
|
|
|
/// <summary>
|
|
/// 현재 활성 모델에 매칭되는 RegisteredModel을 찾아 엔드포인트/API키를 반환합니다.
|
|
/// RegisteredModel에 전용 서버 정보가 있으면 그것을 사용하고, 없으면 기본 설정을 사용합니다.
|
|
/// </summary>
|
|
private (string Endpoint, string ApiKey, bool AllowInsecureTls) ResolveServerInfo()
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var activeService = ResolveService();
|
|
var modelName = ResolveModel();
|
|
|
|
// RegisteredModel에서 현재 모델과 서비스가 일치하는 항목 검색
|
|
var registered = FindRegisteredModel(llm, activeService, modelName);
|
|
|
|
if (registered != null)
|
|
{
|
|
var endpoint = string.IsNullOrWhiteSpace(registered.Endpoint)
|
|
? ResolveEndpointForService(activeService)
|
|
: registered.Endpoint;
|
|
var apiKey = !string.IsNullOrEmpty(registered.ApiKey)
|
|
? ResolveSecretValue(registered.ApiKey, llm.EncryptionEnabled)
|
|
: GetDefaultApiKey(llm, activeService);
|
|
var allowInsecureTls = activeService == "vllm"
|
|
? (registered.AllowInsecureTls || llm.VllmAllowInsecureTls)
|
|
: false;
|
|
return (endpoint, apiKey, allowInsecureTls);
|
|
}
|
|
|
|
// 기본 엔드포인트 사용
|
|
return activeService.ToLowerInvariant() switch
|
|
{
|
|
"vllm" => (llm.VllmEndpoint, ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled), llm.VllmAllowInsecureTls),
|
|
"ollama" => (llm.OllamaEndpoint, ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled), false),
|
|
_ => ("", "", false),
|
|
};
|
|
}
|
|
|
|
/// <summary>현재 활성 모델의 RegisteredModel을 찾습니다.</summary>
|
|
private static Models.RegisteredModel? FindRegisteredModel(Models.LlmSettings llm, string service, string modelName)
|
|
{
|
|
return llm.RegisteredModels.FirstOrDefault(m =>
|
|
m.Service.Equals(service, StringComparison.OrdinalIgnoreCase) &&
|
|
(string.Equals(CryptoService.DecryptIfEnabled(m.EncryptedModelName, llm.EncryptionEnabled), modelName, StringComparison.OrdinalIgnoreCase) ||
|
|
string.Equals(m.Alias, modelName, StringComparison.OrdinalIgnoreCase)));
|
|
}
|
|
|
|
private Models.RegisteredModel? GetActiveRegisteredModel()
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
return FindRegisteredModel(llm, ResolveService(), ResolveModel());
|
|
}
|
|
|
|
private static bool UsesIbmDeploymentChatApi(string service, Models.RegisteredModel? registered, string? endpoint)
|
|
{
|
|
if (!string.Equals(NormalizeServiceName(service), "vllm", StringComparison.OrdinalIgnoreCase))
|
|
return false;
|
|
if (registered == null)
|
|
return false;
|
|
|
|
var authType = (registered.AuthType ?? "").Trim().ToLowerInvariant();
|
|
if (authType is not ("ibm_iam" or "cp4d" or "cp4d_password" or "cp4d_api_key"))
|
|
return false;
|
|
|
|
var normalizedEndpoint = (endpoint ?? "").Trim().ToLowerInvariant();
|
|
var result = normalizedEndpoint.Contains("/ml/") ||
|
|
normalizedEndpoint.Contains("/deployments/") ||
|
|
normalizedEndpoint.Contains("/text/chat");
|
|
LogService.Debug($"[IBM진단] UsesIbmDeploymentChatApi: service={service}, authType={authType}, endpoint={endpoint?.Length ?? 0}자, result={result}");
|
|
return result;
|
|
}
|
|
|
|
private string BuildIbmDeploymentChatUrl(string endpoint, bool stream)
|
|
{
|
|
var trimmed = (endpoint ?? "").Trim();
|
|
if (string.IsNullOrWhiteSpace(trimmed))
|
|
throw new InvalidOperationException("IBM 배포형 vLLM 엔드포인트가 비어 있습니다.");
|
|
|
|
var normalized = trimmed.ToLowerInvariant();
|
|
string url;
|
|
if (normalized.Contains("/text/chat_stream"))
|
|
url = stream ? trimmed : trimmed.Replace("/text/chat_stream", "/text/chat", StringComparison.OrdinalIgnoreCase);
|
|
else if (normalized.Contains("/text/chat"))
|
|
url = stream ? trimmed.Replace("/text/chat", "/text/chat_stream", StringComparison.OrdinalIgnoreCase) : trimmed;
|
|
else if (normalized.Contains("/deployments/"))
|
|
url = trimmed.TrimEnd('/') + (stream ? "/text/chat_stream" : "/text/chat");
|
|
else
|
|
url = trimmed;
|
|
|
|
IbmDiagDebug($"[IBM진단] BuildUrl: stream={stream}, url={url}");
|
|
return url;
|
|
}
|
|
|
|
private object BuildIbmDeploymentBody(List<ChatMessage> messages)
|
|
{
|
|
var msgs = new List<object>();
|
|
if (!string.IsNullOrWhiteSpace(_systemPrompt))
|
|
msgs.Add(new { role = "system", content = _systemPrompt });
|
|
IbmDiagDebug($"[IBM진단] BuildIbmDeploymentBody: messages={messages.Count}건, systemPrompt={(_systemPrompt?.Length ?? 0)}자");
|
|
|
|
foreach (var m in messages)
|
|
{
|
|
if (m.Role == "system")
|
|
continue;
|
|
|
|
// assistant 메시지에 _tool_use_blocks 포함 시 텍스트만 추출
|
|
// (IBM vLLM은 OpenAI tool_use 형식을 이해하지 못함)
|
|
if (m.Role == "assistant" && m.Content.Contains("_tool_use_blocks"))
|
|
{
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(m.Content);
|
|
if (doc.RootElement.SafeTryGetProperty("_tool_use_blocks", out var blocks))
|
|
{
|
|
var parts = new List<string>();
|
|
foreach (var block in blocks.EnumerateArray())
|
|
{
|
|
if (!block.SafeTryGetProperty("type", out var typeEl)) continue;
|
|
var type = typeEl.SafeGetString();
|
|
if (type == "text" && block.SafeTryGetProperty("text", out var textEl))
|
|
parts.Add(textEl.SafeGetString() ?? "");
|
|
else if (type == "tool_use" && block.SafeTryGetProperty("name", out var nameEl))
|
|
parts.Add($"[도구 호출: {nameEl.SafeGetString()}]");
|
|
}
|
|
var content = string.Join("\n", parts).Trim();
|
|
if (!string.IsNullOrEmpty(content))
|
|
msgs.Add(new { role = "assistant", content });
|
|
continue;
|
|
}
|
|
}
|
|
catch { /* 파싱 실패 시 아래에서 원본 사용 */ }
|
|
}
|
|
|
|
// user 메시지에 tool_result JSON 포함 시 평문으로 변환
|
|
if (m.Role == "user" && m.Content.StartsWith("{\"type\":\"tool_result\"", StringComparison.Ordinal))
|
|
{
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(m.Content);
|
|
var root = doc.RootElement;
|
|
var toolName = root.SafeTryGetProperty("tool_name", out var tn) ? tn.SafeGetString() ?? "tool" : "tool";
|
|
var toolContent = root.SafeTryGetProperty("content", out var tc) ? tc.SafeGetString() ?? "" : "";
|
|
msgs.Add(new { role = "user", content = $"[{toolName} 결과]\n{toolContent}" });
|
|
continue;
|
|
}
|
|
catch { /* 파싱 실패 시 아래에서 원본 사용 */ }
|
|
}
|
|
|
|
msgs.Add(new
|
|
{
|
|
role = m.Role == "assistant" ? "assistant" : "user",
|
|
content = m.Content
|
|
});
|
|
}
|
|
|
|
var temperature = ResolveTemperature();
|
|
var maxTokens = ResolveOpenAiCompatibleMaxTokens();
|
|
IbmDiagDebug($"[IBM진단] BuildIbmDeploymentBody 완료: finalMessages={msgs.Count}건, temp={temperature}, maxTokens={maxTokens}");
|
|
var parameters = new Dictionary<string, object?>
|
|
{
|
|
["temperature"] = temperature,
|
|
};
|
|
if (maxTokens.HasValue)
|
|
parameters["max_new_tokens"] = maxTokens.Value;
|
|
|
|
return new Dictionary<string, object?>
|
|
{
|
|
["messages"] = msgs,
|
|
["parameters"] = parameters,
|
|
// Qwen3.5 thinking mode is disabled to keep content in the normal response field.
|
|
["chat_template_kwargs"] = new { enable_thinking = false },
|
|
};
|
|
}
|
|
|
|
private static string ExtractIbmDeploymentText(JsonElement root)
|
|
{
|
|
if (root.SafeTryGetProperty("choices", out var choices) && choices.ValueKind == JsonValueKind.Array && choices.GetArrayLength() > 0)
|
|
{
|
|
var message = choices[0].SafeTryGetProperty("message", out var choiceMessage) ? choiceMessage : default;
|
|
if (message.ValueKind == JsonValueKind.Object)
|
|
{
|
|
if (message.SafeTryGetProperty("content", out var content))
|
|
{
|
|
var text = content.SafeGetString();
|
|
if (!string.IsNullOrEmpty(text))
|
|
return text;
|
|
}
|
|
// Qwen3.5 thinking 모드 폴백
|
|
if (message.SafeTryGetProperty("reasoning_content", out var reasoning))
|
|
{
|
|
var text = reasoning.SafeGetString();
|
|
if (!string.IsNullOrEmpty(text))
|
|
return text;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (root.SafeTryGetProperty("results", out var results) && results.ValueKind == JsonValueKind.Array && results.GetArrayLength() > 0)
|
|
{
|
|
var first = results[0];
|
|
if (first.SafeTryGetProperty("generated_text", out var generatedText))
|
|
return generatedText.SafeGetString() ?? "";
|
|
if (first.SafeTryGetProperty("output_text", out var outputText))
|
|
return outputText.SafeGetString() ?? "";
|
|
}
|
|
|
|
if (root.SafeTryGetProperty("generated_text", out var generated))
|
|
return generated.SafeGetString() ?? "";
|
|
|
|
if (root.SafeTryGetProperty("message", out var messageValue) && messageValue.ValueKind == JsonValueKind.String)
|
|
return messageValue.SafeGetString() ?? "";
|
|
|
|
return "";
|
|
}
|
|
|
|
/// <summary>
|
|
/// 현재 활성 모델의 인증 헤더 값을 반환합니다.
|
|
/// IBM IAM / CP4D 인증인 경우 토큰을 자동 발급/캐싱하여 반환합니다.
|
|
/// </summary>
|
|
internal async Task<string?> ResolveAuthTokenAsync(CancellationToken ct = default)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var activeService = ResolveService();
|
|
var modelName = ResolveModel();
|
|
var registered = FindRegisteredModel(llm, activeService, modelName);
|
|
|
|
// IBM Cloud IAM 인증 방식인 경우
|
|
if (registered != null &&
|
|
registered.AuthType.Equals("ibm_iam", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
IbmDiagDebug($"[IBM진단] IBM IAM 인증 시도: model={modelName}, hasApiKey={!string.IsNullOrWhiteSpace(registered.ApiKey)}");
|
|
try
|
|
{
|
|
var ibmApiKey = !string.IsNullOrWhiteSpace(registered.ApiKey)
|
|
? ResolveSecretValue(registered.ApiKey, llm.EncryptionEnabled)
|
|
: GetDefaultApiKey(llm, activeService);
|
|
var token = await IbmIamTokenService.GetTokenAsync(ibmApiKey, ct: ct);
|
|
IbmDiagDebug($"[IBM진단] IBM IAM 토큰 발급 성공: tokenLen={token?.Length ?? 0}");
|
|
return token;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
IbmDiagError($"[IBM진단] IBM IAM 토큰 발급 실패: {ex.GetType().Name}: {ex.Message}");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
// CP4D 인증 방식인 경우
|
|
if (registered != null &&
|
|
(registered.AuthType.Equals("cp4d", StringComparison.OrdinalIgnoreCase) ||
|
|
registered.AuthType.Equals("cp4d_password", StringComparison.OrdinalIgnoreCase) ||
|
|
registered.AuthType.Equals("cp4d_api_key", StringComparison.OrdinalIgnoreCase)) &&
|
|
!string.IsNullOrWhiteSpace(registered.Cp4dUrl))
|
|
{
|
|
IbmDiagDebug($"[IBM진단] CP4D 인증 시도: authType={registered.AuthType}, cp4dUrl={registered.Cp4dUrl}, user={registered.Cp4dUsername}");
|
|
try
|
|
{
|
|
var password = CryptoService.DecryptIfEnabled(registered.Cp4dPassword, llm.EncryptionEnabled);
|
|
var token = await Cp4dTokenService.GetTokenAsync(
|
|
registered.Cp4dUrl, registered.Cp4dUsername, password, ct);
|
|
IbmDiagDebug($"[IBM진단] CP4D 토큰 발급 성공: tokenLen={token?.Length ?? 0}");
|
|
return token;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
IbmDiagError($"[IBM진단] CP4D 토큰 발급 실패: {ex.GetType().Name}: {ex.Message}");
|
|
throw;
|
|
}
|
|
}
|
|
|
|
// 기본 Bearer 인증 — 기존 API 키 반환
|
|
var (_, apiKey, _) = ResolveServerInfo();
|
|
return string.IsNullOrEmpty(apiKey) ? null : apiKey;
|
|
}
|
|
|
|
/// <summary>
|
|
/// HttpRequestMessage에 인증 헤더를 적용합니다.
|
|
/// IBM IAM / CP4D 인증인 경우 자동 토큰 발급, 일반 Bearer인 경우 API 키를 사용합니다.
|
|
/// </summary>
|
|
private async Task ApplyAuthHeaderAsync(HttpRequestMessage req, CancellationToken ct)
|
|
{
|
|
var token = await ResolveAuthTokenAsync(ct);
|
|
if (!string.IsNullOrEmpty(token))
|
|
req.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", token);
|
|
}
|
|
|
|
private static string GetDefaultApiKey(LlmSettings llm, string? service = null)
|
|
{
|
|
var svc = service ?? llm.Service;
|
|
return svc.ToLowerInvariant() switch
|
|
{
|
|
"vllm" => ResolveSecretValue(llm.VllmApiKey, llm.EncryptionEnabled),
|
|
"ollama" => ResolveSecretValue(llm.OllamaApiKey, llm.EncryptionEnabled),
|
|
_ => "",
|
|
};
|
|
}
|
|
|
|
// ─── 비스트리밍 응답 ───────────────────────────────────────────────────
|
|
|
|
public async Task<string> SendAsync(List<ChatMessage> messages, CancellationToken ct = default)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var activeService = ResolveService();
|
|
EnsureOperationModeAllowsLlmService(activeService);
|
|
try
|
|
{
|
|
return await SendWithServiceAsync(activeService, messages, ct).ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex) when (llm.FallbackModels.Count > 0)
|
|
{
|
|
// 폴백 모델 순차 시도
|
|
foreach (var fallback in llm.FallbackModels)
|
|
{
|
|
var parts = fallback.Split(':', 2);
|
|
var fbService = parts[0].Trim();
|
|
var fbModel = parts.Length > 1 ? parts[1].Trim() : "";
|
|
try
|
|
{
|
|
EnsureOperationModeAllowsLlmService(fbService);
|
|
LogService.Warn($"모델 폴백: {activeService} → {fbService} ({ex.Message})");
|
|
LastFallbackInfo = $"{activeService} → {fbService}";
|
|
return await SendWithServiceAsync(fbService, messages, ct).ConfigureAwait(false);
|
|
}
|
|
catch { continue; }
|
|
}
|
|
throw; // 모든 폴백 실패
|
|
}
|
|
}
|
|
|
|
/// <summary>마지막 폴백 정보 (UI 표시용). null이면 폴백 미발생.</summary>
|
|
public string? LastFallbackInfo { get; private set; }
|
|
|
|
private Task<string> SendWithServiceAsync(string service, List<ChatMessage> messages, CancellationToken ct)
|
|
{
|
|
return NormalizeServiceName(service) switch
|
|
{
|
|
"gemini" => SendGeminiAsync(messages, ct),
|
|
"sigmoid" => SendSigmoidAsync(messages, ct),
|
|
"vllm" => SendOpenAiCompatibleAsync(messages, ct),
|
|
_ => SendOllamaAsync(messages, ct),
|
|
};
|
|
}
|
|
|
|
// ─── 스트리밍 응답 ────────────────────────────────────────────────────
|
|
|
|
public async IAsyncEnumerable<string> StreamAsync(
|
|
List<ChatMessage> messages,
|
|
[EnumeratorCancellation] CancellationToken ct = default)
|
|
{
|
|
var activeService = ResolveService();
|
|
EnsureOperationModeAllowsLlmService(activeService);
|
|
var stream = NormalizeServiceName(activeService) switch
|
|
{
|
|
"gemini" => StreamGeminiAsync(messages, ct),
|
|
"sigmoid" => StreamSigmoidAsync(messages, ct),
|
|
"vllm" => StreamOpenAiCompatibleAsync(messages, ct),
|
|
_ => StreamOllamaAsync(messages, ct),
|
|
};
|
|
await foreach (var chunk in stream.WithCancellation(ct))
|
|
yield return chunk;
|
|
}
|
|
|
|
// ─── 연결 테스트 ──────────────────────────────────────────────────────
|
|
|
|
public async Task<(bool ok, string message)> TestConnectionAsync()
|
|
{
|
|
try
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var normalizedService = ResolveService();
|
|
if (OperationModePolicy.IsInternal(_settings.Settings) && IsExternalLlmService(normalizedService))
|
|
{
|
|
var blockedName = normalizedService == "sigmoid" ? "Claude" : "Gemini";
|
|
return (false, $"사내 모드에서는 {blockedName} 외부 LLM 연결이 차단됩니다.");
|
|
}
|
|
|
|
switch (normalizedService)
|
|
{
|
|
case "ollama":
|
|
{
|
|
var (endpoint, _, _) = ResolveServerInfo();
|
|
var ep = string.IsNullOrWhiteSpace(endpoint) ? ResolveEndpointForService("ollama") : endpoint;
|
|
var resp = await _http.GetAsync(ep.TrimEnd('/') + "/api/tags");
|
|
return resp.IsSuccessStatusCode
|
|
? (true, "Ollama 연결 성공")
|
|
: (false, ClassifyHttpError(resp));
|
|
}
|
|
|
|
case "vllm":
|
|
{
|
|
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
|
|
var ep = string.IsNullOrWhiteSpace(endpoint) ? ResolveEndpointForService("vllm") : endpoint;
|
|
using var vReq = new HttpRequestMessage(HttpMethod.Get, ep.TrimEnd('/') + "/v1/models");
|
|
await ApplyAuthHeaderAsync(vReq, CancellationToken.None);
|
|
using var vResp = await SendWithTlsAsync(vReq, allowInsecureTls, CancellationToken.None);
|
|
return vResp.IsSuccessStatusCode
|
|
? (true, "vLLM 연결 성공")
|
|
: (false, ClassifyHttpError(vResp));
|
|
}
|
|
|
|
case "gemini":
|
|
var gKey = ResolveApiKeyForService("gemini");
|
|
if (string.IsNullOrEmpty(gKey)) return (false, "API 키가 설정되지 않았습니다");
|
|
var gResp = await _http.GetAsync(
|
|
$"https://generativelanguage.googleapis.com/v1beta/models?key={gKey}");
|
|
return gResp.IsSuccessStatusCode
|
|
? (true, "Gemini API 연결 성공")
|
|
: (false, ClassifyHttpError(gResp));
|
|
|
|
case "sigmoid":
|
|
{
|
|
var cKey = ResolveApiKeyForService("sigmoid");
|
|
if (string.IsNullOrEmpty(cKey)) return (false, "API 키가 설정되지 않았습니다");
|
|
using var cReq = new HttpRequestMessage(HttpMethod.Get, $"https://{SigmoidApiHost}/v1/models");
|
|
cReq.Headers.Add("x-api-key", cKey);
|
|
cReq.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion);
|
|
var cResp = await _http.SendAsync(cReq);
|
|
return cResp.IsSuccessStatusCode
|
|
? (true, "Claude API 연결 성공")
|
|
: (false, ClassifyHttpError(cResp));
|
|
}
|
|
|
|
default:
|
|
return (false, "알 수 없는 서비스");
|
|
}
|
|
}
|
|
catch (TaskCanceledException)
|
|
{
|
|
return (false, "연결 시간 초과 — 서버가 응답하지 않습니다");
|
|
}
|
|
catch (HttpRequestException ex)
|
|
{
|
|
return (false, $"연결 실패 — {ex.Message}");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return (false, ex.Message);
|
|
}
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
// Ollama
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
|
|
private async Task<string> SendOllamaAsync(List<ChatMessage> messages, CancellationToken ct)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
|
|
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
|
|
var body = BuildOllamaBody(messages, stream: false);
|
|
var resp = await PostJsonWithRetryAsync(ep.TrimEnd('/') + "/api/chat", body, allowInsecureTls, ct);
|
|
return SafeParseJson(resp, root =>
|
|
{
|
|
TryParseOllamaUsage(root);
|
|
var msg = root.SafeGetProperty("message");
|
|
if (msg == null) return root.SafeGetString() ?? "(빈 응답)";
|
|
if (msg.Value.ValueKind == JsonValueKind.String) return msg.Value.SafeGetString() ?? "";
|
|
return msg.Value.SafeGetProperty("content")?.SafeGetString() ?? "";
|
|
}, "Ollama 응답");
|
|
}
|
|
|
|
private async IAsyncEnumerable<string> StreamOllamaAsync(
|
|
List<ChatMessage> messages,
|
|
[EnumeratorCancellation] CancellationToken ct)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
|
|
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
|
|
var body = BuildOllamaBody(messages, stream: true);
|
|
var url = ep.TrimEnd('/') + "/api/chat";
|
|
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
|
|
using var resp = await SendWithErrorClassificationAsync(req, allowInsecureTls, ct);
|
|
|
|
using var stream = await resp.Content.ReadAsStreamAsync(ct);
|
|
using var reader = new StreamReader(stream);
|
|
|
|
var firstChunkReceived = false;
|
|
while (!reader.EndOfStream && !ct.IsCancellationRequested)
|
|
{
|
|
var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
|
|
var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
|
|
if (line == null)
|
|
{
|
|
if (!firstChunkReceived)
|
|
LogService.Warn($"Ollama 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초) — 모델이 응답하지 않습니다");
|
|
else
|
|
yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
|
|
break;
|
|
}
|
|
firstChunkReceived = true;
|
|
if (string.IsNullOrEmpty(line)) continue;
|
|
|
|
string? text = null;
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(line);
|
|
if (doc.RootElement.SafeTryGetProperty("message", out var msg) &&
|
|
msg.SafeTryGetProperty("content", out var c))
|
|
text = c.SafeGetString();
|
|
if (doc.RootElement.SafeTryGetProperty("done", out var done) && done.GetBoolean())
|
|
TryParseOllamaUsage(doc.RootElement);
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
LogService.Warn($"Ollama 스트리밍 JSON 파싱 오류: {ex.Message}");
|
|
}
|
|
if (!string.IsNullOrEmpty(text)) yield return text;
|
|
}
|
|
}
|
|
|
|
private object BuildOllamaBody(List<ChatMessage> messages, bool stream)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var msgs = BuildMessageList(messages);
|
|
return new
|
|
{
|
|
model = ResolveModelName(),
|
|
messages = msgs,
|
|
stream = stream,
|
|
options = new { temperature = ResolveTemperature() }
|
|
};
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
// OpenAI-Compatible (vLLM)
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
|
|
private async Task<string> SendOpenAiCompatibleAsync(List<ChatMessage> messages, CancellationToken ct)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
|
|
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
|
|
var registered = GetActiveRegisteredModel();
|
|
var usesIbmDeploymentApi = UsesIbmDeploymentChatApi("vllm", registered, ep);
|
|
var body = usesIbmDeploymentApi
|
|
? BuildIbmDeploymentBody(messages)
|
|
: BuildOpenAiBody(messages, stream: false);
|
|
var url = usesIbmDeploymentApi
|
|
? BuildIbmDeploymentChatUrl(ep, stream: false)
|
|
: ep.TrimEnd('/') + "/v1/chat/completions";
|
|
var json = JsonSerializer.Serialize(body);
|
|
|
|
if (usesIbmDeploymentApi)
|
|
IbmDiagInfo($"[IBM진단] SendOpenAi(비스트리밍): url={url}, bodyLen={json.Length}자, messages={messages.Count}건");
|
|
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, url)
|
|
{
|
|
Content = new StringContent(json, Encoding.UTF8, "application/json")
|
|
};
|
|
await ApplyAuthHeaderAsync(req, ct);
|
|
|
|
HttpResponseMessage resp;
|
|
try
|
|
{
|
|
resp = await SendWithErrorClassificationAsync(req, allowInsecureTls, ct);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
if (usesIbmDeploymentApi)
|
|
IbmDiagError($"[IBM진단] SendOpenAi 요청 실패: {ex.GetType().Name}: {ex.Message}");
|
|
throw;
|
|
}
|
|
using (resp)
|
|
{
|
|
var respBody = await resp.Content.ReadAsStringAsync(ct);
|
|
|
|
if (usesIbmDeploymentApi)
|
|
{
|
|
var contentType = resp.Content.Headers.ContentType?.MediaType ?? "(null)";
|
|
var preview = respBody.Length > 500 ? respBody[..500] + "…" : respBody;
|
|
IbmDiagInfo($"[IBM진단] SendOpenAi 응답: HTTP {(int)resp.StatusCode}, ContentType={contentType}, bodyLen={respBody.Length}자");
|
|
IbmDiagDebug($"[IBM진단] SendOpenAi 응답본문: {preview}");
|
|
}
|
|
|
|
// IBM vLLM이 stream:false 요청에도 SSE 형식(id:/event/data: 라인)으로 응답하는 경우 처리
|
|
var effectiveBody = ExtractJsonFromSseIfNeeded(respBody);
|
|
|
|
return SafeParseJson(effectiveBody, root =>
|
|
{
|
|
TryParseOpenAiUsage(root);
|
|
if (usesIbmDeploymentApi)
|
|
{
|
|
// SSE에서 누적된 텍스트가 이미 하나의 JSON이 아닐 수 있으므로 재추출
|
|
var parsed = ExtractIbmDeploymentText(root);
|
|
return string.IsNullOrWhiteSpace(parsed) ? "(빈 응답)" : parsed;
|
|
}
|
|
|
|
if (!root.SafeTryGetProperty("choices", out var choices)
|
|
|| choices.ValueKind != JsonValueKind.Array
|
|
|| choices.GetArrayLength() == 0)
|
|
return "(빈 응답)";
|
|
var firstChoice = choices[0];
|
|
var msg = firstChoice.SafeGetProperty("message");
|
|
if (msg == null) return firstChoice.SafeGetString() ?? "(빈 응답)";
|
|
if (msg.Value.ValueKind == JsonValueKind.String) return msg.Value.SafeGetString() ?? "";
|
|
return msg.Value.SafeGetProperty("content")?.SafeGetString() ?? "";
|
|
}, "vLLM 응답");
|
|
} // using (resp)
|
|
}
|
|
|
|
/// <summary>
|
|
/// IBM vLLM이 stream:false 요청에도 SSE 포맷(id:/event/data: 라인)을 반환할 때
|
|
/// "data: {...}" 라인에서 JSON만 추출합니다. 일반 JSON이면 그대로 반환합니다.
|
|
/// </summary>
|
|
private static string ExtractJsonFromSseIfNeeded(string raw)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(raw)) return raw;
|
|
var trimmed = raw.TrimStart();
|
|
|
|
// 일반 JSON이면 그대로
|
|
if (trimmed.StartsWith('{') || trimmed.StartsWith('['))
|
|
return raw;
|
|
|
|
// SSE 포맷: "data: {...}" 라인 중 마지막 유효한 것 사용
|
|
// (stream:false지만 SSE로 오면 보통 단일 data 라인 + [DONE])
|
|
string? lastDataJson = null;
|
|
var sb = new System.Text.StringBuilder();
|
|
bool collectingChunks = false;
|
|
|
|
foreach (var line in raw.Split('\n'))
|
|
{
|
|
var l = line.TrimEnd('\r').Trim();
|
|
if (!l.StartsWith("data: ", StringComparison.Ordinal)) continue;
|
|
var data = l["data: ".Length..].Trim();
|
|
if (data == "[DONE]") break;
|
|
if (string.IsNullOrEmpty(data)) continue;
|
|
|
|
// choices[].delta.content 형식(스트리밍 청크)인 경우 텍스트를 누적
|
|
// 단일 완성 응답(choices[].message)이면 바로 반환
|
|
lastDataJson = data;
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(data);
|
|
// 스트리밍 청크(delta) → content 누적
|
|
if (doc.RootElement.SafeTryGetProperty("choices", out var ch) && ch.GetArrayLength() > 0)
|
|
{
|
|
var first = ch[0];
|
|
if (first.SafeTryGetProperty("delta", out var delta))
|
|
{
|
|
string? txt = null;
|
|
if (delta.SafeTryGetProperty("content", out var cnt))
|
|
txt = cnt.SafeGetString();
|
|
// Qwen3.5 thinking 모드 폴백: content가 비어있으면 reasoning_content 사용
|
|
if (string.IsNullOrEmpty(txt) && delta.SafeTryGetProperty("reasoning_content", out var rc))
|
|
txt = rc.SafeGetString();
|
|
if (!string.IsNullOrEmpty(txt)) { sb.Append(txt); collectingChunks = true; }
|
|
}
|
|
else if (first.SafeTryGetProperty("message", out _))
|
|
{
|
|
// 완성 응답 → 이 JSON을 그대로 사용
|
|
return data;
|
|
}
|
|
}
|
|
// IBM results[] 형식
|
|
else if (doc.RootElement.SafeTryGetProperty("results", out var res) && res.GetArrayLength() > 0)
|
|
{
|
|
return data;
|
|
}
|
|
}
|
|
catch { /* 파싱 실패 라인 무시 */ }
|
|
}
|
|
|
|
// 청크를 누적한 경우 OpenAI message 형식으로 재조립
|
|
if (collectingChunks && sb.Length > 0)
|
|
{
|
|
var assembled = System.Text.Json.JsonSerializer.Serialize(new
|
|
{
|
|
choices = new[]
|
|
{
|
|
new { message = new { content = sb.ToString() } }
|
|
}
|
|
});
|
|
return assembled;
|
|
}
|
|
|
|
// 마지막 data 라인을 그대로 사용
|
|
return lastDataJson ?? raw;
|
|
}
|
|
|
|
private async IAsyncEnumerable<string> StreamOpenAiCompatibleAsync(
|
|
List<ChatMessage> messages,
|
|
[EnumeratorCancellation] CancellationToken ct)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var (endpoint, _, allowInsecureTls) = ResolveServerInfo();
|
|
var ep = string.IsNullOrEmpty(endpoint) ? llm.Endpoint : endpoint;
|
|
var registered = GetActiveRegisteredModel();
|
|
var usesIbmDeploymentApi = UsesIbmDeploymentChatApi("vllm", registered, ep);
|
|
var body = usesIbmDeploymentApi
|
|
? BuildIbmDeploymentBody(messages)
|
|
: BuildOpenAiBody(messages, stream: true);
|
|
var url = usesIbmDeploymentApi
|
|
? BuildIbmDeploymentChatUrl(ep, stream: true)
|
|
: ep.TrimEnd('/') + "/v1/chat/completions";
|
|
|
|
if (usesIbmDeploymentApi)
|
|
{
|
|
var bodyJson = JsonSerializer.Serialize(body);
|
|
IbmDiagInfo($"[IBM진단] StreamOpenAi: url={url}, bodyLen={bodyJson.Length}자, messages={messages.Count}건");
|
|
IbmDiagDebug($"[IBM진단] StreamOpenAi 요청본문(앞500자): {(bodyJson.Length > 500 ? bodyJson[..500] + "…" : bodyJson)}");
|
|
}
|
|
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
|
|
await ApplyAuthHeaderAsync(req, ct);
|
|
|
|
HttpResponseMessage resp;
|
|
try
|
|
{
|
|
resp = await SendWithErrorClassificationAsync(req, allowInsecureTls, ct);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
if (usesIbmDeploymentApi)
|
|
IbmDiagError($"[IBM진단] StreamOpenAi 요청 실패: {ex.GetType().Name}: {ex.Message}");
|
|
throw;
|
|
}
|
|
|
|
if (usesIbmDeploymentApi)
|
|
{
|
|
var ct2 = resp.Content.Headers.ContentType?.MediaType ?? "(null)";
|
|
IbmDiagInfo($"[IBM진단] StreamOpenAi 연결 성공: HTTP {(int)resp.StatusCode}, ContentType={ct2}");
|
|
}
|
|
|
|
using var stream2 = await resp.Content.ReadAsStreamAsync(ct);
|
|
using var reader = new StreamReader(stream2);
|
|
|
|
var firstChunkReceived = false;
|
|
var ibmChunkCount = 0;
|
|
while (!reader.EndOfStream && !ct.IsCancellationRequested)
|
|
{
|
|
var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
|
|
var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
|
|
if (line == null)
|
|
{
|
|
if (!firstChunkReceived)
|
|
LogService.Warn($"vLLM 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초) — 모델이 응답하지 않습니다");
|
|
else
|
|
yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
|
|
break;
|
|
}
|
|
firstChunkReceived = true;
|
|
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
|
|
var data = line["data: ".Length..];
|
|
if (data == "[DONE]")
|
|
{
|
|
if (usesIbmDeploymentApi)
|
|
IbmDiagDebug($"[IBM진단] StreamOpenAi 완료: 총 {ibmChunkCount}개 청크 수신");
|
|
break;
|
|
}
|
|
|
|
string? text = null;
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(data);
|
|
TryParseOpenAiUsage(doc.RootElement);
|
|
if (usesIbmDeploymentApi)
|
|
{
|
|
ibmChunkCount++;
|
|
// 첫 3개 청크 + 이후 50개마다 로깅 (과도한 로그 방지)
|
|
if (ibmChunkCount <= 3 || ibmChunkCount % 50 == 0)
|
|
{
|
|
var preview = data.Length > 300 ? data[..300] + "…" : data;
|
|
IbmDiagDebug($"[IBM진단] StreamOpenAi chunk#{ibmChunkCount}: {preview}");
|
|
}
|
|
|
|
if (doc.RootElement.SafeTryGetProperty("status", out var status) &&
|
|
string.Equals(status.SafeGetString(), "error", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
var detail = doc.RootElement.SafeTryGetProperty("message", out var message)
|
|
? message.SafeGetString()
|
|
: "IBM vLLM 스트리밍 오류";
|
|
IbmDiagError($"[IBM진단] StreamOpenAi 서버 오류 응답: {detail}");
|
|
throw new InvalidOperationException(detail);
|
|
}
|
|
|
|
if (doc.RootElement.SafeTryGetProperty("results", out var results) &&
|
|
results.ValueKind == JsonValueKind.Array &&
|
|
results.GetArrayLength() > 0)
|
|
{
|
|
var first = results[0];
|
|
if (first.SafeTryGetProperty("generated_text", out var generatedText))
|
|
text = generatedText.SafeGetString();
|
|
else if (first.SafeTryGetProperty("output_text", out var outputText))
|
|
text = outputText.SafeGetString();
|
|
}
|
|
else if (doc.RootElement.SafeTryGetProperty("choices", out var ibmChoices)
|
|
&& ibmChoices.ValueKind == JsonValueKind.Array
|
|
&& ibmChoices.GetArrayLength() > 0)
|
|
{
|
|
var fc = ibmChoices[0];
|
|
if (fc.SafeTryGetProperty("delta", out var delta))
|
|
{
|
|
if (delta.ValueKind == JsonValueKind.String)
|
|
text = delta.SafeGetString();
|
|
else
|
|
{
|
|
if (delta.SafeTryGetProperty("content", out var c))
|
|
text = c.SafeGetString();
|
|
if (string.IsNullOrEmpty(text) && delta.SafeTryGetProperty("reasoning_content", out var rc))
|
|
text = rc.SafeGetString();
|
|
}
|
|
}
|
|
else if (fc.ValueKind == JsonValueKind.String)
|
|
text = fc.SafeGetString();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (doc.RootElement.SafeTryGetProperty("choices", out var choices)
|
|
&& choices.ValueKind == JsonValueKind.Array
|
|
&& choices.GetArrayLength() > 0)
|
|
{
|
|
var fc = choices[0];
|
|
if (fc.SafeTryGetProperty("delta", out var delta))
|
|
{
|
|
if (delta.ValueKind == JsonValueKind.String)
|
|
text = delta.SafeGetString();
|
|
else
|
|
{
|
|
if (delta.SafeTryGetProperty("content", out var c))
|
|
text = c.SafeGetString();
|
|
if (string.IsNullOrEmpty(text) && delta.SafeTryGetProperty("reasoning_content", out var rc2))
|
|
text = rc2.SafeGetString();
|
|
}
|
|
}
|
|
else if (fc.ValueKind == JsonValueKind.String)
|
|
text = fc.SafeGetString();
|
|
}
|
|
}
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
if (usesIbmDeploymentApi)
|
|
{
|
|
var preview = data.Length > 500 ? data[..500] + "…" : data;
|
|
IbmDiagError($"[IBM진단] StreamOpenAi JSON 파싱 오류: {ex.Message}\n 청크 내용: {preview}");
|
|
}
|
|
else
|
|
LogService.Warn($"vLLM 스트리밍 JSON 파싱 오류: {ex.Message}");
|
|
}
|
|
if (!string.IsNullOrEmpty(text)) yield return text;
|
|
}
|
|
}
|
|
|
|
private object BuildOpenAiBody(List<ChatMessage> messages, bool stream)
|
|
{
|
|
var msgs = BuildMessageList(messages, openAiVision: true);
|
|
var body = new Dictionary<string, object?>
|
|
{
|
|
["model"] = ResolveModelName(),
|
|
["messages"] = msgs,
|
|
["stream"] = stream,
|
|
["temperature"] = ResolveTemperature(),
|
|
};
|
|
var maxTokens = ResolveOpenAiCompatibleMaxTokens();
|
|
if (maxTokens.HasValue)
|
|
body["max_tokens"] = maxTokens.Value;
|
|
// 스트리밍 시 마지막 청크에 토큰 사용량을 포함하도록 요청 (vLLM/OpenAI 호환)
|
|
if (stream)
|
|
body["stream_options"] = new { include_usage = true };
|
|
var effort = ResolveReasoningEffort();
|
|
if (!string.IsNullOrWhiteSpace(effort))
|
|
body["reasoning_effort"] = effort;
|
|
return body;
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
// Gemini
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
|
|
private async Task<string> SendGeminiAsync(List<ChatMessage> messages, CancellationToken ct)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var apiKey = ResolveApiKeyForService("gemini");
|
|
if (string.IsNullOrEmpty(apiKey))
|
|
throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다. 설정 > AX Agent에서 API 키를 입력하세요.");
|
|
|
|
var model = ResolveModel();
|
|
var body = BuildGeminiBody(messages);
|
|
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={apiKey}";
|
|
var resp = await PostJsonWithRetryAsync(url, body, false, ct);
|
|
return SafeParseJson(resp, root =>
|
|
{
|
|
TryParseGeminiUsage(root);
|
|
if (!root.SafeTryGetProperty("candidates", out var candidates)
|
|
|| candidates.ValueKind != JsonValueKind.Array
|
|
|| candidates.GetArrayLength() == 0)
|
|
return "(빈 응답)";
|
|
var first = candidates[0];
|
|
var content = first.SafeGetProperty("content");
|
|
if (content == null || !content.Value.SafeTryGetProperty("parts", out var parts)
|
|
|| parts.ValueKind != JsonValueKind.Array || parts.GetArrayLength() == 0)
|
|
return first.SafeGetString() ?? "(빈 응답)";
|
|
return parts[0].SafeGetProperty("text")?.SafeGetString() ?? "";
|
|
}, "Gemini 응답");
|
|
}
|
|
|
|
private async IAsyncEnumerable<string> StreamGeminiAsync(
|
|
List<ChatMessage> messages,
|
|
[EnumeratorCancellation] CancellationToken ct)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var apiKey = ResolveApiKeyForService("gemini");
|
|
if (string.IsNullOrEmpty(apiKey))
|
|
throw new InvalidOperationException("Gemini API 키가 설정되지 않았습니다.");
|
|
|
|
var model = ResolveModel();
|
|
var body = BuildGeminiBody(messages);
|
|
var url = $"https://generativelanguage.googleapis.com/v1beta/models/{model}:streamGenerateContent?alt=sse&key={apiKey}";
|
|
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent(body) };
|
|
using var resp = await SendWithErrorClassificationAsync(req, false, ct);
|
|
|
|
using var stream = await resp.Content.ReadAsStreamAsync(ct);
|
|
using var reader = new StreamReader(stream);
|
|
|
|
var firstChunkReceived = false;
|
|
while (!reader.EndOfStream && !ct.IsCancellationRequested)
|
|
{
|
|
var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
|
|
var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
|
|
if (line == null)
|
|
{
|
|
if (!firstChunkReceived)
|
|
LogService.Warn($"Gemini 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초)");
|
|
else
|
|
yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
|
|
break;
|
|
}
|
|
firstChunkReceived = true;
|
|
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
|
|
var data = line["data: ".Length..];
|
|
string? parsed = null;
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(data);
|
|
TryParseGeminiUsage(doc.RootElement);
|
|
if (!doc.RootElement.SafeTryGetProperty("candidates", out var candidates)
|
|
|| candidates.ValueKind != JsonValueKind.Array
|
|
|| candidates.GetArrayLength() == 0) continue;
|
|
var sb = new StringBuilder();
|
|
var firstCand = candidates[0];
|
|
var contentEl = firstCand.SafeGetProperty("content");
|
|
if (contentEl == null || !contentEl.Value.SafeTryGetProperty("parts", out var parts)
|
|
|| parts.ValueKind != JsonValueKind.Array) continue;
|
|
foreach (var part in parts.EnumerateArray())
|
|
{
|
|
if (part.SafeTryGetProperty("text", out var t))
|
|
{
|
|
var text = t.SafeGetString();
|
|
if (!string.IsNullOrEmpty(text)) sb.Append(text);
|
|
}
|
|
}
|
|
if (sb.Length > 0) parsed = sb.ToString();
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
LogService.Warn($"Gemini 스트리밍 JSON 파싱 오류: {ex.Message}");
|
|
}
|
|
if (parsed != null) yield return parsed;
|
|
}
|
|
}
|
|
|
|
private object BuildGeminiBody(List<ChatMessage> messages)
|
|
{
|
|
var llm = _settings.Settings.Llm;
|
|
var contents = new List<object>();
|
|
|
|
object? systemInstruction = null;
|
|
if (!string.IsNullOrEmpty(_systemPrompt))
|
|
{
|
|
systemInstruction = new { parts = new[] { new { text = _systemPrompt } } };
|
|
}
|
|
|
|
foreach (var m in messages)
|
|
{
|
|
if (m.Role == "system") continue;
|
|
var parts = new List<object> { new { text = m.Content } };
|
|
if (m.Images?.Count > 0)
|
|
{
|
|
foreach (var img in m.Images)
|
|
parts.Add(new { inlineData = new { mimeType = img.MimeType, data = img.Base64 } });
|
|
}
|
|
contents.Add(new
|
|
{
|
|
role = m.Role == "assistant" ? "model" : "user",
|
|
parts
|
|
});
|
|
}
|
|
|
|
var generationConfig = new Dictionary<string, object?>
|
|
{
|
|
["temperature"] = ResolveTemperature(),
|
|
};
|
|
var maxOutputTokens = ResolveConfiguredMaxOutputTokens();
|
|
if (maxOutputTokens.HasValue)
|
|
generationConfig["maxOutputTokens"] = maxOutputTokens.Value;
|
|
|
|
var body = new Dictionary<string, object?>
|
|
{
|
|
["contents"] = contents,
|
|
["generationConfig"] = generationConfig,
|
|
};
|
|
if (systemInstruction != null)
|
|
body["systemInstruction"] = systemInstruction;
|
|
|
|
return body;
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
// Claude (messages API)
|
|
// ═══════════════════════════════════════════════════════════════════════
|
|
|
|
private async Task<string> SendSigmoidAsync(List<ChatMessage> messages, CancellationToken ct)
|
|
{
|
|
var apiKey = ResolveApiKeyForService("sigmoid");
|
|
if (string.IsNullOrEmpty(apiKey))
|
|
throw new InvalidOperationException("Claude API 키가 설정되지 않았습니다. 설정 > AX Agent에서 API 키를 입력하세요.");
|
|
|
|
var body = BuildSigmoidBody(messages, stream: false);
|
|
var json = JsonSerializer.Serialize(body);
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, $"https://{SigmoidApiHost}/v1/messages");
|
|
req.Content = new StringContent(json, Encoding.UTF8, "application/json");
|
|
req.Headers.Add("x-api-key", apiKey);
|
|
req.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion);
|
|
|
|
using var resp = await _http.SendAsync(req, ct);
|
|
if (!resp.IsSuccessStatusCode)
|
|
{
|
|
var errBody = await resp.Content.ReadAsStringAsync(ct);
|
|
throw new HttpRequestException(ClassifyHttpError(resp, errBody));
|
|
}
|
|
|
|
var respJson = await resp.Content.ReadAsStringAsync(ct);
|
|
return SafeParseJson(respJson, root =>
|
|
{
|
|
TryParseSigmoidUsage(root);
|
|
if (!root.SafeTryGetProperty("content", out var content)
|
|
|| content.ValueKind != JsonValueKind.Array
|
|
|| content.GetArrayLength() == 0)
|
|
return root.SafeGetString() ?? "(빈 응답)";
|
|
return content[0].SafeGetProperty("text")?.SafeGetString() ?? "";
|
|
}, "Claude 응답");
|
|
}
|
|
|
|
private async IAsyncEnumerable<string> StreamSigmoidAsync(
|
|
List<ChatMessage> messages,
|
|
[EnumeratorCancellation] CancellationToken ct)
|
|
{
|
|
var apiKey = ResolveApiKeyForService("sigmoid");
|
|
if (string.IsNullOrEmpty(apiKey))
|
|
throw new InvalidOperationException("Claude API 키가 설정되지 않았습니다.");
|
|
|
|
var body = BuildSigmoidBody(messages, stream: true);
|
|
var json = JsonSerializer.Serialize(body);
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, $"https://{SigmoidApiHost}/v1/messages");
|
|
req.Content = new StringContent(json, Encoding.UTF8, "application/json");
|
|
req.Headers.Add("x-api-key", apiKey);
|
|
req.Headers.Add(SigmoidApiVersionHeader, SigmoidApiVersion);
|
|
|
|
using var resp = await _http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct);
|
|
if (!resp.IsSuccessStatusCode)
|
|
{
|
|
var errBody = await resp.Content.ReadAsStringAsync(ct);
|
|
throw new HttpRequestException(ClassifyHttpError(resp, errBody));
|
|
}
|
|
|
|
using var stream = await resp.Content.ReadAsStreamAsync(ct);
|
|
using var reader = new StreamReader(stream);
|
|
|
|
var firstChunkReceived = false;
|
|
while (!reader.EndOfStream && !ct.IsCancellationRequested)
|
|
{
|
|
var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
|
|
var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
|
|
if (line == null)
|
|
{
|
|
if (!firstChunkReceived)
|
|
LogService.Warn($"Claude 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초)");
|
|
else
|
|
yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
|
|
break;
|
|
}
|
|
firstChunkReceived = true;
|
|
if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
|
|
var data = line["data: ".Length..];
|
|
|
|
string? text = null;
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(data);
|
|
var type = doc.RootElement.SafeGetProperty("type")?.SafeGetString();
|
|
if (type == "content_block_delta")
|
|
{
|
|
if (!doc.RootElement.SafeTryGetProperty("delta", out var delta)) continue;
|
|
if (delta.SafeTryGetProperty("text", out var t))
|
|
text = t.SafeGetString();
|
|
}
|
|
else if (type is "message_start" or "message_delta")
|
|
{
|
|
// message_start: usage in .message.usage, message_delta: usage in .usage
|
|
if (doc.RootElement.SafeTryGetProperty("message", out var msg) &&
|
|
msg.SafeTryGetProperty("usage", out var u1))
|
|
TryParseSigmoidUsageFromElement(u1);
|
|
else if (doc.RootElement.SafeTryGetProperty("usage", out var u2))
|
|
TryParseSigmoidUsageFromElement(u2);
|
|
}
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
LogService.Warn($"Claude 스트리밍 JSON 파싱 오류: {ex.Message}");
|
|
}
|
|
if (!string.IsNullOrEmpty(text)) yield return text;
|
|
}
|
|
}
|
|
|
|
private object BuildSigmoidBody(List<ChatMessage> messages, bool stream)
|
|
{
|
|
var msgs = new List<object>();
|
|
|
|
foreach (var m in messages)
|
|
{
|
|
if (m.Role == "system") continue;
|
|
if (m.Images?.Count > 0)
|
|
{
|
|
// Claude Vision: content를 배열로 변환 (이미지 + 텍스트)
|
|
var contentParts = new List<object>();
|
|
foreach (var img in m.Images)
|
|
contentParts.Add(new { type = "image", source = new { type = "base64", media_type = img.MimeType, data = img.Base64 } });
|
|
contentParts.Add(new { type = "text", text = m.Content });
|
|
msgs.Add(new { role = m.Role, content = contentParts });
|
|
}
|
|
else
|
|
{
|
|
msgs.Add(new { role = m.Role, content = m.Content });
|
|
}
|
|
}
|
|
|
|
var activeModel = ResolveModel();
|
|
var maxTokens = ResolveConfiguredMaxOutputTokens();
|
|
if (!string.IsNullOrEmpty(_systemPrompt))
|
|
{
|
|
var body = new Dictionary<string, object?>
|
|
{
|
|
["model"] = activeModel,
|
|
["temperature"] = ResolveTemperature(),
|
|
["system"] = _systemPrompt,
|
|
["messages"] = msgs,
|
|
["stream"] = stream,
|
|
};
|
|
if (maxTokens.HasValue)
|
|
body["max_tokens"] = maxTokens.Value;
|
|
return body;
|
|
}
|
|
|
|
var fallbackBody = new Dictionary<string, object?>
|
|
{
|
|
["model"] = activeModel,
|
|
["temperature"] = ResolveTemperature(),
|
|
["messages"] = msgs,
|
|
["stream"] = stream,
|
|
};
|
|
if (maxTokens.HasValue)
|
|
fallbackBody["max_tokens"] = maxTokens.Value;
|
|
return fallbackBody;
|
|
}
|
|
|
|
// ─── 공용 헬퍼 ─────────────────────────────────────────────────────────
|
|
|
|
private List<object> BuildMessageList(List<ChatMessage> messages, bool openAiVision = false)
|
|
{
|
|
var result = new List<object>();
|
|
if (!string.IsNullOrEmpty(_systemPrompt))
|
|
result.Add(new { role = "system", content = _systemPrompt });
|
|
|
|
foreach (var m in messages)
|
|
{
|
|
if (m.Role == "system") continue;
|
|
if (m.Images?.Count > 0)
|
|
{
|
|
if (openAiVision)
|
|
{
|
|
// OpenAI Vision: content 배열 (text + image_url)
|
|
var contentParts = new List<object>();
|
|
contentParts.Add(new { type = "text", text = m.Content });
|
|
foreach (var img in m.Images)
|
|
contentParts.Add(new { type = "image_url", image_url = new { url = $"data:{img.MimeType};base64,{img.Base64}" } });
|
|
result.Add(new { role = m.Role, content = contentParts });
|
|
}
|
|
else
|
|
{
|
|
// Ollama Vision: images 필드에 base64 배열
|
|
result.Add(new { role = m.Role, content = m.Content, images = m.Images.Select(i => i.Base64).ToArray() });
|
|
}
|
|
}
|
|
else
|
|
{
|
|
result.Add(new { role = m.Role, content = m.Content });
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private async Task<HttpResponseMessage> SendWithTlsAsync(
|
|
HttpRequestMessage req,
|
|
bool allowInsecureTls,
|
|
CancellationToken ct,
|
|
HttpCompletionOption completion = HttpCompletionOption.ResponseContentRead)
|
|
{
|
|
if (!allowInsecureTls)
|
|
return await _http.SendAsync(req, completion, ct);
|
|
return await _httpInsecure.SendAsync(req, completion, ct);
|
|
}
|
|
|
|
/// <summary>비스트리밍 POST + 재시도 (일시적 오류 시 최대 2회)</summary>
|
|
private async Task<string> PostJsonWithRetryAsync(string url, object body, bool allowInsecureTls, CancellationToken ct)
|
|
{
|
|
var json = JsonSerializer.Serialize(body);
|
|
Exception? lastEx = null;
|
|
|
|
for (int attempt = 0; attempt <= MaxRetries; attempt++)
|
|
{
|
|
try
|
|
{
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, url)
|
|
{
|
|
Content = new StringContent(json, Encoding.UTF8, "application/json")
|
|
};
|
|
using var resp = await SendWithTlsAsync(req, allowInsecureTls, ct);
|
|
|
|
if (resp.IsSuccessStatusCode)
|
|
return await resp.Content.ReadAsStringAsync(ct);
|
|
|
|
// 429 Rate Limit → 재시도
|
|
if ((int)resp.StatusCode == 429 && attempt < MaxRetries)
|
|
{
|
|
await Task.Delay(1000 * (attempt + 1), ct);
|
|
continue;
|
|
}
|
|
|
|
// 그 외 에러 → 분류 후 예외
|
|
var errBody = await resp.Content.ReadAsStringAsync(ct);
|
|
throw new HttpRequestException(ClassifyHttpError(resp, errBody));
|
|
}
|
|
catch (HttpRequestException) { throw; }
|
|
catch (TaskCanceledException) when (!ct.IsCancellationRequested && attempt < MaxRetries)
|
|
{
|
|
lastEx = new TimeoutException("요청 시간 초과");
|
|
await Task.Delay(1000 * (attempt + 1), ct);
|
|
}
|
|
}
|
|
throw lastEx ?? new HttpRequestException("요청 실패");
|
|
}
|
|
|
|
/// <summary>스트리밍 전용 — HTTP 요청 전송 + 에러 분류</summary>
|
|
private async Task<HttpResponseMessage> SendWithErrorClassificationAsync(
|
|
HttpRequestMessage req, bool allowInsecureTls, CancellationToken ct)
|
|
{
|
|
var resp = await SendWithTlsAsync(req, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead);
|
|
if (!resp.IsSuccessStatusCode)
|
|
{
|
|
var errBody = await resp.Content.ReadAsStringAsync(ct);
|
|
var errorMsg = ClassifyHttpError(resp, errBody);
|
|
resp.Dispose();
|
|
throw new HttpRequestException(errorMsg);
|
|
}
|
|
return resp;
|
|
}
|
|
|
|
/// <summary>스트리밍 ReadLine에 청크 타임아웃 적용. 타임아웃 시 null 반환.</summary>
|
|
private static async Task<string?> ReadLineWithTimeoutAsync(StreamReader reader, CancellationToken ct, TimeSpan timeout)
|
|
{
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
|
cts.CancelAfter(timeout);
|
|
try
|
|
{
|
|
return await reader.ReadLineAsync(cts.Token);
|
|
}
|
|
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
|
{
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/// <summary>JSON 파싱 안전 래퍼 — 파싱 실패 시 상세 에러 메시지 반환</summary>
|
|
private static string SafeParseJson(string json, Func<JsonElement, string> extractor, string context)
|
|
{
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(json);
|
|
|
|
// API 에러 응답 감지
|
|
if (doc.RootElement.SafeTryGetProperty("error", out var error))
|
|
{
|
|
var msg = error.SafeTryGetProperty("message", out var m) ? m.SafeGetString() : error.ToString();
|
|
throw new HttpRequestException($"[{context}] API 에러: {msg}");
|
|
}
|
|
|
|
return extractor(doc.RootElement);
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
var preview = json.Length > 200 ? json[..200] + "…" : json;
|
|
throw new InvalidOperationException(
|
|
$"[{context}] 응답 형식 오류 — 예상하지 못한 JSON 형식입니다.\n파싱 오류: {ex.Message}\n응답 미리보기: {preview}");
|
|
}
|
|
catch (KeyNotFoundException)
|
|
{
|
|
var preview = json.Length > 200 ? json[..200] + "…" : json;
|
|
throw new InvalidOperationException(
|
|
$"[{context}] 응답에 필요한 필드가 없습니다.\n응답 미리보기: {preview}");
|
|
}
|
|
}
|
|
|
|
/// <summary>HTTP 에러 코드별 사용자 친화적 메시지</summary>
|
|
private static string ClassifyHttpError(HttpResponseMessage resp, string? body = null)
|
|
{
|
|
var code = (int)resp.StatusCode;
|
|
var detail = "";
|
|
|
|
// JSON error.message 추출 시도
|
|
if (!string.IsNullOrEmpty(body))
|
|
{
|
|
try
|
|
{
|
|
using var doc = JsonDocument.Parse(body);
|
|
if (doc.RootElement.SafeTryGetProperty("error", out var err))
|
|
{
|
|
if (err.ValueKind == JsonValueKind.Object && err.SafeTryGetProperty("message", out var m))
|
|
detail = m.SafeGetString() ?? "";
|
|
else if (err.ValueKind == JsonValueKind.String)
|
|
detail = err.SafeGetString() ?? "";
|
|
}
|
|
}
|
|
catch { }
|
|
}
|
|
|
|
var msg = code switch
|
|
{
|
|
400 => "잘못된 요청 — 모델 이름이나 요청 형식을 확인하세요",
|
|
401 => "인증 실패 — API 키가 유효하지 않습니다",
|
|
403 => "접근 거부 — API 키 권한을 확인하세요",
|
|
404 => "모델을 찾을 수 없습니다 — 모델 이름을 확인하세요",
|
|
429 => "요청 한도 초과 — 잠시 후 다시 시도하세요",
|
|
500 => "서버 내부 오류 — LLM 서버 상태를 확인하세요",
|
|
502 or 503 => "서버 일시 장애 — 잠시 후 다시 시도하세요",
|
|
_ => $"HTTP {code} 오류"
|
|
};
|
|
|
|
return string.IsNullOrEmpty(detail) ? msg : $"{msg}\n상세: {detail}";
|
|
}
|
|
|
|
private static StringContent JsonContent(object body)
|
|
{
|
|
var json = JsonSerializer.Serialize(body);
|
|
return new StringContent(json, Encoding.UTF8, "application/json");
|
|
}
|
|
|
|
// ─── 토큰 사용량 파싱 헬퍼 ──────────────────────────────────────────
|
|
|
|
private void TryParseOllamaUsage(JsonElement root)
|
|
{
|
|
try
|
|
{
|
|
var prompt = root.SafeTryGetProperty("prompt_eval_count", out var p) ? p.GetInt32() : 0;
|
|
var completion = root.SafeTryGetProperty("eval_count", out var e) ? e.GetInt32() : 0;
|
|
if (prompt > 0 || completion > 0)
|
|
LastTokenUsage = new TokenUsage(prompt, completion);
|
|
}
|
|
catch { }
|
|
}
|
|
|
|
private void TryParseOpenAiUsage(JsonElement root)
|
|
{
|
|
try
|
|
{
|
|
if (!root.SafeTryGetProperty("usage", out var usage)) return;
|
|
var prompt = usage.SafeTryGetProperty("prompt_tokens", out var p) ? p.SafeGetInt32(0) : 0;
|
|
var completion = usage.SafeTryGetProperty("completion_tokens", out var c) ? c.SafeGetInt32(0) : 0;
|
|
if (prompt > 0 || completion > 0)
|
|
LastTokenUsage = new TokenUsage(prompt, completion);
|
|
}
|
|
catch { }
|
|
}
|
|
|
|
private void TryParseGeminiUsage(JsonElement root)
|
|
{
|
|
try
|
|
{
|
|
if (!root.SafeTryGetProperty("usageMetadata", out var usage)) return;
|
|
var prompt = usage.SafeTryGetProperty("promptTokenCount", out var p) ? p.GetInt32() : 0;
|
|
var completion = usage.SafeTryGetProperty("candidatesTokenCount", out var c) ? c.GetInt32() : 0;
|
|
if (prompt > 0 || completion > 0)
|
|
LastTokenUsage = new TokenUsage(prompt, completion);
|
|
}
|
|
catch { }
|
|
}
|
|
|
|
private void TryParseSigmoidUsage(JsonElement root)
|
|
{
|
|
try
|
|
{
|
|
if (!root.SafeTryGetProperty("usage", out var usage)) return;
|
|
TryParseSigmoidUsageFromElement(usage);
|
|
}
|
|
catch { }
|
|
}
|
|
|
|
private void TryParseSigmoidUsageFromElement(JsonElement usage)
|
|
{
|
|
try
|
|
{
|
|
var input = usage.SafeTryGetProperty("input_tokens", out var i) ? i.GetInt32() : 0;
|
|
var output = usage.SafeTryGetProperty("output_tokens", out var o) ? o.GetInt32() : 0;
|
|
if (input > 0 || output > 0)
|
|
LastTokenUsage = new TokenUsage(input, output);
|
|
}
|
|
catch { }
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
_http.Dispose();
|
|
_httpInsecure.Dispose();
|
|
}
|
|
}
|