모델 프로파일 기반 Cowork/Code 루프와 진행 UX 고도화 반영

- 등록 모델 실행 프로파일을 검증 게이트, 문서 fallback, post-tool verification까지 확장 적용 - Cowork/Code 진행 카드에 계획/도구/검증/압축/폴백/재시도 단계 메타를 추가해 대기 상태 가시성 강화 - OpenAI/vLLM tool 요청에 병렬 도구 호출 힌트를 추가하고 회귀 프롬프트 문서를 프로파일 기준으로 전면 정리 - 검증: dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify\\ -p:IntermediateOutputPath=obj\\verify\\ (경고 0 / 오류 0)
2026-04-08 13:41:57 +09:00
parent b391dfdfb3
commit a2c952879d
552 changed files with 8094 additions and 13595 deletions
--- a/src/AxCopilot/Services/LlmService.cs
+++ b/src/AxCopilot/Services/LlmService.cs
@@ -25,7 +25,10 @@ public partial class LlmService : IDisposable
    private string? _systemPrompt;

    private const int MaxRetries = 2;
-    private static readonly TimeSpan ChunkTimeout = TimeSpan.FromSeconds(30);
+    // 첫 청크: 모델이 컨텍스트를 처리하는 시간 (대용량 컨텍스트에서 3분까지 허용)
+    private static readonly TimeSpan FirstChunkTimeout = TimeSpan.FromSeconds(180);
+    // 이후 청크: 스트리밍이 시작된 후 청크 간 최대 간격
+    private static readonly TimeSpan SubsequentChunkTimeout = TimeSpan.FromSeconds(45);
    private static readonly string SigmoidApiHost = string.Concat("api.", "an", "thr", "opic.com");
    private static readonly string SigmoidApiVersionHeader = string.Concat("an", "thr", "opic-version");
    private const string SigmoidApiVersion = "2023-06-01";
@@ -93,7 +96,12 @@ public partial class LlmService : IDisposable
    public (string service, string model) GetCurrentModelInfo() => (ResolveService(), ResolveModel());

    /// <summary>오버라이드를 고려한 실제 서비스명.</summary>
-    private string ResolveService() => NormalizeServiceName(_serviceOverride ?? _settings.Settings.Llm.Service);
+    private string ResolveService()
+    {
+        string? svc;
+        lock (_overrideLock) svc = _serviceOverride;
+        return NormalizeServiceName(svc ?? _settings.Settings.Llm.Service);
+    }

    private static bool IsExternalLlmService(string normalizedService)
        => normalizedService is "gemini" or "sigmoid";
@@ -129,12 +137,29 @@ public partial class LlmService : IDisposable
    /// <summary>오버라이드를 고려한 실제 모델명.</summary>
    private string ResolveModel()
    {
-        if (_modelOverride != null) return _modelOverride;
-        return ResolveModelName();
+        string? mdl;
+        lock (_overrideLock) mdl = _modelOverride;
+        return mdl ?? ResolveModelName();
    }

    private double ResolveTemperature() => _temperatureOverride ?? _settings.Settings.Llm.Temperature;

+    internal string GetActiveExecutionProfileKey()
+        => Agent.ModelExecutionProfileCatalog.Normalize(GetActiveRegisteredModel()?.ExecutionProfile);
+
+    internal Agent.ModelExecutionProfileCatalog.ExecutionPolicy GetActiveExecutionPolicy()
+        => Agent.ModelExecutionProfileCatalog.Get(GetActiveExecutionProfileKey());
+
+    internal double ResolveToolTemperature()
+    {
+        var resolved = ResolveTemperature();
+        if (!_settings.Settings.Llm.UseAutomaticProfileTemperature)
+            return resolved;
+
+        var cap = GetActiveExecutionPolicy().ToolTemperatureCap;
+        return cap.HasValue ? Math.Min(resolved, cap.Value) : resolved;
+    }
+
    private string? ResolveReasoningEffort() => _reasoningEffortOverride;

    private static bool LooksLikeEncryptedPayload(string value)
@@ -371,6 +396,49 @@ public partial class LlmService : IDisposable
            if (m.Role == "system")
                continue;

+            // assistant 메시지에 _tool_use_blocks 포함 시 텍스트만 추출
+            // (IBM vLLM은 OpenAI tool_use 형식을 이해하지 못함)
+            if (m.Role == "assistant" && m.Content.Contains("_tool_use_blocks"))
+            {
+                try
+                {
+                    using var doc = JsonDocument.Parse(m.Content);
+                    if (doc.RootElement.TryGetProperty("_tool_use_blocks", out var blocks))
+                    {
+                        var parts = new List<string>();
+                        foreach (var block in blocks.EnumerateArray())
+                        {
+                            if (!block.TryGetProperty("type", out var typeEl)) continue;
+                            var type = typeEl.GetString();
+                            if (type == "text" && block.TryGetProperty("text", out var textEl))
+                                parts.Add(textEl.GetString() ?? "");
+                            else if (type == "tool_use" && block.TryGetProperty("name", out var nameEl))
+                                parts.Add($"[도구 호출: {nameEl.GetString()}]");
+                        }
+                        var content = string.Join("\n", parts).Trim();
+                        if (!string.IsNullOrEmpty(content))
+                            msgs.Add(new { role = "assistant", content });
+                        continue;
+                    }
+                }
+                catch { /* 파싱 실패 시 아래에서 원본 사용 */ }
+            }
+
+            // user 메시지에 tool_result JSON 포함 시 평문으로 변환
+            if (m.Role == "user" && m.Content.StartsWith("{\"type\":\"tool_result\"", StringComparison.Ordinal))
+            {
+                try
+                {
+                    using var doc = JsonDocument.Parse(m.Content);
+                    var root = doc.RootElement;
+                    var toolName = root.TryGetProperty("tool_name", out var tn) ? tn.GetString() ?? "tool" : "tool";
+                    var toolContent = root.TryGetProperty("content", out var tc) ? tc.GetString() ?? "" : "";
+                    msgs.Add(new { role = "user", content = $"[{toolName} 결과]\n{toolContent}" });
+                    continue;
+                }
+                catch { /* 파싱 실패 시 아래에서 원본 사용 */ }
+            }
+
            msgs.Add(new
            {
                role = m.Role == "assistant" ? "assistant" : "user",
@@ -656,10 +724,20 @@ public partial class LlmService : IDisposable
        using var stream = await resp.Content.ReadAsStreamAsync(ct);
        using var reader = new StreamReader(stream);

+        var firstChunkReceived = false;
        while (!reader.EndOfStream && !ct.IsCancellationRequested)
        {
-            var line = await ReadLineWithTimeoutAsync(reader, ct);
-            if (line == null) break;
+            var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
+            var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
+            if (line == null)
+            {
+                if (!firstChunkReceived)
+                    LogService.Warn($"Ollama 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초) — 모델이 응답하지 않습니다");
+                else
+                    yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
+                break;
+            }
+            firstChunkReceived = true;
            if (string.IsNullOrEmpty(line)) continue;

            string? text = null;
@@ -669,7 +747,6 @@ public partial class LlmService : IDisposable
                if (doc.RootElement.TryGetProperty("message", out var msg) &&
                    msg.TryGetProperty("content", out var c))
                    text = c.GetString();
-                // Ollama: done=true 시 토큰 사용량 포함
                if (doc.RootElement.TryGetProperty("done", out var done) && done.GetBoolean())
                    TryParseOllamaUsage(doc.RootElement);
            }
@@ -721,11 +798,16 @@ public partial class LlmService : IDisposable

        using var resp = await SendWithErrorClassificationAsync(req, allowInsecureTls, ct);
        var respBody = await resp.Content.ReadAsStringAsync(ct);
-        return SafeParseJson(respBody, root =>
+
+        // IBM vLLM이 stream:false 요청에도 SSE 형식(id:/event/data: 라인)으로 응답하는 경우 처리
+        var effectiveBody = ExtractJsonFromSseIfNeeded(respBody);
+
+        return SafeParseJson(effectiveBody, root =>
        {
            TryParseOpenAiUsage(root);
            if (usesIbmDeploymentApi)
            {
+                // SSE에서 누적된 텍스트가 이미 하나의 JSON이 아닐 수 있으므로 재추출
                var parsed = ExtractIbmDeploymentText(root);
                return string.IsNullOrWhiteSpace(parsed) ? "(빈 응답)" : parsed;
            }
@@ -736,6 +818,81 @@ public partial class LlmService : IDisposable
        }, "vLLM 응답");
    }

+    /// <summary>
+    /// IBM vLLM이 stream:false 요청에도 SSE 포맷(id:/event/data: 라인)을 반환할 때
+    /// "data: {...}" 라인에서 JSON만 추출합니다. 일반 JSON이면 그대로 반환합니다.
+    /// </summary>
+    private static string ExtractJsonFromSseIfNeeded(string raw)
+    {
+        if (string.IsNullOrWhiteSpace(raw)) return raw;
+        var trimmed = raw.TrimStart();
+
+        // 일반 JSON이면 그대로
+        if (trimmed.StartsWith('{') || trimmed.StartsWith('['))
+            return raw;
+
+        // SSE 포맷: "data: {...}" 라인 중 마지막 유효한 것 사용
+        // (stream:false지만 SSE로 오면 보통 단일 data 라인 + [DONE])
+        string? lastDataJson = null;
+        var sb = new System.Text.StringBuilder();
+        bool collectingChunks = false;
+
+        foreach (var line in raw.Split('\n'))
+        {
+            var l = line.TrimEnd('\r').Trim();
+            if (!l.StartsWith("data: ", StringComparison.Ordinal)) continue;
+            var data = l["data: ".Length..].Trim();
+            if (data == "[DONE]") break;
+            if (string.IsNullOrEmpty(data)) continue;
+
+            // choices[].delta.content 형식(스트리밍 청크)인 경우 텍스트를 누적
+            // 단일 완성 응답(choices[].message)이면 바로 반환
+            lastDataJson = data;
+            try
+            {
+                using var doc = JsonDocument.Parse(data);
+                // 스트리밍 청크(delta) → content 누적
+                if (doc.RootElement.TryGetProperty("choices", out var ch) && ch.GetArrayLength() > 0)
+                {
+                    var first = ch[0];
+                    if (first.TryGetProperty("delta", out var delta)
+                        && delta.TryGetProperty("content", out var cnt))
+                    {
+                        var txt = cnt.GetString();
+                        if (!string.IsNullOrEmpty(txt)) { sb.Append(txt); collectingChunks = true; }
+                    }
+                    else if (first.TryGetProperty("message", out _))
+                    {
+                        // 완성 응답 → 이 JSON을 그대로 사용
+                        return data;
+                    }
+                }
+                // IBM results[] 형식
+                else if (doc.RootElement.TryGetProperty("results", out var res) && res.GetArrayLength() > 0)
+                {
+                    return data;
+                }
+            }
+            catch { /* 파싱 실패 라인 무시 */ }
+        }
+
+        // 청크를 누적한 경우 OpenAI message 형식으로 재조립
+        if (collectingChunks && sb.Length > 0)
+        {
+            var assembled = System.Text.Json.JsonSerializer.Serialize(new
+            {
+                choices = new[]
+                {
+                    new { message = new { content = sb.ToString() } }
+                }
+            });
+            return assembled;
+        }
+
+        // 마지막 data 라인을 그대로 사용
+        return lastDataJson ?? raw;
+    }
+
    private async IAsyncEnumerable<string> StreamOpenAiCompatibleAsync(
        List<ChatMessage> messages,
        [EnumeratorCancellation] CancellationToken ct)
@@ -759,10 +916,20 @@ public partial class LlmService : IDisposable
        using var stream = await resp.Content.ReadAsStreamAsync(ct);
        using var reader = new StreamReader(stream);

+        var firstChunkReceived = false;
        while (!reader.EndOfStream && !ct.IsCancellationRequested)
        {
-            var line = await ReadLineWithTimeoutAsync(reader, ct);
-            if (line == null) break;
+            var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
+            var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
+            if (line == null)
+            {
+                if (!firstChunkReceived)
+                    LogService.Warn($"vLLM 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초) — 모델이 응답하지 않습니다");
+                else
+                    yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
+                break;
+            }
+            firstChunkReceived = true;
            if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
            var data = line["data: ".Length..];
            if (data == "[DONE]") break;
@@ -882,10 +1049,20 @@ public partial class LlmService : IDisposable
        using var stream = await resp.Content.ReadAsStreamAsync(ct);
        using var reader = new StreamReader(stream);

+        var firstChunkReceived = false;
        while (!reader.EndOfStream && !ct.IsCancellationRequested)
        {
-            var line = await ReadLineWithTimeoutAsync(reader, ct);
-            if (line == null) break;
+            var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
+            var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
+            if (line == null)
+            {
+                if (!firstChunkReceived)
+                    LogService.Warn($"Gemini 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초)");
+                else
+                    yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
+                break;
+            }
+            firstChunkReceived = true;
            if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
            var data = line["data: ".Length..];
            string? parsed = null;
@@ -1016,10 +1193,20 @@ public partial class LlmService : IDisposable
        using var stream = await resp.Content.ReadAsStreamAsync(ct);
        using var reader = new StreamReader(stream);

+        var firstChunkReceived = false;
        while (!reader.EndOfStream && !ct.IsCancellationRequested)
        {
-            var line = await ReadLineWithTimeoutAsync(reader, ct);
-            if (line == null) break;
+            var timeout = firstChunkReceived ? SubsequentChunkTimeout : FirstChunkTimeout;
+            var line = await ReadLineWithTimeoutAsync(reader, ct, timeout);
+            if (line == null)
+            {
+                if (!firstChunkReceived)
+                    LogService.Warn($"Claude 첫 청크 타임아웃 ({(int)FirstChunkTimeout.TotalSeconds}초)");
+                else
+                    yield return "\n\n*(응답이 중간에 끊겼습니다 — 연결 시간 초과)*";
+                break;
+            }
+            firstChunkReceived = true;
            if (string.IsNullOrEmpty(line) || !line.StartsWith("data: ")) continue;
            var data = line["data: ".Length..];

@@ -1201,19 +1388,18 @@ public partial class LlmService : IDisposable
        return resp;
    }

-    /// <summary>스트리밍 ReadLine에 청크 타임아웃 적용</summary>
-    private static async Task<string?> ReadLineWithTimeoutAsync(StreamReader reader, CancellationToken ct)
+    /// <summary>스트리밍 ReadLine에 청크 타임아웃 적용. 타임아웃 시 null 반환.</summary>
+    private static async Task<string?> ReadLineWithTimeoutAsync(StreamReader reader, CancellationToken ct, TimeSpan timeout)
    {
        using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
-        cts.CancelAfter(ChunkTimeout);
+        cts.CancelAfter(timeout);
        try
        {
            return await reader.ReadLineAsync(cts.Token);
        }
        catch (OperationCanceledException) when (!ct.IsCancellationRequested)
        {
-            LogService.Warn("스트리밍 청크 타임아웃 (30초 무응답)");
-            return null; // 타임아웃 시 스트림 종료
+            return null;
        }
    }