diff --git a/README.md b/README.md index 40a24a9..edb2684 100644 --- a/README.md +++ b/README.md @@ -1488,3 +1488,7 @@ MIT License - IBM 배포형 엔드포인트가 `tool_choice`를 400으로 거부하면, `tool_choice`만 제거하고 동일한 강제 지시를 유지한 채 한 번 더 재시도하는 대체 강제 전략을 넣었습니다. - OpenAI/vLLM tool-use 응답은 이제 `stream=true` 기반 SSE 수신기로 읽으며, `delta.tool_calls`를 부분 조립해 완성된 도구 호출을 더 빨리 감지합니다. - read-only 도구는 조립이 끝나는 즉시 조기 실행을 시작하고, 최종 루프에서는 그 결과를 재사용하도록 바꿔 도구 착수 속도를 끌어올렸습니다. +- 업데이트: 2026-04-08 11:31 (KST) + - `LlmService`에 tool-use 전용 스트리밍 이벤트 API를 추가했습니다. 이제 OpenAI/vLLM/IBM 경로는 텍스트 델타와 완성된 도구 호출을 각각 이벤트로 내보낼 수 있습니다. + - `Cowork/Code` 루프도 이 스트리밍 이벤트를 직접 소비하도록 바꿔, 도구 호출이 완성되는 즉시 transcript에 `스트리밍 도구 감지` 진행 표시가 보이고 read-only 도구 조기 실행도 실제 실행 루프와 연결되도록 정리했습니다. + - 기존의 `응답 전체 수신 -> tool_calls 파싱 -> 도구 실행` 구조에서 한 단계 더 나아가, `스트리밍 수신 -> partial tool_call 조립 -> 조기 read-only 실행 -> 최종 루프 재사용` 흐름으로 리팩터링했습니다. diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index 26167b0..b5cf4d4 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -5401,3 +5401,17 @@ ow + toggle ?쒓컖 ?몄뼱濡??ㅼ떆 ?뺣젹?덈떎. - [AgentLoopService.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/Services/Agent/AgentLoopService.cs) - 메인 루프의 tool-use 요청 경로에 조기 read-only prefetch callback을 연결했다. - 최종 도구 실행 단계에서는 조기 실행이 이미 끝난 도구의 결과를 재사용해 중복 실행을 피하고, transcript에는 `조기 실행 결과 재사용` 힌트를 남기도록 정리했다. + +## 2026-04-08 11:31 (KST) + +- [LlmService.ToolUse.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/Services/LlmService.ToolUse.cs) + - `ToolStreamEventKind`, `ToolStreamEvent`, `StreamWithToolsAsync(...)`를 추가해 tool-use 응답을 텍스트 델타와 도구 호출 이벤트 단위로 소비할 수 있게 했다. + - OpenAI/vLLM/IBM 경로에 `StreamOpenAiToolEventsAsync(...)`를 도입해 `choices[].delta.tool_calls`와 IBM `results`/`choices` 스트림을 직접 읽고, 완성된 tool call을 event로 즉시 방출하도록 리팩터링했다. + - 기존 `ReadOpenAiToolBlocksFromStreamAsync(...)`는 이제 streaming event를 집계하는 래퍼로 바뀌어, 기존 반환형 API와 새 streaming API가 같은 파서를 공유한다. +- [AgentLoopTransitions.Execution.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/Services/Agent/AgentLoopTransitions.Execution.cs) + - `SendWithToolsWithRecoveryAsync(...)`가 streaming event callback을 받을 수 있게 확장했다. + - callback이 주어지면 `LlmService.StreamWithToolsAsync(...)`를 직접 소비하면서 텍스트/도구 호출을 집계하도록 조정했다. +- [AgentLoopService.cs](/E:/AX%20Copilot%20-%20Codex/src/AxCopilot/Services/Agent/AgentLoopService.cs) + - Cowork/Code 메인 루프가 tool-use streaming event를 직접 소비하게 바꿨다. + - 텍스트 델타가 쌓이면 450ms 주기로 `Thinking` 이벤트에 축약 preview를 갱신하고, 도구 호출이 완성되면 `스트리밍 도구 감지` 진행 메시지를 즉시 띄우도록 연결했다. + - read-only 조기 실행과 최종 실행 재사용 흐름이 기존 loop와 실제로 이어지도록 정리했다. diff --git a/src/AxCopilot/Services/Agent/AgentLoopService.cs b/src/AxCopilot/Services/Agent/AgentLoopService.cs index 3056103..9e77895 100644 --- a/src/AxCopilot/Services/Agent/AgentLoopService.cs +++ b/src/AxCopilot/Services/Agent/AgentLoopService.cs @@ -2,6 +2,7 @@ using System.Diagnostics; using System.Collections.Concurrent; using System.IO; +using System.Text; using System.Text.Json; using AxCopilot.Models; using AxCopilot.Services; @@ -548,6 +549,8 @@ public partial class AgentLoopService var (_, currentModel) = _llm.GetCurrentModelInfo(); WorkflowLogService.LogLlmRequest(_conversationId, _currentRunId, iteration, currentModel, sendMessages.Count, activeTools.Count, forceFirst); + var streamedTextPreview = new StringBuilder(); + var lastStreamUiUpdateAt = DateTime.MinValue; blocks = await SendWithToolsWithRecoveryAsync( sendMessages, @@ -560,7 +563,39 @@ public partial class AgentLoopService block, activeTools, context, - ct)); + ct), + onStreamEventAsync: async evt => + { + switch (evt.Kind) + { + case LlmService.ToolStreamEventKind.TextDelta: + if (!string.IsNullOrWhiteSpace(evt.Text)) + { + streamedTextPreview.Append(evt.Text); + var now = DateTime.UtcNow; + if ((now - lastStreamUiUpdateAt).TotalMilliseconds >= 450 && streamedTextPreview.Length > 0) + { + var preview = streamedTextPreview.ToString(); + preview = preview.Length > 140 ? preview[..140] + "…" : preview; + EmitEvent(AgentEventType.Thinking, "", preview); + lastStreamUiUpdateAt = now; + } + } + break; + case LlmService.ToolStreamEventKind.ToolCallReady: + if (evt.ToolCall != null) + { + EmitEvent( + AgentEventType.Thinking, + evt.ToolCall.ToolName, + $"스트리밍 도구 감지: {FormatToolCallSummary(evt.ToolCall)}"); + } + break; + case LlmService.ToolStreamEventKind.Completed: + await Task.CompletedTask; + break; + } + }); runState.ContextRecoveryAttempts = 0; llmCallSw.Stop(); runState.TransientLlmErrorRetries = 0; diff --git a/src/AxCopilot/Services/Agent/AgentLoopTransitions.Execution.cs b/src/AxCopilot/Services/Agent/AgentLoopTransitions.Execution.cs index 173f95a..bcb7126 100644 --- a/src/AxCopilot/Services/Agent/AgentLoopTransitions.Execution.cs +++ b/src/AxCopilot/Services/Agent/AgentLoopTransitions.Execution.cs @@ -3,6 +3,7 @@ using AxCopilot.Services; using System.Text.Json; using System.Diagnostics; +using System.Text; namespace AxCopilot.Services.Agent; @@ -1164,7 +1165,8 @@ public partial class AgentLoopService string phaseLabel, RunState? runState = null, bool forceToolCall = false, - Func>? prefetchToolCallAsync = null) + Func>? prefetchToolCallAsync = null, + Func? onStreamEventAsync = null) { var transientRetries = runState?.TransientLlmErrorRetries ?? 0; var contextRecoveryRetries = runState?.ContextRecoveryAttempts ?? 0; @@ -1172,7 +1174,30 @@ public partial class AgentLoopService { try { - return await _llm.SendWithToolsAsync(messages, tools, ct, forceToolCall, prefetchToolCallAsync); + if (onStreamEventAsync == null) + return await _llm.SendWithToolsAsync(messages, tools, ct, forceToolCall, prefetchToolCallAsync); + + var blocks = new List(); + var textBuilder = new StringBuilder(); + await foreach (var evt in _llm.StreamWithToolsAsync(messages, tools, forceToolCall, prefetchToolCallAsync, ct).WithCancellation(ct)) + { + await onStreamEventAsync(evt); + if (evt.Kind == LlmService.ToolStreamEventKind.TextDelta && !string.IsNullOrWhiteSpace(evt.Text)) + { + textBuilder.Append(evt.Text); + } + else if (evt.Kind == LlmService.ToolStreamEventKind.ToolCallReady && evt.ToolCall != null) + { + blocks.Add(evt.ToolCall); + } + } + + var result = new List(); + var text = textBuilder.ToString().Trim(); + if (!string.IsNullOrWhiteSpace(text)) + result.Add(new LlmService.ContentBlock { Type = "text", Text = text }); + result.AddRange(blocks); + return result; } catch (Exception ex) { diff --git a/src/AxCopilot/Services/LlmService.ToolUse.cs b/src/AxCopilot/Services/LlmService.ToolUse.cs index 068fedd..c7510ce 100644 --- a/src/AxCopilot/Services/LlmService.ToolUse.cs +++ b/src/AxCopilot/Services/LlmService.ToolUse.cs @@ -32,6 +32,18 @@ public partial class LlmService long ElapsedMilliseconds, string? ResolvedToolName = null); + public enum ToolStreamEventKind + { + TextDelta, + ToolCallReady, + Completed + } + + public sealed record ToolStreamEvent( + ToolStreamEventKind Kind, + string Text = "", + ContentBlock? ToolCall = null); + /// 도구 정의를 포함하여 LLM에 요청하고, 텍스트 + tool_use 블록을 파싱하여 반환합니다. /// /// true이면 tool_choice: "required"를 요청에 추가하여 모델이 반드시 도구를 호출하도록 강제합니다. @@ -56,6 +68,51 @@ public partial class LlmService }; } + public async IAsyncEnumerable StreamWithToolsAsync( + List messages, + IReadOnlyCollection tools, + bool forceToolCall = false, + Func>? prefetchToolCallAsync = null, + [EnumeratorCancellation] CancellationToken ct = default) + { + var activeService = ResolveService(); + EnsureOperationModeAllowsLlmService(activeService); + + switch (NormalizeServiceName(activeService)) + { + case "ollama": + case "vllm": + await foreach (var evt in StreamOpenAiToolEventsAsync(messages, tools, ct, forceToolCall, prefetchToolCallAsync).WithCancellation(ct)) + yield return evt; + yield break; + + case "sigmoid": + foreach (var block in await SendSigmoidWithToolsAsync(messages, tools, ct)) + { + if (block.Type == "text" && !string.IsNullOrWhiteSpace(block.Text)) + yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, block.Text); + else if (block.Type == "tool_use") + yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: block); + } + yield return new ToolStreamEvent(ToolStreamEventKind.Completed); + yield break; + + case "gemini": + foreach (var block in await SendGeminiWithToolsAsync(messages, tools, ct)) + { + if (block.Type == "text" && !string.IsNullOrWhiteSpace(block.Text)) + yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, block.Text); + else if (block.Type == "tool_use") + yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: block); + } + yield return new ToolStreamEvent(ToolStreamEventKind.Completed); + yield break; + + default: + throw new NotSupportedException($"서비스 '{activeService}'는 아직 Function Calling 스트리밍을 지원하지 않습니다."); + } + } + /// 도구 실행 결과를 LLM에 피드백하기 위한 메시지를 생성합니다. public static ChatMessage CreateToolResultMessage(string toolId, string toolName, string result) { @@ -909,14 +966,127 @@ public partial class LlmService bool usesIbmDeploymentApi, Func>? prefetchToolCallAsync, CancellationToken ct) + { + var blocks = new List(); + var textBuilder = new StringBuilder(); + + await foreach (var evt in StreamOpenAiToolEventsAsync(resp, usesIbmDeploymentApi, prefetchToolCallAsync, ct).WithCancellation(ct)) + { + if (evt.Kind == ToolStreamEventKind.TextDelta && !string.IsNullOrWhiteSpace(evt.Text)) + { + textBuilder.Append(evt.Text); + } + else if (evt.Kind == ToolStreamEventKind.ToolCallReady && evt.ToolCall != null) + { + blocks.Add(evt.ToolCall); + } + } + + var text = textBuilder.ToString().Trim(); + var result = new List(); + if (!string.IsNullOrWhiteSpace(text)) + result.Add(new ContentBlock { Type = "text", Text = text }); + result.AddRange(blocks); + + if (!result.Any(b => b.Type == "tool_use")) + { + var textBlock = result.FirstOrDefault(b => b.Type == "text" && !string.IsNullOrWhiteSpace(b.Text)); + if (textBlock != null) + { + var extracted = TryExtractToolCallsFromText(textBlock.Text); + if (extracted.Count > 0) + { + foreach (var block in extracted) + { + if (prefetchToolCallAsync != null) + block.PrefetchedExecutionTask = prefetchToolCallAsync(block); + } + result.AddRange(extracted); + LogService.Debug($"[ToolUse] 텍스트에서 도구 호출 {extracted.Count}건 추출 (SSE 폴백 파싱)"); + } + } + } + + return result; + } + + private async IAsyncEnumerable StreamOpenAiToolEventsAsync( + List messages, + IReadOnlyCollection tools, + [EnumeratorCancellation] CancellationToken ct, + bool forceToolCall = false, + Func>? prefetchToolCallAsync = null) + { + var activeService = ResolveService(); + var (resolvedEp, _, allowInsecureTls) = ResolveServerInfo(); + var endpoint = string.IsNullOrEmpty(resolvedEp) + ? ResolveEndpointForService(activeService) + : resolvedEp; + var registered = GetActiveRegisteredModel(); + var isIbmDeployment = UsesIbmDeploymentChatApi(activeService, registered, endpoint); + + var body = isIbmDeployment + ? BuildIbmToolBody(messages, tools, forceToolCall) + : BuildOpenAiToolBody(messages, tools, forceToolCall); + + string url; + if (isIbmDeployment) + url = BuildIbmDeploymentChatUrl(endpoint, stream: true); + else if (activeService.Equals("ollama", StringComparison.OrdinalIgnoreCase)) + url = endpoint.TrimEnd('/') + "/api/chat"; + else + url = endpoint.TrimEnd('/') + "/v1/chat/completions"; + var json = JsonSerializer.Serialize(body); + + using var req = new HttpRequestMessage(HttpMethod.Post, url) + { + Content = new StringContent(json, Encoding.UTF8, "application/json") + }; + await ApplyAuthHeaderAsync(req, ct); + using var resp = await SendWithTlsAsync(req, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead); + if (!resp.IsSuccessStatusCode) + { + var errBody = await resp.Content.ReadAsStringAsync(ct); + var detail = ExtractErrorDetail(errBody); + if (isIbmDeployment && forceToolCall && (int)resp.StatusCode == 400) + { + var fallbackBody = BuildIbmToolBody(messages, tools, forceToolCall: true, useToolChoice: false); + var fallbackJson = JsonSerializer.Serialize(fallbackBody); + using var retryReq = new HttpRequestMessage(HttpMethod.Post, url) + { + Content = new StringContent(fallbackJson, Encoding.UTF8, "application/json") + }; + await ApplyAuthHeaderAsync(retryReq, ct); + using var retryResp = await SendWithTlsAsync(retryReq, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead); + if (!retryResp.IsSuccessStatusCode) + throw new ToolCallNotSupportedException($"{activeService} API 오류 ({retryResp.StatusCode}): {detail}"); + + await foreach (var evt in StreamOpenAiToolEventsAsync(retryResp, true, prefetchToolCallAsync, ct).WithCancellation(ct)) + yield return evt; + yield break; + } + + if ((int)resp.StatusCode == 400) + throw new ToolCallNotSupportedException($"{activeService} API 오류 ({resp.StatusCode}): {detail}"); + + throw new HttpRequestException($"{activeService} API 오류 ({resp.StatusCode}): {detail}"); + } + + await foreach (var evt in StreamOpenAiToolEventsAsync(resp, isIbmDeployment, prefetchToolCallAsync, ct).WithCancellation(ct)) + yield return evt; + } + + private async IAsyncEnumerable StreamOpenAiToolEventsAsync( + HttpResponseMessage resp, + bool usesIbmDeploymentApi, + Func>? prefetchToolCallAsync, + [EnumeratorCancellation] CancellationToken ct) { using var stream = await resp.Content.ReadAsStreamAsync(ct); using var reader = new StreamReader(stream); var firstChunkReceived = false; - var textBuilder = new StringBuilder(); var toolAccumulators = new Dictionary(); - var emittedTools = new List(); var lastIbmGeneratedText = ""; while (!reader.EndOfStream && !ct.IsCancellationRequested) @@ -952,8 +1122,21 @@ public partial class LlmService throw new ToolCallNotSupportedException(detail ?? "IBM vLLM 도구 호출 응답 오류"); } - if (TryExtractMessageToolBlocks(root, textBuilder, emittedTools)) + if (TryExtractMessageToolBlocks(root, out var messageText, out var directToolBlocks)) + { + if (!string.IsNullOrWhiteSpace(messageText)) + yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, messageText); + if (directToolBlocks.Count > 0) + { + foreach (var toolBlock in directToolBlocks) + { + if (prefetchToolCallAsync != null) + toolBlock.PrefetchedExecutionTask = prefetchToolCallAsync(toolBlock); + yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: toolBlock); + } + } continue; + } if (usesIbmDeploymentApi && root.TryGetProperty("results", out var resultsEl) && @@ -970,13 +1153,15 @@ public partial class LlmService { if (generatedText.StartsWith(lastIbmGeneratedText, StringComparison.Ordinal)) { - textBuilder.Append(generatedText[lastIbmGeneratedText.Length..]); + var delta = generatedText[lastIbmGeneratedText.Length..]; + if (!string.IsNullOrEmpty(delta)) + yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, delta); lastIbmGeneratedText = generatedText; } else { - textBuilder.Clear(); - textBuilder.Append(generatedText); + if (!string.IsNullOrEmpty(generatedText)) + yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, generatedText); lastIbmGeneratedText = generatedText; } } @@ -994,7 +1179,7 @@ public partial class LlmService { var chunk = contentEl.GetString(); if (!string.IsNullOrEmpty(chunk)) - textBuilder.Append(chunk); + yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, chunk); } if (deltaEl.TryGetProperty("tool_calls", out var toolCallsEl) && @@ -1030,53 +1215,50 @@ public partial class LlmService } } - await TryEmitCompletedToolCallAsync(acc, emittedTools, prefetchToolCallAsync).ConfigureAwait(false); + var emittedBlock = await TryCreateCompletedToolCallAsync(acc, prefetchToolCallAsync).ConfigureAwait(false); + if (emittedBlock != null) + yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: emittedBlock); } } } if (firstChoice.TryGetProperty("message", out var messageEl)) - TryExtractMessageToolBlocks(messageEl, textBuilder, emittedTools); - } - } - - foreach (var acc in toolAccumulators.Values.OrderBy(a => a.Index)) - await TryEmitCompletedToolCallAsync(acc, emittedTools, prefetchToolCallAsync, forceEmit: true).ConfigureAwait(false); - - var blocks = new List(); - var text = textBuilder.ToString().Trim(); - if (!string.IsNullOrWhiteSpace(text)) - blocks.Add(new ContentBlock { Type = "text", Text = text }); - - blocks.AddRange(emittedTools); - - if (!blocks.Any(b => b.Type == "tool_use")) - { - var textBlock = blocks.FirstOrDefault(b => b.Type == "text" && !string.IsNullOrWhiteSpace(b.Text)); - if (textBlock != null) - { - var extracted = TryExtractToolCallsFromText(textBlock.Text); - if (extracted.Count > 0) { - foreach (var block in extracted) + if (TryExtractMessageToolBlocks(messageEl, out var messageText2, out var directToolBlocks2)) { - if (prefetchToolCallAsync != null) - block.PrefetchedExecutionTask = prefetchToolCallAsync(block); + if (!string.IsNullOrWhiteSpace(messageText2)) + yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, messageText2); + if (directToolBlocks2.Count > 0) + { + foreach (var toolBlock in directToolBlocks2) + { + if (prefetchToolCallAsync != null) + toolBlock.PrefetchedExecutionTask = prefetchToolCallAsync(toolBlock); + yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: toolBlock); + } + } } - blocks.AddRange(extracted); - LogService.Debug($"[ToolUse] 텍스트에서 도구 호출 {extracted.Count}건 추출 (SSE 폴백 파싱)"); } } } - return blocks; + foreach (var acc in toolAccumulators.Values.OrderBy(a => a.Index)) + { + var emittedBlock = await TryCreateCompletedToolCallAsync(acc, prefetchToolCallAsync, forceEmit: true).ConfigureAwait(false); + if (emittedBlock != null) + yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: emittedBlock); + } + + yield return new ToolStreamEvent(ToolStreamEventKind.Completed); } private static bool TryExtractMessageToolBlocks( JsonElement messageOrRoot, - StringBuilder textBuilder, - List emittedTools) + out string text, + out List toolBlocks) { + text = ""; + toolBlocks = new List(); JsonElement message = messageOrRoot; if (messageOrRoot.TryGetProperty("message", out var nestedMessage)) message = nestedMessage; @@ -1085,10 +1267,10 @@ public partial class LlmService if (message.TryGetProperty("content", out var contentEl) && contentEl.ValueKind == JsonValueKind.String) { - var text = contentEl.GetString(); - if (!string.IsNullOrWhiteSpace(text)) + var parsedText = contentEl.GetString(); + if (!string.IsNullOrWhiteSpace(parsedText)) { - textBuilder.Append(text); + text = parsedText; consumed = true; } } @@ -1119,7 +1301,7 @@ public partial class LlmService } } - emittedTools.Add(new ContentBlock + toolBlocks.Add(new ContentBlock { Type = "tool_use", ToolName = functionEl.TryGetProperty("name", out var nameEl) ? nameEl.GetString() ?? "" : "", @@ -1154,21 +1336,20 @@ public partial class LlmService } } - private static async Task TryEmitCompletedToolCallAsync( + private static async Task TryCreateCompletedToolCallAsync( ToolCallAccumulator acc, - List emittedTools, Func>? prefetchToolCallAsync, bool forceEmit = false) { if (acc.Emitted || string.IsNullOrWhiteSpace(acc.Name)) - return; + return null; var argsJson = acc.Arguments.ToString().Trim(); JsonElement? parsedArgs = null; if (!string.IsNullOrEmpty(argsJson)) { if (!forceEmit && !LooksLikeCompleteJson(argsJson)) - return; + return null; try { @@ -1178,7 +1359,7 @@ public partial class LlmService catch { if (!forceEmit) - return; + return null; } } @@ -1193,8 +1374,8 @@ public partial class LlmService if (prefetchToolCallAsync != null) block.PrefetchedExecutionTask = prefetchToolCallAsync(block); - emittedTools.Add(block); acc.Emitted = true; + return block; } // ─── 공통 헬퍼 ─────────────────────────────────────────────────────