vLLM 도구 호출 스트리밍 실행기와 코워크 루프 실시간 소비 구조 추가
Some checks failed
Release Gate / gate (push) Has been cancelled
Some checks failed
Release Gate / gate (push) Has been cancelled
- LlmService에 tool-use 전용 streaming event API를 추가하고 OpenAI vLLM IBM 경로의 partial tool_call 조립을 event 기반으로 재구성함 - Cowork/Code 루프가 streaming event를 직접 소비하도록 바꿔 도구 호출 감지와 진행 표시를 더 빠르게 갱신함 - read-only 도구 조기 실행이 기존 loop와 실제로 이어지도록 정리하고 최종 실행에서는 prefetch 결과를 재사용함 - README와 DEVELOPMENT 문서를 2026-04-08 11:31(KST) 기준으로 갱신함 검증 - dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\verify\ -p:IntermediateOutputPath=obj\verify\ - 경고 0 / 오류 0
This commit is contained in:
@@ -32,6 +32,18 @@ public partial class LlmService
|
||||
long ElapsedMilliseconds,
|
||||
string? ResolvedToolName = null);
|
||||
|
||||
public enum ToolStreamEventKind
|
||||
{
|
||||
TextDelta,
|
||||
ToolCallReady,
|
||||
Completed
|
||||
}
|
||||
|
||||
public sealed record ToolStreamEvent(
|
||||
ToolStreamEventKind Kind,
|
||||
string Text = "",
|
||||
ContentBlock? ToolCall = null);
|
||||
|
||||
/// <summary>도구 정의를 포함하여 LLM에 요청하고, 텍스트 + tool_use 블록을 파싱하여 반환합니다.</summary>
|
||||
/// <param name="forceToolCall">
|
||||
/// true이면 <c>tool_choice: "required"</c>를 요청에 추가하여 모델이 반드시 도구를 호출하도록 강제합니다.
|
||||
@@ -56,6 +68,51 @@ public partial class LlmService
|
||||
};
|
||||
}
|
||||
|
||||
public async IAsyncEnumerable<ToolStreamEvent> StreamWithToolsAsync(
|
||||
List<ChatMessage> messages,
|
||||
IReadOnlyCollection<IAgentTool> tools,
|
||||
bool forceToolCall = false,
|
||||
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync = null,
|
||||
[EnumeratorCancellation] CancellationToken ct = default)
|
||||
{
|
||||
var activeService = ResolveService();
|
||||
EnsureOperationModeAllowsLlmService(activeService);
|
||||
|
||||
switch (NormalizeServiceName(activeService))
|
||||
{
|
||||
case "ollama":
|
||||
case "vllm":
|
||||
await foreach (var evt in StreamOpenAiToolEventsAsync(messages, tools, ct, forceToolCall, prefetchToolCallAsync).WithCancellation(ct))
|
||||
yield return evt;
|
||||
yield break;
|
||||
|
||||
case "sigmoid":
|
||||
foreach (var block in await SendSigmoidWithToolsAsync(messages, tools, ct))
|
||||
{
|
||||
if (block.Type == "text" && !string.IsNullOrWhiteSpace(block.Text))
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, block.Text);
|
||||
else if (block.Type == "tool_use")
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: block);
|
||||
}
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.Completed);
|
||||
yield break;
|
||||
|
||||
case "gemini":
|
||||
foreach (var block in await SendGeminiWithToolsAsync(messages, tools, ct))
|
||||
{
|
||||
if (block.Type == "text" && !string.IsNullOrWhiteSpace(block.Text))
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, block.Text);
|
||||
else if (block.Type == "tool_use")
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: block);
|
||||
}
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.Completed);
|
||||
yield break;
|
||||
|
||||
default:
|
||||
throw new NotSupportedException($"서비스 '{activeService}'는 아직 Function Calling 스트리밍을 지원하지 않습니다.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>도구 실행 결과를 LLM에 피드백하기 위한 메시지를 생성합니다.</summary>
|
||||
public static ChatMessage CreateToolResultMessage(string toolId, string toolName, string result)
|
||||
{
|
||||
@@ -909,14 +966,127 @@ public partial class LlmService
|
||||
bool usesIbmDeploymentApi,
|
||||
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var blocks = new List<ContentBlock>();
|
||||
var textBuilder = new StringBuilder();
|
||||
|
||||
await foreach (var evt in StreamOpenAiToolEventsAsync(resp, usesIbmDeploymentApi, prefetchToolCallAsync, ct).WithCancellation(ct))
|
||||
{
|
||||
if (evt.Kind == ToolStreamEventKind.TextDelta && !string.IsNullOrWhiteSpace(evt.Text))
|
||||
{
|
||||
textBuilder.Append(evt.Text);
|
||||
}
|
||||
else if (evt.Kind == ToolStreamEventKind.ToolCallReady && evt.ToolCall != null)
|
||||
{
|
||||
blocks.Add(evt.ToolCall);
|
||||
}
|
||||
}
|
||||
|
||||
var text = textBuilder.ToString().Trim();
|
||||
var result = new List<ContentBlock>();
|
||||
if (!string.IsNullOrWhiteSpace(text))
|
||||
result.Add(new ContentBlock { Type = "text", Text = text });
|
||||
result.AddRange(blocks);
|
||||
|
||||
if (!result.Any(b => b.Type == "tool_use"))
|
||||
{
|
||||
var textBlock = result.FirstOrDefault(b => b.Type == "text" && !string.IsNullOrWhiteSpace(b.Text));
|
||||
if (textBlock != null)
|
||||
{
|
||||
var extracted = TryExtractToolCallsFromText(textBlock.Text);
|
||||
if (extracted.Count > 0)
|
||||
{
|
||||
foreach (var block in extracted)
|
||||
{
|
||||
if (prefetchToolCallAsync != null)
|
||||
block.PrefetchedExecutionTask = prefetchToolCallAsync(block);
|
||||
}
|
||||
result.AddRange(extracted);
|
||||
LogService.Debug($"[ToolUse] 텍스트에서 도구 호출 {extracted.Count}건 추출 (SSE 폴백 파싱)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async IAsyncEnumerable<ToolStreamEvent> StreamOpenAiToolEventsAsync(
|
||||
List<ChatMessage> messages,
|
||||
IReadOnlyCollection<IAgentTool> tools,
|
||||
[EnumeratorCancellation] CancellationToken ct,
|
||||
bool forceToolCall = false,
|
||||
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync = null)
|
||||
{
|
||||
var activeService = ResolveService();
|
||||
var (resolvedEp, _, allowInsecureTls) = ResolveServerInfo();
|
||||
var endpoint = string.IsNullOrEmpty(resolvedEp)
|
||||
? ResolveEndpointForService(activeService)
|
||||
: resolvedEp;
|
||||
var registered = GetActiveRegisteredModel();
|
||||
var isIbmDeployment = UsesIbmDeploymentChatApi(activeService, registered, endpoint);
|
||||
|
||||
var body = isIbmDeployment
|
||||
? BuildIbmToolBody(messages, tools, forceToolCall)
|
||||
: BuildOpenAiToolBody(messages, tools, forceToolCall);
|
||||
|
||||
string url;
|
||||
if (isIbmDeployment)
|
||||
url = BuildIbmDeploymentChatUrl(endpoint, stream: true);
|
||||
else if (activeService.Equals("ollama", StringComparison.OrdinalIgnoreCase))
|
||||
url = endpoint.TrimEnd('/') + "/api/chat";
|
||||
else
|
||||
url = endpoint.TrimEnd('/') + "/v1/chat/completions";
|
||||
var json = JsonSerializer.Serialize(body);
|
||||
|
||||
using var req = new HttpRequestMessage(HttpMethod.Post, url)
|
||||
{
|
||||
Content = new StringContent(json, Encoding.UTF8, "application/json")
|
||||
};
|
||||
await ApplyAuthHeaderAsync(req, ct);
|
||||
using var resp = await SendWithTlsAsync(req, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead);
|
||||
if (!resp.IsSuccessStatusCode)
|
||||
{
|
||||
var errBody = await resp.Content.ReadAsStringAsync(ct);
|
||||
var detail = ExtractErrorDetail(errBody);
|
||||
if (isIbmDeployment && forceToolCall && (int)resp.StatusCode == 400)
|
||||
{
|
||||
var fallbackBody = BuildIbmToolBody(messages, tools, forceToolCall: true, useToolChoice: false);
|
||||
var fallbackJson = JsonSerializer.Serialize(fallbackBody);
|
||||
using var retryReq = new HttpRequestMessage(HttpMethod.Post, url)
|
||||
{
|
||||
Content = new StringContent(fallbackJson, Encoding.UTF8, "application/json")
|
||||
};
|
||||
await ApplyAuthHeaderAsync(retryReq, ct);
|
||||
using var retryResp = await SendWithTlsAsync(retryReq, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead);
|
||||
if (!retryResp.IsSuccessStatusCode)
|
||||
throw new ToolCallNotSupportedException($"{activeService} API 오류 ({retryResp.StatusCode}): {detail}");
|
||||
|
||||
await foreach (var evt in StreamOpenAiToolEventsAsync(retryResp, true, prefetchToolCallAsync, ct).WithCancellation(ct))
|
||||
yield return evt;
|
||||
yield break;
|
||||
}
|
||||
|
||||
if ((int)resp.StatusCode == 400)
|
||||
throw new ToolCallNotSupportedException($"{activeService} API 오류 ({resp.StatusCode}): {detail}");
|
||||
|
||||
throw new HttpRequestException($"{activeService} API 오류 ({resp.StatusCode}): {detail}");
|
||||
}
|
||||
|
||||
await foreach (var evt in StreamOpenAiToolEventsAsync(resp, isIbmDeployment, prefetchToolCallAsync, ct).WithCancellation(ct))
|
||||
yield return evt;
|
||||
}
|
||||
|
||||
private async IAsyncEnumerable<ToolStreamEvent> StreamOpenAiToolEventsAsync(
|
||||
HttpResponseMessage resp,
|
||||
bool usesIbmDeploymentApi,
|
||||
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync,
|
||||
[EnumeratorCancellation] CancellationToken ct)
|
||||
{
|
||||
using var stream = await resp.Content.ReadAsStreamAsync(ct);
|
||||
using var reader = new StreamReader(stream);
|
||||
|
||||
var firstChunkReceived = false;
|
||||
var textBuilder = new StringBuilder();
|
||||
var toolAccumulators = new Dictionary<int, ToolCallAccumulator>();
|
||||
var emittedTools = new List<ContentBlock>();
|
||||
var lastIbmGeneratedText = "";
|
||||
|
||||
while (!reader.EndOfStream && !ct.IsCancellationRequested)
|
||||
@@ -952,8 +1122,21 @@ public partial class LlmService
|
||||
throw new ToolCallNotSupportedException(detail ?? "IBM vLLM 도구 호출 응답 오류");
|
||||
}
|
||||
|
||||
if (TryExtractMessageToolBlocks(root, textBuilder, emittedTools))
|
||||
if (TryExtractMessageToolBlocks(root, out var messageText, out var directToolBlocks))
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(messageText))
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, messageText);
|
||||
if (directToolBlocks.Count > 0)
|
||||
{
|
||||
foreach (var toolBlock in directToolBlocks)
|
||||
{
|
||||
if (prefetchToolCallAsync != null)
|
||||
toolBlock.PrefetchedExecutionTask = prefetchToolCallAsync(toolBlock);
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: toolBlock);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (usesIbmDeploymentApi &&
|
||||
root.TryGetProperty("results", out var resultsEl) &&
|
||||
@@ -970,13 +1153,15 @@ public partial class LlmService
|
||||
{
|
||||
if (generatedText.StartsWith(lastIbmGeneratedText, StringComparison.Ordinal))
|
||||
{
|
||||
textBuilder.Append(generatedText[lastIbmGeneratedText.Length..]);
|
||||
var delta = generatedText[lastIbmGeneratedText.Length..];
|
||||
if (!string.IsNullOrEmpty(delta))
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, delta);
|
||||
lastIbmGeneratedText = generatedText;
|
||||
}
|
||||
else
|
||||
{
|
||||
textBuilder.Clear();
|
||||
textBuilder.Append(generatedText);
|
||||
if (!string.IsNullOrEmpty(generatedText))
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, generatedText);
|
||||
lastIbmGeneratedText = generatedText;
|
||||
}
|
||||
}
|
||||
@@ -994,7 +1179,7 @@ public partial class LlmService
|
||||
{
|
||||
var chunk = contentEl.GetString();
|
||||
if (!string.IsNullOrEmpty(chunk))
|
||||
textBuilder.Append(chunk);
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, chunk);
|
||||
}
|
||||
|
||||
if (deltaEl.TryGetProperty("tool_calls", out var toolCallsEl) &&
|
||||
@@ -1030,53 +1215,50 @@ public partial class LlmService
|
||||
}
|
||||
}
|
||||
|
||||
await TryEmitCompletedToolCallAsync(acc, emittedTools, prefetchToolCallAsync).ConfigureAwait(false);
|
||||
var emittedBlock = await TryCreateCompletedToolCallAsync(acc, prefetchToolCallAsync).ConfigureAwait(false);
|
||||
if (emittedBlock != null)
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: emittedBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (firstChoice.TryGetProperty("message", out var messageEl))
|
||||
TryExtractMessageToolBlocks(messageEl, textBuilder, emittedTools);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var acc in toolAccumulators.Values.OrderBy(a => a.Index))
|
||||
await TryEmitCompletedToolCallAsync(acc, emittedTools, prefetchToolCallAsync, forceEmit: true).ConfigureAwait(false);
|
||||
|
||||
var blocks = new List<ContentBlock>();
|
||||
var text = textBuilder.ToString().Trim();
|
||||
if (!string.IsNullOrWhiteSpace(text))
|
||||
blocks.Add(new ContentBlock { Type = "text", Text = text });
|
||||
|
||||
blocks.AddRange(emittedTools);
|
||||
|
||||
if (!blocks.Any(b => b.Type == "tool_use"))
|
||||
{
|
||||
var textBlock = blocks.FirstOrDefault(b => b.Type == "text" && !string.IsNullOrWhiteSpace(b.Text));
|
||||
if (textBlock != null)
|
||||
{
|
||||
var extracted = TryExtractToolCallsFromText(textBlock.Text);
|
||||
if (extracted.Count > 0)
|
||||
{
|
||||
foreach (var block in extracted)
|
||||
if (TryExtractMessageToolBlocks(messageEl, out var messageText2, out var directToolBlocks2))
|
||||
{
|
||||
if (prefetchToolCallAsync != null)
|
||||
block.PrefetchedExecutionTask = prefetchToolCallAsync(block);
|
||||
if (!string.IsNullOrWhiteSpace(messageText2))
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, messageText2);
|
||||
if (directToolBlocks2.Count > 0)
|
||||
{
|
||||
foreach (var toolBlock in directToolBlocks2)
|
||||
{
|
||||
if (prefetchToolCallAsync != null)
|
||||
toolBlock.PrefetchedExecutionTask = prefetchToolCallAsync(toolBlock);
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: toolBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
blocks.AddRange(extracted);
|
||||
LogService.Debug($"[ToolUse] 텍스트에서 도구 호출 {extracted.Count}건 추출 (SSE 폴백 파싱)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return blocks;
|
||||
foreach (var acc in toolAccumulators.Values.OrderBy(a => a.Index))
|
||||
{
|
||||
var emittedBlock = await TryCreateCompletedToolCallAsync(acc, prefetchToolCallAsync, forceEmit: true).ConfigureAwait(false);
|
||||
if (emittedBlock != null)
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: emittedBlock);
|
||||
}
|
||||
|
||||
yield return new ToolStreamEvent(ToolStreamEventKind.Completed);
|
||||
}
|
||||
|
||||
private static bool TryExtractMessageToolBlocks(
|
||||
JsonElement messageOrRoot,
|
||||
StringBuilder textBuilder,
|
||||
List<ContentBlock> emittedTools)
|
||||
out string text,
|
||||
out List<ContentBlock> toolBlocks)
|
||||
{
|
||||
text = "";
|
||||
toolBlocks = new List<ContentBlock>();
|
||||
JsonElement message = messageOrRoot;
|
||||
if (messageOrRoot.TryGetProperty("message", out var nestedMessage))
|
||||
message = nestedMessage;
|
||||
@@ -1085,10 +1267,10 @@ public partial class LlmService
|
||||
if (message.TryGetProperty("content", out var contentEl) &&
|
||||
contentEl.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var text = contentEl.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(text))
|
||||
var parsedText = contentEl.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(parsedText))
|
||||
{
|
||||
textBuilder.Append(text);
|
||||
text = parsedText;
|
||||
consumed = true;
|
||||
}
|
||||
}
|
||||
@@ -1119,7 +1301,7 @@ public partial class LlmService
|
||||
}
|
||||
}
|
||||
|
||||
emittedTools.Add(new ContentBlock
|
||||
toolBlocks.Add(new ContentBlock
|
||||
{
|
||||
Type = "tool_use",
|
||||
ToolName = functionEl.TryGetProperty("name", out var nameEl) ? nameEl.GetString() ?? "" : "",
|
||||
@@ -1154,21 +1336,20 @@ public partial class LlmService
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task TryEmitCompletedToolCallAsync(
|
||||
private static async Task<ContentBlock?> TryCreateCompletedToolCallAsync(
|
||||
ToolCallAccumulator acc,
|
||||
List<ContentBlock> emittedTools,
|
||||
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync,
|
||||
bool forceEmit = false)
|
||||
{
|
||||
if (acc.Emitted || string.IsNullOrWhiteSpace(acc.Name))
|
||||
return;
|
||||
return null;
|
||||
|
||||
var argsJson = acc.Arguments.ToString().Trim();
|
||||
JsonElement? parsedArgs = null;
|
||||
if (!string.IsNullOrEmpty(argsJson))
|
||||
{
|
||||
if (!forceEmit && !LooksLikeCompleteJson(argsJson))
|
||||
return;
|
||||
return null;
|
||||
|
||||
try
|
||||
{
|
||||
@@ -1178,7 +1359,7 @@ public partial class LlmService
|
||||
catch
|
||||
{
|
||||
if (!forceEmit)
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1193,8 +1374,8 @@ public partial class LlmService
|
||||
if (prefetchToolCallAsync != null)
|
||||
block.PrefetchedExecutionTask = prefetchToolCallAsync(block);
|
||||
|
||||
emittedTools.Add(block);
|
||||
acc.Emitted = true;
|
||||
return block;
|
||||
}
|
||||
|
||||
// ─── 공통 헬퍼 ─────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user