319 lines
10 KiB
C#
319 lines
10 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Net;
|
|
using System.Runtime.InteropServices;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace AxCopilot.Services.Agent;
|
|
|
|
public class TextSummarizeTool : IAgentTool
|
|
{
|
|
public string Name => "text_summarize";
|
|
|
|
public string Description => "Summarize long text or documents into a specified length and format. Supports: bullet points, paragraph, executive summary, technical summary. For very long texts, automatically chunks and summarizes progressively. Can summarize file contents or inline text.";
|
|
|
|
public ToolParameterSchema Parameters
|
|
{
|
|
get
|
|
{
|
|
ToolParameterSchema toolParameterSchema = new ToolParameterSchema();
|
|
Dictionary<string, ToolProperty> obj = new Dictionary<string, ToolProperty>
|
|
{
|
|
["source"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Text to summarize, OR file path (if starts with '/' or contains '\\' or '.'). For files: supports .txt, .md, .html, .csv, .json, .log"
|
|
},
|
|
["max_length"] = new ToolProperty
|
|
{
|
|
Type = "integer",
|
|
Description = "Maximum summary length in characters. Default: 500"
|
|
}
|
|
};
|
|
ToolProperty obj2 = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Summary style: bullet (bullet points), paragraph (flowing text), executive (key conclusions + action items), technical (detailed with terminology). Default: bullet"
|
|
};
|
|
int num = 4;
|
|
List<string> list = new List<string>(num);
|
|
CollectionsMarshal.SetCount(list, num);
|
|
Span<string> span = CollectionsMarshal.AsSpan(list);
|
|
span[0] = "bullet";
|
|
span[1] = "paragraph";
|
|
span[2] = "executive";
|
|
span[3] = "technical";
|
|
obj2.Enum = list;
|
|
obj["style"] = obj2;
|
|
obj["language"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Output language: ko (Korean), en (English). Default: ko"
|
|
};
|
|
obj["focus"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Optional focus area or keywords to emphasize in the summary."
|
|
};
|
|
obj["sections"] = new ToolProperty
|
|
{
|
|
Type = "boolean",
|
|
Description = "If true, provide section-by-section summary instead of one overall summary. Default: false"
|
|
};
|
|
toolParameterSchema.Properties = obj;
|
|
num = 1;
|
|
List<string> list2 = new List<string>(num);
|
|
CollectionsMarshal.SetCount(list2, num);
|
|
CollectionsMarshal.AsSpan(list2)[0] = "source";
|
|
toolParameterSchema.Required = list2;
|
|
return toolParameterSchema;
|
|
}
|
|
}
|
|
|
|
public async Task<ToolResult> ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct)
|
|
{
|
|
string source = args.GetProperty("source").GetString() ?? "";
|
|
JsonElement mlEl;
|
|
int ml;
|
|
int maxLength = ((args.TryGetProperty("max_length", out mlEl) && mlEl.TryGetInt32(out ml)) ? ml : 500);
|
|
JsonElement stEl;
|
|
string style = (args.TryGetProperty("style", out stEl) ? (stEl.GetString() ?? "bullet") : "bullet");
|
|
JsonElement langEl;
|
|
string language = (args.TryGetProperty("language", out langEl) ? (langEl.GetString() ?? "ko") : "ko");
|
|
JsonElement focEl;
|
|
string focus = (args.TryGetProperty("focus", out focEl) ? (focEl.GetString() ?? "") : "");
|
|
JsonElement secEl;
|
|
bool bySections = args.TryGetProperty("sections", out secEl) && secEl.GetBoolean();
|
|
string text;
|
|
if (LooksLikeFilePath(source))
|
|
{
|
|
string fullPath = FileReadTool.ResolvePath(source, context.WorkFolder);
|
|
if (!context.IsPathAllowed(fullPath))
|
|
{
|
|
return ToolResult.Fail("경로 접근 차단: " + fullPath);
|
|
}
|
|
if (!File.Exists(fullPath))
|
|
{
|
|
return ToolResult.Fail("파일 없음: " + fullPath);
|
|
}
|
|
text = await File.ReadAllTextAsync(fullPath, ct);
|
|
if (fullPath.EndsWith(".html", StringComparison.OrdinalIgnoreCase) || fullPath.EndsWith(".htm", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
text = StripHtmlTags(text);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
text = source;
|
|
}
|
|
if (string.IsNullOrWhiteSpace(text))
|
|
{
|
|
return ToolResult.Fail("요약할 텍스트가 비어있습니다.");
|
|
}
|
|
int charCount = text.Length;
|
|
int lineCount = text.Split('\n').Length;
|
|
int wordCount = EstimateWordCount(text);
|
|
if (charCount <= maxLength)
|
|
{
|
|
return ToolResult.Ok($"\ud83d\udcdd 텍스트가 이미 요약 기준 이하입니다 ({charCount}자).\n\n{text}");
|
|
}
|
|
List<string> chunks = ChunkText(text, 3000);
|
|
List<string> chunkSummaries = new List<string>();
|
|
foreach (string chunk in chunks)
|
|
{
|
|
string summary = ExtractKeyContent(chunk, maxLength / chunks.Count, style, focus);
|
|
chunkSummaries.Add(summary);
|
|
}
|
|
StringBuilder sb = new StringBuilder();
|
|
StringBuilder stringBuilder = sb;
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(25, 3, stringBuilder);
|
|
handler.AppendLiteral("\ud83d\udcdd 텍스트 요약 (원문: ");
|
|
handler.AppendFormatted(charCount, "N0");
|
|
handler.AppendLiteral("자, ");
|
|
handler.AppendFormatted(lineCount);
|
|
handler.AppendLiteral("줄, ~");
|
|
handler.AppendFormatted(wordCount);
|
|
handler.AppendLiteral("단어)");
|
|
stringBuilder2.AppendLine(ref handler);
|
|
sb.AppendLine();
|
|
if (bySections && chunks.Count > 1)
|
|
{
|
|
for (int i = 0; i < chunkSummaries.Count; i++)
|
|
{
|
|
stringBuilder = sb;
|
|
StringBuilder stringBuilder3 = stringBuilder;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(8, 2, stringBuilder);
|
|
handler.AppendLiteral("### 섹션 ");
|
|
handler.AppendFormatted(i + 1);
|
|
handler.AppendLiteral("/");
|
|
handler.AppendFormatted(chunkSummaries.Count);
|
|
stringBuilder3.AppendLine(ref handler);
|
|
sb.AppendLine(chunkSummaries[i]);
|
|
sb.AppendLine();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
string combined = string.Join("\n", chunkSummaries);
|
|
sb.AppendLine(FormatSummary(combined, style, language, focus));
|
|
}
|
|
string result = sb.ToString();
|
|
if (result.Length > maxLength + 500)
|
|
{
|
|
result = result.Substring(0, maxLength + 500) + "\n...[요약 길이 초과로 생략]";
|
|
}
|
|
return ToolResult.Ok(result);
|
|
}
|
|
|
|
private static bool LooksLikeFilePath(string s)
|
|
{
|
|
if (string.IsNullOrEmpty(s))
|
|
{
|
|
return false;
|
|
}
|
|
if (s.Contains('\\') || s.Contains('/'))
|
|
{
|
|
return true;
|
|
}
|
|
if (s.Length < 260 && Regex.IsMatch(s, "\\.\\w{1,5}$"))
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private static string StripHtmlTags(string html)
|
|
{
|
|
string input = Regex.Replace(html, "<script[^>]*>.*?</script>", "", RegexOptions.Singleline);
|
|
input = Regex.Replace(input, "<style[^>]*>.*?</style>", "", RegexOptions.Singleline);
|
|
input = Regex.Replace(input, "<[^>]+>", " ");
|
|
input = WebUtility.HtmlDecode(input);
|
|
return Regex.Replace(input, "\\s+", " ").Trim();
|
|
}
|
|
|
|
private static int EstimateWordCount(string text)
|
|
{
|
|
int num = text.Count((char c) => c == ' ');
|
|
int num2 = text.Count((char c) => c >= '가' && c <= '힣');
|
|
return num + 1 + num2 / 3;
|
|
}
|
|
|
|
private static List<string> ChunkText(string text, int chunkSize)
|
|
{
|
|
List<string> list = new List<string>();
|
|
string[] array = text.Split('\n');
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
string[] array2 = array;
|
|
foreach (string text2 in array2)
|
|
{
|
|
if (stringBuilder.Length + text2.Length > chunkSize && stringBuilder.Length > 0)
|
|
{
|
|
list.Add(stringBuilder.ToString());
|
|
stringBuilder.Clear();
|
|
}
|
|
stringBuilder.AppendLine(text2);
|
|
}
|
|
if (stringBuilder.Length > 0)
|
|
{
|
|
list.Add(stringBuilder.ToString());
|
|
}
|
|
return list;
|
|
}
|
|
|
|
private static string ExtractKeyContent(string text, int targetLength, string style, string focus)
|
|
{
|
|
List<string> sentences = (from s in Regex.Split(text, "(?<=[.!?。\\n])\\s+")
|
|
where s.Trim().Length > 10
|
|
select s).ToList();
|
|
if (sentences.Count == 0)
|
|
{
|
|
return (text.Length > targetLength) ? text.Substring(0, targetLength) : text;
|
|
}
|
|
List<(string, double)> list = (from x in sentences.Select(delegate(string s)
|
|
{
|
|
double num2 = 0.0;
|
|
if (s.Length > 20 && s.Length < 200)
|
|
{
|
|
num2 += 1.0;
|
|
}
|
|
if (Regex.IsMatch(s, "\\d+"))
|
|
{
|
|
num2 += 0.5;
|
|
}
|
|
if (!string.IsNullOrEmpty(focus) && s.Contains(focus, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
num2 += 2.0;
|
|
}
|
|
int num3 = sentences.IndexOf(s);
|
|
if (num3 == 0 || num3 == sentences.Count - 1)
|
|
{
|
|
num2 += 1.0;
|
|
}
|
|
if (num3 < 3)
|
|
{
|
|
num2 += 0.5;
|
|
}
|
|
if (s.Contains("결론") || s.Contains("요약") || s.Contains("핵심") || s.Contains("중요") || s.Contains("결과") || s.Contains("therefore") || s.Contains("conclusion") || s.Contains("key"))
|
|
{
|
|
num2 += 1.5;
|
|
}
|
|
return (Sentence: s.Trim(), Score: num2);
|
|
})
|
|
orderby x.Score descending
|
|
select x).ToList();
|
|
List<string> list2 = new List<string>();
|
|
int num = 0;
|
|
foreach (var item2 in list)
|
|
{
|
|
string item = item2.Item1;
|
|
if (num + item.Length > targetLength && list2.Count > 0)
|
|
{
|
|
break;
|
|
}
|
|
list2.Add(item);
|
|
num += item.Length;
|
|
}
|
|
list2.Sort((string a, string b) => text.IndexOf(a).CompareTo(text.IndexOf(b)));
|
|
return string.Join("\n", list2);
|
|
}
|
|
|
|
private static string FormatSummary(string content, string style, string language, string focus)
|
|
{
|
|
switch (style)
|
|
{
|
|
case "bullet":
|
|
{
|
|
string[] source = content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
|
return string.Join("\n", source.Select((string l) => (l.StartsWith("•") || l.StartsWith("-")) ? l : ("• " + l)));
|
|
}
|
|
case "executive":
|
|
{
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.AppendLine("**핵심 요약**");
|
|
stringBuilder.AppendLine(content);
|
|
if (!string.IsNullOrEmpty(focus))
|
|
{
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(16, 1, stringBuilder2);
|
|
handler.AppendLiteral("\n**주요 관심 영역 (");
|
|
handler.AppendFormatted(focus);
|
|
handler.AppendLiteral(")**");
|
|
stringBuilder2.AppendLine(ref handler);
|
|
}
|
|
return stringBuilder.ToString();
|
|
}
|
|
case "technical":
|
|
return "**기술 요약**\n" + content;
|
|
default:
|
|
return content.Replace("\n\n", "\n").Replace("\n", " ").Trim();
|
|
}
|
|
}
|
|
}
|