Files
AX-Copilot-Codex/.decompiledproj/AxCopilot/Services/Agent/TextSummarizeTool.cs

319 lines
10 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
namespace AxCopilot.Services.Agent;
public class TextSummarizeTool : IAgentTool
{
public string Name => "text_summarize";
public string Description => "Summarize long text or documents into a specified length and format. Supports: bullet points, paragraph, executive summary, technical summary. For very long texts, automatically chunks and summarizes progressively. Can summarize file contents or inline text.";
public ToolParameterSchema Parameters
{
get
{
ToolParameterSchema toolParameterSchema = new ToolParameterSchema();
Dictionary<string, ToolProperty> obj = new Dictionary<string, ToolProperty>
{
["source"] = new ToolProperty
{
Type = "string",
Description = "Text to summarize, OR file path (if starts with '/' or contains '\\' or '.'). For files: supports .txt, .md, .html, .csv, .json, .log"
},
["max_length"] = new ToolProperty
{
Type = "integer",
Description = "Maximum summary length in characters. Default: 500"
}
};
ToolProperty obj2 = new ToolProperty
{
Type = "string",
Description = "Summary style: bullet (bullet points), paragraph (flowing text), executive (key conclusions + action items), technical (detailed with terminology). Default: bullet"
};
int num = 4;
List<string> list = new List<string>(num);
CollectionsMarshal.SetCount(list, num);
Span<string> span = CollectionsMarshal.AsSpan(list);
span[0] = "bullet";
span[1] = "paragraph";
span[2] = "executive";
span[3] = "technical";
obj2.Enum = list;
obj["style"] = obj2;
obj["language"] = new ToolProperty
{
Type = "string",
Description = "Output language: ko (Korean), en (English). Default: ko"
};
obj["focus"] = new ToolProperty
{
Type = "string",
Description = "Optional focus area or keywords to emphasize in the summary."
};
obj["sections"] = new ToolProperty
{
Type = "boolean",
Description = "If true, provide section-by-section summary instead of one overall summary. Default: false"
};
toolParameterSchema.Properties = obj;
num = 1;
List<string> list2 = new List<string>(num);
CollectionsMarshal.SetCount(list2, num);
CollectionsMarshal.AsSpan(list2)[0] = "source";
toolParameterSchema.Required = list2;
return toolParameterSchema;
}
}
public async Task<ToolResult> ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct)
{
string source = args.GetProperty("source").GetString() ?? "";
JsonElement mlEl;
int ml;
int maxLength = ((args.TryGetProperty("max_length", out mlEl) && mlEl.TryGetInt32(out ml)) ? ml : 500);
JsonElement stEl;
string style = (args.TryGetProperty("style", out stEl) ? (stEl.GetString() ?? "bullet") : "bullet");
JsonElement langEl;
string language = (args.TryGetProperty("language", out langEl) ? (langEl.GetString() ?? "ko") : "ko");
JsonElement focEl;
string focus = (args.TryGetProperty("focus", out focEl) ? (focEl.GetString() ?? "") : "");
JsonElement secEl;
bool bySections = args.TryGetProperty("sections", out secEl) && secEl.GetBoolean();
string text;
if (LooksLikeFilePath(source))
{
string fullPath = FileReadTool.ResolvePath(source, context.WorkFolder);
if (!context.IsPathAllowed(fullPath))
{
return ToolResult.Fail("경로 접근 차단: " + fullPath);
}
if (!File.Exists(fullPath))
{
return ToolResult.Fail("파일 없음: " + fullPath);
}
text = await File.ReadAllTextAsync(fullPath, ct);
if (fullPath.EndsWith(".html", StringComparison.OrdinalIgnoreCase) || fullPath.EndsWith(".htm", StringComparison.OrdinalIgnoreCase))
{
text = StripHtmlTags(text);
}
}
else
{
text = source;
}
if (string.IsNullOrWhiteSpace(text))
{
return ToolResult.Fail("요약할 텍스트가 비어있습니다.");
}
int charCount = text.Length;
int lineCount = text.Split('\n').Length;
int wordCount = EstimateWordCount(text);
if (charCount <= maxLength)
{
return ToolResult.Ok($"\ud83d\udcdd 텍스트가 이미 요약 기준 이하입니다 ({charCount}자).\n\n{text}");
}
List<string> chunks = ChunkText(text, 3000);
List<string> chunkSummaries = new List<string>();
foreach (string chunk in chunks)
{
string summary = ExtractKeyContent(chunk, maxLength / chunks.Count, style, focus);
chunkSummaries.Add(summary);
}
StringBuilder sb = new StringBuilder();
StringBuilder stringBuilder = sb;
StringBuilder stringBuilder2 = stringBuilder;
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(25, 3, stringBuilder);
handler.AppendLiteral("\ud83d\udcdd 텍스트 요약 (원문: ");
handler.AppendFormatted(charCount, "N0");
handler.AppendLiteral("자, ");
handler.AppendFormatted(lineCount);
handler.AppendLiteral("줄, ~");
handler.AppendFormatted(wordCount);
handler.AppendLiteral("단어)");
stringBuilder2.AppendLine(ref handler);
sb.AppendLine();
if (bySections && chunks.Count > 1)
{
for (int i = 0; i < chunkSummaries.Count; i++)
{
stringBuilder = sb;
StringBuilder stringBuilder3 = stringBuilder;
handler = new StringBuilder.AppendInterpolatedStringHandler(8, 2, stringBuilder);
handler.AppendLiteral("### 섹션 ");
handler.AppendFormatted(i + 1);
handler.AppendLiteral("/");
handler.AppendFormatted(chunkSummaries.Count);
stringBuilder3.AppendLine(ref handler);
sb.AppendLine(chunkSummaries[i]);
sb.AppendLine();
}
}
else
{
string combined = string.Join("\n", chunkSummaries);
sb.AppendLine(FormatSummary(combined, style, language, focus));
}
string result = sb.ToString();
if (result.Length > maxLength + 500)
{
result = result.Substring(0, maxLength + 500) + "\n...[요약 길이 초과로 생략]";
}
return ToolResult.Ok(result);
}
private static bool LooksLikeFilePath(string s)
{
if (string.IsNullOrEmpty(s))
{
return false;
}
if (s.Contains('\\') || s.Contains('/'))
{
return true;
}
if (s.Length < 260 && Regex.IsMatch(s, "\\.\\w{1,5}$"))
{
return true;
}
return false;
}
private static string StripHtmlTags(string html)
{
string input = Regex.Replace(html, "<script[^>]*>.*?</script>", "", RegexOptions.Singleline);
input = Regex.Replace(input, "<style[^>]*>.*?</style>", "", RegexOptions.Singleline);
input = Regex.Replace(input, "<[^>]+>", " ");
input = WebUtility.HtmlDecode(input);
return Regex.Replace(input, "\\s+", " ").Trim();
}
private static int EstimateWordCount(string text)
{
int num = text.Count((char c) => c == ' ');
int num2 = text.Count((char c) => c >= '가' && c <= '힣');
return num + 1 + num2 / 3;
}
private static List<string> ChunkText(string text, int chunkSize)
{
List<string> list = new List<string>();
string[] array = text.Split('\n');
StringBuilder stringBuilder = new StringBuilder();
string[] array2 = array;
foreach (string text2 in array2)
{
if (stringBuilder.Length + text2.Length > chunkSize && stringBuilder.Length > 0)
{
list.Add(stringBuilder.ToString());
stringBuilder.Clear();
}
stringBuilder.AppendLine(text2);
}
if (stringBuilder.Length > 0)
{
list.Add(stringBuilder.ToString());
}
return list;
}
private static string ExtractKeyContent(string text, int targetLength, string style, string focus)
{
List<string> sentences = (from s in Regex.Split(text, "(?<=[.!?。\\n])\\s+")
where s.Trim().Length > 10
select s).ToList();
if (sentences.Count == 0)
{
return (text.Length > targetLength) ? text.Substring(0, targetLength) : text;
}
List<(string, double)> list = (from x in sentences.Select(delegate(string s)
{
double num2 = 0.0;
if (s.Length > 20 && s.Length < 200)
{
num2 += 1.0;
}
if (Regex.IsMatch(s, "\\d+"))
{
num2 += 0.5;
}
if (!string.IsNullOrEmpty(focus) && s.Contains(focus, StringComparison.OrdinalIgnoreCase))
{
num2 += 2.0;
}
int num3 = sentences.IndexOf(s);
if (num3 == 0 || num3 == sentences.Count - 1)
{
num2 += 1.0;
}
if (num3 < 3)
{
num2 += 0.5;
}
if (s.Contains("결론") || s.Contains("요약") || s.Contains("핵심") || s.Contains("중요") || s.Contains("결과") || s.Contains("therefore") || s.Contains("conclusion") || s.Contains("key"))
{
num2 += 1.5;
}
return (Sentence: s.Trim(), Score: num2);
})
orderby x.Score descending
select x).ToList();
List<string> list2 = new List<string>();
int num = 0;
foreach (var item2 in list)
{
string item = item2.Item1;
if (num + item.Length > targetLength && list2.Count > 0)
{
break;
}
list2.Add(item);
num += item.Length;
}
list2.Sort((string a, string b) => text.IndexOf(a).CompareTo(text.IndexOf(b)));
return string.Join("\n", list2);
}
private static string FormatSummary(string content, string style, string language, string focus)
{
switch (style)
{
case "bullet":
{
string[] source = content.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
return string.Join("\n", source.Select((string l) => (l.StartsWith("•") || l.StartsWith("-")) ? l : ("• " + l)));
}
case "executive":
{
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.AppendLine("**핵심 요약**");
stringBuilder.AppendLine(content);
if (!string.IsNullOrEmpty(focus))
{
StringBuilder stringBuilder2 = stringBuilder;
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(16, 1, stringBuilder2);
handler.AppendLiteral("\n**주요 관심 영역 (");
handler.AppendFormatted(focus);
handler.AppendLiteral(")**");
stringBuilder2.AppendLine(ref handler);
}
return stringBuilder.ToString();
}
case "technical":
return "**기술 요약**\n" + content;
default:
return content.Replace("\n\n", "\n").Replace("\n", " ").Trim();
}
}
}