758 lines
25 KiB
C#
758 lines
25 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Runtime.InteropServices;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using DocumentFormat.OpenXml.Packaging;
|
|
using DocumentFormat.OpenXml.Spreadsheet;
|
|
using DocumentFormat.OpenXml.Wordprocessing;
|
|
using UglyToad.PdfPig;
|
|
using UglyToad.PdfPig.Content;
|
|
|
|
namespace AxCopilot.Services.Agent;
|
|
|
|
public class DocumentReaderTool : IAgentTool
|
|
{
|
|
private static readonly HashSet<string> TextExtensions = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
|
{
|
|
".txt", ".log", ".json", ".xml", ".md", ".csv", ".tsv", ".yaml", ".yml", ".ini",
|
|
".cfg", ".conf", ".properties", ".html", ".htm", ".css", ".js", ".ts", ".py", ".cs",
|
|
".java", ".sql", ".sh", ".bat", ".ps1", ".r", ".m"
|
|
};
|
|
|
|
private const int DefaultMaxChars = 8000;
|
|
|
|
public string Name => "document_read";
|
|
|
|
public string Description => "Read a document file and extract its text content. Supports: PDF (.pdf), Word (.docx), Excel (.xlsx), CSV (.csv), text (.txt/.log/.json/.xml/.md), BibTeX (.bib), RIS (.ris). For large files, use 'offset' to read from a specific character position (chunked reading). For large PDFs, use 'pages' parameter to read specific page ranges (e.g., '1-5', '10-20'). Use 'section' parameter with value 'references' to extract only the references/bibliography section from a PDF.";
|
|
|
|
public ToolParameterSchema Parameters
|
|
{
|
|
get
|
|
{
|
|
ToolParameterSchema obj = new ToolParameterSchema
|
|
{
|
|
Properties = new Dictionary<string, ToolProperty>
|
|
{
|
|
["path"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Document file path (absolute or relative to work folder)"
|
|
},
|
|
["max_chars"] = new ToolProperty
|
|
{
|
|
Type = "integer",
|
|
Description = "Maximum characters to extract per chunk. Default: 8000. Use smaller values for summaries."
|
|
},
|
|
["offset"] = new ToolProperty
|
|
{
|
|
Type = "integer",
|
|
Description = "Character offset to start reading from. Default: 0. Use this to read the next chunk of a large file (value from 'next_offset' in previous response)."
|
|
},
|
|
["sheet"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "For Excel files: sheet name or 1-based index. Default: first sheet."
|
|
},
|
|
["pages"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "For PDF files: page range to read (e.g., '1-5', '3', '10-20'). Default: all pages."
|
|
},
|
|
["section"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Extract specific section. 'references' = extract references/bibliography from PDF. 'abstract' = extract abstract."
|
|
}
|
|
}
|
|
};
|
|
int num = 1;
|
|
List<string> list = new List<string>(num);
|
|
CollectionsMarshal.SetCount(list, num);
|
|
CollectionsMarshal.AsSpan(list)[0] = "path";
|
|
obj.Required = list;
|
|
return obj;
|
|
}
|
|
}
|
|
|
|
public async Task<ToolResult> ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct)
|
|
{
|
|
if (!args.TryGetProperty("path", out var pathEl))
|
|
{
|
|
return ToolResult.Fail("path가 필요합니다.");
|
|
}
|
|
string path = pathEl.GetString() ?? "";
|
|
JsonElement mc;
|
|
int maxChars = (args.TryGetProperty("max_chars", out mc) ? GetIntValue(mc, 8000) : 8000);
|
|
JsonElement off;
|
|
int offset = (args.TryGetProperty("offset", out off) ? GetIntValue(off, 0) : 0);
|
|
JsonElement sh;
|
|
string sheetParam = (args.TryGetProperty("sheet", out sh) ? (sh.GetString() ?? "") : "");
|
|
JsonElement pg;
|
|
string pagesParam = (args.TryGetProperty("pages", out pg) ? (pg.GetString() ?? "") : "");
|
|
JsonElement sec;
|
|
string sectionParam = (args.TryGetProperty("section", out sec) ? (sec.GetString() ?? "") : "");
|
|
if (maxChars < 100)
|
|
{
|
|
maxChars = 8000;
|
|
}
|
|
if (offset < 0)
|
|
{
|
|
offset = 0;
|
|
}
|
|
string fullPath = FileReadTool.ResolvePath(path, context.WorkFolder);
|
|
if (!context.IsPathAllowed(fullPath))
|
|
{
|
|
return ToolResult.Fail("경로 접근 차단: " + fullPath);
|
|
}
|
|
if (!File.Exists(fullPath))
|
|
{
|
|
return ToolResult.Fail("파일이 존재하지 않습니다: " + fullPath);
|
|
}
|
|
string ext = Path.GetExtension(fullPath).ToLowerInvariant();
|
|
try
|
|
{
|
|
int extractMax = ((offset > 0) ? (offset + maxChars + 100) : maxChars);
|
|
string text = ext;
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
string text2 = text switch
|
|
{
|
|
".pdf" => await Task.Run(() => ReadPdf(fullPath, extractMax, pagesParam, sectionParam), ct),
|
|
".docx" => await Task.Run(() => ReadDocx(fullPath, extractMax), ct),
|
|
".xlsx" => await Task.Run(() => ReadXlsx(fullPath, sheetParam, extractMax), ct),
|
|
".bib" => await Task.Run(() => ReadBibTeX(fullPath, extractMax), ct),
|
|
".ris" => await Task.Run(() => ReadRis(fullPath, extractMax), ct),
|
|
_ => (!TextExtensions.Contains(ext)) ? null : (await ReadTextFile(fullPath, extractMax, ct)),
|
|
};
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
string text3 = text2;
|
|
if (text3 == null)
|
|
{
|
|
return ToolResult.Fail("지원하지 않는 파일 형식: " + ext);
|
|
}
|
|
int totalExtracted = text3.Length;
|
|
if (offset > 0)
|
|
{
|
|
if (offset >= text3.Length)
|
|
{
|
|
return ToolResult.Ok($"[{Path.GetFileName(fullPath)}] offset {offset}은 문서 끝을 초과합니다 (전체 {text3.Length}자).", fullPath);
|
|
}
|
|
text2 = text3;
|
|
int num = offset;
|
|
text3 = text2.Substring(num, text2.Length - num);
|
|
}
|
|
bool hasMore = text3.Length > maxChars;
|
|
if (hasMore)
|
|
{
|
|
text3 = text3.Substring(0, maxChars);
|
|
}
|
|
FileInfo fileInfo = new FileInfo(fullPath);
|
|
string header = $"[{Path.GetFileName(fullPath)}] ({ext.TrimStart('.')}, {FormatSize(fileInfo.Length)})";
|
|
if (offset > 0)
|
|
{
|
|
header += $" — offset {offset}부터 {maxChars}자 읽음";
|
|
}
|
|
if (hasMore)
|
|
{
|
|
int nextOffset = offset + maxChars;
|
|
header += $"\n⚡ 추가 내용이 있습니다. 다음 청크를 읽으려면 offset={nextOffset}을 사용하세요.";
|
|
}
|
|
else if (offset > 0)
|
|
{
|
|
header += " — 문서 끝까지 읽음 ✓";
|
|
}
|
|
else if (totalExtracted >= maxChars)
|
|
{
|
|
header += $" — 처음 {maxChars}자만 추출됨. 계속 읽으려면 offset={maxChars}을 사용하거나, pages 파라미터로 특정 페이지를 지정하세요.";
|
|
}
|
|
return ToolResult.Ok(header + "\n\n" + text3, fullPath);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return ToolResult.Fail("문서 읽기 실패: " + ex.Message);
|
|
}
|
|
}
|
|
|
|
private static string ReadPdf(string path, int maxChars, string pagesParam, string sectionParam)
|
|
{
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
using PdfDocument pdfDocument = PdfDocument.Open(path);
|
|
int numberOfPages = pdfDocument.NumberOfPages;
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder3 = stringBuilder2;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(8, 1, stringBuilder2);
|
|
handler.AppendLiteral("PDF: ");
|
|
handler.AppendFormatted(numberOfPages);
|
|
handler.AppendLiteral("페이지");
|
|
stringBuilder3.AppendLine(ref handler);
|
|
stringBuilder.AppendLine();
|
|
var (num, num2) = ParsePageRange(pagesParam, numberOfPages);
|
|
if (string.Equals(sectionParam, "references", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return ExtractReferences(pdfDocument, numberOfPages, maxChars);
|
|
}
|
|
if (string.Equals(sectionParam, "abstract", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return ExtractAbstract(pdfDocument, numberOfPages, maxChars);
|
|
}
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder4 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(15, 3, stringBuilder2);
|
|
handler.AppendLiteral("읽는 범위: ");
|
|
handler.AppendFormatted(num);
|
|
handler.AppendLiteral("-");
|
|
handler.AppendFormatted(num2);
|
|
handler.AppendLiteral(" / ");
|
|
handler.AppendFormatted(numberOfPages);
|
|
handler.AppendLiteral(" 페이지");
|
|
stringBuilder4.AppendLine(ref handler);
|
|
stringBuilder.AppendLine();
|
|
for (int i = num; i <= num2; i++)
|
|
{
|
|
if (stringBuilder.Length >= maxChars)
|
|
{
|
|
break;
|
|
}
|
|
UglyToad.PdfPig.Content.Page page = pdfDocument.GetPage(i);
|
|
string text = page.Text;
|
|
if (!string.IsNullOrWhiteSpace(text))
|
|
{
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder5 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(13, 1, stringBuilder2);
|
|
handler.AppendLiteral("--- Page ");
|
|
handler.AppendFormatted(i);
|
|
handler.AppendLiteral(" ---");
|
|
stringBuilder5.AppendLine(ref handler);
|
|
stringBuilder.AppendLine(text.Trim());
|
|
stringBuilder.AppendLine();
|
|
}
|
|
}
|
|
return Truncate(stringBuilder.ToString(), maxChars);
|
|
}
|
|
|
|
private static (int start, int end) ParsePageRange(string pagesParam, int totalPages)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(pagesParam))
|
|
{
|
|
return (start: 1, end: totalPages);
|
|
}
|
|
if (int.TryParse(pagesParam.Trim(), out var result))
|
|
{
|
|
return (start: Math.Max(1, result), end: Math.Min(result, totalPages));
|
|
}
|
|
string[] array = pagesParam.Split('-', StringSplitOptions.TrimEntries);
|
|
if (array.Length == 2 && int.TryParse(array[0], out var result2) && int.TryParse(array[1], out var result3))
|
|
{
|
|
return (start: Math.Max(1, result2), end: Math.Min(result3, totalPages));
|
|
}
|
|
return (start: 1, end: totalPages);
|
|
}
|
|
|
|
private static string ExtractReferences(PdfDocument doc, int totalPages, int maxChars)
|
|
{
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.AppendLine("=== References / Bibliography ===");
|
|
stringBuilder.AppendLine();
|
|
string[] array = new string[2] { "(?i)^\\s*(References|Bibliography|Works\\s+Cited|Literature\\s+Cited|참고\\s*문헌|참조|인용\\s*문헌)\\s*$", "(?i)^(References|Bibliography|참고문헌)\\s*\\n" };
|
|
bool flag = false;
|
|
int num = totalPages;
|
|
while (num >= Math.Max(1, totalPages - 10) && !flag)
|
|
{
|
|
string text = doc.GetPage(num).Text;
|
|
if (!string.IsNullOrWhiteSpace(text))
|
|
{
|
|
string[] array2 = array;
|
|
foreach (string pattern in array2)
|
|
{
|
|
Match match = Regex.Match(text, pattern, RegexOptions.Multiline);
|
|
if (!match.Success)
|
|
{
|
|
continue;
|
|
}
|
|
int index = match.Index;
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder3 = stringBuilder2;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(12, 1, stringBuilder2);
|
|
handler.AppendLiteral("(Page ");
|
|
handler.AppendFormatted(num);
|
|
handler.AppendLiteral("부터 시작)");
|
|
stringBuilder3.AppendLine(ref handler);
|
|
string text2 = text;
|
|
int num2 = index;
|
|
stringBuilder.AppendLine(text2.Substring(num2, text2.Length - num2).Trim());
|
|
for (int j = num + 1; j <= totalPages; j++)
|
|
{
|
|
if (stringBuilder.Length >= maxChars)
|
|
{
|
|
break;
|
|
}
|
|
string text3 = doc.GetPage(j).Text;
|
|
if (!string.IsNullOrWhiteSpace(text3))
|
|
{
|
|
stringBuilder.AppendLine(text3.Trim());
|
|
}
|
|
}
|
|
flag = true;
|
|
break;
|
|
}
|
|
}
|
|
num--;
|
|
}
|
|
if (!flag)
|
|
{
|
|
stringBuilder.AppendLine("(References 섹션 헤더를 찾지 못했습니다. 마지막 3페이지를 반환합니다.)");
|
|
stringBuilder.AppendLine();
|
|
for (int k = Math.Max(1, totalPages - 2); k <= totalPages; k++)
|
|
{
|
|
if (stringBuilder.Length >= maxChars)
|
|
{
|
|
break;
|
|
}
|
|
string text4 = doc.GetPage(k).Text;
|
|
if (!string.IsNullOrWhiteSpace(text4))
|
|
{
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder4 = stringBuilder2;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(13, 1, stringBuilder2);
|
|
handler.AppendLiteral("--- Page ");
|
|
handler.AppendFormatted(k);
|
|
handler.AppendLiteral(" ---");
|
|
stringBuilder4.AppendLine(ref handler);
|
|
stringBuilder.AppendLine(text4.Trim());
|
|
stringBuilder.AppendLine();
|
|
}
|
|
}
|
|
}
|
|
string text5 = stringBuilder.ToString();
|
|
List<string> list = ParseReferenceEntries(text5);
|
|
if (list.Count > 0)
|
|
{
|
|
StringBuilder stringBuilder5 = new StringBuilder();
|
|
StringBuilder stringBuilder2 = stringBuilder5;
|
|
StringBuilder stringBuilder6 = stringBuilder2;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(26, 1, stringBuilder2);
|
|
handler.AppendLiteral("=== References (");
|
|
handler.AppendFormatted(list.Count);
|
|
handler.AppendLiteral("개 항목) ===\n");
|
|
stringBuilder6.AppendLine(ref handler);
|
|
for (int l = 0; l < list.Count; l++)
|
|
{
|
|
stringBuilder2 = stringBuilder5;
|
|
StringBuilder stringBuilder7 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(3, 2, stringBuilder2);
|
|
handler.AppendLiteral("[");
|
|
handler.AppendFormatted(l + 1);
|
|
handler.AppendLiteral("] ");
|
|
handler.AppendFormatted(list[l]);
|
|
stringBuilder7.AppendLine(ref handler);
|
|
}
|
|
return Truncate(stringBuilder5.ToString(), maxChars);
|
|
}
|
|
return Truncate(text5, maxChars);
|
|
}
|
|
|
|
private static string ExtractAbstract(PdfDocument doc, int totalPages, int maxChars)
|
|
{
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
stringBuilder.AppendLine("=== Abstract ===");
|
|
stringBuilder.AppendLine();
|
|
for (int i = 1; i <= Math.Min(3, totalPages); i++)
|
|
{
|
|
string text = doc.GetPage(i).Text;
|
|
if (!string.IsNullOrWhiteSpace(text))
|
|
{
|
|
Match match = Regex.Match(text, "(?i)(Abstract|초록|요약)\\s*\\n(.*?)(?=\\n\\s*(Keywords|Introduction|1\\.|서론|키워드|핵심어)\\s*[\\n:])", RegexOptions.Singleline);
|
|
if (match.Success)
|
|
{
|
|
stringBuilder.AppendLine(match.Groups[2].Value.Trim());
|
|
return Truncate(stringBuilder.ToString(), maxChars);
|
|
}
|
|
}
|
|
}
|
|
stringBuilder.AppendLine("(Abstract 섹션을 찾지 못했습니다. 첫 페이지를 반환합니다.)");
|
|
string text2 = doc.GetPage(1).Text;
|
|
if (!string.IsNullOrWhiteSpace(text2))
|
|
{
|
|
stringBuilder.AppendLine(text2.Trim());
|
|
}
|
|
return Truncate(stringBuilder.ToString(), maxChars);
|
|
}
|
|
|
|
private static List<string> ParseReferenceEntries(string text)
|
|
{
|
|
List<string> list = new List<string>();
|
|
string[] array = Regex.Split(text, "\\n\\s*\\[(\\d+)\\]\\s*");
|
|
if (array.Length > 3)
|
|
{
|
|
for (int i = 2; i < array.Length; i += 2)
|
|
{
|
|
string text2 = array[i].Trim().Replace("\n", " ").Replace(" ", " ");
|
|
if (text2.Length > 10)
|
|
{
|
|
list.Add(text2);
|
|
}
|
|
}
|
|
return list;
|
|
}
|
|
string[] array2 = Regex.Split(text, "\\n\\s*(\\d+)\\.\\s+");
|
|
if (array2.Length > 5)
|
|
{
|
|
for (int j = 2; j < array2.Length; j += 2)
|
|
{
|
|
string text3 = array2[j].Trim().Replace("\n", " ").Replace(" ", " ");
|
|
if (text3.Length > 10)
|
|
{
|
|
list.Add(text3);
|
|
}
|
|
}
|
|
return list;
|
|
}
|
|
return list;
|
|
}
|
|
|
|
private static string ReadBibTeX(string path, int maxChars)
|
|
{
|
|
string text = File.ReadAllText(path, Encoding.UTF8);
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
Regex regex = new Regex("@(\\w+)\\s*\\{\\s*([^,\\s]+)\\s*,\\s*(.*?)\\n\\s*\\}", RegexOptions.Singleline);
|
|
Regex regex2 = new Regex("(\\w+)\\s*=\\s*[\\{\"](.*?)[\\}\"]", RegexOptions.Singleline);
|
|
MatchCollection matchCollection = regex.Matches(text);
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder3 = stringBuilder2;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(12, 1, stringBuilder2);
|
|
handler.AppendLiteral("BibTeX: ");
|
|
handler.AppendFormatted(matchCollection.Count);
|
|
handler.AppendLiteral("개 항목");
|
|
stringBuilder3.AppendLine(ref handler);
|
|
stringBuilder.AppendLine();
|
|
int num = 0;
|
|
foreach (Match item in matchCollection)
|
|
{
|
|
if (stringBuilder.Length >= maxChars)
|
|
{
|
|
break;
|
|
}
|
|
num++;
|
|
string value = item.Groups[1].Value;
|
|
string value2 = item.Groups[2].Value;
|
|
string value3 = item.Groups[3].Value;
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder4 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(6, 3, stringBuilder2);
|
|
handler.AppendLiteral("[");
|
|
handler.AppendFormatted(num);
|
|
handler.AppendLiteral("] @");
|
|
handler.AppendFormatted(value);
|
|
handler.AppendLiteral("{");
|
|
handler.AppendFormatted(value2);
|
|
handler.AppendLiteral("}");
|
|
stringBuilder4.AppendLine(ref handler);
|
|
MatchCollection matchCollection2 = regex2.Matches(value3);
|
|
foreach (Match item2 in matchCollection2)
|
|
{
|
|
string text2 = item2.Groups[1].Value.ToLower();
|
|
string value4 = item2.Groups[2].Value.Trim();
|
|
bool flag;
|
|
switch (text2)
|
|
{
|
|
case "author":
|
|
case "title":
|
|
case "journal":
|
|
case "booktitle":
|
|
case "year":
|
|
case "volume":
|
|
case "number":
|
|
case "pages":
|
|
case "doi":
|
|
case "publisher":
|
|
case "url":
|
|
flag = true;
|
|
break;
|
|
default:
|
|
flag = false;
|
|
break;
|
|
}
|
|
if (flag)
|
|
{
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder5 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(4, 2, stringBuilder2);
|
|
handler.AppendLiteral(" ");
|
|
handler.AppendFormatted(text2);
|
|
handler.AppendLiteral(": ");
|
|
handler.AppendFormatted(value4);
|
|
stringBuilder5.AppendLine(ref handler);
|
|
}
|
|
}
|
|
stringBuilder.AppendLine();
|
|
}
|
|
if (matchCollection.Count == 0)
|
|
{
|
|
stringBuilder.AppendLine("(BibTeX 항목을 파싱하지 못했습니다. 원문을 반환합니다.)");
|
|
stringBuilder.AppendLine(Truncate(text, maxChars - stringBuilder.Length));
|
|
}
|
|
return Truncate(stringBuilder.ToString(), maxChars);
|
|
}
|
|
|
|
private static string ReadRis(string path, int maxChars)
|
|
{
|
|
string[] array = File.ReadAllLines(path, Encoding.UTF8);
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
List<Dictionary<string, List<string>>> list = new List<Dictionary<string, List<string>>>();
|
|
Dictionary<string, List<string>> dictionary = null;
|
|
string[] array2 = array;
|
|
string key2;
|
|
foreach (string text in array2)
|
|
{
|
|
if (text.StartsWith("TY -"))
|
|
{
|
|
dictionary = new Dictionary<string, List<string>>();
|
|
list.Add(dictionary);
|
|
}
|
|
else if (text.StartsWith("ER -"))
|
|
{
|
|
dictionary = null;
|
|
continue;
|
|
}
|
|
if (dictionary != null && text.Length >= 6 && text[2] == ' ' && text[3] == ' ' && text[4] == '-' && text[5] == ' ')
|
|
{
|
|
string key = text.Substring(0, 2).Trim();
|
|
key2 = text;
|
|
string item = key2.Substring(6, key2.Length - 6).Trim();
|
|
if (!dictionary.ContainsKey(key))
|
|
{
|
|
dictionary[key] = new List<string>();
|
|
}
|
|
dictionary[key].Add(item);
|
|
}
|
|
}
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder3 = stringBuilder2;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(9, 1, stringBuilder2);
|
|
handler.AppendLiteral("RIS: ");
|
|
handler.AppendFormatted(list.Count);
|
|
handler.AppendLiteral("개 항목");
|
|
stringBuilder3.AppendLine(ref handler);
|
|
stringBuilder.AppendLine();
|
|
Dictionary<string, string> dictionary2 = new Dictionary<string, string>
|
|
{
|
|
["TY"] = "Type",
|
|
["AU"] = "Author",
|
|
["TI"] = "Title",
|
|
["T1"] = "Title",
|
|
["JO"] = "Journal",
|
|
["JF"] = "Journal",
|
|
["PY"] = "Year",
|
|
["Y1"] = "Year",
|
|
["VL"] = "Volume",
|
|
["IS"] = "Issue",
|
|
["SP"] = "Start Page",
|
|
["EP"] = "End Page",
|
|
["DO"] = "DOI",
|
|
["UR"] = "URL",
|
|
["PB"] = "Publisher",
|
|
["AB"] = "Abstract",
|
|
["KW"] = "Keyword",
|
|
["SN"] = "ISSN/ISBN"
|
|
};
|
|
for (int j = 0; j < list.Count; j++)
|
|
{
|
|
if (stringBuilder.Length >= maxChars)
|
|
{
|
|
break;
|
|
}
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder4 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(2, 1, stringBuilder2);
|
|
handler.AppendLiteral("[");
|
|
handler.AppendFormatted(j + 1);
|
|
handler.AppendLiteral("]");
|
|
stringBuilder4.AppendLine(ref handler);
|
|
Dictionary<string, List<string>> dictionary3 = list[j];
|
|
foreach (KeyValuePair<string, List<string>> item2 in dictionary3)
|
|
{
|
|
item2.Deconstruct(out key2, out var value);
|
|
string text2 = key2;
|
|
List<string> values = value;
|
|
string valueOrDefault = dictionary2.GetValueOrDefault(text2, text2);
|
|
if ((text2 == "AU" || text2 == "KW") ? true : false)
|
|
{
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder5 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(4, 2, stringBuilder2);
|
|
handler.AppendLiteral(" ");
|
|
handler.AppendFormatted(valueOrDefault);
|
|
handler.AppendLiteral(": ");
|
|
handler.AppendFormatted(string.Join("; ", values));
|
|
stringBuilder5.AppendLine(ref handler);
|
|
}
|
|
else
|
|
{
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder6 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(4, 2, stringBuilder2);
|
|
handler.AppendLiteral(" ");
|
|
handler.AppendFormatted(valueOrDefault);
|
|
handler.AppendLiteral(": ");
|
|
handler.AppendFormatted(string.Join(" ", values));
|
|
stringBuilder6.AppendLine(ref handler);
|
|
}
|
|
}
|
|
stringBuilder.AppendLine();
|
|
}
|
|
return Truncate(stringBuilder.ToString(), maxChars);
|
|
}
|
|
|
|
private static string ReadDocx(string path, int maxChars)
|
|
{
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
using WordprocessingDocument wordprocessingDocument = WordprocessingDocument.Open(path, isEditable: false);
|
|
Body body = wordprocessingDocument.MainDocumentPart?.Document.Body;
|
|
if (body == null)
|
|
{
|
|
return "(빈 문서)";
|
|
}
|
|
foreach (Paragraph item in body.Elements<Paragraph>())
|
|
{
|
|
string innerText = item.InnerText;
|
|
if (!string.IsNullOrWhiteSpace(innerText))
|
|
{
|
|
stringBuilder.AppendLine(innerText);
|
|
if (stringBuilder.Length >= maxChars)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return Truncate(stringBuilder.ToString(), maxChars);
|
|
}
|
|
|
|
private static string ReadXlsx(string path, string sheetParam, int maxChars)
|
|
{
|
|
StringBuilder stringBuilder = new StringBuilder();
|
|
using SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(path, isEditable: false);
|
|
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
|
|
if (workbookPart == null)
|
|
{
|
|
return "(빈 스프레드시트)";
|
|
}
|
|
List<Sheet> list = workbookPart.Workbook.Sheets?.Elements<Sheet>().ToList() ?? new List<Sheet>();
|
|
if (list.Count == 0)
|
|
{
|
|
return "(시트 없음)";
|
|
}
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder3 = stringBuilder2;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(14, 2, stringBuilder2);
|
|
handler.AppendLiteral("Excel: ");
|
|
handler.AppendFormatted(list.Count);
|
|
handler.AppendLiteral("개 시트 (");
|
|
handler.AppendFormatted(string.Join(", ", list.Select((Sheet s) => s.Name?.Value)));
|
|
handler.AppendLiteral(")");
|
|
stringBuilder3.AppendLine(ref handler);
|
|
stringBuilder.AppendLine();
|
|
Sheet sheet = null;
|
|
if (!string.IsNullOrEmpty(sheetParam))
|
|
{
|
|
sheet = ((!int.TryParse(sheetParam, out var result) || result < 1 || result > list.Count) ? list.FirstOrDefault((Sheet s) => string.Equals(s.Name?.Value, sheetParam, StringComparison.OrdinalIgnoreCase)) : list[result - 1]);
|
|
}
|
|
if (sheet == null)
|
|
{
|
|
sheet = list[0];
|
|
}
|
|
string text = sheet.Id?.Value;
|
|
if (text == null)
|
|
{
|
|
return "(시트 ID 없음)";
|
|
}
|
|
WorksheetPart worksheetPart = (WorksheetPart)workbookPart.GetPartById(text);
|
|
List<SharedStringItem> sharedStrings = workbookPart.SharedStringTablePart?.SharedStringTable.Elements<SharedStringItem>().ToList() ?? new List<SharedStringItem>();
|
|
List<Row> list2 = worksheetPart.Worksheet.Descendants<Row>().ToList();
|
|
stringBuilder2 = stringBuilder;
|
|
StringBuilder stringBuilder4 = stringBuilder2;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(10, 2, stringBuilder2);
|
|
handler.AppendLiteral("[");
|
|
handler.AppendFormatted(sheet.Name?.Value);
|
|
handler.AppendLiteral("] (");
|
|
handler.AppendFormatted(list2.Count);
|
|
handler.AppendLiteral(" rows)");
|
|
stringBuilder4.AppendLine(ref handler);
|
|
foreach (Row item in list2)
|
|
{
|
|
List<Cell> list3 = item.Elements<Cell>().ToList();
|
|
List<string> list4 = new List<string>();
|
|
foreach (Cell item2 in list3)
|
|
{
|
|
list4.Add(GetCellValue(item2, sharedStrings));
|
|
}
|
|
stringBuilder.AppendLine(string.Join("\t", list4));
|
|
if (stringBuilder.Length >= maxChars)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
return Truncate(stringBuilder.ToString(), maxChars);
|
|
}
|
|
|
|
private static string GetCellValue(Cell cell, List<SharedStringItem> sharedStrings)
|
|
{
|
|
string text = cell.CellValue?.Text ?? "";
|
|
CellValues? cellValues = cell.DataType?.Value;
|
|
CellValues sharedString = CellValues.SharedString;
|
|
if (cellValues.HasValue && cellValues.GetValueOrDefault() == sharedString && int.TryParse(text, out var result) && result >= 0 && result < sharedStrings.Count)
|
|
{
|
|
return sharedStrings[result].InnerText;
|
|
}
|
|
return text;
|
|
}
|
|
|
|
private static async Task<string> ReadTextFile(string path, int maxChars, CancellationToken ct)
|
|
{
|
|
return Truncate(await File.ReadAllTextAsync(path, Encoding.UTF8, ct), maxChars);
|
|
}
|
|
|
|
private static string Truncate(string text, int maxChars)
|
|
{
|
|
if (text.Length <= maxChars)
|
|
{
|
|
return text;
|
|
}
|
|
return text.Substring(0, maxChars) + "\n\n... (내용 잘림 — pages 또는 section 파라미터로 특정 부분을 읽을 수 있습니다)";
|
|
}
|
|
|
|
private static string FormatSize(long bytes)
|
|
{
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
string result = ((bytes < 1024) ? $"{bytes} B" : ((bytes >= 1048576) ? $"{(double)bytes / 1048576.0:F1} MB" : $"{(double)bytes / 1024.0:F1} KB"));
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private static int GetIntValue(JsonElement el, int defaultValue)
|
|
{
|
|
if (el.ValueKind == JsonValueKind.Number)
|
|
{
|
|
return el.GetInt32();
|
|
}
|
|
if (el.ValueKind == JsonValueKind.String && int.TryParse(el.GetString(), out var result))
|
|
{
|
|
return result;
|
|
}
|
|
return defaultValue;
|
|
}
|
|
}
|