문서 생성 고도화 1차: 네이티브 워드·엑셀·HTML 경로 정렬 및 품질 보강

- Word/Excel/HTML 스킬을 Python 우회 중심에서 AX 네이티브 문서 도구 우선 경로로 재작성했습니다.

- DocumentPlannerTool의 보고서·제안서·분석 문서 아웃라인을 Executive Summary, Business Case, Decision Ask, Appendix 중심의 업무형 구조로 확장했습니다.

- DocumentAssemblerTool의 DOCX 조립 경로에서 표·목록·콜아웃·소제목 같은 HTML/Markdown 구조를 더 보존하도록 개선했습니다.

- ExcelSkill에 summary_sheet를 추가해 KPI·핵심 인사이트·후속 과제를 담은 요약 시트를 상세 데이터 시트 앞에 생성할 수 있게 했습니다.

- HtmlSkill에 comparison, roadmap, matrix 구조화 섹션을 추가하고 sections 중심 호출 스키마를 정리했습니다.

- DocumentAssemblerSemanticTests, ExcelSkillSummarySheetTests, HtmlSkillConsultingSectionsTests, DocumentPlannerBusinessDocumentTests를 추가했습니다.

- 검증: dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify_doc_phase1\\ -p:IntermediateOutputPath=obj\\verify_doc_phase1\

- 검증: dotnet test src/AxCopilot.Tests/AxCopilot.Tests.csproj -c Release -v minimal --filter 문서_고도화_테스트_5건 -p:OutputPath=bin\\verify_doc_phase1_tests\\ -p:IntermediateOutputPath=obj\\verify_doc_phase1_tests\
This commit is contained in:
2026-04-14 21:02:08 +09:00
parent 0b6d60e959
commit d9cb02f3c4
14 changed files with 1002 additions and 466 deletions

View File

@@ -1,6 +1,7 @@
using System.IO;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace AxCopilot.Services.Agent;
@@ -238,13 +239,11 @@ public class DocumentAssemblerTool : IAgentTool
private string AssembleDocx(string path, string title, List<(string Heading, string Content, int Level)> sections,
string? headerText, string? footerText)
{
// DOCX 조립: DocxSkill의 sections 형식으로 변환하여 OpenXML 사용
using var doc = DocumentFormat.OpenXml.Packaging.WordprocessingDocument.Create(
path, DocumentFormat.OpenXml.WordprocessingDocumentType.Document);
var mainPart = doc.AddMainDocumentPart();
// 기본 스타일 파트 추가 (styles.xml — 없으면 Word에서 글꼴/서식 깨짐)
var stylesPart = mainPart.AddNewPart<DocumentFormat.OpenXml.Packaging.StyleDefinitionsPart>();
stylesPart.Styles = CreateDefaultDocxStyles();
stylesPart.Styles.Save();
@@ -252,8 +251,7 @@ public class DocumentAssemblerTool : IAgentTool
mainPart.Document = new DocumentFormat.OpenXml.Wordprocessing.Document();
var body = mainPart.Document.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Body());
// 한글 호환 글꼴 설정 헬퍼
static DocumentFormat.OpenXml.Wordprocessing.RunFonts KoreanFonts() => new()
DocumentFormat.OpenXml.Wordprocessing.RunFonts KoreanFonts() => new()
{
Ascii = "맑은 고딕",
HighAnsi = "맑은 고딕",
@@ -261,7 +259,235 @@ public class DocumentAssemblerTool : IAgentTool
ComplexScript = "맑은 고딕"
};
// 제목
string DecodeHtml(string text)
=> text.Replace("&nbsp;", " ")
.Replace("&amp;", "&")
.Replace("&lt;", "<")
.Replace("&gt;", ">")
.Replace("&quot;", "\"");
string ExtractStructuredText(string html)
{
if (string.IsNullOrWhiteSpace(html)) return "";
var text = Regex.Replace(html, @"<br\s*/?>", "\n", RegexOptions.IgnoreCase);
text = Regex.Replace(text, @"</(p|div|li|tr|h[1-6]|blockquote|table|thead|tbody|ul|ol)>", "\n", RegexOptions.IgnoreCase);
text = Regex.Replace(text, @"<[^>]+>", " ");
text = DecodeHtml(text);
text = Regex.Replace(text, @"[ \t]+\n", "\n");
text = Regex.Replace(text, @"\n{3,}", "\n\n");
return text.Trim();
}
DocumentFormat.OpenXml.Wordprocessing.Paragraph CreateParagraph(
string text,
string fontSize = "22",
bool bold = false,
string? color = null,
string? fill = null)
{
var para = new DocumentFormat.OpenXml.Wordprocessing.Paragraph();
var props = new DocumentFormat.OpenXml.Wordprocessing.ParagraphProperties();
props.SpacingBetweenLines = new DocumentFormat.OpenXml.Wordprocessing.SpacingBetweenLines
{
After = "160"
};
if (!string.IsNullOrWhiteSpace(fill))
{
props.ParagraphBorders = new DocumentFormat.OpenXml.Wordprocessing.ParagraphBorders(
new DocumentFormat.OpenXml.Wordprocessing.LeftBorder
{
Val = DocumentFormat.OpenXml.Wordprocessing.BorderValues.Single,
Size = 10,
Color = color ?? "2B579A"
});
props.Shading = new DocumentFormat.OpenXml.Wordprocessing.Shading
{
Val = DocumentFormat.OpenXml.Wordprocessing.ShadingPatternValues.Clear,
Fill = fill
};
}
para.AppendChild(props);
var run = new DocumentFormat.OpenXml.Wordprocessing.Run();
var runProps = new DocumentFormat.OpenXml.Wordprocessing.RunProperties
{
RunFonts = KoreanFonts(),
FontSize = new DocumentFormat.OpenXml.Wordprocessing.FontSize { Val = fontSize }
};
if (bold)
runProps.Bold = new DocumentFormat.OpenXml.Wordprocessing.Bold();
if (!string.IsNullOrWhiteSpace(color))
runProps.Color = new DocumentFormat.OpenXml.Wordprocessing.Color { Val = color };
run.AppendChild(runProps);
run.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Text(text)
{
Space = DocumentFormat.OpenXml.SpaceProcessingModeValues.Preserve
});
para.AppendChild(run);
return para;
}
DocumentFormat.OpenXml.Wordprocessing.Table CreateTableFromHtml(string tableHtml)
{
var table = new DocumentFormat.OpenXml.Wordprocessing.Table();
table.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.TableProperties(
new DocumentFormat.OpenXml.Wordprocessing.TableBorders(
new DocumentFormat.OpenXml.Wordprocessing.TopBorder { Val = DocumentFormat.OpenXml.Wordprocessing.BorderValues.Single, Size = 8, Color = "CBD5E1" },
new DocumentFormat.OpenXml.Wordprocessing.BottomBorder { Val = DocumentFormat.OpenXml.Wordprocessing.BorderValues.Single, Size = 8, Color = "CBD5E1" },
new DocumentFormat.OpenXml.Wordprocessing.LeftBorder { Val = DocumentFormat.OpenXml.Wordprocessing.BorderValues.Single, Size = 8, Color = "CBD5E1" },
new DocumentFormat.OpenXml.Wordprocessing.RightBorder { Val = DocumentFormat.OpenXml.Wordprocessing.BorderValues.Single, Size = 8, Color = "CBD5E1" },
new DocumentFormat.OpenXml.Wordprocessing.InsideHorizontalBorder { Val = DocumentFormat.OpenXml.Wordprocessing.BorderValues.Single, Size = 8, Color = "E2E8F0" },
new DocumentFormat.OpenXml.Wordprocessing.InsideVerticalBorder { Val = DocumentFormat.OpenXml.Wordprocessing.BorderValues.Single, Size = 8, Color = "E2E8F0" })));
var rowMatches = Regex.Matches(tableHtml, @"<tr\b[^>]*>(.*?)</tr>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
var rowIndex = 0;
foreach (Match rowMatch in rowMatches)
{
var row = new DocumentFormat.OpenXml.Wordprocessing.TableRow();
var rowHtml = rowMatch.Groups[1].Value;
var isHeader = rowIndex == 0 || Regex.IsMatch(rowHtml, "<th", RegexOptions.IgnoreCase);
var cellMatches = Regex.Matches(rowHtml, @"<t[hd]\b[^>]*>(.*?)</t[hd]>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
foreach (Match cellMatch in cellMatches)
{
var cellText = ExtractStructuredText(cellMatch.Groups[1].Value);
var cell = new DocumentFormat.OpenXml.Wordprocessing.TableCell();
if (isHeader)
{
cell.AppendChild(CreateParagraph(cellText, fontSize: "21", bold: true, color: "1F3A5F", fill: "E8EEF8"));
}
else
{
cell.AppendChild(CreateParagraph(cellText));
}
cell.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.TableCellProperties(
new DocumentFormat.OpenXml.Wordprocessing.TableCellWidth
{
Type = DocumentFormat.OpenXml.Wordprocessing.TableWidthUnitValues.Auto
}));
row.AppendChild(cell);
}
if (cellMatches.Count > 0)
table.AppendChild(row);
rowIndex++;
}
return table;
}
void AppendPlainText(string plain)
{
foreach (var rawLine in plain.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
var line = rawLine.Trim();
if (string.IsNullOrWhiteSpace(line))
continue;
if (Regex.IsMatch(line, @"^#{2,6}\s+"))
{
body.AppendChild(CreateParagraph(Regex.Replace(line, @"^#{2,6}\s+", ""), fontSize: "26", bold: true, color: "2B579A"));
continue;
}
if (Regex.IsMatch(line, @"^[-*]\s+"))
{
body.AppendChild(CreateParagraph($"• {Regex.Replace(line, @"^[-*]\s+", "")}"));
continue;
}
if (Regex.IsMatch(line, @"^\d+\.\s+"))
{
body.AppendChild(CreateParagraph(line));
continue;
}
body.AppendChild(CreateParagraph(line));
}
}
void AppendListBlock(string listHtml, bool ordered)
{
var matches = Regex.Matches(listHtml, @"<li\b[^>]*>(.*?)</li>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
var number = 1;
foreach (Match match in matches)
{
var text = ExtractStructuredText(match.Groups[1].Value);
var prefix = ordered ? $"{number}. " : "• ";
body.AppendChild(CreateParagraph(prefix + text));
number++;
}
}
void AppendStructuredContent(string rawContent)
{
if (string.IsNullOrWhiteSpace(rawContent))
return;
var normalized = rawContent.Replace("\r\n", "\n");
if (!Regex.IsMatch(normalized, @"<\s*(p|ul|ol|table|blockquote|div|h[1-6]|li)\b", RegexOptions.IgnoreCase))
{
AppendPlainText(normalized);
return;
}
normalized = Regex.Replace(normalized, @"<br\s*/?>", "\n", RegexOptions.IgnoreCase);
var blockPattern = @"<table\b[^>]*>.*?</table>|<ul\b[^>]*>.*?</ul>|<ol\b[^>]*>.*?</ol>|<blockquote\b[^>]*>.*?</blockquote>|<div\b[^>]*class=""[^""]*(callout-[^""]*|comparison-grid|roadmap-block|matrix-grid)[^""]*""[^>]*>.*?</div>|<h[2-6]\b[^>]*>.*?</h[2-6]>|<p\b[^>]*>.*?</p>";
var matches = Regex.Matches(normalized, blockPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
if (matches.Count == 0)
{
AppendPlainText(ExtractStructuredText(normalized));
return;
}
var cursor = 0;
foreach (Match match in matches)
{
if (match.Index > cursor)
{
var leadingText = ExtractStructuredText(normalized.Substring(cursor, match.Index - cursor));
AppendPlainText(leadingText);
}
var block = match.Value;
if (Regex.IsMatch(block, @"^<table", RegexOptions.IgnoreCase))
{
body.AppendChild(CreateTableFromHtml(block));
}
else if (Regex.IsMatch(block, @"^<ul", RegexOptions.IgnoreCase))
{
AppendListBlock(block, ordered: false);
}
else if (Regex.IsMatch(block, @"^<ol", RegexOptions.IgnoreCase))
{
AppendListBlock(block, ordered: true);
}
else if (Regex.IsMatch(block, @"^<h", RegexOptions.IgnoreCase))
{
body.AppendChild(CreateParagraph(ExtractStructuredText(block), fontSize: "26", bold: true, color: "2B579A"));
}
else if (Regex.IsMatch(block, @"^<(blockquote|div)", RegexOptions.IgnoreCase))
{
body.AppendChild(CreateParagraph(ExtractStructuredText(block), fontSize: "21", bold: true, color: "1F3A5F", fill: "EDF4FF"));
}
else
{
AppendPlainText(ExtractStructuredText(block));
}
cursor = match.Index + match.Length;
}
if (cursor < normalized.Length)
AppendPlainText(ExtractStructuredText(normalized[cursor..]));
}
var titlePara = new DocumentFormat.OpenXml.Wordprocessing.Paragraph();
var titleRun = new DocumentFormat.OpenXml.Wordprocessing.Run();
titleRun.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.RunProperties
@@ -277,10 +503,8 @@ public class DocumentAssemblerTool : IAgentTool
// 빈 줄
body.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
// 각 섹션
foreach (var (heading, content, level) in sections)
{
// 섹션 제목
var headPara = new DocumentFormat.OpenXml.Wordprocessing.Paragraph();
var headRun = new DocumentFormat.OpenXml.Wordprocessing.Run();
headRun.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.RunProperties
@@ -294,31 +518,10 @@ public class DocumentAssemblerTool : IAgentTool
headPara.AppendChild(headRun);
body.AppendChild(headPara);
// 섹션 본문 (줄 단위 분할)
var lines = StripHtmlTags(content).Split('\n', StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
var para = new DocumentFormat.OpenXml.Wordprocessing.Paragraph();
var run = new DocumentFormat.OpenXml.Wordprocessing.Run();
run.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.RunProperties
{
RunFonts = KoreanFonts(),
FontSize = new DocumentFormat.OpenXml.Wordprocessing.FontSize { Val = "22" }
});
run.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Text(line.Trim())
{
Space = DocumentFormat.OpenXml.SpaceProcessingModeValues.Preserve
});
para.AppendChild(run);
body.AppendChild(para);
}
// 섹션 간 빈 줄
AppendStructuredContent(content);
body.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
}
// ★ SectionProperties는 반드시 body의 마지막 자식이어야 함 (OOXML 규격)
// 첫 번째에 넣으면 Word가 무시하거나 문서가 깨짐
body.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.SectionProperties(
new DocumentFormat.OpenXml.Wordprocessing.PageSize { Width = 11906, Height = 16838 },
new DocumentFormat.OpenXml.Wordprocessing.PageMargin { Top = 1440, Right = 1440, Bottom = 1440, Left = 1440,