Files
AX-Copilot-Codex/src/AxCopilot/Services/IntentDetector.cs

178 lines
8.7 KiB
C#
Raw Blame History

namespace AxCopilot.Services;
/// <summary>
/// 사용자 메시지에서 질문 유형(인텐트)을 감지하는 로컬 키워드 기반 분류기.
/// 외부 API 없이 순수 키워드 매칭으로 동작합니다.
/// </summary>
public static class IntentDetector
{
/// <summary>인텐트 카테고리 상수.</summary>
public static class Categories
{
public const string Coding = "coding";
public const string Translation = "translation";
public const string Analysis = "analysis";
public const string Creative = "creative";
public const string Document = "document";
public const string Math = "math";
public const string General = "general";
public static readonly string[] All =
{
Coding, Translation, Analysis, Creative, Document, Math, General
};
}
/// <summary>카테고리별 키워드 사전. (키워드, 가중치) 쌍.</summary>
private static readonly Dictionary<string, (string Keyword, double Weight)[]> _keywords = new()
{
[Categories.Coding] = new (string, double)[]
{
// 한국어
("코드", 1.0), ("함수", 1.0), ("클래스", 1.0), ("버그", 1.2), ("디버그", 1.2),
("리팩토링", 1.5), ("컴파일", 1.2), ("에러", 0.8), ("구현", 0.9), ("개발", 0.7),
("테스트", 0.8), ("빌드", 1.0), ("배포", 0.8), ("커밋", 1.2), ("브랜치", 1.2),
("머지", 1.0), ("풀리퀘", 1.2), ("변수", 1.0), ("메서드", 1.2), ("인터<EC9DB8><ED84B0>이스", 1.0),
("타입", 0.6), ("파라미터", 1.0), ("반환", 0.8), ("예외", 1.0), ("스택", 0.9),
("알고리즘", 1.2), ("자료구조", 1.2), ("정렬", 0.8), ("재귀", 1.0), ("루프", 0.9),
("API", 1.0), ("SDK", 1.0), ("라이브러리", 0.8), ("패키지", 0.8), ("모듈", 0.8),
("깃", 1.0), ("레포", 1.0), ("소스", 0.7), ("프로그래밍", 1.0), ("코딩", 1.2),
// 영어
("code", 1.0), ("function", 1.0), ("class", 0.8), ("bug", 1.2), ("debug", 1.2),
("refactor", 1.5), ("compile", 1.2), ("error", 0.6), ("implement", 1.0), ("develop", 0.7),
("test", 0.7), ("build", 0.8), ("deploy", 0.8), ("commit", 1.2), ("branch", 1.2),
("merge", 1.0), ("variable", 1.0), ("method", 1.0), ("interface", 0.8),
("parameter", 1.0), ("return", 0.6), ("exception", 1.0), ("stack", 0.7),
("algorithm", 1.2), ("syntax", 1.2), ("runtime", 1.0), ("compile", 1.0),
("git", 1.2), ("npm", 1.0), ("pip", 1.0), ("nuget", 1.0), ("docker", 1.0),
},
[Categories.Translation] = new (string, double)[]
{
("번역", 2.0), ("영어로", 2.0), ("한국어로", 2.0), ("일본어로", 2.0), ("중국어로", 2.0),
("영문", 1.5), ("국문", 1.5), ("통역", 1.5), ("원문", 1.2), ("의역", 1.5), ("직역", 1.5),
("translate", 2.0), ("English", 1.0), ("Korean", 1.0), ("Japanese", 1.0), ("Chinese", 1.0),
("translation", 2.0), ("localize", 1.5), ("localization", 1.5),
},
[Categories.Analysis] = new (string, double)[]
{
("분석", 1.5), ("요약", 1.5), ("비교", 1.2), ("장<><EC9EA5>점", 1.5), ("평가", 1.2),
("검토", 1.0), ("리뷰", 0.8), ("통계", 1.2), ("데이터", 0.8), ("트렌드", 1.0),
("인사이트", 1.2), ("근거", 1.0), ("원인", 0.8), ("결론", 0.8), ("핵심", 0.7),
("analyze", 1.5), ("summarize", 1.5), ("compare", 1.2), ("evaluate", 1.2),
("review", 0.8), ("statistics", 1.2), ("data", 0.6), ("trend", 1.0),
("insight", 1.2), ("pros", 1.0), ("cons", 1.0), ("conclusion", 0.8),
},
[Categories.Creative] = new (string, double)[]
{
("작성", 0.8), ("글쓰기", 1.5), ("스토리", 1.5), ("시", 1.2), ("소설", 1.5),
("에<><EC9790>이", 1.5), ("블로그", 1.2), ("카피", 1.2), ("슬로건", 1.5), ("제목", 0.8),
("아이디어", 1.0), ("창작", 1.5), ("묘사", 1.2), ("대본", 1.5), ("가사", 1.5),
("story", 1.5), ("poem", 1.5), ("essay", 1.5), ("blog", 1.2), ("creative", 1.5),
("slogan", 1.5), ("copy", 0.8), ("fiction", 1.5), ("narrative", 1.2), ("lyrics", 1.5),
},
[Categories.Document] = new (string, double)[]
{
("보고서", 2.0), ("문서", 1.2), ("제안서", 2.0), ("기획서", 2.0), ("계획서", 1.8),
("발표자료", 2.0), ("프레젠테이션", 2.0), ("양식", 1.5), ("서식", 1.5), ("템플릿", 1.2),
("<22><>셀", 1.5), ("워드", 1.2), ("파워포인트", 1.5), ("PDF", 0.8), ("CSV", 1.0),
("회의록", 2.0), ("업무일지", 2.0), ("주간보고", 2.0), ("월간보고", 2.0),
("report", 1.8), ("document", 1.0), ("proposal", 2.0), ("presentation", 2.0),
("template", 1.2), ("spreadsheet", 1.5), ("excel", 1.5), ("memo", 1.2),
},
[Categories.Math] = new (string, double)[]
{
("수학", 1.5), ("계산", 1.2), ("방정식", 2.0), ("증명", 2.0), ("미적분", 2.0),
("통계", 1.0), ("확률", 1.5), ("행렬", 2.0), ("벡터", 1.5), ("미분", 2.0),
("적분", 2.0), ("함수", 0.5), ("그래프", 0.8), ("좌표", 1.5), ("기하", 1.5),
("삼각함수", 2.0), ("로그", 1.0), ("지수", 1.0), ("급수", 2.0), ("극한", 2.0),
("math", 1.5), ("calculate", 1.2), ("equation", 2.0), ("proof", 2.0), ("calculus", 2.0),
("probability", 1.5), ("matrix", 2.0), ("vector", 1.5), ("derivative", 2.0),
("integral", 2.0), ("theorem", 2.0), ("formula", 1.5), ("algebra", 1.5),
},
};
/// <summary>
/// 사용자 메시지에서 인텐트를 감지합니다.
/// </summary>
/// <returns>(카테고리명, 확신도 0.0~1.0). 매칭 없으면 ("general", 0.0).</returns>
public static (string Category, double Confidence) Detect(string message)
{
if (string.IsNullOrWhiteSpace(message))
return (Categories.General, 0.0);
var lowerMessage = message.ToLowerInvariant();
var words = lowerMessage.Split(new[] { ' ', '\t', '\n', '\r', ',', '.', '!', '?', ';', ':', '(', ')', '[', ']', '{', '}' },
StringSplitOptions.RemoveEmptyEntries);
var wordSet = new HashSet<string>(words);
var scores = new Dictionary<string, double>();
double maxScore = 0;
foreach (var (category, keywords) in _keywords)
{
double score = 0;
int hits = 0;
foreach (var (keyword, weight) in keywords)
{
var lowerKeyword = keyword.ToLowerInvariant();
// 한국어: substring 매칭 (조사 붙어도 감지)
// 영어: 단어 경계 매칭 (대소문자 무시)
bool matched = IsKorean(keyword)
? lowerMessage.Contains(lowerKeyword)
: wordSet.Contains(lowerKeyword);
if (matched)
{
score += weight;
hits++;
}
}
scores[category] = score;
if (score > maxScore) maxScore = score;
}
if (maxScore < 1.0)
return (Categories.General, 0.0);
// 최고 점수 카테고리 선택
var bestCategory = Categories.General;
double bestScore = 0;
foreach (var (cat, score) in scores)
{
if (score > bestScore)
{
bestScore = score;
bestCategory = cat;
}
}
// 확신도: 최고 점수를 정규화 (점수 범위를 0~1로 변환)
// 점수 3.0 이상이면 확신도 0.9+, 2.0이면 0.7~0.8 수준
double confidence = Math.Min(1.0, bestScore / 4.0 + 0.3);
// 2위와의 차이가 작으면 확신도 낮춤 (모호한 경우)
var sortedScores = scores.Values.OrderByDescending(s => s).ToArray();
if (sortedScores.Length >= 2 && sortedScores[1] > 0)
{
double ratio = sortedScores[1] / sortedScores[0];
if (ratio > 0.7) confidence *= 0.8; // 2위가 70% 이상이면 20% 감<><EAB090>
}
return (bestCategory, Math.Round(confidence, 2));
}
/// <summary>문자열에 한국어 문자가 포함되어 있는지 확인.</summary>
private static bool IsKorean(string text)
{
foreach (var ch in text)
{
if (ch >= 0xAC00 && ch <= 0xD7A3) return true; // 완성형 한글
if (ch >= 0x3131 && ch <= 0x318E) return true; // 자모
}
return false;
}
}