469 lines
9.5 KiB
C#
469 lines
9.5 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using AxCopilot.Services;
|
|
|
|
namespace AxCopilot.Core;
|
|
|
|
public class FuzzyEngine
|
|
{
|
|
private readonly IndexService _index;
|
|
|
|
private static readonly char[] Chosungs = new char[19]
|
|
{
|
|
'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ',
|
|
'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
|
|
};
|
|
|
|
private static readonly HashSet<char> ChosungSet = new HashSet<char>(new _003C_003Ez__ReadOnlyArray<char>(new char[19]
|
|
{
|
|
'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ',
|
|
'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
|
|
}));
|
|
|
|
private static readonly char[] Jungsungs = new char[21]
|
|
{
|
|
'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ',
|
|
'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ',
|
|
'ㅣ'
|
|
};
|
|
|
|
private static readonly char[] Jongsungs = new char[28]
|
|
{
|
|
'\0', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ',
|
|
'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ',
|
|
'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
|
|
};
|
|
|
|
public FuzzyEngine(IndexService index)
|
|
{
|
|
_index = index;
|
|
}
|
|
|
|
public IEnumerable<FuzzyResult> Search(string query, int maxResults = 7)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(query))
|
|
{
|
|
return Enumerable.Empty<FuzzyResult>();
|
|
}
|
|
string normalized = query.Trim().ToLowerInvariant();
|
|
IReadOnlyList<IndexEntry> entries = _index.Entries;
|
|
bool queryHasKorean = false;
|
|
string text = normalized;
|
|
foreach (char c in text)
|
|
{
|
|
if ((c >= '가' && c <= '힣') || ChosungSet.Contains(c))
|
|
{
|
|
queryHasKorean = true;
|
|
break;
|
|
}
|
|
}
|
|
if (entries.Count > 300)
|
|
{
|
|
return (from e in entries.AsParallel()
|
|
select new FuzzyResult(e, CalculateScoreFast(normalized, e, queryHasKorean)) into r
|
|
where r.Score > 0
|
|
orderby r.Score descending
|
|
select r).Take(maxResults);
|
|
}
|
|
return (from e in entries
|
|
select new FuzzyResult(e, CalculateScoreFast(normalized, e, queryHasKorean)) into r
|
|
where r.Score > 0
|
|
orderby r.Score descending
|
|
select r).Take(maxResults);
|
|
}
|
|
|
|
private static int CalculateScoreFast(string query, IndexEntry entry, bool queryHasKorean)
|
|
{
|
|
string text = (string.IsNullOrEmpty(entry.NameLower) ? entry.Name.ToLowerInvariant() : entry.NameLower);
|
|
if (query.Length == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
if (text == query)
|
|
{
|
|
return 1000 + entry.Score;
|
|
}
|
|
if (text.StartsWith(query))
|
|
{
|
|
return 800 + entry.Score;
|
|
}
|
|
if (text.Contains(query))
|
|
{
|
|
return 600 + entry.Score;
|
|
}
|
|
if (!queryHasKorean)
|
|
{
|
|
int num = FuzzyMatch(query, text);
|
|
return (num > 0) ? (num + entry.Score) : 0;
|
|
}
|
|
int num2 = JamoContainsScoreFast(string.IsNullOrEmpty(entry.NameJamo) ? DecomposeToJamo(text) : entry.NameJamo, query);
|
|
if (num2 > 0)
|
|
{
|
|
return num2 + entry.Score;
|
|
}
|
|
int num3 = ChosungMatchScoreFast(string.IsNullOrEmpty(entry.NameChosung) ? null : entry.NameChosung, text, query);
|
|
if (num3 > 0)
|
|
{
|
|
return num3 + entry.Score;
|
|
}
|
|
int num4 = FuzzyMatch(query, text);
|
|
if (num4 > 0)
|
|
{
|
|
return num4 + entry.Score;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
internal static int CalculateScore(string query, string target, int baseScore)
|
|
{
|
|
if (query.Length == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
if (target == query)
|
|
{
|
|
return 1000 + baseScore;
|
|
}
|
|
if (target.StartsWith(query))
|
|
{
|
|
return 800 + baseScore;
|
|
}
|
|
if (target.Contains(query))
|
|
{
|
|
return 600 + baseScore;
|
|
}
|
|
int num = JamoContainsScore(target, query);
|
|
if (num > 0)
|
|
{
|
|
return num + baseScore;
|
|
}
|
|
int num2 = ChosungMatchScore(target, query);
|
|
if (num2 > 0)
|
|
{
|
|
return num2 + baseScore;
|
|
}
|
|
int num3 = FuzzyMatch(query, target);
|
|
if (num3 > 0)
|
|
{
|
|
return num3 + baseScore;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
internal static int FuzzyMatch(string query, string target)
|
|
{
|
|
int num = 0;
|
|
int num2 = 0;
|
|
int num3 = 0;
|
|
int num4 = -1;
|
|
while (num < query.Length && num2 < target.Length)
|
|
{
|
|
if (query[num] == target[num2])
|
|
{
|
|
num3 = ((num4 != num2 - 1) ? (num3 + 10) : (num3 + 30));
|
|
if (num2 == 0)
|
|
{
|
|
num3 += 15;
|
|
}
|
|
num4 = num2;
|
|
num++;
|
|
}
|
|
num2++;
|
|
}
|
|
return (num == query.Length) ? Math.Max(num3, 50) : 0;
|
|
}
|
|
|
|
internal static string DecomposeToJamo(string text)
|
|
{
|
|
StringBuilder stringBuilder = new StringBuilder(text.Length * 3);
|
|
foreach (char c in text)
|
|
{
|
|
if (c >= '가' && c <= '힣')
|
|
{
|
|
int num = c - 44032;
|
|
int num2 = num / 588;
|
|
int num3 = num % 588 / 28;
|
|
int num4 = num % 28;
|
|
stringBuilder.Append(Chosungs[num2]);
|
|
stringBuilder.Append(Jungsungs[num3]);
|
|
if (num4 > 0)
|
|
{
|
|
stringBuilder.Append(Jongsungs[num4]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
stringBuilder.Append(c);
|
|
}
|
|
}
|
|
return stringBuilder.ToString();
|
|
}
|
|
|
|
internal static char GetChosung(char hangul)
|
|
{
|
|
if (hangul < '가' || hangul > '힣')
|
|
{
|
|
return '\0';
|
|
}
|
|
int num = hangul - 44032;
|
|
return Chosungs[num / 588];
|
|
}
|
|
|
|
internal static int JamoContainsScore(string target, string query)
|
|
{
|
|
if (!HasKorean(query))
|
|
{
|
|
return 0;
|
|
}
|
|
string text = DecomposeToJamo(target);
|
|
string text2 = DecomposeToJamo(query);
|
|
if (text2.Length == 0 || text.Length == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
if (text.Contains(text2))
|
|
{
|
|
return (text.IndexOf(text2) == 0) ? 580 : 550;
|
|
}
|
|
int num = 0;
|
|
for (int i = 0; i < text.Length; i++)
|
|
{
|
|
if (num >= text2.Length)
|
|
{
|
|
break;
|
|
}
|
|
if (text2[num] == text[i])
|
|
{
|
|
num++;
|
|
}
|
|
}
|
|
if (num == text2.Length)
|
|
{
|
|
return 400;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
internal static bool HasChosung(string text)
|
|
{
|
|
return text.Any((char c) => ChosungSet.Contains(c));
|
|
}
|
|
|
|
internal static bool IsChosung(string text)
|
|
{
|
|
return text.Length > 0 && text.All((char c) => ChosungSet.Contains(c));
|
|
}
|
|
|
|
private static bool HasKorean(string text)
|
|
{
|
|
return text.Any((char c) => c >= '가' && c <= '힣');
|
|
}
|
|
|
|
internal static int ChosungMatchScore(string target, string query)
|
|
{
|
|
if (!HasChosung(query))
|
|
{
|
|
return 0;
|
|
}
|
|
List<char> list = new List<char>();
|
|
List<char> list2 = new List<char>();
|
|
foreach (char c in target)
|
|
{
|
|
char chosung = GetChosung(c);
|
|
if (chosung != 0)
|
|
{
|
|
list.Add(chosung);
|
|
list2.Add(c);
|
|
}
|
|
else if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
|
|
{
|
|
list.Add(c);
|
|
list2.Add(c);
|
|
}
|
|
}
|
|
if (list.Count == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
if (IsChosung(query))
|
|
{
|
|
if (ContainsChosungConsecutive(list, query))
|
|
{
|
|
return 520;
|
|
}
|
|
if (ContainsChosungSubsequence(list, query))
|
|
{
|
|
return 480;
|
|
}
|
|
return 0;
|
|
}
|
|
return MixedChosungMatch(list2, list, query);
|
|
}
|
|
|
|
private static bool ContainsChosungConsecutive(List<char> targetChosungs, string query)
|
|
{
|
|
for (int i = 0; i <= targetChosungs.Count - query.Length; i++)
|
|
{
|
|
bool flag = true;
|
|
for (int j = 0; j < query.Length; j++)
|
|
{
|
|
if (targetChosungs[i + j] != query[j])
|
|
{
|
|
flag = false;
|
|
break;
|
|
}
|
|
}
|
|
if (flag)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private static bool ContainsChosungSubsequence(List<char> targetChosungs, string query)
|
|
{
|
|
int num = 0;
|
|
for (int i = 0; i < targetChosungs.Count; i++)
|
|
{
|
|
if (num >= query.Length)
|
|
{
|
|
break;
|
|
}
|
|
if (targetChosungs[i] == query[num])
|
|
{
|
|
num++;
|
|
}
|
|
}
|
|
return num == query.Length;
|
|
}
|
|
|
|
private static int MixedChosungMatch(List<char> targetChars, List<char> targetChosungs, string query)
|
|
{
|
|
int num = 0;
|
|
int num2 = 0;
|
|
while (num < query.Length && num2 < targetChars.Count)
|
|
{
|
|
char c = query[num];
|
|
if (ChosungSet.Contains(c))
|
|
{
|
|
if (targetChosungs[num2] == c)
|
|
{
|
|
num++;
|
|
}
|
|
}
|
|
else if (targetChars[num2] == c)
|
|
{
|
|
num++;
|
|
}
|
|
num2++;
|
|
}
|
|
return (num == query.Length) ? 460 : 0;
|
|
}
|
|
|
|
private static int JamoContainsScoreFast(string targetJamo, string query)
|
|
{
|
|
if (!HasKorean(query))
|
|
{
|
|
return 0;
|
|
}
|
|
string text = DecomposeToJamo(query);
|
|
if (text.Length == 0 || targetJamo.Length == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
if (targetJamo.Contains(text))
|
|
{
|
|
return (targetJamo.IndexOf(text, StringComparison.Ordinal) == 0) ? 580 : 550;
|
|
}
|
|
int num = 0;
|
|
for (int i = 0; i < targetJamo.Length; i++)
|
|
{
|
|
if (num >= text.Length)
|
|
{
|
|
break;
|
|
}
|
|
if (text[num] == targetJamo[i])
|
|
{
|
|
num++;
|
|
}
|
|
}
|
|
return (num == text.Length) ? 400 : 0;
|
|
}
|
|
|
|
private static int ChosungMatchScoreFast(string? targetChosung, string targetLower, string query)
|
|
{
|
|
if (!HasChosung(query))
|
|
{
|
|
return 0;
|
|
}
|
|
if (IsChosung(query))
|
|
{
|
|
if (string.IsNullOrEmpty(targetChosung))
|
|
{
|
|
return 0;
|
|
}
|
|
if (targetChosung.Contains(query, StringComparison.Ordinal))
|
|
{
|
|
return 520;
|
|
}
|
|
int num = 0;
|
|
for (int i = 0; i < targetChosung.Length; i++)
|
|
{
|
|
if (num >= query.Length)
|
|
{
|
|
break;
|
|
}
|
|
if (targetChosung[i] == query[num])
|
|
{
|
|
num++;
|
|
}
|
|
}
|
|
if (num == query.Length)
|
|
{
|
|
return 480;
|
|
}
|
|
return 0;
|
|
}
|
|
int num2 = 0;
|
|
int num3 = 0;
|
|
while (num2 < query.Length && num3 < targetLower.Length)
|
|
{
|
|
char c = query[num2];
|
|
char c2 = targetLower[num3];
|
|
if (ChosungSet.Contains(c))
|
|
{
|
|
char c3 = GetChosung(c2);
|
|
if (c3 == '\0' && ((c2 >= 'a' && c2 <= 'z') || (c2 >= '0' && c2 <= '9')))
|
|
{
|
|
c3 = c2;
|
|
}
|
|
if (c3 == c)
|
|
{
|
|
num2++;
|
|
}
|
|
}
|
|
else if (c2 == c)
|
|
{
|
|
num2++;
|
|
}
|
|
num3++;
|
|
}
|
|
return (num2 == query.Length) ? 460 : 0;
|
|
}
|
|
|
|
internal static bool ContainsChosung(string target, string chosungQuery)
|
|
{
|
|
List<char> list = (from c in target.Select(GetChosung)
|
|
where c != '\0'
|
|
select c).ToList();
|
|
if (list.Count < chosungQuery.Length)
|
|
{
|
|
return false;
|
|
}
|
|
return ContainsChosungConsecutive(list, chosungQuery) || ContainsChosungSubsequence(list, chosungQuery);
|
|
}
|
|
}
|