class CantonesePhonetics { constructor() { this.charToJyutping = {}; this.savedResults = []; this.similarInitials = { b: ["p", "m"], c: ["z", "s"], d: ["t", "n"], f: ["h", "w"], g: ["k", "ng"], gw: ["kw"], h: ["f", "w"], j: ["z", "c"], k: ["g", "h"], kw: ["gw"], l: ["n"], m: ["n", "b"], n: ["l", "m", "ng"], ng: ["g", "n"], p: ["b", "m"], s: ["c", "z"], t: ["d", "n"], w: ["f", "h"], z: ["c", "j"] }; this.similarFinals = { aa: ["a", "aai", "aau"], aai: ["aa", "ai"], aau: ["aa", "au"], ai: ["ei", "aai"], au: ["ou", "aau"], e: ["i", "ei"], ei: ["ai", "i"], i: ["e", "ei", "yu"], o: ["u", "ou"], oi: ["ui"], ou: ["u", "au"], u: ["o", "ou", "yu"], ui: ["oi"], yu: ["i", "u"] }; } async initialize() { const [jyutpingData, results] = await Promise.all([ fetch("https://huggingface.co/spaces/OttoYu/Cantonese-Phonetics/raw/main/lexi-can_key.json").then(response => response.json()), fetch("https://huggingface.co/spaces/OttoYu/Cantonese-Phonetics/raw/main/jyutping_results_largec.json").then(response => response.json()) ]); this.charToJyutping = this.preprocessJyutpingData(jyutpingData); this.savedResults = results; } preprocessJyutpingData(jyutpingData) { const result = {}; for (const [syllable, mappings] of Object.entries(jyutpingData)) { for (const mapping of mappings) { for (const char of mapping["漢字"]) { result[char] = syllable; } } } return result; } chineseToJyutping(text) { return text.split("").map(char => this.charToJyutping[char] || char); } areJyutpingSimilar(jyutping1, jyutping2) { function splitJyutping(jyutping) { if (jyutping.length > 1 && "wzjgk".includes(jyutping[1])) { return [jyutping.slice(0, 2), jyutping.slice(2)]; } return [jyutping[0], jyutping.slice(1)]; } const [initial1, final1] = splitJyutping(jyutping1); const [initial2, final2] = splitJyutping(jyutping2); const initialMatch = initial1 === initial2 || (this.similarInitials[initial1] && this.similarInitials[initial1].includes(initial2)) || (this.similarInitials[initial2] && this.similarInitials[initial2].includes(initial1)); const finalMatch = final1 === final2 || (this.similarFinals[final1] && this.similarFinals[final1].includes(final2)) || (this.similarFinals[final2] && this.similarFinals[final2].includes(final1)); return initialMatch && finalMatch; } calculatePhoneticSimilarity(userJyutping, resultJyutping) { const similarCount = userJyutping.reduce( (count, uj) => count + resultJyutping.filter(rj => this.areJyutpingSimilar(uj, rj)).length, 0 ); return similarCount / Math.max(userJyutping.length, resultJyutping.length); } similarity(s1, s2) { let longer = s1, shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } const longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - this.editDistance(longer, shorter)) / longerLength; } editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); const costs = new Array(); for (let i = 0; i <= s1.length; i++) { let lastValue = i; for (let j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { let newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; } matchUserInput(userInput) { const userJyutping = this.chineseToJyutping(userInput); const exactMatch = this.savedResults.find(result => userJyutping.every(uj => result.jyutping.includes(uj)) ); if (exactMatch) { return { input_text: userInput, input_jyutping: userJyutping, match: exactMatch, match_type: "exact" }; } const matches = this.savedResults .map(result => { if (!result.text || !result.jyutping) return null; const phoneticScore = this.calculatePhoneticSimilarity(userJyutping, result.jyutping); const textSimilarity = this.similarity(userInput, result.text); const lengthDiff = Math.abs(userInput.length - result.text.length); const lengthPenalty = 1 / (1 + lengthDiff); const totalScore = phoneticScore * 0.7 + textSimilarity * 0.2 + lengthPenalty * 0.1; return { result, score: totalScore }; }) .filter(Boolean); matches.sort((a, b) => b.score - a.score); const topMatches = matches.slice(0, 3); return { input_text: userInput, input_jyutping: userJyutping, matches: topMatches.map(match => ({ match: match.result, score: match.score, match_type: "phonetic_similarity" })) }; } } const phonetics = new CantonesePhonetics();