Spaces:
Configuration error
Configuration error
File size: 6,650 Bytes
727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 b1c9207 727cf89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
from smolagents.tools import Tool
import json
import pronouncing
import string
import difflib
class WordPhoneTool(Tool):
name = "word_phonetic_analyzer"
description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns.
Can also compare two words for phonetic similarity."""
inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
output_type = "string"
VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
def _get_vowel_groups(self):
groups = []
group_strs = self.VOWEL_REF.split("|")
for group_str in group_strs:
groups.append(group_str.split(","))
return groups
def _get_last_syllable(self, phones):
last_vowel_idx = -1
last_vowel = None
vowel_groups = self._get_vowel_groups()
for i in range(len(phones)):
phone = phones[i]
base_phone = ""
for j in range(len(phone)):
if phone[j] not in "012":
base_phone += phone[j]
for group in vowel_groups:
if base_phone in group:
last_vowel_idx = i
last_vowel = base_phone
break
if last_vowel_idx == -1:
return None, []
remaining = []
for i in range(last_vowel_idx + 1, len(phones)):
remaining.append(phones[i])
return last_vowel, remaining
def _strip_stress(self, phones):
result = []
for phone in phones:
stripped = ""
for char in phone:
if char not in "012":
stripped += char
result.append(stripped)
return result
def _vowels_match(self, v1, v2):
v1_stripped = ""
v2_stripped = ""
for char in v1:
if char not in "012":
v1_stripped += char
for char in v2:
if char not in "012":
v2_stripped += char
if v1_stripped == v2_stripped:
return True
vowel_groups = self._get_vowel_groups()
for group in vowel_groups:
if v1_stripped in group and v2_stripped in group:
return True
return False
def _calculate_similarity(self, word1, phones1, word2, phones2):
import pronouncing
from difflib import SequenceMatcher
phone_list1 = phones1.split()
phone_list2 = phones2.split()
result1 = self._get_last_syllable(phone_list1)
result2 = self._get_last_syllable(phone_list2)
last_vowel1 = result1[0]
word1_end = result1[1]
last_vowel2 = result2[0]
word2_end = result2[1]
rhyme_score = 0.0
syllable_score = 0.0
string_similarity = 0.0
if last_vowel1 and last_vowel2:
if self._vowels_match(last_vowel1, last_vowel2):
word1_end_clean = self._strip_stress(word1_end)
word2_end_clean = self._strip_stress(word2_end)
if word1_end_clean == word2_end_clean:
rhyme_score = 1.0
if len(word1) == len(word2):
if word1[1:] == word2[1:]:
rhyme_score = 1.2
else:
rhyme_score = 0.6
syl1 = pronouncing.syllable_count(phones1)
syl2 = pronouncing.syllable_count(phones2)
if syl1 == syl2:
syllable_score = 1.0
matcher = SequenceMatcher(None)
if len(word1) > 1 and len(word2) > 1:
matcher.set_seqs(word1[1:], word2[1:])
string_similarity = matcher.ratio()
else:
matcher.set_seqs(word1, word2)
string_similarity = matcher.ratio()
total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
return {
"similarity": round(total_similarity, 3),
"rhyme_score": round(rhyme_score, 3),
"syllable_match": syllable_score == 1.0,
"string_similarity": round(string_similarity, 3)
}
def forward(self, word, compare_to=None):
import json
import string
import pronouncing
word_clean = word.lower()
word_clean = word_clean.strip(string.punctuation)
phones = pronouncing.phones_for_word(word_clean)
if not phones:
result = {
'word': word_clean,
'found': False,
'error': 'Word not found in dictionary'
}
return json.dumps(result, indent=2)
primary_phones = phones[0]
result = {
'word': word_clean,
'found': True,
'syllable_count': pronouncing.syllable_count(primary_phones),
'phones': primary_phones.split(),
'stresses': pronouncing.stresses(primary_phones)
}
if compare_to:
compare_clean = compare_to.lower()
compare_clean = compare_clean.strip(string.punctuation)
compare_phones = pronouncing.phones_for_word(compare_clean)
if not compare_phones:
result['comparison'] = {
'error': f'Comparison word "{compare_clean}" not found in dictionary'
}
else:
compare_primary = compare_phones[0]
result['comparison'] = {
'word': compare_clean,
'syllable_count': pronouncing.syllable_count(compare_primary),
'phones': compare_primary.split(),
'stresses': pronouncing.stresses(compare_primary)
}
similarity_result = self._calculate_similarity(
word_clean, primary_phones,
compare_clean, compare_primary
)
result['similarity'] = similarity_result
return json.dumps(result, indent=2)
def __init__(self, *args, **kwargs):
self.is_initialized = False
|