Spaces:
Configuration error
Configuration error
from smolagents.tools import Tool | |
import json | |
import pronouncing | |
import string | |
import difflib | |
class WordPhoneTool(Tool): | |
name = "word_phonetic_analyzer" | |
description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns. | |
Can also compare two words for phonetic similarity.""" | |
inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}} | |
output_type = "string" | |
VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR" | |
def _get_vowel_groups(self): | |
groups = [] | |
group_strs = self.VOWEL_REF.split("|") | |
for group_str in group_strs: | |
groups.append(group_str.split(",")) | |
return groups | |
def _get_last_syllable(self, phones): | |
last_vowel_idx = -1 | |
last_vowel = None | |
vowel_groups = self._get_vowel_groups() | |
for i in range(len(phones)): | |
phone = phones[i] | |
base_phone = "" | |
for j in range(len(phone)): | |
if phone[j] not in "012": | |
base_phone += phone[j] | |
for group in vowel_groups: | |
if base_phone in group: | |
last_vowel_idx = i | |
last_vowel = base_phone | |
break | |
if last_vowel_idx == -1: | |
return None, [] | |
remaining = [] | |
for i in range(last_vowel_idx + 1, len(phones)): | |
remaining.append(phones[i]) | |
return last_vowel, remaining | |
def _strip_stress(self, phones): | |
result = [] | |
for phone in phones: | |
stripped = "" | |
for char in phone: | |
if char not in "012": | |
stripped += char | |
result.append(stripped) | |
return result | |
def _vowels_match(self, v1, v2): | |
v1_stripped = "" | |
v2_stripped = "" | |
for char in v1: | |
if char not in "012": | |
v1_stripped += char | |
for char in v2: | |
if char not in "012": | |
v2_stripped += char | |
if v1_stripped == v2_stripped: | |
return True | |
vowel_groups = self._get_vowel_groups() | |
for group in vowel_groups: | |
if v1_stripped in group and v2_stripped in group: | |
return True | |
return False | |
def _calculate_similarity(self, word1, phones1, word2, phones2): | |
import pronouncing | |
from difflib import SequenceMatcher | |
phone_list1 = phones1.split() | |
phone_list2 = phones2.split() | |
result1 = self._get_last_syllable(phone_list1) | |
result2 = self._get_last_syllable(phone_list2) | |
last_vowel1 = result1[0] | |
word1_end = result1[1] | |
last_vowel2 = result2[0] | |
word2_end = result2[1] | |
rhyme_score = 0.0 | |
syllable_score = 0.0 | |
string_similarity = 0.0 | |
if last_vowel1 and last_vowel2: | |
if self._vowels_match(last_vowel1, last_vowel2): | |
word1_end_clean = self._strip_stress(word1_end) | |
word2_end_clean = self._strip_stress(word2_end) | |
if word1_end_clean == word2_end_clean: | |
rhyme_score = 1.0 | |
if len(word1) == len(word2): | |
if word1[1:] == word2[1:]: | |
rhyme_score = 1.2 | |
else: | |
rhyme_score = 0.6 | |
syl1 = pronouncing.syllable_count(phones1) | |
syl2 = pronouncing.syllable_count(phones2) | |
if syl1 == syl2: | |
syllable_score = 1.0 | |
matcher = SequenceMatcher(None) | |
if len(word1) > 1 and len(word2) > 1: | |
matcher.set_seqs(word1[1:], word2[1:]) | |
string_similarity = matcher.ratio() | |
else: | |
matcher.set_seqs(word1, word2) | |
string_similarity = matcher.ratio() | |
total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15) | |
return { | |
"similarity": round(total_similarity, 3), | |
"rhyme_score": round(rhyme_score, 3), | |
"syllable_match": syllable_score == 1.0, | |
"string_similarity": round(string_similarity, 3) | |
} | |
def forward(self, word, compare_to=None): | |
import json | |
import string | |
import pronouncing | |
word_clean = word.lower() | |
word_clean = word_clean.strip(string.punctuation) | |
phones = pronouncing.phones_for_word(word_clean) | |
if not phones: | |
result = { | |
'word': word_clean, | |
'found': False, | |
'error': 'Word not found in dictionary' | |
} | |
return json.dumps(result, indent=2) | |
primary_phones = phones[0] | |
result = { | |
'word': word_clean, | |
'found': True, | |
'syllable_count': pronouncing.syllable_count(primary_phones), | |
'phones': primary_phones.split(), | |
'stresses': pronouncing.stresses(primary_phones) | |
} | |
if compare_to: | |
compare_clean = compare_to.lower() | |
compare_clean = compare_clean.strip(string.punctuation) | |
compare_phones = pronouncing.phones_for_word(compare_clean) | |
if not compare_phones: | |
result['comparison'] = { | |
'error': f'Comparison word "{compare_clean}" not found in dictionary' | |
} | |
else: | |
compare_primary = compare_phones[0] | |
result['comparison'] = { | |
'word': compare_clean, | |
'syllable_count': pronouncing.syllable_count(compare_primary), | |
'phones': compare_primary.split(), | |
'stresses': pronouncing.stresses(compare_primary) | |
} | |
similarity_result = self._calculate_similarity( | |
word_clean, primary_phones, | |
compare_clean, compare_primary | |
) | |
result['similarity'] = similarity_result | |
return json.dumps(result, indent=2) | |
def __init__(self, *args, **kwargs): | |
self.is_initialized = False | |