word-phone / tool.py
patruff's picture
Upload tool
b1c9207 verified
from smolagents.tools import Tool
import json
import pronouncing
import string
import difflib
class WordPhoneTool(Tool):
name = "word_phonetic_analyzer"
description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns.
Can also compare two words for phonetic similarity."""
inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
output_type = "string"
VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
def _get_vowel_groups(self):
groups = []
group_strs = self.VOWEL_REF.split("|")
for group_str in group_strs:
groups.append(group_str.split(","))
return groups
def _get_last_syllable(self, phones):
last_vowel_idx = -1
last_vowel = None
vowel_groups = self._get_vowel_groups()
for i in range(len(phones)):
phone = phones[i]
base_phone = ""
for j in range(len(phone)):
if phone[j] not in "012":
base_phone += phone[j]
for group in vowel_groups:
if base_phone in group:
last_vowel_idx = i
last_vowel = base_phone
break
if last_vowel_idx == -1:
return None, []
remaining = []
for i in range(last_vowel_idx + 1, len(phones)):
remaining.append(phones[i])
return last_vowel, remaining
def _strip_stress(self, phones):
result = []
for phone in phones:
stripped = ""
for char in phone:
if char not in "012":
stripped += char
result.append(stripped)
return result
def _vowels_match(self, v1, v2):
v1_stripped = ""
v2_stripped = ""
for char in v1:
if char not in "012":
v1_stripped += char
for char in v2:
if char not in "012":
v2_stripped += char
if v1_stripped == v2_stripped:
return True
vowel_groups = self._get_vowel_groups()
for group in vowel_groups:
if v1_stripped in group and v2_stripped in group:
return True
return False
def _calculate_similarity(self, word1, phones1, word2, phones2):
import pronouncing
from difflib import SequenceMatcher
phone_list1 = phones1.split()
phone_list2 = phones2.split()
result1 = self._get_last_syllable(phone_list1)
result2 = self._get_last_syllable(phone_list2)
last_vowel1 = result1[0]
word1_end = result1[1]
last_vowel2 = result2[0]
word2_end = result2[1]
rhyme_score = 0.0
syllable_score = 0.0
string_similarity = 0.0
if last_vowel1 and last_vowel2:
if self._vowels_match(last_vowel1, last_vowel2):
word1_end_clean = self._strip_stress(word1_end)
word2_end_clean = self._strip_stress(word2_end)
if word1_end_clean == word2_end_clean:
rhyme_score = 1.0
if len(word1) == len(word2):
if word1[1:] == word2[1:]:
rhyme_score = 1.2
else:
rhyme_score = 0.6
syl1 = pronouncing.syllable_count(phones1)
syl2 = pronouncing.syllable_count(phones2)
if syl1 == syl2:
syllable_score = 1.0
matcher = SequenceMatcher(None)
if len(word1) > 1 and len(word2) > 1:
matcher.set_seqs(word1[1:], word2[1:])
string_similarity = matcher.ratio()
else:
matcher.set_seqs(word1, word2)
string_similarity = matcher.ratio()
total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
return {
"similarity": round(total_similarity, 3),
"rhyme_score": round(rhyme_score, 3),
"syllable_match": syllable_score == 1.0,
"string_similarity": round(string_similarity, 3)
}
def forward(self, word, compare_to=None):
import json
import string
import pronouncing
word_clean = word.lower()
word_clean = word_clean.strip(string.punctuation)
phones = pronouncing.phones_for_word(word_clean)
if not phones:
result = {
'word': word_clean,
'found': False,
'error': 'Word not found in dictionary'
}
return json.dumps(result, indent=2)
primary_phones = phones[0]
result = {
'word': word_clean,
'found': True,
'syllable_count': pronouncing.syllable_count(primary_phones),
'phones': primary_phones.split(),
'stresses': pronouncing.stresses(primary_phones)
}
if compare_to:
compare_clean = compare_to.lower()
compare_clean = compare_clean.strip(string.punctuation)
compare_phones = pronouncing.phones_for_word(compare_clean)
if not compare_phones:
result['comparison'] = {
'error': f'Comparison word "{compare_clean}" not found in dictionary'
}
else:
compare_primary = compare_phones[0]
result['comparison'] = {
'word': compare_clean,
'syllable_count': pronouncing.syllable_count(compare_primary),
'phones': compare_primary.split(),
'stresses': pronouncing.stresses(compare_primary)
}
similarity_result = self._calculate_similarity(
word_clean, primary_phones,
compare_clean, compare_primary
)
result['similarity'] = similarity_result
return json.dumps(result, indent=2)
def __init__(self, *args, **kwargs):
self.is_initialized = False