Spaces:

patruff
/

word-phone

Configuration error

App Files Files Community

word-phone / tool.py

patruff

Upload tool

b1c9207 verified 2 days ago

raw

history blame contribute delete

6.65 kB

	from smolagents.tools import Tool
	import json
	import pronouncing
	import string
	import difflib

	class WordPhoneTool(Tool):
	name = "word_phonetic_analyzer"
	description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns.
	Can also compare two words for phonetic similarity."""
	inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
	output_type = "string"
	VOWEL_REF = "AH,UH,AX\|AE,EH\|IY,IH\|AO,AA\|UW,UH\|AY,EY\|OW,AO\|AW,AO\|OY,OW\|ER,AXR"

	def _get_vowel_groups(self):
	groups = []
	group_strs = self.VOWEL_REF.split("\|")
	for group_str in group_strs:
	groups.append(group_str.split(","))
	return groups


	def _get_last_syllable(self, phones):
	last_vowel_idx = -1
	last_vowel = None
	vowel_groups = self._get_vowel_groups()

	for i in range(len(phones)):
	phone = phones[i]
	base_phone = ""
	for j in range(len(phone)):
	if phone[j] not in "012":
	base_phone += phone[j]

	for group in vowel_groups:
	if base_phone in group:
	last_vowel_idx = i
	last_vowel = base_phone
	break

	if last_vowel_idx == -1:
	return None, []

	remaining = []
	for i in range(last_vowel_idx + 1, len(phones)):
	remaining.append(phones[i])

	return last_vowel, remaining


	def _strip_stress(self, phones):
	result = []
	for phone in phones:
	stripped = ""
	for char in phone:
	if char not in "012":
	stripped += char
	result.append(stripped)
	return result


	def _vowels_match(self, v1, v2):
	v1_stripped = ""
	v2_stripped = ""

	for char in v1:
	if char not in "012":
	v1_stripped += char

	for char in v2:
	if char not in "012":
	v2_stripped += char

	if v1_stripped == v2_stripped:
	return True

	vowel_groups = self._get_vowel_groups()
	for group in vowel_groups:
	if v1_stripped in group and v2_stripped in group:
	return True
	return False


	def _calculate_similarity(self, word1, phones1, word2, phones2):
	import pronouncing
	from difflib import SequenceMatcher

	phone_list1 = phones1.split()
	phone_list2 = phones2.split()

	result1 = self._get_last_syllable(phone_list1)
	result2 = self._get_last_syllable(phone_list2)

	last_vowel1 = result1[0]
	word1_end = result1[1]
	last_vowel2 = result2[0]
	word2_end = result2[1]

	rhyme_score = 0.0
	syllable_score = 0.0
	string_similarity = 0.0

	if last_vowel1 and last_vowel2:
	if self._vowels_match(last_vowel1, last_vowel2):
	word1_end_clean = self._strip_stress(word1_end)
	word2_end_clean = self._strip_stress(word2_end)

	if word1_end_clean == word2_end_clean:
	rhyme_score = 1.0
	if len(word1) == len(word2):
	if word1[1:] == word2[1:]:
	rhyme_score = 1.2
	else:
	rhyme_score = 0.6

	syl1 = pronouncing.syllable_count(phones1)
	syl2 = pronouncing.syllable_count(phones2)
	if syl1 == syl2:
	syllable_score = 1.0

	matcher = SequenceMatcher(None)
	if len(word1) > 1 and len(word2) > 1:
	matcher.set_seqs(word1[1:], word2[1:])
	string_similarity = matcher.ratio()
	else:
	matcher.set_seqs(word1, word2)
	string_similarity = matcher.ratio()

	total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)

	return {
	"similarity": round(total_similarity, 3),
	"rhyme_score": round(rhyme_score, 3),
	"syllable_match": syllable_score == 1.0,
	"string_similarity": round(string_similarity, 3)
	}


	def forward(self, word, compare_to=None):
	import json
	import string
	import pronouncing

	word_clean = word.lower()
	word_clean = word_clean.strip(string.punctuation)
	phones = pronouncing.phones_for_word(word_clean)

	if not phones:
	result = {
	'word': word_clean,
	'found': False,
	'error': 'Word not found in dictionary'
	}
	return json.dumps(result, indent=2)

	primary_phones = phones[0]
	result = {
	'word': word_clean,
	'found': True,
	'syllable_count': pronouncing.syllable_count(primary_phones),
	'phones': primary_phones.split(),
	'stresses': pronouncing.stresses(primary_phones)
	}

	if compare_to:
	compare_clean = compare_to.lower()
	compare_clean = compare_clean.strip(string.punctuation)
	compare_phones = pronouncing.phones_for_word(compare_clean)

	if not compare_phones:
	result['comparison'] = {
	'error': f'Comparison word "{compare_clean}" not found in dictionary'
	}
	else:
	compare_primary = compare_phones[0]
	result['comparison'] = {
	'word': compare_clean,
	'syllable_count': pronouncing.syllable_count(compare_primary),
	'phones': compare_primary.split(),
	'stresses': pronouncing.stresses(compare_primary)
	}

	similarity_result = self._calculate_similarity(
	word_clean, primary_phones,
	compare_clean, compare_primary
	)
	result['similarity'] = similarity_result

	return json.dumps(result, indent=2)


	def __init__(self, args, *kwargs):
	self.is_initialized = False