Spaces:

patruff
/

word-phone

Configuration error

File size: 6,650 Bytes

from smolagents.tools import Tool
import json
import pronouncing
import string
import difflib

class WordPhoneTool(Tool):
    name = "word_phonetic_analyzer"
    description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns. 

    Can also compare two words for phonetic similarity."""
    inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
    output_type = "string"
    VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"

    def _get_vowel_groups(self):
        groups = []
        group_strs = self.VOWEL_REF.split("|")
        for group_str in group_strs:
            groups.append(group_str.split(","))
        return groups


    def _get_last_syllable(self, phones):
        last_vowel_idx = -1
        last_vowel = None
        vowel_groups = self._get_vowel_groups()
    
        for i in range(len(phones)):
            phone = phones[i]
            base_phone = ""
            for j in range(len(phone)):
                if phone[j] not in "012":
                    base_phone += phone[j]
        
            for group in vowel_groups:
                if base_phone in group:
                    last_vowel_idx = i
                    last_vowel = base_phone
                    break
    
        if last_vowel_idx == -1:
            return None, []
        
        remaining = []
        for i in range(last_vowel_idx + 1, len(phones)):
            remaining.append(phones[i])
        
        return last_vowel, remaining


    def _strip_stress(self, phones):
        result = []
        for phone in phones:
            stripped = ""
            for char in phone:
                if char not in "012":
                    stripped += char
            result.append(stripped)
        return result


    def _vowels_match(self, v1, v2):
        v1_stripped = ""
        v2_stripped = ""
    
        for char in v1:
            if char not in "012":
                v1_stripped += char
            
        for char in v2:
            if char not in "012":
                v2_stripped += char
    
        if v1_stripped == v2_stripped:
            return True
        
        vowel_groups = self._get_vowel_groups()
        for group in vowel_groups:
            if v1_stripped in group and v2_stripped in group:
                return True
        return False


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        import pronouncing
        from difflib import SequenceMatcher
    
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
    
        result1 = self._get_last_syllable(phone_list1)
        result2 = self._get_last_syllable(phone_list2)
    
        last_vowel1 = result1[0]
        word1_end = result1[1]
        last_vowel2 = result2[0]
        word2_end = result2[1]
    
        rhyme_score = 0.0
        syllable_score = 0.0
        string_similarity = 0.0
    
        if last_vowel1 and last_vowel2:
            if self._vowels_match(last_vowel1, last_vowel2):
                word1_end_clean = self._strip_stress(word1_end)
                word2_end_clean = self._strip_stress(word2_end)
            
                if word1_end_clean == word2_end_clean:
                    rhyme_score = 1.0
                    if len(word1) == len(word2):
                        if word1[1:] == word2[1:]:
                            rhyme_score = 1.2
                else:
                    rhyme_score = 0.6
    
        syl1 = pronouncing.syllable_count(phones1)
        syl2 = pronouncing.syllable_count(phones2)
        if syl1 == syl2:
            syllable_score = 1.0
    
        matcher = SequenceMatcher(None)
        if len(word1) > 1 and len(word2) > 1:
            matcher.set_seqs(word1[1:], word2[1:])
            string_similarity = matcher.ratio()
        else:
            matcher.set_seqs(word1, word2)
            string_similarity = matcher.ratio()
    
        total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
    
        return {
            "similarity": round(total_similarity, 3),
            "rhyme_score": round(rhyme_score, 3),
            "syllable_match": syllable_score == 1.0,
            "string_similarity": round(string_similarity, 3)
        }


    def forward(self, word, compare_to=None):
        import json
        import string
        import pronouncing
    
        word_clean = word.lower()
        word_clean = word_clean.strip(string.punctuation)
        phones = pronouncing.phones_for_word(word_clean)
    
        if not phones:
            result = {
                'word': word_clean, 
                'found': False,
                'error': 'Word not found in dictionary'
            }
            return json.dumps(result, indent=2)
    
        primary_phones = phones[0]
        result = {
            'word': word_clean,
            'found': True,
            'syllable_count': pronouncing.syllable_count(primary_phones),
            'phones': primary_phones.split(),
            'stresses': pronouncing.stresses(primary_phones)
        }
    
        if compare_to:
            compare_clean = compare_to.lower()
            compare_clean = compare_clean.strip(string.punctuation)
            compare_phones = pronouncing.phones_for_word(compare_clean)
        
            if not compare_phones:
                result['comparison'] = {
                    'error': f'Comparison word "{compare_clean}" not found in dictionary'
                }
            else:
                compare_primary = compare_phones[0]
                result['comparison'] = {
                    'word': compare_clean,
                    'syllable_count': pronouncing.syllable_count(compare_primary),
                    'phones': compare_primary.split(),
                    'stresses': pronouncing.stresses(compare_primary)
                }
            
                similarity_result = self._calculate_similarity(
                    word_clean, primary_phones,
                    compare_clean, compare_primary
                )
                result['similarity'] = similarity_result
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False