Spaces:

patruff
/

word-phone

Configuration error

App Files Files Community

patruff commited on 2 days ago

Commit

de91b03

verified ·

1 Parent(s): 89aaafa

Upload tool

Browse files

Files changed (2) hide show

requirements.txt +1 -1
tool.py +109 -52

requirements.txt CHANGED Viewed

@@ -1,2 +1,2 @@
-pronouncing
 smolagents



1	smolagents
2	+ pronouncing

tool.py CHANGED Viewed

@@ -1,14 +1,13 @@
 from smolagents.tools import Tool
 import json
 import pronouncing
-import string
-import difflib
 class WordPhoneTool(Tool):
     name = "word_phonetic_analyzer"
-    description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns.
-    Can also compare two words for phonetic similarity."""
-    inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
     output_type = "string"
     VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
@@ -20,6 +19,16 @@ class WordPhoneTool(Tool):
         return groups
     def _get_last_syllable(self, phones):
         last_vowel_idx = -1
         last_vowel = None
@@ -29,8 +38,9 @@ class WordPhoneTool(Tool):
             phone = phones[i]
             base_phone = ""
             for j in range(len(phone)):
-                if phone[j] not in "012":
-                    base_phone += phone[j]
             for group in vowel_groups:
                 if base_phone in group:
@@ -83,106 +93,153 @@ class WordPhoneTool(Tool):
     def _calculate_similarity(self, word1, phones1, word2, phones2):
         import pronouncing
-        from difflib import SequenceMatcher
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
         result1 = self._get_last_syllable(phone_list1)
         result2 = self._get_last_syllable(phone_list2)
-        last_vowel1 = result1[0]
-        word1_end = result1[1]
-        last_vowel2 = result2[0]
-        word2_end = result2[1]
         rhyme_score = 0.0
-        syllable_score = 0.0
-        string_similarity = 0.0
         if last_vowel1 and last_vowel2:
             if self._vowels_match(last_vowel1, last_vowel2):
-                word1_end_clean = self._strip_stress(word1_end)
-                word2_end_clean = self._strip_stress(word2_end)
-                if word1_end_clean == word2_end_clean:
-                    rhyme_score = 1.0
-                    if len(word1) == len(word2):
-                        if word1[1:] == word2[1:]:
-                            rhyme_score = 1.2
                 else:
-                    rhyme_score = 0.6
         syl1 = pronouncing.syllable_count(phones1)
         syl2 = pronouncing.syllable_count(phones2)
-        if syl1 == syl2:
-            syllable_score = 1.0
-        matcher = SequenceMatcher(None)
-        if len(word1) > 1 and len(word2) > 1:
-            matcher.set_seqs(word1[1:], word2[1:])
-            string_similarity = matcher.ratio()
-        else:
-            matcher.set_seqs(word1, word2)
-            string_similarity = matcher.ratio()
-        total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
         return {
             "similarity": round(total_similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
-            "syllable_match": syllable_score == 1.0,
-            "string_similarity": round(string_similarity, 3)
         }
-    def forward(self, word, compare_to=None):
         import json
         import string
         import pronouncing
         word_clean = word.lower()
         word_clean = word_clean.strip(string.punctuation)
-        phones = pronouncing.phones_for_word(word_clean)
-        if not phones:
             result = {
                 'word': word_clean,
                 'found': False,
-                'error': 'Word not found in dictionary'
             }
             return json.dumps(result, indent=2)
-        primary_phones = phones[0]
         result = {
             'word': word_clean,
             'found': True,
-            'syllable_count': pronouncing.syllable_count(primary_phones),
-            'phones': primary_phones.split(),
-            'stresses': pronouncing.stresses(primary_phones)
         }
         if compare_to:
             compare_clean = compare_to.lower()
             compare_clean = compare_clean.strip(string.punctuation)
-            compare_phones = pronouncing.phones_for_word(compare_clean)
             if not compare_phones:
                 result['comparison'] = {
-                    'error': f'Comparison word "{compare_clean}" not found in dictionary'
                 }
             else:
-                compare_primary = compare_phones[0]
                 result['comparison'] = {
                     'word': compare_clean,
-                    'syllable_count': pronouncing.syllable_count(compare_primary),
-                    'phones': compare_primary.split(),
-                    'stresses': pronouncing.stresses(compare_primary)
                 }
                 similarity_result = self._calculate_similarity(
                     word_clean, primary_phones,
-                    compare_clean, compare_primary
                 )
                 result['similarity'] = similarity_result

 from smolagents.tools import Tool
+import string
 import json
 import pronouncing
 class WordPhoneTool(Tool):
     name = "word_phonetic_analyzer"
+    description = """Analyzes word pronunciation using CMU dictionary and custom pronunciations to get phonemes, syllables, and stress patterns.
+    Can also compare two words for phonetic similarity and rhyming."""
+    inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True}}
     output_type = "string"
     VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
         return groups
+    def _get_word_phones(self, word, custom_phones=None):
+        """Get phones for a word, checking custom dictionary first."""
+        if custom_phones and word in custom_phones:
+            return custom_phones[word]["primary_phones"]
+        import pronouncing
+        phones = pronouncing.phones_for_word(word)
+        return phones[0] if phones else None
     def _get_last_syllable(self, phones):
         last_vowel_idx = -1
         last_vowel = None
             phone = phones[i]
             base_phone = ""
             for j in range(len(phone)):
+                char = phone[j]
+                if char not in "012":
+                    base_phone += char
             for group in vowel_groups:
                 if base_phone in group:
     def _calculate_similarity(self, word1, phones1, word2, phones2):
         import pronouncing
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
+        # Initialize variables before use
+        last_vowel1 = None
+        last_vowel2 = None
+        word1_end = []
+        word2_end = []
+        matched = 0
+        common_length = 0
+        end1_clean = []
+        end2_clean = []
+        i = 0  # Initialize i for loop variable
+        # Get last syllable components
         result1 = self._get_last_syllable(phone_list1)
         result2 = self._get_last_syllable(phone_list2)
+        last_vowel1, word1_end = result1
+        last_vowel2, word2_end = result2
+        # Calculate rhyme score (most important)
         rhyme_score = 0.0
         if last_vowel1 and last_vowel2:
             if self._vowels_match(last_vowel1, last_vowel2):
+                end1_clean = self._strip_stress(word1_end)
+                end2_clean = self._strip_stress(word2_end)
+                if end1_clean == end2_clean:
+                    rhyme_score = 1.0  # Perfect rhyme
                 else:
+                    common_length = min(len(end1_clean), len(end2_clean))
+                    matched = 0
+                    for i in range(common_length):
+                        if end1_clean[i] == end2_clean[i]:
+                            matched += 1
+                    rhyme_score = 0.7 + (0.3 * (matched / max(len(end1_clean), len(end2_clean))))
+        # Calculate syllable pattern similarity
         syl1 = pronouncing.syllable_count(phones1)
         syl2 = pronouncing.syllable_count(phones2)
+        syllable_score = 1.0 if syl1 == syl2 else 1 - (abs(syl1 - syl2) / max(syl1, syl2))
+        # Calculate stress pattern similarity
+        stress1 = pronouncing.stresses(phones1)
+        stress2 = pronouncing.stresses(phones2)
+        stress_score = 1.0 if stress1 == stress2 else 0.5
+        # Weighted combination prioritizing rhyming
+        total_similarity = (
+            (rhyme_score * 0.8) +       # Rhyming is most important (80%)
+            (syllable_score * 0.15) +   # Syllable count has some importance (15%)
+            (stress_score * 0.05)       # Stress pattern has minimal weight (5%)
+        )
         return {
             "similarity": round(total_similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
+            "syllable_score": round(syllable_score, 3),
+            "stress_score": round(stress_score, 3)
         }
+    def forward(self, word, compare_to=None, custom_phones=None):
         import json
         import string
         import pronouncing
+        # Initialize variables before use
+        word_last_vowel = None
+        compare_last_vowel = None
+        word_end = []
+        compare_end = []
+        is_rhyme = False
         word_clean = word.lower()
         word_clean = word_clean.strip(string.punctuation)
+        primary_phones = self._get_word_phones(word_clean, custom_phones)
+        if not primary_phones:
             result = {
                 'word': word_clean,
                 'found': False,
+                'error': 'Word not found in dictionary or custom phones'
             }
             return json.dumps(result, indent=2)
         result = {
             'word': word_clean,
             'found': True,
+            'analysis': {
+                'syllable_count': pronouncing.syllable_count(primary_phones),
+                'phones': primary_phones.split(),
+                'stresses': pronouncing.stresses(primary_phones),
+                'phone_count': len(primary_phones.split())
+            }
         }
         if compare_to:
             compare_clean = compare_to.lower()
             compare_clean = compare_clean.strip(string.punctuation)
+            compare_phones = self._get_word_phones(compare_clean, custom_phones)
             if not compare_phones:
                 result['comparison'] = {
+                    'error': f'Comparison word "{compare_clean}" not found in dictionary or custom phones'
                 }
             else:
+                # Get rhyme components
+                word_result = self._get_last_syllable(primary_phones.split())
+                compare_result = self._get_last_syllable(compare_phones.split())
+                word_last_vowel, word_end = word_result
+                compare_last_vowel, compare_end = compare_result
+                # Calculate if words rhyme
+                if word_last_vowel and compare_last_vowel:
+                    if self._vowels_match(word_last_vowel, compare_last_vowel):
+                        word_end_clean = self._strip_stress(word_end)
+                        compare_end_clean = self._strip_stress(compare_end)
+                        if word_end_clean == compare_end_clean:
+                            is_rhyme = True
+                # Calculate detailed comparison stats
+                word_syl_count = pronouncing.syllable_count(primary_phones)
+                compare_syl_count = pronouncing.syllable_count(compare_phones)
                 result['comparison'] = {
                     'word': compare_clean,
+                    'analysis': {
+                        'syllable_count': compare_syl_count,
+                        'phones': compare_phones.split(),
+                        'stresses': pronouncing.stresses(compare_phones),
+                        'phone_count': len(compare_phones.split())
+                    },
+                    'comparison_stats': {
+                        'is_rhyme': is_rhyme,
+                        'same_syllable_count': word_syl_count == compare_syl_count,
+                        'same_stress_pattern': pronouncing.stresses(primary_phones) == pronouncing.stresses(compare_phones),
+                        'syllable_difference': abs(word_syl_count - compare_syl_count),
+                        'phone_difference': abs(len(primary_phones.split()) - len(compare_phones.split()))
+                    }
                 }
+                # Calculate detailed similarity scores
                 similarity_result = self._calculate_similarity(
                     word_clean, primary_phones,
+                    compare_clean, compare_phones
                 )
                 result['similarity'] = similarity_result