patruff commited on
Commit
de91b03
·
verified ·
1 Parent(s): 89aaafa

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +109 -52
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- pronouncing
2
  smolagents
 
 
 
1
  smolagents
2
+ pronouncing
tool.py CHANGED
@@ -1,14 +1,13 @@
1
  from smolagents.tools import Tool
 
2
  import json
3
  import pronouncing
4
- import string
5
- import difflib
6
 
7
  class WordPhoneTool(Tool):
8
  name = "word_phonetic_analyzer"
9
- description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns.
10
- Can also compare two words for phonetic similarity."""
11
- inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
12
  output_type = "string"
13
  VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
14
 
@@ -20,6 +19,16 @@ class WordPhoneTool(Tool):
20
  return groups
21
 
22
 
 
 
 
 
 
 
 
 
 
 
23
  def _get_last_syllable(self, phones):
24
  last_vowel_idx = -1
25
  last_vowel = None
@@ -29,8 +38,9 @@ class WordPhoneTool(Tool):
29
  phone = phones[i]
30
  base_phone = ""
31
  for j in range(len(phone)):
32
- if phone[j] not in "012":
33
- base_phone += phone[j]
 
34
 
35
  for group in vowel_groups:
36
  if base_phone in group:
@@ -83,106 +93,153 @@ class WordPhoneTool(Tool):
83
 
84
  def _calculate_similarity(self, word1, phones1, word2, phones2):
85
  import pronouncing
86
- from difflib import SequenceMatcher
87
 
88
  phone_list1 = phones1.split()
89
  phone_list2 = phones2.split()
90
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  result1 = self._get_last_syllable(phone_list1)
92
  result2 = self._get_last_syllable(phone_list2)
 
 
93
 
94
- last_vowel1 = result1[0]
95
- word1_end = result1[1]
96
- last_vowel2 = result2[0]
97
- word2_end = result2[1]
98
-
99
  rhyme_score = 0.0
100
- syllable_score = 0.0
101
- string_similarity = 0.0
102
-
103
  if last_vowel1 and last_vowel2:
104
  if self._vowels_match(last_vowel1, last_vowel2):
105
- word1_end_clean = self._strip_stress(word1_end)
106
- word2_end_clean = self._strip_stress(word2_end)
107
 
108
- if word1_end_clean == word2_end_clean:
109
- rhyme_score = 1.0
110
- if len(word1) == len(word2):
111
- if word1[1:] == word2[1:]:
112
- rhyme_score = 1.2
113
  else:
114
- rhyme_score = 0.6
115
-
 
 
 
 
 
 
116
  syl1 = pronouncing.syllable_count(phones1)
117
  syl2 = pronouncing.syllable_count(phones2)
118
- if syl1 == syl2:
119
- syllable_score = 1.0
120
 
121
- matcher = SequenceMatcher(None)
122
- if len(word1) > 1 and len(word2) > 1:
123
- matcher.set_seqs(word1[1:], word2[1:])
124
- string_similarity = matcher.ratio()
125
- else:
126
- matcher.set_seqs(word1, word2)
127
- string_similarity = matcher.ratio()
128
 
129
- total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
 
 
 
 
 
130
 
131
  return {
132
  "similarity": round(total_similarity, 3),
133
  "rhyme_score": round(rhyme_score, 3),
134
- "syllable_match": syllable_score == 1.0,
135
- "string_similarity": round(string_similarity, 3)
136
  }
137
 
138
 
139
- def forward(self, word, compare_to=None):
140
  import json
141
  import string
142
  import pronouncing
143
 
 
 
 
 
 
 
 
144
  word_clean = word.lower()
145
  word_clean = word_clean.strip(string.punctuation)
146
- phones = pronouncing.phones_for_word(word_clean)
147
 
148
- if not phones:
149
  result = {
150
  'word': word_clean,
151
  'found': False,
152
- 'error': 'Word not found in dictionary'
153
  }
154
  return json.dumps(result, indent=2)
155
 
156
- primary_phones = phones[0]
157
  result = {
158
  'word': word_clean,
159
  'found': True,
160
- 'syllable_count': pronouncing.syllable_count(primary_phones),
161
- 'phones': primary_phones.split(),
162
- 'stresses': pronouncing.stresses(primary_phones)
 
 
 
163
  }
164
 
165
  if compare_to:
166
  compare_clean = compare_to.lower()
167
  compare_clean = compare_clean.strip(string.punctuation)
168
- compare_phones = pronouncing.phones_for_word(compare_clean)
169
 
170
  if not compare_phones:
171
  result['comparison'] = {
172
- 'error': f'Comparison word "{compare_clean}" not found in dictionary'
173
  }
174
  else:
175
- compare_primary = compare_phones[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  result['comparison'] = {
177
  'word': compare_clean,
178
- 'syllable_count': pronouncing.syllable_count(compare_primary),
179
- 'phones': compare_primary.split(),
180
- 'stresses': pronouncing.stresses(compare_primary)
 
 
 
 
 
 
 
 
 
 
181
  }
182
 
 
183
  similarity_result = self._calculate_similarity(
184
  word_clean, primary_phones,
185
- compare_clean, compare_primary
186
  )
187
  result['similarity'] = similarity_result
188
 
 
1
  from smolagents.tools import Tool
2
+ import string
3
  import json
4
  import pronouncing
 
 
5
 
6
  class WordPhoneTool(Tool):
7
  name = "word_phonetic_analyzer"
8
+ description = """Analyzes word pronunciation using CMU dictionary and custom pronunciations to get phonemes, syllables, and stress patterns.
9
+ Can also compare two words for phonetic similarity and rhyming."""
10
+ inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True}}
11
  output_type = "string"
12
  VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
13
 
 
19
  return groups
20
 
21
 
22
+ def _get_word_phones(self, word, custom_phones=None):
23
+ """Get phones for a word, checking custom dictionary first."""
24
+ if custom_phones and word in custom_phones:
25
+ return custom_phones[word]["primary_phones"]
26
+
27
+ import pronouncing
28
+ phones = pronouncing.phones_for_word(word)
29
+ return phones[0] if phones else None
30
+
31
+
32
  def _get_last_syllable(self, phones):
33
  last_vowel_idx = -1
34
  last_vowel = None
 
38
  phone = phones[i]
39
  base_phone = ""
40
  for j in range(len(phone)):
41
+ char = phone[j]
42
+ if char not in "012":
43
+ base_phone += char
44
 
45
  for group in vowel_groups:
46
  if base_phone in group:
 
93
 
94
  def _calculate_similarity(self, word1, phones1, word2, phones2):
95
  import pronouncing
 
96
 
97
  phone_list1 = phones1.split()
98
  phone_list2 = phones2.split()
99
 
100
+ # Initialize variables before use
101
+ last_vowel1 = None
102
+ last_vowel2 = None
103
+ word1_end = []
104
+ word2_end = []
105
+ matched = 0
106
+ common_length = 0
107
+ end1_clean = []
108
+ end2_clean = []
109
+ i = 0 # Initialize i for loop variable
110
+
111
+ # Get last syllable components
112
  result1 = self._get_last_syllable(phone_list1)
113
  result2 = self._get_last_syllable(phone_list2)
114
+ last_vowel1, word1_end = result1
115
+ last_vowel2, word2_end = result2
116
 
117
+ # Calculate rhyme score (most important)
 
 
 
 
118
  rhyme_score = 0.0
 
 
 
119
  if last_vowel1 and last_vowel2:
120
  if self._vowels_match(last_vowel1, last_vowel2):
121
+ end1_clean = self._strip_stress(word1_end)
122
+ end2_clean = self._strip_stress(word2_end)
123
 
124
+ if end1_clean == end2_clean:
125
+ rhyme_score = 1.0 # Perfect rhyme
 
 
 
126
  else:
127
+ common_length = min(len(end1_clean), len(end2_clean))
128
+ matched = 0
129
+ for i in range(common_length):
130
+ if end1_clean[i] == end2_clean[i]:
131
+ matched += 1
132
+ rhyme_score = 0.7 + (0.3 * (matched / max(len(end1_clean), len(end2_clean))))
133
+
134
+ # Calculate syllable pattern similarity
135
  syl1 = pronouncing.syllable_count(phones1)
136
  syl2 = pronouncing.syllable_count(phones2)
137
+ syllable_score = 1.0 if syl1 == syl2 else 1 - (abs(syl1 - syl2) / max(syl1, syl2))
 
138
 
139
+ # Calculate stress pattern similarity
140
+ stress1 = pronouncing.stresses(phones1)
141
+ stress2 = pronouncing.stresses(phones2)
142
+ stress_score = 1.0 if stress1 == stress2 else 0.5
 
 
 
143
 
144
+ # Weighted combination prioritizing rhyming
145
+ total_similarity = (
146
+ (rhyme_score * 0.8) + # Rhyming is most important (80%)
147
+ (syllable_score * 0.15) + # Syllable count has some importance (15%)
148
+ (stress_score * 0.05) # Stress pattern has minimal weight (5%)
149
+ )
150
 
151
  return {
152
  "similarity": round(total_similarity, 3),
153
  "rhyme_score": round(rhyme_score, 3),
154
+ "syllable_score": round(syllable_score, 3),
155
+ "stress_score": round(stress_score, 3)
156
  }
157
 
158
 
159
+ def forward(self, word, compare_to=None, custom_phones=None):
160
  import json
161
  import string
162
  import pronouncing
163
 
164
+ # Initialize variables before use
165
+ word_last_vowel = None
166
+ compare_last_vowel = None
167
+ word_end = []
168
+ compare_end = []
169
+ is_rhyme = False
170
+
171
  word_clean = word.lower()
172
  word_clean = word_clean.strip(string.punctuation)
173
+ primary_phones = self._get_word_phones(word_clean, custom_phones)
174
 
175
+ if not primary_phones:
176
  result = {
177
  'word': word_clean,
178
  'found': False,
179
+ 'error': 'Word not found in dictionary or custom phones'
180
  }
181
  return json.dumps(result, indent=2)
182
 
 
183
  result = {
184
  'word': word_clean,
185
  'found': True,
186
+ 'analysis': {
187
+ 'syllable_count': pronouncing.syllable_count(primary_phones),
188
+ 'phones': primary_phones.split(),
189
+ 'stresses': pronouncing.stresses(primary_phones),
190
+ 'phone_count': len(primary_phones.split())
191
+ }
192
  }
193
 
194
  if compare_to:
195
  compare_clean = compare_to.lower()
196
  compare_clean = compare_clean.strip(string.punctuation)
197
+ compare_phones = self._get_word_phones(compare_clean, custom_phones)
198
 
199
  if not compare_phones:
200
  result['comparison'] = {
201
+ 'error': f'Comparison word "{compare_clean}" not found in dictionary or custom phones'
202
  }
203
  else:
204
+ # Get rhyme components
205
+ word_result = self._get_last_syllable(primary_phones.split())
206
+ compare_result = self._get_last_syllable(compare_phones.split())
207
+ word_last_vowel, word_end = word_result
208
+ compare_last_vowel, compare_end = compare_result
209
+
210
+ # Calculate if words rhyme
211
+ if word_last_vowel and compare_last_vowel:
212
+ if self._vowels_match(word_last_vowel, compare_last_vowel):
213
+ word_end_clean = self._strip_stress(word_end)
214
+ compare_end_clean = self._strip_stress(compare_end)
215
+ if word_end_clean == compare_end_clean:
216
+ is_rhyme = True
217
+
218
+ # Calculate detailed comparison stats
219
+ word_syl_count = pronouncing.syllable_count(primary_phones)
220
+ compare_syl_count = pronouncing.syllable_count(compare_phones)
221
+
222
  result['comparison'] = {
223
  'word': compare_clean,
224
+ 'analysis': {
225
+ 'syllable_count': compare_syl_count,
226
+ 'phones': compare_phones.split(),
227
+ 'stresses': pronouncing.stresses(compare_phones),
228
+ 'phone_count': len(compare_phones.split())
229
+ },
230
+ 'comparison_stats': {
231
+ 'is_rhyme': is_rhyme,
232
+ 'same_syllable_count': word_syl_count == compare_syl_count,
233
+ 'same_stress_pattern': pronouncing.stresses(primary_phones) == pronouncing.stresses(compare_phones),
234
+ 'syllable_difference': abs(word_syl_count - compare_syl_count),
235
+ 'phone_difference': abs(len(primary_phones.split()) - len(compare_phones.split()))
236
+ }
237
  }
238
 
239
+ # Calculate detailed similarity scores
240
  similarity_result = self._calculate_similarity(
241
  word_clean, primary_phones,
242
+ compare_clean, compare_phones
243
  )
244
  result['similarity'] = similarity_result
245