danseith commited on
Commit
616c8c6
·
1 Parent(s): a95bc58

Added warning to single edit and added list of words to not substitute.

Browse files
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -28,7 +28,7 @@ tab_one_examples = [['A crustless _ made from two slices of baked bread.'],
28
  ]
29
 
30
 
31
- def add_mask(text, size=1):
32
  split_text = text.split()
33
 
34
  # If the user supplies a mask, don't add more
@@ -36,13 +36,20 @@ def add_mask(text, size=1):
36
  u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
37
  split_text[u_pos] = '[MASK]'
38
  return ' '.join(split_text), '[MASK]'
39
- idx = np.random.randint(len(split_text), size=size)
40
- masked_strings = []
41
- for i in idx:
42
- masked_strings.append(split_text[i])
43
- split_text[i] = '[MASK]'
 
 
 
 
 
 
 
44
  masked_output = ' '.join(split_text)
45
- return masked_output, masked_strings
46
 
47
 
48
  class TempScalePipe(FillMaskPipeline):
@@ -154,8 +161,7 @@ def sample_output(out, sampling):
154
 
155
 
156
  def unmask_single(text, temp=1):
157
- tp = add_mask(text, size=1)
158
- masked_text, masked = tp[0], tp[1]
159
  res = scrambler(masked_text, temp=temp, top_k=10)
160
  out = {item["token_str"]: item["score"] for item in res}
161
  return out
@@ -164,21 +170,20 @@ def unmask_single(text, temp=1):
164
  def unmask(text, temp, rounds):
165
  sampling = 'multi'
166
  for _ in range(rounds):
167
- tp = add_mask(text, size=1)
168
- masked_text, masked = tp[0], tp[1]
169
  split_text = masked_text.split()
170
  res = scrambler(masked_text, temp=temp, top_k=15)
171
  mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
172
  out = {item["token_str"]: item["score"] for item in res}
173
  new_token = sample_output(out, sampling)
174
  unsuccessful_iters = 0
175
- while new_token == masked[0]:
176
  if unsuccessful_iters > 5:
177
  break
178
  print('skipped', new_token)
179
  new_token = sample_output(out, sampling=sampling)
180
  unsuccessful_iters += 1
181
- if new_token == masked[0]:
182
  split_text[mask_pos] = new_token
183
  else:
184
  split_text[mask_pos] = '*' + new_token + '*'
@@ -188,6 +193,7 @@ def unmask(text, temp, rounds):
188
  text[0] = text[0].upper()
189
  return ''.join(text)
190
 
 
191
  textbox1 = gr.Textbox(label="Input Sentence", lines=5)
192
  output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
193
 
@@ -197,13 +203,15 @@ temp_slider2 = gr.Slider(1.0, 3.0, value=1.0, label='Creativity')
197
  edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
198
 
199
  title1 = "Patent-BERT Sentence Remix-er: Single Edit"
200
- description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
 
201
  <br/>
202
  <p/>"""
203
  title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
204
  description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
205
  the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
206
- the model to attempt to make. <br/> <p/> """
 
207
 
208
  demo1 = gr.Interface(
209
  fn=unmask_single,
 
28
  ]
29
 
30
 
31
+ def add_mask(text):
32
  split_text = text.split()
33
 
34
  # If the user supplies a mask, don't add more
 
36
  u_pos = [i for i, s in enumerate(split_text) if '_' in s][0]
37
  split_text[u_pos] = '[MASK]'
38
  return ' '.join(split_text), '[MASK]'
39
+
40
+ idx = np.random.randint(len(split_text), size=1).astype(int)[0]
41
+ # Don't mask certain words
42
+ num_iters = 0
43
+ while split_text[idx].lower() in ['a', 'an', 'the', 'is', 'and', 'or']:
44
+ num_iters += 1
45
+ idx = np.random.randint(len(split_text), size=1).astype(int)[0]
46
+ if num_iters > 10:
47
+ break
48
+
49
+ masked_string = split_text[idx]
50
+ split_text[idx] = '[MASK]'
51
  masked_output = ' '.join(split_text)
52
+ return masked_output, masked_string
53
 
54
 
55
  class TempScalePipe(FillMaskPipeline):
 
161
 
162
 
163
  def unmask_single(text, temp=1):
164
+ masked_text, _ = add_mask(text)
 
165
  res = scrambler(masked_text, temp=temp, top_k=10)
166
  out = {item["token_str"]: item["score"] for item in res}
167
  return out
 
170
  def unmask(text, temp, rounds):
171
  sampling = 'multi'
172
  for _ in range(rounds):
173
+ masked_text, masked = add_mask(text)
 
174
  split_text = masked_text.split()
175
  res = scrambler(masked_text, temp=temp, top_k=15)
176
  mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
177
  out = {item["token_str"]: item["score"] for item in res}
178
  new_token = sample_output(out, sampling)
179
  unsuccessful_iters = 0
180
+ while new_token == masked:
181
  if unsuccessful_iters > 5:
182
  break
183
  print('skipped', new_token)
184
  new_token = sample_output(out, sampling=sampling)
185
  unsuccessful_iters += 1
186
+ if new_token == masked:
187
  split_text[mask_pos] = new_token
188
  else:
189
  split_text[mask_pos] = '*' + new_token + '*'
 
193
  text[0] = text[0].upper()
194
  return ''.join(text)
195
 
196
+
197
  textbox1 = gr.Textbox(label="Input Sentence", lines=5)
198
  output_textbox1 = gr.Textbox(placeholder="Output will appear here", lines=4)
199
 
 
203
  edit_slider2 = gr.Slider(1, 20, step=1, value=1.0, label='Number of edits')
204
 
205
  title1 = "Patent-BERT Sentence Remix-er: Single Edit"
206
+ description1 = """<p>Try inserting a '_' where you want the model to generate a list of likely words.
207
+ <strong>Note:</strong> You can only add one '_' per submission.
208
  <br/>
209
  <p/>"""
210
  title2 = "Patent-BERT Sentence Remix-er: Multiple Edits"
211
  description2 = """<p>Try typing in a sentence for the model to remix. Adjust the 'creativity' scale bar to change the
212
  the model's confidence in its likely substitutions and the 'number of edits' for the number of edits you want
213
+ the model to attempt to make. The words substituted in the output sentence will be enclosed in asterisks (e.g., *word*).
214
+ <br/> <p/> """
215
 
216
  demo1 = gr.Interface(
217
  fn=unmask_single,