Spaces:
Runtime error
Runtime error
Peter
commited on
Commit
·
8d9ed7d
1
Parent(s):
7afd604
🔊 ⚡️ add logs, improve params
Browse filesSigned-off-by: Peter <[email protected]>
- grammar_improve.py +6 -4
grammar_improve.py
CHANGED
@@ -3,6 +3,8 @@ grammar_improve.py - this .py script contains functions to improve the grammar o
|
|
3 |
|
4 |
"""
|
5 |
|
|
|
|
|
6 |
import math
|
7 |
import pprint as pp
|
8 |
import re
|
@@ -453,8 +455,8 @@ def correct_grammar(
|
|
453 |
n_results: int = 1,
|
454 |
beams: int = 8,
|
455 |
temp=1,
|
456 |
-
|
457 |
-
rep_penalty=
|
458 |
device="cpu",
|
459 |
):
|
460 |
"""
|
@@ -480,7 +482,7 @@ def correct_grammar(
|
|
480 |
st = time.perf_counter()
|
481 |
|
482 |
if len(tokenizer(input_text).input_ids) < 4:
|
483 |
-
|
484 |
return input_text
|
485 |
max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
|
486 |
batch = tokenizer(
|
@@ -494,7 +496,7 @@ def correct_grammar(
|
|
494 |
**batch,
|
495 |
max_length=max_length,
|
496 |
min_length=min(10, len(input_text)),
|
497 |
-
no_repeat_ngram_size=
|
498 |
repetition_penalty=rep_penalty,
|
499 |
num_beams=beams,
|
500 |
num_return_sequences=n_results,
|
|
|
3 |
|
4 |
"""
|
5 |
|
6 |
+
import logging
|
7 |
+
logging.basicConfig(level=logging.INFO)
|
8 |
import math
|
9 |
import pprint as pp
|
10 |
import re
|
|
|
455 |
n_results: int = 1,
|
456 |
beams: int = 8,
|
457 |
temp=1,
|
458 |
+
no_repeat_ngram_size=4,
|
459 |
+
rep_penalty=2.5,
|
460 |
device="cpu",
|
461 |
):
|
462 |
"""
|
|
|
482 |
st = time.perf_counter()
|
483 |
|
484 |
if len(tokenizer(input_text).input_ids) < 4:
|
485 |
+
logging.info(f"input text of {input_text} is too short to be corrected")
|
486 |
return input_text
|
487 |
max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
|
488 |
batch = tokenizer(
|
|
|
496 |
**batch,
|
497 |
max_length=max_length,
|
498 |
min_length=min(10, len(input_text)),
|
499 |
+
no_repeat_ngram_size=no_repeat_ngram_size,
|
500 |
repetition_penalty=rep_penalty,
|
501 |
num_beams=beams,
|
502 |
num_return_sequences=n_results,
|