Spaces:
Running
Running
pham thuy tien
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -73,16 +73,19 @@ def sample_seq(model, context, length, device, temperature=1, top_k=0, top_p=0.0
|
|
73 |
next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
|
74 |
generated = torch.cat((generated, next_token.unsqueeze(0)), dim=1)
|
75 |
return generated
|
76 |
-
def add_special_tokens():
|
77 |
""" Returns GPT2 tokenizer after adding separator and padding tokens """
|
78 |
-
|
|
|
|
|
|
|
79 |
special_tokens = {'pad_token': '<|pad|>', 'sep_token': '<|sep|>'}
|
80 |
tokenizer.add_special_tokens(special_tokens)
|
81 |
return tokenizer
|
82 |
|
83 |
def gene(t,a):
|
84 |
|
85 |
-
tokenizer = add_special_tokens()
|
86 |
article = tokenizer.encode(t)[:900]
|
87 |
# Load model directly
|
88 |
model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
|
|
|
73 |
next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
|
74 |
generated = torch.cat((generated, next_token.unsqueeze(0)), dim=1)
|
75 |
return generated
|
76 |
+
def add_special_tokens(lang):
|
77 |
""" Returns GPT2 tokenizer after adding separator and padding tokens """
|
78 |
+
token = 'gpt2'
|
79 |
+
if lang =='vi':
|
80 |
+
token = 'NlpHUST/gpt2-vietnamese'
|
81 |
+
tokenizer = GPT2Tokenizer.from_pretrained(token)
|
82 |
special_tokens = {'pad_token': '<|pad|>', 'sep_token': '<|sep|>'}
|
83 |
tokenizer.add_special_tokens(special_tokens)
|
84 |
return tokenizer
|
85 |
|
86 |
def gene(t,a):
|
87 |
|
88 |
+
tokenizer = add_special_tokens(a)
|
89 |
article = tokenizer.encode(t)[:900]
|
90 |
# Load model directly
|
91 |
model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
|