pham thuy tien commited on
Commit
81d4cc5
·
verified ·
1 Parent(s): 71485a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -73,16 +73,19 @@ def sample_seq(model, context, length, device, temperature=1, top_k=0, top_p=0.0
73
  next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
74
  generated = torch.cat((generated, next_token.unsqueeze(0)), dim=1)
75
  return generated
76
- def add_special_tokens():
77
  """ Returns GPT2 tokenizer after adding separator and padding tokens """
78
- tokenizer = GPT2Tokenizer.from_pretrained('NlpHUST/gpt2-vietnamese')
 
 
 
79
  special_tokens = {'pad_token': '<|pad|>', 'sep_token': '<|sep|>'}
80
  tokenizer.add_special_tokens(special_tokens)
81
  return tokenizer
82
 
83
  def gene(t,a):
84
 
85
- tokenizer = add_special_tokens()
86
  article = tokenizer.encode(t)[:900]
87
  # Load model directly
88
  model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")
 
73
  next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
74
  generated = torch.cat((generated, next_token.unsqueeze(0)), dim=1)
75
  return generated
76
+ def add_special_tokens(lang):
77
  """ Returns GPT2 tokenizer after adding separator and padding tokens """
78
+ token = 'gpt2'
79
+ if lang =='vi':
80
+ token = 'NlpHUST/gpt2-vietnamese'
81
+ tokenizer = GPT2Tokenizer.from_pretrained(token)
82
  special_tokens = {'pad_token': '<|pad|>', 'sep_token': '<|sep|>'}
83
  tokenizer.add_special_tokens(special_tokens)
84
  return tokenizer
85
 
86
  def gene(t,a):
87
 
88
+ tokenizer = add_special_tokens(a)
89
  article = tokenizer.encode(t)[:900]
90
  # Load model directly
91
  model = AutoModelForCausalLM.from_pretrained("tiennlu/GPT2en_CNNen_3k")