minhtoan commited on
Commit
7f35677
·
1 Parent(s): ad67eab

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +52 -0
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: vi
3
+ tags:
4
+ - vi
5
+ - vietnamese
6
+ - gpt2
7
+ - text-generation
8
+ - lm
9
+ - nlp
10
+ datasets:
11
+ - vietnews
12
+ widget:
13
+ - text: "Hoa quả và rau thường rẻ hơn khi vào mùa"
14
+ ---
15
+
16
+ # GPT-2
17
+
18
+ Pretrained gpt model on Vietnamese New
19
+
20
+ # How to use the model
21
+
22
+ ~~~~
23
+ import torch
24
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
25
+
26
+ tokenizer = GPT2Tokenizer.from_pretrained('minhtoan/gpt2-finetune-vietnamese-news')
27
+ model = GPT2LMHeadModel.from_pretrained('minhtoan/gpt2-finetune-vietnamese-news')
28
+
29
+ text = "Hoa quả và rau thường rẻ hơn khi vào mùa"
30
+ input_ids = tokenizer.encode(text, return_tensors='pt')
31
+ max_length = 80
32
+
33
+ sample_outputs = model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,
34
+ do_sample=True,
35
+ max_length=max_length,
36
+ min_length=max_length,
37
+ top_k=40,
38
+ num_beams=5,
39
+ early_stopping=True,
40
+ no_repeat_ngram_size=2,
41
+ num_return_sequences=3)
42
+
43
+ for i, sample_output in enumerate(sample_outputs):
44
+ print(">> Generated text {}\n\n{}".format(i+1, tokenizer.decode(sample_output.tolist())))
45
+ print('\n---')
46
+ ~~~~
47
+
48
+
49
+ ## Author
50
+ `
51
+ Phan Minh Toan
52
+ `