Jathish lcw99 commited on
Commit
656c9b4
ยท
0 Parent(s):

Duplicate from lcw99/gpt-neo-1.3B-ko-text-generator

Browse files

Co-authored-by: Chang W Lee <[email protected]>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +106 -0
  4. requirements.txt +9 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Gpt Neo 1.3B Ko Text Generator
3
+ emoji: ๐Ÿ“‰
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: lcw99/gpt-neo-1.3B-ko-text-generator
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+
3
+ import torch
4
+ import torch.nn.functional as F
5
+ from transformers import GPTNeoForCausalLM, AutoTokenizer, pipeline
6
+ import numpy as np
7
+ from tqdm import trange
8
+
9
+
10
+ import streamlit as st
11
+
12
+
13
+ def set_seed(seed):
14
+ np.random.seed(seed)
15
+ torch.manual_seed(seed)
16
+ try:
17
+ torch.cuda.manual_seed_all(seed)
18
+ except:
19
+ pass
20
+
21
+
22
+ MODEL_CLASSES = {
23
+ 'lcw99/gpt-neo-1.3B-ko-fp16': (GPTNeoForCausalLM, AutoTokenizer),
24
+ 'lcw99/gpt-neo-1.3B-ko': (GPTNeoForCausalLM, AutoTokenizer),
25
+ }
26
+
27
+
28
+ # @st.cache
29
+ def load_model(model_name):
30
+ model_class, tokenizer_class = MODEL_CLASSES[model_name]
31
+
32
+ model = model_class.from_pretrained(
33
+ model_name,
34
+ torch_dtype=torch.float32,
35
+ low_cpu_mem_usage=True,
36
+ use_cache=False,
37
+ gradient_checkpointing=False,
38
+ device_map='auto',
39
+ #revision="float16",
40
+ #load_in_8bit=True
41
+ )
42
+ tokenizer = tokenizer_class.from_pretrained(model_name)
43
+
44
+ model.to(device)
45
+ model.eval()
46
+ return model, tokenizer
47
+
48
+
49
+ if __name__ == "__main__":
50
+
51
+
52
+ # Selectors
53
+ model_name = st.sidebar.selectbox("Model", list(MODEL_CLASSES.keys()))
54
+ length = st.sidebar.slider("Length", 50, 2048, 100)
55
+ temperature = st.sidebar.slider("Temperature", 0.0, 3.0, 0.8)
56
+ top_k = st.sidebar.slider("Top K", 0, 10, 0)
57
+ top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.7)
58
+
59
+ st.title("Text generation with GPT-neo Korean")
60
+ raw_text = st.text_input("์‹œ์ž‘ํ•˜๋Š” ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜๊ณ  ์—”ํ„ฐ๋ฅผ ์น˜์„ธ์š”.", placeholder="๊ณจํ”„๋ฅผ ์ž˜ ์น˜๊ณ  ์‹ถ๋‹ค๋ฉด,",
61
+ key="text_input1")
62
+
63
+ if raw_text:
64
+ st.write(raw_text)
65
+ with st.spinner(f'loading model({model_name}) wait...'):
66
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
+ model, tokenizer = load_model(model_name)
68
+
69
+ # making a copy so streamlit doesn't reload models
70
+ # model = copy.deepcopy(model)
71
+ # tokenizer = copy.deepcopy(tokenizer)
72
+
73
+ if False:
74
+ text_generation = pipeline(
75
+ "text-generation",
76
+ model=model,
77
+ tokenizer=tokenizer,
78
+ )
79
+
80
+ with st.spinner(f'Generating text wait...'):
81
+ # generated = text_generation(
82
+ # raw_text,
83
+ # max_length=length,
84
+ # do_sample=True,
85
+ # min_length=100,
86
+ # num_return_sequences=3,
87
+ # top_p=top_p,
88
+ # top_k=top_k
89
+ # )
90
+ # st.write(*generated)
91
+
92
+ encoded_input = tokenizer(raw_text, return_tensors='pt')
93
+ output_sequences = model.generate(
94
+ input_ids=encoded_input['input_ids'].to(device),
95
+ attention_mask=encoded_input['attention_mask'].to(device),
96
+ max_length=length,
97
+ do_sample=True,
98
+ min_length=20,
99
+ top_p=top_p,
100
+ top_k=top_k
101
+ )
102
+ generated = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
103
+ #print(generated)
104
+ st.write(generated)
105
+
106
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ numpy
3
+ tqdm
4
+ accelerate
5
+ bitsandbytes==0.35.4
6
+ --extra-index-url https://download.pytorch.org/whl/cu116
7
+ torch
8
+ torchvision
9
+ torchaudio