Spaces:
Sleeping
Sleeping
add models
Browse files- app.py +95 -8
- git +0 -0
- src/model.py +72 -0
app.py
CHANGED
@@ -3,22 +3,109 @@ import gradio as gr
|
|
3 |
|
4 |
if gr.NO_RELOAD:
|
5 |
import numpy as np
|
6 |
-
from src.
|
7 |
|
8 |
|
9 |
DEVICE = 'cpu'
|
10 |
MODELS = [
|
11 |
(
|
12 |
-
'
|
13 |
-
lambda:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
'distilbert-base-uncased',
|
15 |
-
[
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
2,
|
20 |
device=DEVICE,
|
21 |
-
state_dict='src/ckpt/
|
22 |
),
|
23 |
),
|
24 |
]
|
|
|
3 |
|
4 |
if gr.NO_RELOAD:
|
5 |
import numpy as np
|
6 |
+
from src.model import BaseTransferLearningModel
|
7 |
|
8 |
|
9 |
DEVICE = 'cpu'
|
10 |
MODELS = [
|
11 |
(
|
12 |
+
'bert-model_1950',
|
13 |
+
lambda: BaseTransferLearningModel(
|
14 |
+
'bert-base-uncased',
|
15 |
+
[('linear', ['in', 'out']), ('softmax')],
|
16 |
+
2,
|
17 |
+
device=DEVICE,
|
18 |
+
state_dict='src/ckpt/bert-model_1950.pt',
|
19 |
+
),
|
20 |
+
),
|
21 |
+
(
|
22 |
+
'bert-model_2000',
|
23 |
+
lambda: BaseTransferLearningModel(
|
24 |
+
'bert-base-uncased',
|
25 |
+
[('linear', ['in', 'out']), ('softmax')],
|
26 |
+
2,
|
27 |
+
device=DEVICE,
|
28 |
+
state_dict='src/ckpt/bert-model_2000.pt',
|
29 |
+
),
|
30 |
+
),
|
31 |
+
(
|
32 |
+
'deberta-base-model_4400',
|
33 |
+
lambda: BaseTransferLearningModel(
|
34 |
+
'microsoft/deberta-base',
|
35 |
+
[('linear', ['in', 'out']), ('softmax')],
|
36 |
+
2,
|
37 |
+
device=DEVICE,
|
38 |
+
state_dict='src/ckpt/deberta-base-model_4400.pt',
|
39 |
+
),
|
40 |
+
),
|
41 |
+
(
|
42 |
+
'deberta-base-model_8000',
|
43 |
+
lambda: BaseTransferLearningModel(
|
44 |
+
'microsoft/deberta-base',
|
45 |
+
[('linear', ['in', 'out']), ('softmax')],
|
46 |
+
2,
|
47 |
+
device=DEVICE,
|
48 |
+
state_dict='src/ckpt/deberta-base-model_8000.pt',
|
49 |
+
),
|
50 |
+
),
|
51 |
+
(
|
52 |
+
'deberta-v3-base-model_3400',
|
53 |
+
lambda: BaseTransferLearningModel(
|
54 |
+
'microsoft/deberta-v3-base',
|
55 |
+
[('linear', ['in', 'out']), ('softmax')],
|
56 |
+
2,
|
57 |
+
device=DEVICE,
|
58 |
+
state_dict='src/ckpt/deberta-v3-base-model_3400.pt',
|
59 |
+
),
|
60 |
+
),
|
61 |
+
(
|
62 |
+
'deberta-v3-base-model_4000',
|
63 |
+
lambda: BaseTransferLearningModel(
|
64 |
+
'microsoft/deberta-v3-base',
|
65 |
+
[('linear', ['in', 'out']), ('softmax')],
|
66 |
+
2,
|
67 |
+
device=DEVICE,
|
68 |
+
state_dict='src/ckpt/deberta-v3-base-model_4000.pt',
|
69 |
+
),
|
70 |
+
),
|
71 |
+
(
|
72 |
+
'distilbert-model_1850',
|
73 |
+
lambda: BaseTransferLearningModel(
|
74 |
'distilbert-base-uncased',
|
75 |
+
[('linear', ['in', 'out']), ('softmax')],
|
76 |
+
2,
|
77 |
+
device=DEVICE,
|
78 |
+
state_dict='src/ckpt/distilbert-model_1850.pt',
|
79 |
+
),
|
80 |
+
),
|
81 |
+
(
|
82 |
+
'distilbert-model_2000',
|
83 |
+
lambda: BaseTransferLearningModel(
|
84 |
+
'distilbert-base-uncased',
|
85 |
+
[('linear', ['in', 'out']), ('softmax')],
|
86 |
+
2,
|
87 |
+
device=DEVICE,
|
88 |
+
state_dict='src/ckpt/distilbert-model_2000.pt',
|
89 |
+
),
|
90 |
+
),
|
91 |
+
(
|
92 |
+
'roberta-base-model_1250',
|
93 |
+
lambda: BaseTransferLearningModel(
|
94 |
+
'FacebookAI/roberta-base',
|
95 |
+
[('linear', ['in', 'out']), ('softmax')],
|
96 |
+
2,
|
97 |
+
device=DEVICE,
|
98 |
+
state_dict='src/ckpt/roberta-base-model_1250.pt',
|
99 |
+
),
|
100 |
+
),
|
101 |
+
(
|
102 |
+
'roberta-base-model_2000',
|
103 |
+
lambda: BaseTransferLearningModel(
|
104 |
+
'FacebookAI/roberta-base',
|
105 |
+
[('linear', ['in', 'out']), ('softmax')],
|
106 |
2,
|
107 |
device=DEVICE,
|
108 |
+
state_dict='src/ckpt/roberta-base-model_2000.pt',
|
109 |
),
|
110 |
),
|
111 |
]
|
git
ADDED
File without changes
|
src/model.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Optional, Tuple, Union
|
2 |
+
import torch
|
3 |
+
import transformers
|
4 |
+
|
5 |
+
|
6 |
+
class BaseTransferLearningModel(torch.nn.Module):
|
7 |
+
|
8 |
+
def __init__(
|
9 |
+
self,
|
10 |
+
pretrained_model: str = "distilbert-base-uncased",
|
11 |
+
layers: list[Tuple[str, Optional[list[Any]]]] = [
|
12 |
+
('linear', ['in', 'out']),
|
13 |
+
('softmax'),
|
14 |
+
],
|
15 |
+
dim_out: int = 2,
|
16 |
+
use_local_file: bool = False,
|
17 |
+
device: str = 'cpu',
|
18 |
+
state_dict: Optional[Union[str, dict]] = None,
|
19 |
+
):
|
20 |
+
super(BaseTransferLearningModel, self).__init__()
|
21 |
+
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
|
22 |
+
pretrained_model, local_files_only=use_local_file
|
23 |
+
)
|
24 |
+
self.base_model = transformers.AutoModel.from_pretrained(
|
25 |
+
pretrained_model, local_files_only=use_local_file
|
26 |
+
)
|
27 |
+
clf_layers = []
|
28 |
+
for layer in layers:
|
29 |
+
layer_type = layer[0] if isinstance(layer, tuple) else layer
|
30 |
+
if layer_type == 'linear':
|
31 |
+
layer_in, layer_out = [
|
32 |
+
(
|
33 |
+
self.base_model.config.hidden_size
|
34 |
+
if x == 'in'
|
35 |
+
else dim_out if x == 'out' else x
|
36 |
+
)
|
37 |
+
for x in layer[1]
|
38 |
+
]
|
39 |
+
clf_layers.append(torch.nn.Linear(layer_in, layer_out))
|
40 |
+
elif layer_type == 'softmax':
|
41 |
+
clf_layers.append(torch.nn.Softmax(dim=-1))
|
42 |
+
self.clf = torch.nn.Sequential(*clf_layers)
|
43 |
+
|
44 |
+
if state_dict is not None:
|
45 |
+
if isinstance(state_dict, str) and state_dict.endswith('.pt'):
|
46 |
+
if device == 'cpu':
|
47 |
+
state_dict = torch.load(state_dict, map_location='cpu')
|
48 |
+
else:
|
49 |
+
state_dict = torch.load(state_dict)
|
50 |
+
self.load_state_dict(state_dict)
|
51 |
+
|
52 |
+
def forward(self, ids: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
|
53 |
+
y = self.base_model(ids, attention_mask=mask, return_dict=False)[0][:, 0]
|
54 |
+
y = self.clf(y)
|
55 |
+
return y
|
56 |
+
|
57 |
+
def predict(self, text: str, device: str) -> torch.Tensor:
|
58 |
+
encoded = self.tokenizer.encode_plus(
|
59 |
+
text,
|
60 |
+
add_special_tokens=True,
|
61 |
+
return_token_type_ids=False,
|
62 |
+
return_attention_mask=True,
|
63 |
+
max_length=512,
|
64 |
+
padding='max_length',
|
65 |
+
truncation=True,
|
66 |
+
return_tensors='pt',
|
67 |
+
)
|
68 |
+
with torch.no_grad():
|
69 |
+
ids = encoded['input_ids'].to(device)
|
70 |
+
mask = encoded['attention_mask'].to(device)
|
71 |
+
output = self.forward(ids, mask)
|
72 |
+
return output.to(device)
|