shoukaku commited on
Commit
551aa70
·
1 Parent(s): f489111

add models

Browse files
Files changed (3) hide show
  1. app.py +95 -8
  2. git +0 -0
  3. src/model.py +72 -0
app.py CHANGED
@@ -3,22 +3,109 @@ import gradio as gr
3
 
4
  if gr.NO_RELOAD:
5
  import numpy as np
6
- from src.distilbert_tf import DistilBertTransferLearningModel
7
 
8
 
9
  DEVICE = 'cpu'
10
  MODELS = [
11
  (
12
- 'distilbert-1linear-1650',
13
- lambda: DistilBertTransferLearningModel(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  'distilbert-base-uncased',
15
- [
16
- ('linear', ['in', 'out']),
17
- ('softmax'),
18
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  2,
20
  device=DEVICE,
21
- state_dict='src/ckpt/distilbert-1linear-dataset-all-augmented-all-1650.pt',
22
  ),
23
  ),
24
  ]
 
3
 
4
  if gr.NO_RELOAD:
5
  import numpy as np
6
+ from src.model import BaseTransferLearningModel
7
 
8
 
9
  DEVICE = 'cpu'
10
  MODELS = [
11
  (
12
+ 'bert-model_1950',
13
+ lambda: BaseTransferLearningModel(
14
+ 'bert-base-uncased',
15
+ [('linear', ['in', 'out']), ('softmax')],
16
+ 2,
17
+ device=DEVICE,
18
+ state_dict='src/ckpt/bert-model_1950.pt',
19
+ ),
20
+ ),
21
+ (
22
+ 'bert-model_2000',
23
+ lambda: BaseTransferLearningModel(
24
+ 'bert-base-uncased',
25
+ [('linear', ['in', 'out']), ('softmax')],
26
+ 2,
27
+ device=DEVICE,
28
+ state_dict='src/ckpt/bert-model_2000.pt',
29
+ ),
30
+ ),
31
+ (
32
+ 'deberta-base-model_4400',
33
+ lambda: BaseTransferLearningModel(
34
+ 'microsoft/deberta-base',
35
+ [('linear', ['in', 'out']), ('softmax')],
36
+ 2,
37
+ device=DEVICE,
38
+ state_dict='src/ckpt/deberta-base-model_4400.pt',
39
+ ),
40
+ ),
41
+ (
42
+ 'deberta-base-model_8000',
43
+ lambda: BaseTransferLearningModel(
44
+ 'microsoft/deberta-base',
45
+ [('linear', ['in', 'out']), ('softmax')],
46
+ 2,
47
+ device=DEVICE,
48
+ state_dict='src/ckpt/deberta-base-model_8000.pt',
49
+ ),
50
+ ),
51
+ (
52
+ 'deberta-v3-base-model_3400',
53
+ lambda: BaseTransferLearningModel(
54
+ 'microsoft/deberta-v3-base',
55
+ [('linear', ['in', 'out']), ('softmax')],
56
+ 2,
57
+ device=DEVICE,
58
+ state_dict='src/ckpt/deberta-v3-base-model_3400.pt',
59
+ ),
60
+ ),
61
+ (
62
+ 'deberta-v3-base-model_4000',
63
+ lambda: BaseTransferLearningModel(
64
+ 'microsoft/deberta-v3-base',
65
+ [('linear', ['in', 'out']), ('softmax')],
66
+ 2,
67
+ device=DEVICE,
68
+ state_dict='src/ckpt/deberta-v3-base-model_4000.pt',
69
+ ),
70
+ ),
71
+ (
72
+ 'distilbert-model_1850',
73
+ lambda: BaseTransferLearningModel(
74
  'distilbert-base-uncased',
75
+ [('linear', ['in', 'out']), ('softmax')],
76
+ 2,
77
+ device=DEVICE,
78
+ state_dict='src/ckpt/distilbert-model_1850.pt',
79
+ ),
80
+ ),
81
+ (
82
+ 'distilbert-model_2000',
83
+ lambda: BaseTransferLearningModel(
84
+ 'distilbert-base-uncased',
85
+ [('linear', ['in', 'out']), ('softmax')],
86
+ 2,
87
+ device=DEVICE,
88
+ state_dict='src/ckpt/distilbert-model_2000.pt',
89
+ ),
90
+ ),
91
+ (
92
+ 'roberta-base-model_1250',
93
+ lambda: BaseTransferLearningModel(
94
+ 'FacebookAI/roberta-base',
95
+ [('linear', ['in', 'out']), ('softmax')],
96
+ 2,
97
+ device=DEVICE,
98
+ state_dict='src/ckpt/roberta-base-model_1250.pt',
99
+ ),
100
+ ),
101
+ (
102
+ 'roberta-base-model_2000',
103
+ lambda: BaseTransferLearningModel(
104
+ 'FacebookAI/roberta-base',
105
+ [('linear', ['in', 'out']), ('softmax')],
106
  2,
107
  device=DEVICE,
108
+ state_dict='src/ckpt/roberta-base-model_2000.pt',
109
  ),
110
  ),
111
  ]
git ADDED
File without changes
src/model.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional, Tuple, Union
2
+ import torch
3
+ import transformers
4
+
5
+
6
+ class BaseTransferLearningModel(torch.nn.Module):
7
+
8
+ def __init__(
9
+ self,
10
+ pretrained_model: str = "distilbert-base-uncased",
11
+ layers: list[Tuple[str, Optional[list[Any]]]] = [
12
+ ('linear', ['in', 'out']),
13
+ ('softmax'),
14
+ ],
15
+ dim_out: int = 2,
16
+ use_local_file: bool = False,
17
+ device: str = 'cpu',
18
+ state_dict: Optional[Union[str, dict]] = None,
19
+ ):
20
+ super(BaseTransferLearningModel, self).__init__()
21
+ self.tokenizer = transformers.AutoTokenizer.from_pretrained(
22
+ pretrained_model, local_files_only=use_local_file
23
+ )
24
+ self.base_model = transformers.AutoModel.from_pretrained(
25
+ pretrained_model, local_files_only=use_local_file
26
+ )
27
+ clf_layers = []
28
+ for layer in layers:
29
+ layer_type = layer[0] if isinstance(layer, tuple) else layer
30
+ if layer_type == 'linear':
31
+ layer_in, layer_out = [
32
+ (
33
+ self.base_model.config.hidden_size
34
+ if x == 'in'
35
+ else dim_out if x == 'out' else x
36
+ )
37
+ for x in layer[1]
38
+ ]
39
+ clf_layers.append(torch.nn.Linear(layer_in, layer_out))
40
+ elif layer_type == 'softmax':
41
+ clf_layers.append(torch.nn.Softmax(dim=-1))
42
+ self.clf = torch.nn.Sequential(*clf_layers)
43
+
44
+ if state_dict is not None:
45
+ if isinstance(state_dict, str) and state_dict.endswith('.pt'):
46
+ if device == 'cpu':
47
+ state_dict = torch.load(state_dict, map_location='cpu')
48
+ else:
49
+ state_dict = torch.load(state_dict)
50
+ self.load_state_dict(state_dict)
51
+
52
+ def forward(self, ids: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
53
+ y = self.base_model(ids, attention_mask=mask, return_dict=False)[0][:, 0]
54
+ y = self.clf(y)
55
+ return y
56
+
57
+ def predict(self, text: str, device: str) -> torch.Tensor:
58
+ encoded = self.tokenizer.encode_plus(
59
+ text,
60
+ add_special_tokens=True,
61
+ return_token_type_ids=False,
62
+ return_attention_mask=True,
63
+ max_length=512,
64
+ padding='max_length',
65
+ truncation=True,
66
+ return_tensors='pt',
67
+ )
68
+ with torch.no_grad():
69
+ ids = encoded['input_ids'].to(device)
70
+ mask = encoded['attention_mask'].to(device)
71
+ output = self.forward(ids, mask)
72
+ return output.to(device)