Add SetFit model
Browse files- 1_Pooling/config.json +10 -0
- README.md +476 -0
- config.json +25 -0
- config_sentence_transformers.json +9 -0
- config_setfit.json +212 -0
- model.safetensors +3 -0
- model_head.pkl +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: setfit
|
3 |
+
tags:
|
4 |
+
- setfit
|
5 |
+
- sentence-transformers
|
6 |
+
- text-classification
|
7 |
+
- generated_from_setfit_trainer
|
8 |
+
base_model: jhgan/ko-sbert-multitask
|
9 |
+
metrics:
|
10 |
+
- accuracy
|
11 |
+
widget:
|
12 |
+
- text: 제습기 전기세가 어느 정도 나오는지 궁금합니다. 요즘 비가 계속 오기도 했고 집 안의 습도가 너무 높아서 거의 하루 종일 제습기를 틀어놓고는
|
13 |
+
하는데 인터넷에서 제습기 전기세가 꽤나 많이 나온다는 내용의 글응 보았습니다. 제습기 전기세가 다른 가전제품에 비해서 어느 정도 나오는지 궁금합니다.
|
14 |
+
- text: '우주선에 사용하는 전선은 일상생활에 사용하는것과 같은 재질인가요? 궁금한데요. 영화같은데보면 일반전선이랑 같은거같은데 실제로 그러진않겠죠?
|
15 |
+
지구대기랑 다른 악조건에 견디게 만들거같은데 어떨런지요? '
|
16 |
+
- text: 부산 바다가 보이는 카페는 어떤 곳이 있나요? 다음달에 부산 여행을 갈 생각인데요. 부산 바다가 좀 보이는 카페를 가보고 싶어요. 하지만
|
17 |
+
너무 사람들이 많이 오는곳은 제외 하구요. 1층이 아니라 고층이면 더 좋습니다!
|
18 |
+
- text: 경희대학교 토목공학과 전망 괜찮을까요? 경희대학교 토목공학과 전망 괜찮은지 궁금합니다.토목공학 자체가 지금 그렇게 좋은 느낌은 아닌
|
19 |
+
것 같은데..ㅠㅠ우스갯 소리로 잘되려면 통일이 되어야 토목공학과가 살아날 수 있다고 하던데ㅋ....그래도 학교 네임 밸류가 좋아서.. 괜찮을
|
20 |
+
것 같기도하고전망 어떤가요!?
|
21 |
+
- text: 치아관리 치아가잇몸이올라가 치아뿌리분이조금보여요 제목그대로 치아뿌리가보이고 잇몸이 올라가있는거같아요 잇몸에좋은치료방법이있나요. 잇몸치약을쓰고있는데.
|
22 |
+
다른방법이있으면알려주세요
|
23 |
+
pipeline_tag: text-classification
|
24 |
+
inference: true
|
25 |
+
model-index:
|
26 |
+
- name: SetFit with jhgan/ko-sbert-multitask
|
27 |
+
results:
|
28 |
+
- task:
|
29 |
+
type: text-classification
|
30 |
+
name: Text Classification
|
31 |
+
dataset:
|
32 |
+
name: Unknown
|
33 |
+
type: unknown
|
34 |
+
split: test
|
35 |
+
metrics:
|
36 |
+
- type: accuracy
|
37 |
+
value: 0.5528846153846154
|
38 |
+
name: Accuracy
|
39 |
+
---
|
40 |
+
|
41 |
+
# SetFit with jhgan/ko-sbert-multitask
|
42 |
+
|
43 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [jhgan/ko-sbert-multitask](https://huggingface.co/jhgan/ko-sbert-multitask) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
44 |
+
|
45 |
+
The model has been trained using an efficient few-shot learning technique that involves:
|
46 |
+
|
47 |
+
1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
|
48 |
+
2. Training a classification head with features from the fine-tuned Sentence Transformer.
|
49 |
+
|
50 |
+
## Model Details
|
51 |
+
|
52 |
+
### Model Description
|
53 |
+
- **Model Type:** SetFit
|
54 |
+
- **Sentence Transformer body:** [jhgan/ko-sbert-multitask](https://huggingface.co/jhgan/ko-sbert-multitask)
|
55 |
+
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
56 |
+
- **Maximum Sequence Length:** 128 tokens
|
57 |
+
- **Number of Classes:** 208 classes
|
58 |
+
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
59 |
+
<!-- - **Language:** Unknown -->
|
60 |
+
<!-- - **License:** Unknown -->
|
61 |
+
|
62 |
+
### Model Sources
|
63 |
+
|
64 |
+
- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
|
65 |
+
- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
|
66 |
+
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
67 |
+
|
68 |
+
## Evaluation
|
69 |
+
|
70 |
+
### Metrics
|
71 |
+
| Label | Accuracy |
|
72 |
+
|:--------|:---------|
|
73 |
+
| **all** | 0.5529 |
|
74 |
+
|
75 |
+
## Uses
|
76 |
+
|
77 |
+
### Direct Use for Inference
|
78 |
+
|
79 |
+
First install the SetFit library:
|
80 |
+
|
81 |
+
```bash
|
82 |
+
pip install setfit
|
83 |
+
```
|
84 |
+
|
85 |
+
Then you can load this model and run inference.
|
86 |
+
|
87 |
+
```python
|
88 |
+
from setfit import SetFitModel
|
89 |
+
|
90 |
+
# Download from the 🤗 Hub
|
91 |
+
model = SetFitModel.from_pretrained("djsull/setfit_classifier")
|
92 |
+
# Run inference
|
93 |
+
preds = model("치아관리 치아가잇몸이올라가 치아뿌리분이조금보여요 제목그대로 치아뿌리가보이고 잇몸이 올라가있는거같아요 잇몸에좋은치료방법이있나요. 잇몸치약을쓰고있는데. 다른방법이있으면알려주세요")
|
94 |
+
```
|
95 |
+
|
96 |
+
<!--
|
97 |
+
### Downstream Use
|
98 |
+
|
99 |
+
*List how someone could finetune this model on their own dataset.*
|
100 |
+
-->
|
101 |
+
|
102 |
+
<!--
|
103 |
+
### Out-of-Scope Use
|
104 |
+
|
105 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
106 |
+
-->
|
107 |
+
|
108 |
+
<!--
|
109 |
+
## Bias, Risks and Limitations
|
110 |
+
|
111 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
112 |
+
-->
|
113 |
+
|
114 |
+
<!--
|
115 |
+
### Recommendations
|
116 |
+
|
117 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
118 |
+
-->
|
119 |
+
|
120 |
+
## Training Details
|
121 |
+
|
122 |
+
### Training Set Metrics
|
123 |
+
| Training set | Min | Median | Max |
|
124 |
+
|:-------------|:----|:--------|:----|
|
125 |
+
| Word count | 10 | 34.4464 | 468 |
|
126 |
+
|
127 |
+
| Label | Training Sample Count |
|
128 |
+
|:----------|:----------------------|
|
129 |
+
| PC·노트북 | 0 |
|
130 |
+
| 양도소득세 | 0 |
|
131 |
+
| 토목공학 | 7 |
|
132 |
+
| 책·독서 | 7 |
|
133 |
+
| 대인관계 | 7 |
|
134 |
+
| 기타 고민상담 | 7 |
|
135 |
+
| 공인노무사 자격증 | 7 |
|
136 |
+
| 서울·수도권 | 7 |
|
137 |
+
| 연세대학교 | 7 |
|
138 |
+
| 자동차 수리 | 7 |
|
139 |
+
| 반려동물 건강 | 7 |
|
140 |
+
| 예능 | 7 |
|
141 |
+
| 가압류·가처분 | 7 |
|
142 |
+
| 기타 육아상담 | 7 |
|
143 |
+
| 역사 | 7 |
|
144 |
+
| 손해사정사 자격증 | 7 |
|
145 |
+
| 환자 식단 | 7 |
|
146 |
+
| 서울시립대학교 | 7 |
|
147 |
+
| 기타 심리상담 | 0 |
|
148 |
+
| 형사 | 7 |
|
149 |
+
| 치과대학 | 7 |
|
150 |
+
| 예금·적금 | 7 |
|
151 |
+
| 물리 | 7 |
|
152 |
+
| 일본 | 7 |
|
153 |
+
| 낚시 | 7 |
|
154 |
+
| 현대중공업 | 7 |
|
155 |
+
| 교통사고 | 7 |
|
156 |
+
| 공인회계사 자격증 | 7 |
|
157 |
+
| 농구 | 7 |
|
158 |
+
| 종합부동산세 | 7 |
|
159 |
+
| 비뇨의학과 | 7 |
|
160 |
+
| 명예훼손·모욕 | 0 |
|
161 |
+
| 지게차운전기능사 | 7 |
|
162 |
+
| 가족·이혼 | 7 |
|
163 |
+
| 현대자동차 | 7 |
|
164 |
+
| 기계공학 | 7 |
|
165 |
+
| 영양제 | 7 |
|
166 |
+
| 연말정산 | 7 |
|
167 |
+
| 성균관대학교 | 7 |
|
168 |
+
| 미술 | 7 |
|
169 |
+
| 양육·훈육 | 7 |
|
170 |
+
| 경제용어 | 0 |
|
171 |
+
| 경희대학교 | 7 |
|
172 |
+
| 금융 | 7 |
|
173 |
+
| 스케이트 | 7 |
|
174 |
+
| 반려동물 훈련 | 7 |
|
175 |
+
| 한약 | 7 |
|
176 |
+
| 생물·생명 | 7 |
|
177 |
+
| 사회복지사 자격증 | 7 |
|
178 |
+
| 한의학 | 7 |
|
179 |
+
| 부동산·임대차 | 7 |
|
180 |
+
| 회생·파산 | 7 |
|
181 |
+
| 임금체불 | 7 |
|
182 |
+
| 변리사 자격증 | 7 |
|
183 |
+
| 피부과 | 7 |
|
184 |
+
| 놀이 | 7 |
|
185 |
+
| 대출 | 7 |
|
186 |
+
| 재산범죄 | 0 |
|
187 |
+
| 영유아·아동 식단 | 7 |
|
188 |
+
| 계절가전 | 7 |
|
189 |
+
| 재료공학 | 7 |
|
190 |
+
| 전라 | 7 |
|
191 |
+
| 다이어트 식단 | 7 |
|
192 |
+
| 경제정책 | 0 |
|
193 |
+
| 캠핑 | 7 |
|
194 |
+
| 철학 | 7 |
|
195 |
+
| 치과 | 7 |
|
196 |
+
| 서강대학교 | 7 |
|
197 |
+
| 자아·자기이해 | 0 |
|
198 |
+
| 환경·에너지 | 7 |
|
199 |
+
| 포항공과대학교 | 7 |
|
200 |
+
| 제주 | 7 |
|
201 |
+
| 행정사 자격증 | 7 |
|
202 |
+
| 임금·급여 | 7 |
|
203 |
+
| 세탁수선 | 0 |
|
204 |
+
| 중학교 생활 | 7 |
|
205 |
+
| 정보처리기사 | 7 |
|
206 |
+
| 수영 | 7 |
|
207 |
+
| 직장내괴롭힘 | 0 |
|
208 |
+
| 소아청소년과 | 7 |
|
209 |
+
| 재활·물리치료 | 7 |
|
210 |
+
| 부산 | 7 |
|
211 |
+
| 안과 | 7 |
|
212 |
+
| 런닝 | 0 |
|
213 |
+
| 지식재산권·IT | 7 |
|
214 |
+
| 사진·영상 | 7 |
|
215 |
+
| 세무사 자격증 | 7 |
|
216 |
+
| 축구·풋살 | 7 |
|
217 |
+
| 미주·중남미 | 7 |
|
218 |
+
| 중앙대학교 | 7 |
|
219 |
+
| 음향기기 | 7 |
|
220 |
+
| 한화 | 7 |
|
221 |
+
| 세무조사·불복 | 7 |
|
222 |
+
| 교통사고 과실 | 0 |
|
223 |
+
| 서울대학교 | 7 |
|
224 |
+
| 의료 | 7 |
|
225 |
+
| 식습관·식이요법 | 7 |
|
226 |
+
| 구조조정 | 7 |
|
227 |
+
| 법인세 | 7 |
|
228 |
+
| 기타 영양���담 | 0 |
|
229 |
+
| 자동차 | 0 |
|
230 |
+
| KAIST | 7 |
|
231 |
+
| 이비인후과 | 0 |
|
232 |
+
| 삼성 | 0 |
|
233 |
+
| 골프 | 7 |
|
234 |
+
| 경상 | 7 |
|
235 |
+
| 등산·클라이밍 | 7 |
|
236 |
+
| 기타 스포츠 | 7 |
|
237 |
+
| 성범죄 | 7 |
|
238 |
+
| GIST | 7 |
|
239 |
+
| 가드닝 | 7 |
|
240 |
+
| 한국외국어대학교 | 7 |
|
241 |
+
| PC 주변기기 | 7 |
|
242 |
+
| 기타 장치 | 7 |
|
243 |
+
| 연애·이성 | 7 |
|
244 |
+
| 취득세·등록세 | 7 |
|
245 |
+
| 산부인과 | 7 |
|
246 |
+
| 기타 세금상담 | 0 |
|
247 |
+
| 헬스 | 7 |
|
248 |
+
| 간호조무사 자격증 | 7 |
|
249 |
+
| 전기기사·기능사 | 7 |
|
250 |
+
| 오세아니아 | 7 |
|
251 |
+
| 무역 | 7 |
|
252 |
+
| 청소 | 0 |
|
253 |
+
| 공인중개사 자격증 | 7 |
|
254 |
+
| 중국 | 0 |
|
255 |
+
| 인테리어 | 7 |
|
256 |
+
| 동남아 | 0 |
|
257 |
+
| 군대 생활 | 0 |
|
258 |
+
| 영상가전 | 0 |
|
259 |
+
| 상속세 | 0 |
|
260 |
+
| LG | 7 |
|
261 |
+
| 부동산 | 0 |
|
262 |
+
| 기타 노무상담 | 0 |
|
263 |
+
| 웹툰·웹소설 | 0 |
|
264 |
+
| 뮤지컬·연극 | 0 |
|
265 |
+
| 네이버 | 0 |
|
266 |
+
| 근로계약 | 7 |
|
267 |
+
| 의료 보험 | 7 |
|
268 |
+
| 내과 | 7 |
|
269 |
+
| 세탁기·건조기 | 7 |
|
270 |
+
| 유럽·아프리카 | 7 |
|
271 |
+
| 기타 약료상담 | 7 |
|
272 |
+
| 드라마 | 7 |
|
273 |
+
| 반려동물 미용 | 0 |
|
274 |
+
| 스마트폰·태블릿 | 7 |
|
275 |
+
| 기타 의료상담 | 7 |
|
276 |
+
| 화학 | 7 |
|
277 |
+
| 청소기 | 7 |
|
278 |
+
| 테니스 | 7 |
|
279 |
+
| 충청 | 7 |
|
280 |
+
| 대학교 생활 | 7 |
|
281 |
+
| 야구 | 7 |
|
282 |
+
| 고등학교 생활 | 7 |
|
283 |
+
| 정형외과 | 7 |
|
284 |
+
| 재산 보험 | 7 |
|
285 |
+
| 화학공학 | 0 |
|
286 |
+
| 기타가전 | 7 |
|
287 |
+
| 문학 | 7 |
|
288 |
+
| 기타 법률상담 | 7 |
|
289 |
+
| 정신건강 | 7 |
|
290 |
+
| 크로스핏 | 0 |
|
291 |
+
| 요가·필라테스 | 7 |
|
292 |
+
| 스키·보드 | 7 |
|
293 |
+
| 보험설계사 자격증 | 7 |
|
294 |
+
| 해고·징계 | 0 |
|
295 |
+
| 휴일·휴가 | 0 |
|
296 |
+
| 성형외과 | 7 |
|
297 |
+
| 민사 | 7 |
|
298 |
+
| 음악 | 7 |
|
299 |
+
| 산업안전산업기사 | 7 |
|
300 |
+
| 유아교육 | 7 |
|
301 |
+
| 약 복용 | 7 |
|
302 |
+
| 주방가전 | 7 |
|
303 |
+
| 전기·전자 | 7 |
|
304 |
+
| 생활·미용가전 | 7 |
|
305 |
+
| 변호사 자격증 | 0 |
|
306 |
+
| 영화·애니 | 7 |
|
307 |
+
| 저축성 보험 | 7 |
|
308 |
+
| 증여세 | 7 |
|
309 |
+
| 회사 생활 | 7 |
|
310 |
+
| 기업·회사 | 7 |
|
311 |
+
| 자산관리 | 7 |
|
312 |
+
| 하이킹 | 7 |
|
313 |
+
| 연애·결혼 | 0 |
|
314 |
+
| 한의대학 | 7 |
|
315 |
+
| 지구과학·천문우주 | 7 |
|
316 |
+
| 롯데 | 7 |
|
317 |
+
| 산업재해 | 7 |
|
318 |
+
| 고려대학교 | 7 |
|
319 |
+
| 관세사 자격증 | 7 |
|
320 |
+
| 경제동향 | 7 |
|
321 |
+
| 회계자문 | 7 |
|
322 |
+
| 웨어러블 기기 | 7 |
|
323 |
+
| UNIST | 7 |
|
324 |
+
| SK | 7 |
|
325 |
+
| 주식·가상화폐 | 7 |
|
326 |
+
| 강원 | 7 |
|
327 |
+
| 부가가치세 | 0 |
|
328 |
+
| 신경외과 | 7 |
|
329 |
+
| 로스쿨 | 7 |
|
330 |
+
| 상해 보험 | 7 |
|
331 |
+
| 한양대학교 | 7 |
|
332 |
+
| 종합소득세 | 7 |
|
333 |
+
| 치아 관리 | 7 |
|
334 |
+
| 폭행·협박 | 7 |
|
335 |
+
| 의과대학 | 7 |
|
336 |
+
|
337 |
+
### Training Hyperparameters
|
338 |
+
- batch_size: (128, 128)
|
339 |
+
- num_epochs: (2, 2)
|
340 |
+
- max_steps: -1
|
341 |
+
- sampling_strategy: oversampling
|
342 |
+
- num_iterations: 80
|
343 |
+
- body_learning_rate: (2e-05, 1e-05)
|
344 |
+
- head_learning_rate: 0.01
|
345 |
+
- loss: CosineSimilarityLoss
|
346 |
+
- distance_metric: cosine_distance
|
347 |
+
- margin: 0.25
|
348 |
+
- end_to_end: False
|
349 |
+
- use_amp: False
|
350 |
+
- warmup_proportion: 0.1
|
351 |
+
- seed: 42
|
352 |
+
- eval_max_steps: -1
|
353 |
+
- load_best_model_at_end: True
|
354 |
+
|
355 |
+
### Training Results
|
356 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
357 |
+
|:-------:|:--------:|:-------------:|:---------------:|
|
358 |
+
| 0.0005 | 1 | 0.1182 | - |
|
359 |
+
| 0.0275 | 50 | 0.1246 | - |
|
360 |
+
| 0.0549 | 100 | 0.0879 | - |
|
361 |
+
| 0.0824 | 150 | 0.0493 | - |
|
362 |
+
| 0.1099 | 200 | 0.0559 | - |
|
363 |
+
| 0.1374 | 250 | 0.0323 | - |
|
364 |
+
| 0.1648 | 300 | 0.025 | - |
|
365 |
+
| 0.1923 | 350 | 0.0296 | - |
|
366 |
+
| 0.2198 | 400 | 0.0198 | - |
|
367 |
+
| 0.2473 | 450 | 0.0163 | - |
|
368 |
+
| 0.2747 | 500 | 0.013 | - |
|
369 |
+
| 0.3022 | 550 | 0.013 | - |
|
370 |
+
| 0.3297 | 600 | 0.0193 | - |
|
371 |
+
| 0.3571 | 650 | 0.0161 | - |
|
372 |
+
| 0.3846 | 700 | 0.0163 | - |
|
373 |
+
| 0.4121 | 750 | 0.0095 | - |
|
374 |
+
| 0.4396 | 800 | 0.0055 | - |
|
375 |
+
| 0.4670 | 850 | 0.0178 | - |
|
376 |
+
| 0.4945 | 900 | 0.0094 | - |
|
377 |
+
| 0.5220 | 950 | 0.025 | - |
|
378 |
+
| 0.5495 | 1000 | 0.017 | - |
|
379 |
+
| 0.5769 | 1050 | 0.006 | - |
|
380 |
+
| 0.6044 | 1100 | 0.003 | - |
|
381 |
+
| 0.6319 | 1150 | 0.0108 | - |
|
382 |
+
| 0.6593 | 1200 | 0.0069 | - |
|
383 |
+
| 0.6868 | 1250 | 0.006 | - |
|
384 |
+
| 0.7143 | 1300 | 0.0025 | - |
|
385 |
+
| 0.7418 | 1350 | 0.0178 | - |
|
386 |
+
| 0.7692 | 1400 | 0.0042 | - |
|
387 |
+
| 0.7967 | 1450 | 0.0018 | - |
|
388 |
+
| 0.8242 | 1500 | 0.0046 | - |
|
389 |
+
| 0.8516 | 1550 | 0.0077 | - |
|
390 |
+
| 0.8791 | 1600 | 0.0041 | - |
|
391 |
+
| 0.9066 | 1650 | 0.0057 | - |
|
392 |
+
| 0.9341 | 1700 | 0.0115 | - |
|
393 |
+
| 0.9615 | 1750 | 0.0106 | - |
|
394 |
+
| 0.9890 | 1800 | 0.0086 | - |
|
395 |
+
| **1.0** | **1820** | **-** | **0.0718** |
|
396 |
+
| 1.0165 | 1850 | 0.0029 | - |
|
397 |
+
| 1.0440 | 1900 | 0.0078 | - |
|
398 |
+
| 1.0714 | 1950 | 0.0018 | - |
|
399 |
+
| 1.0989 | 2000 | 0.0066 | - |
|
400 |
+
| 1.1264 | 2050 | 0.0026 | - |
|
401 |
+
| 1.1538 | 2100 | 0.0014 | - |
|
402 |
+
| 1.1813 | 2150 | 0.0022 | - |
|
403 |
+
| 1.2088 | 2200 | 0.0038 | - |
|
404 |
+
| 1.2363 | 2250 | 0.0018 | - |
|
405 |
+
| 1.2637 | 2300 | 0.0169 | - |
|
406 |
+
| 1.2912 | 2350 | 0.0084 | - |
|
407 |
+
| 1.3187 | 2400 | 0.0022 | - |
|
408 |
+
| 1.3462 | 2450 | 0.0013 | - |
|
409 |
+
| 1.3736 | 2500 | 0.0015 | - |
|
410 |
+
| 1.4011 | 2550 | 0.0036 | - |
|
411 |
+
| 1.4286 | 2600 | 0.0012 | - |
|
412 |
+
| 1.4560 | 2650 | 0.0023 | - |
|
413 |
+
| 1.4835 | 2700 | 0.0013 | - |
|
414 |
+
| 1.5110 | 2750 | 0.0082 | - |
|
415 |
+
| 1.5385 | 2800 | 0.0073 | - |
|
416 |
+
| 1.5659 | 2850 | 0.0049 | - |
|
417 |
+
| 1.5934 | 2900 | 0.0013 | - |
|
418 |
+
| 1.6209 | 2950 | 0.0012 | - |
|
419 |
+
| 1.6484 | 3000 | 0.002 | - |
|
420 |
+
| 1.6758 | 3050 | 0.0038 | - |
|
421 |
+
| 1.7033 | 3100 | 0.0056 | - |
|
422 |
+
| 1.7308 | 3150 | 0.001 | - |
|
423 |
+
| 1.7582 | 3200 | 0.004 | - |
|
424 |
+
| 1.7857 | 3250 | 0.0012 | - |
|
425 |
+
| 1.8132 | 3300 | 0.0063 | - |
|
426 |
+
| 1.8407 | 3350 | 0.0016 | - |
|
427 |
+
| 1.8681 | 3400 | 0.0016 | - |
|
428 |
+
| 1.8956 | 3450 | 0.0063 | - |
|
429 |
+
| 1.9231 | 3500 | 0.0013 | - |
|
430 |
+
| 1.9505 | 3550 | 0.0033 | - |
|
431 |
+
| 1.9780 | 3600 | 0.0025 | - |
|
432 |
+
| 2.0 | 3640 | - | 0.0775 |
|
433 |
+
|
434 |
+
* The bold row denotes the saved checkpoint.
|
435 |
+
### Framework Versions
|
436 |
+
- Python: 3.10.12
|
437 |
+
- SetFit: 1.0.3
|
438 |
+
- Sentence Transformers: 2.7.0
|
439 |
+
- Transformers: 4.40.2
|
440 |
+
- PyTorch: 2.0.1
|
441 |
+
- Datasets: 2.19.1
|
442 |
+
- Tokenizers: 0.19.1
|
443 |
+
|
444 |
+
## Citation
|
445 |
+
|
446 |
+
### BibTeX
|
447 |
+
```bibtex
|
448 |
+
@article{https://doi.org/10.48550/arxiv.2209.11055,
|
449 |
+
doi = {10.48550/ARXIV.2209.11055},
|
450 |
+
url = {https://arxiv.org/abs/2209.11055},
|
451 |
+
author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
|
452 |
+
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
453 |
+
title = {Efficient Few-Shot Learning Without Prompts},
|
454 |
+
publisher = {arXiv},
|
455 |
+
year = {2022},
|
456 |
+
copyright = {Creative Commons Attribution 4.0 International}
|
457 |
+
}
|
458 |
+
```
|
459 |
+
|
460 |
+
<!--
|
461 |
+
## Glossary
|
462 |
+
|
463 |
+
*Clearly define terms in order to be accessible across audiences.*
|
464 |
+
-->
|
465 |
+
|
466 |
+
<!--
|
467 |
+
## Model Card Authors
|
468 |
+
|
469 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
470 |
+
-->
|
471 |
+
|
472 |
+
<!--
|
473 |
+
## Model Card Contact
|
474 |
+
|
475 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
476 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "checkpoints/step_1820",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.40.2",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 32000
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "2.1.0",
|
4 |
+
"transformers": "4.13.0",
|
5 |
+
"pytorch": "1.7.0+cu110"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null
|
9 |
+
}
|
config_setfit.json
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"labels": [
|
3 |
+
"PC\u00b7\ub178\ud2b8\ubd81",
|
4 |
+
"\uc591\ub3c4\uc18c\ub4dd\uc138",
|
5 |
+
"\ud1a0\ubaa9\uacf5\ud559",
|
6 |
+
"\ucc45\u00b7\ub3c5\uc11c",
|
7 |
+
"\ub300\uc778\uad00\uacc4",
|
8 |
+
"\uae30\ud0c0 \uace0\ubbfc\uc0c1\ub2f4",
|
9 |
+
"\uacf5\uc778\ub178\ubb34\uc0ac \uc790\uaca9\uc99d",
|
10 |
+
"\uc11c\uc6b8\u00b7\uc218\ub3c4\uad8c",
|
11 |
+
"\uc5f0\uc138\ub300\ud559\uad50",
|
12 |
+
"\uc790\ub3d9\ucc28 \uc218\ub9ac",
|
13 |
+
"\ubc18\ub824\ub3d9\ubb3c \uac74\uac15",
|
14 |
+
"\uc608\ub2a5",
|
15 |
+
"\uac00\uc555\ub958\u00b7\uac00\ucc98\ubd84",
|
16 |
+
"\uae30\ud0c0 \uc721\uc544\uc0c1\ub2f4",
|
17 |
+
"\uc5ed\uc0ac",
|
18 |
+
"\uc190\ud574\uc0ac\uc815\uc0ac \uc790\uaca9\uc99d",
|
19 |
+
"\ud658\uc790 \uc2dd\ub2e8",
|
20 |
+
"\uc11c\uc6b8\uc2dc\ub9bd\ub300\ud559\uad50",
|
21 |
+
"\uae30\ud0c0 \uc2ec\ub9ac\uc0c1\ub2f4",
|
22 |
+
"\ud615\uc0ac",
|
23 |
+
"\uce58\uacfc\ub300\ud559",
|
24 |
+
"\uc608\uae08\u00b7\uc801\uae08",
|
25 |
+
"\ubb3c\ub9ac",
|
26 |
+
"\uc77c\ubcf8",
|
27 |
+
"\ub09a\uc2dc",
|
28 |
+
"\ud604\ub300\uc911\uacf5\uc5c5",
|
29 |
+
"\uad50\ud1b5\uc0ac\uace0",
|
30 |
+
"\uacf5\uc778\ud68c\uacc4\uc0ac \uc790\uaca9\uc99d",
|
31 |
+
"\ub18d\uad6c",
|
32 |
+
"\uc885\ud569\ubd80\ub3d9\uc0b0\uc138",
|
33 |
+
"\ube44\ub1e8\uc758\ud559\uacfc",
|
34 |
+
"\uba85\uc608\ud6fc\uc190\u00b7\ubaa8\uc695",
|
35 |
+
"\uc9c0\uac8c\ucc28\uc6b4\uc804\uae30\ub2a5\uc0ac",
|
36 |
+
"\uac00\uc871\u00b7\uc774\ud63c",
|
37 |
+
"\ud604\ub300\uc790\ub3d9\ucc28",
|
38 |
+
"\uae30\uacc4\uacf5\ud559",
|
39 |
+
"\uc601\uc591\uc81c",
|
40 |
+
"\uc5f0\ub9d0\uc815\uc0b0",
|
41 |
+
"\uc131\uade0\uad00\ub300\ud559\uad50",
|
42 |
+
"\ubbf8\uc220",
|
43 |
+
"\uc591\uc721\u00b7\ud6c8\uc721",
|
44 |
+
"\uacbd\uc81c\uc6a9\uc5b4",
|
45 |
+
"\uacbd\ud76c\ub300\ud559\uad50",
|
46 |
+
"\uae08\uc735",
|
47 |
+
"\uc2a4\ucf00\uc774\ud2b8",
|
48 |
+
"\ubc18\ub824\ub3d9\ubb3c \ud6c8\ub828",
|
49 |
+
"\ud55c\uc57d",
|
50 |
+
"\uc0dd\ubb3c\u00b7\uc0dd\uba85",
|
51 |
+
"\uc0ac\ud68c\ubcf5\uc9c0\uc0ac \uc790\uaca9\uc99d",
|
52 |
+
"\ud55c\uc758\ud559",
|
53 |
+
"\ubd80\ub3d9\uc0b0\u00b7\uc784\ub300\ucc28",
|
54 |
+
"\ud68c\uc0dd\u00b7\ud30c\uc0b0",
|
55 |
+
"\uc784\uae08\uccb4\ubd88",
|
56 |
+
"\ubcc0\ub9ac\uc0ac \uc790\uaca9\uc99d",
|
57 |
+
"\ud53c\ubd80\uacfc",
|
58 |
+
"\ub180\uc774",
|
59 |
+
"\ub300\ucd9c",
|
60 |
+
"\uc7ac\uc0b0\ubc94\uc8c4",
|
61 |
+
"\uc601\uc720\uc544\u00b7\uc544\ub3d9 \uc2dd\ub2e8",
|
62 |
+
"\uacc4\uc808\uac00\uc804",
|
63 |
+
"\uc7ac\ub8cc\uacf5\ud559",
|
64 |
+
"\uc804\ub77c",
|
65 |
+
"\ub2e4\uc774\uc5b4\ud2b8 \uc2dd\ub2e8",
|
66 |
+
"\uacbd\uc81c\uc815\ucc45",
|
67 |
+
"\ucea0\ud551",
|
68 |
+
"\ucca0\ud559",
|
69 |
+
"\uce58\uacfc",
|
70 |
+
"\uc11c\uac15\ub300\ud559\uad50",
|
71 |
+
"\uc790\uc544\u00b7\uc790\uae30\uc774\ud574",
|
72 |
+
"\ud658\uacbd\u00b7\uc5d0\ub108\uc9c0",
|
73 |
+
"\ud3ec\ud56d\uacf5\uacfc\ub300\ud559\uad50",
|
74 |
+
"\uc81c\uc8fc",
|
75 |
+
"\ud589\uc815\uc0ac \uc790\uaca9\uc99d",
|
76 |
+
"\uc784\uae08\u00b7\uae09\uc5ec",
|
77 |
+
"\uc138\ud0c1\uc218\uc120",
|
78 |
+
"\uc911\ud559\uad50 \uc0dd\ud65c",
|
79 |
+
"\uc815\ubcf4\ucc98\ub9ac\uae30\uc0ac",
|
80 |
+
"\uc218\uc601",
|
81 |
+
"\uc9c1\uc7a5\ub0b4\uad34\ub86d\ud798",
|
82 |
+
"\uc18c\uc544\uccad\uc18c\ub144\uacfc",
|
83 |
+
"\uc7ac\ud65c\u00b7\ubb3c\ub9ac\uce58\ub8cc",
|
84 |
+
"\ubd80\uc0b0",
|
85 |
+
"\uc548\uacfc",
|
86 |
+
"\ub7f0\ub2dd",
|
87 |
+
"\uc9c0\uc2dd\uc7ac\uc0b0\uad8c\u00b7IT",
|
88 |
+
"\uc0ac\uc9c4\u00b7\uc601\uc0c1",
|
89 |
+
"\uc138\ubb34\uc0ac \uc790\uaca9\uc99d",
|
90 |
+
"\ucd95\uad6c\u00b7\ud48b\uc0b4",
|
91 |
+
"\ubbf8\uc8fc\u00b7\uc911\ub0a8\ubbf8",
|
92 |
+
"\uc911\uc559\ub300\ud559\uad50",
|
93 |
+
"\uc74c\ud5a5\uae30\uae30",
|
94 |
+
"\ud55c\ud654",
|
95 |
+
"\uc138\ubb34\uc870\uc0ac\u00b7\ubd88\ubcf5",
|
96 |
+
"\uad50\ud1b5\uc0ac\uace0 \uacfc\uc2e4",
|
97 |
+
"\uc11c\uc6b8\ub300\ud559\uad50",
|
98 |
+
"\uc758\ub8cc",
|
99 |
+
"\uc2dd\uc2b5\uad00\u00b7\uc2dd\uc774\uc694\ubc95",
|
100 |
+
"\uad6c\uc870\uc870\uc815",
|
101 |
+
"\ubc95\uc778\uc138",
|
102 |
+
"\uae30\ud0c0 \uc601\uc591\uc0c1\ub2f4",
|
103 |
+
"\uc790\ub3d9\ucc28",
|
104 |
+
"KAIST",
|
105 |
+
"\uc774\ube44\uc778\ud6c4\uacfc",
|
106 |
+
"\uc0bc\uc131",
|
107 |
+
"\uace8\ud504",
|
108 |
+
"\uacbd\uc0c1",
|
109 |
+
"\ub4f1\uc0b0\u00b7\ud074\ub77c\uc774\ubc0d",
|
110 |
+
"\uae30\ud0c0 \uc2a4\ud3ec\uce20",
|
111 |
+
"\uc131\ubc94\uc8c4",
|
112 |
+
"GIST",
|
113 |
+
"\uac00\ub4dc\ub2dd",
|
114 |
+
"\ud55c\uad6d\uc678\uad6d\uc5b4\ub300\ud559\uad50",
|
115 |
+
"PC \uc8fc\ubcc0\uae30\uae30",
|
116 |
+
"\uae30\ud0c0 \uc7a5\uce58",
|
117 |
+
"\uc5f0\uc560\u00b7\uc774\uc131",
|
118 |
+
"\ucde8\ub4dd\uc138\u00b7\ub4f1\ub85d\uc138",
|
119 |
+
"\uc0b0\ubd80\uc778\uacfc",
|
120 |
+
"\uae30\ud0c0 \uc138\uae08\uc0c1\ub2f4",
|
121 |
+
"\ud5ec\uc2a4",
|
122 |
+
"\uac04\ud638\uc870\ubb34\uc0ac \uc790\uaca9\uc99d",
|
123 |
+
"\uc804\uae30\uae30\uc0ac\u00b7\uae30\ub2a5\uc0ac",
|
124 |
+
"\uc624\uc138\uc544\ub2c8\uc544",
|
125 |
+
"\ubb34\uc5ed",
|
126 |
+
"\uccad\uc18c",
|
127 |
+
"\uacf5\uc778\uc911\uac1c\uc0ac \uc790\uaca9\uc99d",
|
128 |
+
"\uc911\uad6d",
|
129 |
+
"\uc778\ud14c\ub9ac\uc5b4",
|
130 |
+
"\ub3d9\ub0a8\uc544",
|
131 |
+
"\uad70\ub300 \uc0dd\ud65c",
|
132 |
+
"\uc601\uc0c1\uac00\uc804",
|
133 |
+
"\uc0c1\uc18d\uc138",
|
134 |
+
"LG",
|
135 |
+
"\ubd80\ub3d9\uc0b0",
|
136 |
+
"\uae30\ud0c0 \ub178\ubb34\uc0c1\ub2f4",
|
137 |
+
"\uc6f9\ud230\u00b7\uc6f9\uc18c\uc124",
|
138 |
+
"\ubba4\uc9c0\uceec\u00b7\uc5f0\uadf9",
|
139 |
+
"\ub124\uc774\ubc84",
|
140 |
+
"\uadfc\ub85c\uacc4\uc57d",
|
141 |
+
"\uc758\ub8cc \ubcf4\ud5d8",
|
142 |
+
"\ub0b4\uacfc",
|
143 |
+
"\uc138\ud0c1\uae30\u00b7\uac74\uc870\uae30",
|
144 |
+
"\uc720\ub7fd\u00b7\uc544\ud504\ub9ac\uce74",
|
145 |
+
"\uae30\ud0c0 \uc57d\ub8cc\uc0c1\ub2f4",
|
146 |
+
"\ub4dc\ub77c\ub9c8",
|
147 |
+
"\ubc18\ub824\ub3d9\ubb3c \ubbf8\uc6a9",
|
148 |
+
"\uc2a4\ub9c8\ud2b8\ud3f0\u00b7\ud0dc\ube14\ub9bf",
|
149 |
+
"\uae30\ud0c0 \uc758\ub8cc\uc0c1\ub2f4",
|
150 |
+
"\ud654\ud559",
|
151 |
+
"\uccad\uc18c\uae30",
|
152 |
+
"\ud14c\ub2c8\uc2a4",
|
153 |
+
"\ucda9\uccad",
|
154 |
+
"\ub300\ud559\uad50 \uc0dd\ud65c",
|
155 |
+
"\uc57c\uad6c",
|
156 |
+
"\uace0\ub4f1\ud559\uad50 \uc0dd\ud65c",
|
157 |
+
"\uc815\ud615\uc678\uacfc",
|
158 |
+
"\uc7ac\uc0b0 \ubcf4\ud5d8",
|
159 |
+
"\ud654\ud559\uacf5\ud559",
|
160 |
+
"\uae30\ud0c0\uac00\uc804",
|
161 |
+
"\ubb38\ud559",
|
162 |
+
"\uae30\ud0c0 \ubc95\ub960\uc0c1\ub2f4",
|
163 |
+
"\uc815\uc2e0\uac74\uac15",
|
164 |
+
"\ud06c\ub85c\uc2a4\ud54f",
|
165 |
+
"\uc694\uac00\u00b7\ud544\ub77c\ud14c\uc2a4",
|
166 |
+
"\uc2a4\ud0a4\u00b7\ubcf4\ub4dc",
|
167 |
+
"\ubcf4\ud5d8\uc124\uacc4\uc0ac \uc790\uaca9\uc99d",
|
168 |
+
"\ud574\uace0\u00b7\uc9d5\uacc4",
|
169 |
+
"\ud734\uc77c\u00b7\ud734\uac00",
|
170 |
+
"\uc131\ud615\uc678\uacfc",
|
171 |
+
"\ubbfc\uc0ac",
|
172 |
+
"\uc74c\uc545",
|
173 |
+
"\uc0b0\uc5c5\uc548\uc804\uc0b0\uc5c5\uae30\uc0ac",
|
174 |
+
"\uc720\uc544\uad50\uc721",
|
175 |
+
"\uc57d \ubcf5\uc6a9",
|
176 |
+
"\uc8fc\ubc29\uac00\uc804",
|
177 |
+
"\uc804\uae30\u00b7\uc804\uc790",
|
178 |
+
"\uc0dd\ud65c\u00b7\ubbf8\uc6a9\uac00\uc804",
|
179 |
+
"\ubcc0\ud638\uc0ac \uc790\uaca9\uc99d",
|
180 |
+
"\uc601\ud654\u00b7\uc560\ub2c8",
|
181 |
+
"\uc800\ucd95\uc131 \ubcf4\ud5d8",
|
182 |
+
"\uc99d\uc5ec\uc138",
|
183 |
+
"\ud68c\uc0ac \uc0dd\ud65c",
|
184 |
+
"\uae30\uc5c5\u00b7\ud68c\uc0ac",
|
185 |
+
"\uc790\uc0b0\uad00\ub9ac",
|
186 |
+
"\ud558\uc774\ud0b9",
|
187 |
+
"\uc5f0\uc560\u00b7\uacb0\ud63c",
|
188 |
+
"\ud55c\uc758\ub300\ud559",
|
189 |
+
"\uc9c0\uad6c\uacfc\ud559\u00b7\ucc9c\ubb38\uc6b0\uc8fc",
|
190 |
+
"\ub86f\ub370",
|
191 |
+
"\uc0b0\uc5c5\uc7ac\ud574",
|
192 |
+
"\uace0\ub824\ub300\ud559\uad50",
|
193 |
+
"\uad00\uc138\uc0ac \uc790\uaca9\uc99d",
|
194 |
+
"\uacbd\uc81c\ub3d9\ud5a5",
|
195 |
+
"\ud68c\uacc4\uc790\ubb38",
|
196 |
+
"\uc6e8\uc5b4\ub7ec\ube14 \uae30\uae30",
|
197 |
+
"UNIST",
|
198 |
+
"SK",
|
199 |
+
"\uc8fc\uc2dd\u00b7\uac00\uc0c1\ud654\ud3d0",
|
200 |
+
"\uac15\uc6d0",
|
201 |
+
"\ubd80\uac00\uac00\uce58\uc138",
|
202 |
+
"\uc2e0\uacbd\uc678\uacfc",
|
203 |
+
"\ub85c\uc2a4\ucfe8",
|
204 |
+
"\uc0c1\ud574 \ubcf4\ud5d8",
|
205 |
+
"\ud55c\uc591\ub300\ud559\uad50",
|
206 |
+
"\uc885\ud569\uc18c\ub4dd\uc138",
|
207 |
+
"\uce58\uc544 \uad00\ub9ac",
|
208 |
+
"\ud3ed\ud589\u00b7\ud611\ubc15",
|
209 |
+
"\uc758\uacfc\ub300\ud559"
|
210 |
+
],
|
211 |
+
"normalize_embeddings": false
|
212 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33d6cf3b048c0313d349211fe91639608d8926a186510dcce63456cc019f20bb
|
3 |
+
size 442491744
|
model_head.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0dcb6b8e0f06aadc9d4054c23b5f3207baed066f02e1d0612198ed703d96768
|
3 |
+
size 1282046
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"4": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": false,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|