Update README.md
Browse files
README.md
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
language:
|
3 |
- en
|
4 |
tags:
|
5 |
-
- pubmed
|
6 |
- feature-extraction
|
|
|
7 |
- sentence-similarity
|
8 |
datasets:
|
9 |
- biu-nlp/abstract-sim-pubmed
|
@@ -19,18 +19,18 @@ import torch
|
|
19 |
def load_finetuned_model():
|
20 |
|
21 |
|
22 |
-
sentence_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed")
|
23 |
-
query_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-query-pubmed")
|
24 |
tokenizer = AutoTokenizer.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed")
|
25 |
-
|
26 |
return tokenizer, query_encoder, sentence_encoder
|
27 |
|
28 |
|
29 |
def encode_batch(model, tokenizer, sentences, device):
|
30 |
-
input_ids = tokenizer(sentences, padding=True, max_length=
|
31 |
add_special_tokens=True).to(device)
|
32 |
features = model(**input_ids)[0]
|
33 |
features = torch.sum(features[:,1:,:] * input_ids["attention_mask"][:,1:].unsqueeze(-1), dim=1) / torch.clamp(torch.sum(input_ids["attention_mask"][:,1:], dim=1, keepdims=True), min=1e-9)
|
|
|
34 |
return features
|
35 |
|
36 |
```
|
|
|
2 |
language:
|
3 |
- en
|
4 |
tags:
|
|
|
5 |
- feature-extraction
|
6 |
+
- pubmed
|
7 |
- sentence-similarity
|
8 |
datasets:
|
9 |
- biu-nlp/abstract-sim-pubmed
|
|
|
19 |
def load_finetuned_model():
|
20 |
|
21 |
|
22 |
+
sentence_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed", revision="71f4539120e29024adc618173a1ed5fd230ac249")
|
23 |
+
query_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-query-pubmed", revision="8d34676d80a39bcbc5a1d2eec13e6f8078496215")
|
24 |
tokenizer = AutoTokenizer.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed")
|
|
|
25 |
return tokenizer, query_encoder, sentence_encoder
|
26 |
|
27 |
|
28 |
def encode_batch(model, tokenizer, sentences, device):
|
29 |
+
input_ids = tokenizer(sentences, padding=True, max_length=128, truncation=True, return_tensors="pt",
|
30 |
add_special_tokens=True).to(device)
|
31 |
features = model(**input_ids)[0]
|
32 |
features = torch.sum(features[:,1:,:] * input_ids["attention_mask"][:,1:].unsqueeze(-1), dim=1) / torch.clamp(torch.sum(input_ids["attention_mask"][:,1:], dim=1, keepdims=True), min=1e-9)
|
33 |
+
|
34 |
return features
|
35 |
|
36 |
```
|