ravfogs commited on
Commit
b035a1d
·
1 Parent(s): 71f4539

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -2,8 +2,8 @@
2
  language:
3
  - en
4
  tags:
5
- - pubmed
6
  - feature-extraction
 
7
  - sentence-similarity
8
  datasets:
9
  - biu-nlp/abstract-sim-pubmed
@@ -19,18 +19,18 @@ import torch
19
  def load_finetuned_model():
20
 
21
 
22
- sentence_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed")
23
- query_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-query-pubmed")
24
  tokenizer = AutoTokenizer.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed")
25
-
26
  return tokenizer, query_encoder, sentence_encoder
27
 
28
 
29
  def encode_batch(model, tokenizer, sentences, device):
30
- input_ids = tokenizer(sentences, padding=True, max_length=512, truncation=True, return_tensors="pt",
31
  add_special_tokens=True).to(device)
32
  features = model(**input_ids)[0]
33
  features = torch.sum(features[:,1:,:] * input_ids["attention_mask"][:,1:].unsqueeze(-1), dim=1) / torch.clamp(torch.sum(input_ids["attention_mask"][:,1:], dim=1, keepdims=True), min=1e-9)
 
34
  return features
35
 
36
  ```
 
2
  language:
3
  - en
4
  tags:
 
5
  - feature-extraction
6
+ - pubmed
7
  - sentence-similarity
8
  datasets:
9
  - biu-nlp/abstract-sim-pubmed
 
19
  def load_finetuned_model():
20
 
21
 
22
+ sentence_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed", revision="71f4539120e29024adc618173a1ed5fd230ac249")
23
+ query_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-query-pubmed", revision="8d34676d80a39bcbc5a1d2eec13e6f8078496215")
24
  tokenizer = AutoTokenizer.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed")
 
25
  return tokenizer, query_encoder, sentence_encoder
26
 
27
 
28
  def encode_batch(model, tokenizer, sentences, device):
29
+ input_ids = tokenizer(sentences, padding=True, max_length=128, truncation=True, return_tensors="pt",
30
  add_special_tokens=True).to(device)
31
  features = model(**input_ids)[0]
32
  features = torch.sum(features[:,1:,:] * input_ids["attention_mask"][:,1:].unsqueeze(-1), dim=1) / torch.clamp(torch.sum(input_ids["attention_mask"][:,1:], dim=1, keepdims=True), min=1e-9)
33
+
34
  return features
35
 
36
  ```