AIDSC commited on
Commit
e5cd820
·
verified ·
1 Parent(s): 41ae640

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +36 -27
handler.py CHANGED
@@ -1,33 +1,42 @@
 
1
  import torch
 
 
 
2
 
3
- from typing import Any, Dict
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
 
 
 
 
 
 
6
 
7
- class EndpointHandler:
8
  def __init__(self, path=""):
9
- # load model and tokenizer from path
 
 
 
10
  self.tokenizer = AutoTokenizer.from_pretrained(path)
11
- self.model = AutoModelForCausalLM.from_pretrained(
12
- path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
13
- )
14
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
15
-
16
- def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
17
- # process input
18
- inputs = data.pop("inputs", data)
19
- parameters = data.pop("parameters", None)
20
-
21
- # preprocess
22
- inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
23
-
24
- # pass inputs with all kwargs in data
25
- if parameters is not None:
26
- outputs = self.model.generate(**inputs, **parameters)
27
- else:
28
- outputs = self.model.generate(**inputs)
29
-
30
- # postprocess the prediction
31
- prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
32
-
33
- return [{"generated_text": prediction}]
 
1
+ from typing import Dict, List, Any
2
  import torch
3
+ from accelerate import Accelerator
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import numpy as np
6
 
 
 
7
 
8
+ def softmax(x):
9
+ z = x - max(x)
10
+ numerator = np.exp(z)
11
+ denominator = np.sum(numerator)
12
+ softmax = numerator/denominator
13
+ return softmax
14
 
15
+ class EndpointHandler():
16
  def __init__(self, path=""):
17
+ self.accelerator = Accelerator()
18
+ self.device = self.accelerator.device
19
+ self.model = AutoModelForCausalLM.from_pretrained(path, trust_remote_code=True, device_map="auto")
20
+ self.model = self.accelerator.prepare(self.model)
21
  self.tokenizer = AutoTokenizer.from_pretrained(path)
22
+ self.options_tokens = [self.tokenizer.encode(choice)[-1] for choice in ["A", "B", "C", "D"]]
23
+
24
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
25
+ """
26
+ data args:
27
+ inputs (:obj: `str` | `PIL.Image` | `np.array`)
28
+ kwargss
29
+ Return:
30
+ A :obj:`list` | `dict`: will be serialized and returned
31
+ """
32
+ with torch.no_grad():
33
+ prompt = data.pop("prompt")
34
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
35
+ input_size = inputs['input_ids'].size(1)
36
+ input_ids = inputs["input_ids"].to(self.device)
37
+ outputs = self.model(**inputs)
38
+ last_token_logits = outputs.logits[:, -1, :]
39
+ options_tokens_logits = last_token_logits[:, self.options_tokens].detach().cpu().numpy()
40
+ conf = softmax(options_tokens_logits[0])
41
+ pred = np.argmax(options_tokens_logits[0])
42
+ return [{"pred": pred, "conf":conf}]