rajiv-data-chef commited on
Commit
981fd76
·
verified ·
1 Parent(s): d3e33eb

adding __main__py file

Browse files
Files changed (1) hide show
  1. __main__.py +89 -0
__main__.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Pipeline, AutoTokenizer
2
+ import torch
3
+ from bs4 import BeautifulSoup
4
+ import re
5
+
6
+ class MyPipeline(Pipeline):
7
+ tokenizer = AutoTokenizer.from_pretrained("abacusai/Llama-3-Smaug-8B")
8
+ def _sanitize_parameters(self, **kwargs):
9
+ preprocess_kwargs = {}
10
+ if "context" in kwargs:
11
+ preprocess_kwargs["context"] = kwargs["context"]
12
+ if "search_person" in kwargs:
13
+ preprocess_kwargs["search_person"] = kwargs["search_person"]
14
+ return preprocess_kwargs, {}, {}
15
+
16
+ def preprocess(self, inputs, **kwargs):
17
+ tokenizer = MyPipeline.tokenizer
18
+ context = inputs["context"]
19
+ search_person = inputs["search_person"]
20
+ #print(f"here --> {len(context)}, {search_person}")
21
+
22
+ def create_prompt(context, search_person):
23
+ def clean_text(text):
24
+ soup = BeautifulSoup(text, 'html.parser')
25
+ for link in soup.find_all('a'):
26
+ link.decompose()
27
+ text = re.sub(r'\([^)]*\)', '', soup.get_text())
28
+ return text
29
+
30
+ def prepare_question(search_person):
31
+ q = f"""
32
+ Based on the information provided in the context, what is the most likely perception of {search_person}?
33
+ Pick one answer option.
34
+
35
+ Answer options:
36
+
37
+ Positive: {search_person} is portrayed in a favorable light, and the context suggests that she is a caring and responsible parent.
38
+ Negative: {search_person} is portrayed in an unfavorable light, and the context suggests that she is a neglectful and/or abusive parent.
39
+ Neutral: The context does not provide enough information to make a determination about the character or actions of {search_person}, or it presents a balanced and unbiased view of her.
40
+ """
41
+ return q
42
+
43
+ context = clean_text(context)
44
+ question = prepare_question(search_person)
45
+ if len(tokenizer.tokenize(context + ' ' + question)) > tokenizer.model_max_length:
46
+ print("found such")
47
+ context = context[500:]
48
+
49
+ prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n"
50
+ return prompt_template
51
+
52
+ prompt = create_prompt(context, search_person)
53
+ predToken = tokenizer(prompt, return_tensors='pt')
54
+ #tokens = self.tokenizer(prompt, return_tensors='pt')
55
+
56
+ return predToken
57
+
58
+ def _forward(self, model_inputs):
59
+ tokenizer = MyPipeline.tokenizer
60
+ try:
61
+ # out of memory error is most likely to happen here
62
+ device = "cuda" if torch.cuda.is_available() else "cpu"
63
+ self.model = self.model.to(device)
64
+ model_inputs = {k:v.to(device) for k,v in model_inputs.items()}
65
+ except RuntimeError as e:
66
+ # explicitly transferring to cpu
67
+ self.model = self.model.to("cpu")
68
+ model_inputs = {k:v.to("cpu") for k,v in model_inputs.items()}
69
+ #model_inputs = model_inputs.to(device)
70
+
71
+ with torch.no_grad():
72
+ outputs = self.model.generate(**model_inputs, max_new_tokens=20,pad_token_id=tokenizer.eos_token_id)
73
+ generated_tokens = outputs[0, len(model_inputs['input_ids'][0]):]
74
+ out_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
75
+ return {'out_text': out_text}
76
+
77
+ def postprocess(self, model_outputs):
78
+ out_text = model_outputs['out_text']
79
+ if 'Positive' in out_text:
80
+ return 'Positive'
81
+ elif 'Negative' in out_text:
82
+ return 'Negative'
83
+ elif 'Neutral' in out_text:
84
+ return 'Neutral'
85
+ else:
86
+ return 'Neutral'
87
+
88
+ # Initialize the model and tokenizer
89
+