from transformers import Pipeline, AutoTokenizer import torch from bs4 import BeautifulSoup import re class MyPipeline(Pipeline): tokenizer = AutoTokenizer.from_pretrained("abacusai/Llama-3-Smaug-8B") def _sanitize_parameters(self, **kwargs): preprocess_kwargs = {} if "context" in kwargs: preprocess_kwargs["context"] = kwargs["context"] if "search_person" in kwargs: preprocess_kwargs["search_person"] = kwargs["search_person"] return preprocess_kwargs, {}, {} def preprocess(self, inputs, **kwargs): tokenizer = MyPipeline.tokenizer context = inputs["context"] search_person = inputs["search_person"] #print(f"here --> {len(context)}, {search_person}") def create_prompt(context, search_person): def clean_text(text): soup = BeautifulSoup(text, 'html.parser') for link in soup.find_all('a'): link.decompose() text = re.sub(r'\([^)]*\)', '', soup.get_text()) return text def prepare_question(search_person): q = f""" Based on the information provided in the context, what is the most likely perception of {search_person}? Pick one answer option. Answer options: Positive: {search_person} is portrayed in a favorable light, and the context suggests that she is a caring and responsible parent. Negative: {search_person} is portrayed in an unfavorable light, and the context suggests that she is a neglectful and/or abusive parent. Neutral: The context does not provide enough information to make a determination about the character or actions of {search_person}, or it presents a balanced and unbiased view of her. """ return q context = clean_text(context) question = prepare_question(search_person) if len(tokenizer.tokenize(context + ' ' + question)) > tokenizer.model_max_length: print("found such") context = context[500:] prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n" return prompt_template prompt = create_prompt(context, search_person) predToken = tokenizer(prompt, return_tensors='pt') #tokens = self.tokenizer(prompt, return_tensors='pt') return predToken def _forward(self, model_inputs): tokenizer = MyPipeline.tokenizer try: # out of memory error is most likely to happen here device = "cuda" if torch.cuda.is_available() else "cpu" self.model = self.model.to(device) model_inputs = {k:v.to(device) for k,v in model_inputs.items()} except RuntimeError as e: # explicitly transferring to cpu self.model = self.model.to("cpu") model_inputs = {k:v.to("cpu") for k,v in model_inputs.items()} #model_inputs = model_inputs.to(device) with torch.no_grad(): outputs = self.model.generate(**model_inputs, max_new_tokens=20,pad_token_id=tokenizer.eos_token_id) generated_tokens = outputs[0, len(model_inputs['input_ids'][0]):] out_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip() return {'out_text': out_text} def postprocess(self, model_outputs): out_text = model_outputs['out_text'] if 'Positive' in out_text: return 'Positive' elif 'Negative' in out_text: return 'Negative' elif 'Neutral' in out_text: return 'Neutral' else: return 'Neutral' # Initialize the model and tokenizer