Lahiru Menikdiwela commited on
Commit
173b5f1
·
1 Parent(s): ab21bba

changes done according to llama model

Browse files
Files changed (2) hide show
  1. model.py +15 -13
  2. summarizer.py +32 -4
model.py CHANGED
@@ -19,28 +19,30 @@ def get_local_model(model_name_or_path:str)->pipeline:
19
 
20
  #print(f"Model is running on {device}")
21
 
22
- tokenizer = AutoTokenizer.from_pretrained(
23
- model_name_or_path,
24
- token = hf_token
25
- )
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name_or_path,
28
  torch_dtype=torch.bfloat16,
29
  # load_in_4bit = True,
30
  token = hf_token
31
  )
32
- pipe = pipeline(
33
- task = "text-generation",
34
- model=model,
35
- tokenizer=tokenizer,
36
- device = device,
37
- max_new_tokens = 400,
38
- model_kwargs = {"max_length":16384, "max_new_tokens": 512},
39
- )
 
40
 
41
  logger.info(f"Summarization pipeline created and loaded to {device}")
42
 
43
- return pipe
44
 
45
  def get_endpoint(api_key:str):
46
 
 
19
 
20
  #print(f"Model is running on {device}")
21
 
22
+ #!!!!!Removed for Llama model
23
+ # tokenizer = AutoTokenizer.from_pretrained(
24
+ # model_name_or_path,
25
+ # token = hf_token
26
+ # )
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name_or_path,
29
  torch_dtype=torch.bfloat16,
30
  # load_in_4bit = True,
31
  token = hf_token
32
  )
33
+ #!!!!!!!!!!!!!!!!!!!!!Removed for Llama model!!!!!!!!!!!!!!!!!!!!!!!
34
+ # pipe = pipeline(
35
+ # task = "summarization",
36
+ # model=model,
37
+ # tokenizer=tokenizer,
38
+ # device = device,
39
+ # max_new_tokens = 400,
40
+ # model_kwargs = {"max_length":16384, "max_new_tokens": 512},
41
+ # )
42
 
43
  logger.info(f"Summarization pipeline created and loaded to {device}")
44
 
45
+ return model
46
 
47
  def get_endpoint(api_key:str):
48
 
summarizer.py CHANGED
@@ -18,9 +18,13 @@ def summarizer_init(model_name,model_type,api_key=None) -> None:
18
  return tokenizer,base_summarizer
19
 
20
  def summarizer_summarize(model_type,tokenizer, base_summarizer, text:str,summarizer_type = "map_reduce")->str:
21
- prompt = "SUmmarize this by focusing numerical importance sentences dont omit numerical sentences.Include all numerical details input text:"
22
- text = prompt+text
23
- text_to_summarize,length_type = prepare_for_summarize(text,tokenizer)
 
 
 
 
24
 
25
  if length_type =="short":
26
 
@@ -45,7 +49,31 @@ def summarizer_summarize(model_type,tokenizer, base_summarizer, text:str,summari
45
  elif model_type == "local":
46
  pipe = base_summarizer
47
  start = time.time()
48
- summary = pipe(text_to_summarize)[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  end = time.time()
50
  print(f"Summary generation took {round((end-start),2)}s.")
51
  return summary,round((end-start),2)
 
18
  return tokenizer,base_summarizer
19
 
20
  def summarizer_summarize(model_type,tokenizer, base_summarizer, text:str,summarizer_type = "map_reduce")->str:
21
+ # prompt = "SUmmarize this by focusing numerical importance sentences dont omit numerical sentences.Include all numerical details input text:"
22
+ text = text
23
+
24
+ #!!!!!!!!!!!!!!!!!!!Removed because map reduce is not suitable or take long time
25
+ # text_to_summarize,length_type = prepare_for_summarize(text,tokenizer)
26
+ length_type = "short"
27
+ text_to_summarize = text
28
 
29
  if length_type =="short":
30
 
 
49
  elif model_type == "local":
50
  pipe = base_summarizer
51
  start = time.time()
52
+
53
+ #!!!!!!!!!!!!!!!!!!!!Changes to llama model
54
+ input_text = text_to_summarize
55
+ chat = [
56
+ { "role": "user",
57
+ "content": f"""
58
+ SUmmarize this by focusing numerical importance sentences in the perspective of financial executive. input text: {input_text}
59
+ """ },
60
+ ]
61
+ prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
62
+ inputs = tokenizer(prompt,
63
+ return_tensors="pt", truncation=True).to('cuda')
64
+ attention_mask = inputs["attention_mask"]
65
+ approximate_tokens = int(len(text)//10)
66
+ output = base_summarizer.generate(inputs['input_ids'],
67
+ attention_mask = attention_mask,
68
+ top_k=10, max_new_tokens=approximate_tokens,
69
+ pad_token_id = tokenizer.eos_token_id)
70
+
71
+ base_summary = tokenizer.batch_decode(output[:, inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
72
+ summary = base_summary[0]
73
+ # summary = pipe(text_to_summarize)[0]['generated_text']
74
+ #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1Changes finished for llama model
75
+
76
+
77
  end = time.time()
78
  print(f"Summary generation took {round((end-start),2)}s.")
79
  return summary,round((end-start),2)