InsuHelp-old / app.py
anisrashidov's picture
Update app.py
a3cd2ab verified
# from fastapi import FastAPI
# from fastapi.middleware.cors import CORSMiddleware
from openai import OpenAI
from google import genai
from crawler import extract_data
import time
import os
from dotenv import load_dotenv
import gradio as gr
# import multiprocessing
from together import Together
load_dotenv("../.env")
# print("Environment variables:", os.environ)
together_client = Together(
api_key=os.getenv("TOGETHER_API_KEY"),
)
gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
genai_model = "gemini-2.0-flash-exp"
perplexity_client = OpenAI(api_key=os.getenv("PERPLEXITY_API_KEY"), base_url="https://api.perplexity.ai")
gpt_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def get_answers( query: str ):
context = extract_data(query, 1)
return context
# with torch.no_grad():
# model = AutoModel.from_pretrained('BM-K/KoSimCSE-roberta')
# tokenizer = AutoTokenizer.from_pretrained('BM-K/KoSimCSE-roberta', TOKENIZERS_PARALLELISM=True)
# def cal_score(input_data):
# # Initialize model and tokenizer inside the function
# with torch.no_grad():
# inputs = tokenizer(input_data, padding=True, truncation=True, return_tensors="pt")
# outputs = model.get_input_embeddings(inputs["input_ids"])
# a, b = outputs[0], outputs[1] # Adjust based on your model's output structure
# # Normalize the tensors
# a_norm = a / a.norm(dim=1)[:, None]
# b_norm = b / b.norm(dim=1)[:, None]
# print(a.shape, b.shape)
# # Return the similarity score
# # return torch.mm(a_norm, b_norm.transpose(0, 1)) * 100
# a_norm = a_norm.reshape(1, -1)
# b_norm = b_norm.reshape(1, -1)
# similarity_score = cosine_similarity(a_norm, b_norm)
# # Return the similarity score (assuming you want the average of the similarities across the tokens)
# return similarity_score # Scalar value
# def get_match_scores( message: str, query: str, answers: list[dict[str, object]] ):
# start = time.time()
# max_processes = 4
# with multiprocessing.Pool(processes=max_processes) as pool:
# scores = pool.map(cal_score, [[answer['questionDetails'], message] for answer in answers])
# print(f"Time taken to compare: {time.time() - start} seconds")
# print("Scores: ", scores)
# return scores
def get_naver_answers( message: str ):
print(">>> Starting naver extraction...")
print("Question: ", message)
naver_start_time = time.time()
response = gemini_client.models.generate_content(
model = genai_model,
contents=f"{message}\n ์œ„์˜ ๋‚ด์šฉ์„ ์งง์€ ์ œ๋ชฉ์œผ๋กœ ์š”์•ฝํ•ฉ๋‹ˆ๋‹ค. ์ œ๋ชฉ๋งŒ ๋ณด์—ฌ์ฃผ์„ธ์š”. ๋Œ€๋‹ตํ•˜์ง€ ๋งˆ์„ธ์š”. ํ•œ๊ตญ์–ด๋กœ๋งŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”!!!",
)
query = response.text
print( "Query: ", query)
context = get_answers( query )
sorted_answers = ['. '.join(answer['answers']) for answer in context]
naver_end_time = time.time()
print(f"Time taken to extract from Naver: { naver_end_time - naver_start_time } seconds")
document = '\n'.join(sorted_answers)
return document, naver_end_time - naver_start_time
def get_qwen_big_answer( message: str ):
print(">>> Starting Qwen 72B extraction...")
qwen_start_time = time.time()
response = together_client.chat.completions.create(
model="Qwen/Qwen2.5-72B-Instruct-Turbo",
messages=[
{"role": "system", "content": "You are a helpful question-answer, CONCISE conversation assistant that answers in Korean."},
{"role": "user", "content": message}
]
)
qwen_end_time = time.time()
print(f"Time taken to extract from Qwen: { qwen_end_time - qwen_start_time } seconds")
return response.choices[0].message.content, qwen_end_time - qwen_start_time
def get_qwen_small_answer( message: str ):
print(">>> Starting Qwen 7B extraction...")
qwen_start_time = time.time()
response = together_client.chat.completions.create(
model="Qwen/Qwen2.5-7B-Instruct-Turbo",
messages=[
{"role": "system", "content": "You are a helpful question-answer, conversation assistant that answers in Korean. Your responses should sound human-like."},
{"role": "user", "content": message}
],
max_tokens = None
#TODO: Change the messages option
)
qwen_end_time = time.time()
print(f"Time taken to extract from Qwen: { qwen_end_time - qwen_start_time } seconds")
return response.choices[0].message.content, qwen_end_time - qwen_start_time
def get_llama_small_answer( message: str ):
print(">>> Starting Llama 3.1 8B extraction...")
llama_start_time = time.time()
response = together_client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
messages=[
{"role": "system", "content": "You are an artificial intelligence assistant and you need to engage in a helpful, CONCISE, polite question-answer conversation with a user."},
{
"role": "user",
"content": message
}
]
)
llama_end_time = time.time()
print(f"Time taken to extract from Llama: { llama_end_time - llama_start_time } seconds")
return response.choices[0].message.content, llama_end_time - llama_start_time
def get_llama_big_answer( message: str ):
print(">>> Starting Llama 3.1 70B extraction...")
llama_start_time = time.time()
response = together_client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
messages=[
{"role": "system", "content": "You are an artificial intelligence assistant and you need to engage in a helpful, CONCISE, polite question-answer conversation with a user."},
{
"role": "user",
"content": message
}
]
)
llama_end_time = time.time()
print(f"Time taken to extract from Llama: { llama_end_time - llama_start_time } seconds")
return response.choices[0].message.content, llama_end_time - llama_start_time
def get_gemini_answer( message: str ):
print(">>> Starting gemini extraction...")
gemini_start_time = time.time()
response = gemini_client.models.generate_content(
model = genai_model,
contents=message,
)
gemini_end_time = time.time()
print(f"Time taken to extract from Gemini: { gemini_end_time - gemini_start_time } seconds")
return response.candidates[0].content, gemini_end_time - gemini_start_time
# def get_perplexity_answer( message: str ):
# print(">>> Starting perplexity extraction...")
# perplexity_start_time = time.time()
# messages = [
# {
# "role": "system",
# "content": (
# "You are an artificial intelligence assistant and you need to "
# "engage in a helpful, CONCISE, polite question-answer conversation with a user."
# ),
# },
# {
# "role": "user",
# "content": (
# message
# ),
# },
# ]
# response = perplexity_client.chat.completions.create(
# model="llama-3.1-sonar-small-128k-online",
# messages=messages
# )
# perplexity_end_time = time.time()
# print(f"Time taken to extract from Perplexity: { perplexity_end_time - perplexity_start_time } seconds")
# return response.choices[0].message.content, perplexity_end_time - perplexity_start_time
def get_gpt_answer( message: str ):
print(">>> Starting GPT extraction...")
gpt_start_time = time.time()
completion = gpt_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant that gives short answers and nothing extra."},
{
"role": "user",
"content": message
}
]
)
gpt_end_time = time.time()
print(f"Time taken to extract from GPT: { gpt_end_time - gpt_start_time } seconds")
return completion.choices[0].message.content, gpt_end_time - gpt_start_time
def compare_answers(message: str):
methods = [
("Qwen Big (72B)", get_qwen_big_answer),
("Qwen Small (7B)", get_qwen_small_answer),
("Llama Small (8B)", get_llama_small_answer),
("Llama Big (70B)", get_llama_big_answer),
("Gemini-2.0-Flash", get_gemini_answer),
# ("Perplexity", get_perplexity_answer),
("GPT (4o-mini)", get_gpt_answer)
]
results = []
naver_docs, naver_time_taken = get_naver_answers( message )
content = f'์•„๋ž˜ ๋ฌธ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ตํ•˜์„ธ์š”. ๋‹ต๋ณ€์€ ํ•œ๊ตญ์–ด๋กœ๋งŒ ํ•ด์ฃผ์„ธ์š” \n ์งˆ๋ฌธ {message}\n'
content += naver_docs
print("Starting the comparison between summarizers...")
for method_name, method in methods:
answer, time_taken = method(content)
results.append({
"Method": f"Naver + ({method_name})",
"Question": message,
"Answer": answer,
"Time Taken": naver_time_taken + time_taken
})
print("Starting the comparison between extractors/summarizers...")
for method_name, method in methods:
additional_docs, time_taken = method(message)
results.append({
"Method": method_name,
"Question": message,
"Answer": additional_docs,
"Time Taken": time_taken
})
content += f'\n{additional_docs}'
time_taken += naver_time_taken
for summarizer_name, summarizer in methods:
answer, answer_time = summarizer(content)
results.append({
"Method": f"Naver + {method_name} + ({summarizer_name})",
"Question": message,
"Answer": answer,
"Time Taken": time_taken + answer_time
})
return results
def chatFunction( message, history ):
content = f'์•„๋ž˜ ๋ฌธ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ตํ•˜์„ธ์š”. ๋‹ต๋ณ€์—์„œ ์งˆ๋ฌธ์„ ๋”ฐ๋ผ ์ถœ๋ ฅ ํ•˜์ง€ ๋งˆ์„ธ์š”. ๋‹ต๋ณ€์€ ํ•œ๊ตญ์–ด๋กœ๋งŒ ํ•ด์ฃผ์„ธ์š”! ์ฐพ์€ Naver ๋ฌธ์„œ์™€ ๋‹ค๋ฅธ ๋ฌธ์„œ์—์„œ ๋‹ต๋ณ€์ด ์—†๋Š” ๋‚ด์šฉ์€ ์ ˆ๋Œ€ ์ถœ๋ ฅํ•˜์ง€ ๋งˆ์„ธ์š”. ์นœ์ ˆํ•˜๊ณ  ์ธ๊ฐ„๋‹ต๊ฒŒ ๋งํ•˜์„ธ์š”. \n ์งˆ๋ฌธ: {message}\n ๋ฌธ์„œ: '
naver_docs, naver_time_taken = get_naver_answers( message )
if len(naver_docs) > 55000:
overlap = 200
answers = []
split_len = len(naver_docs) // ( ( len(naver_docs) - 55000 ) // 55000 + 2 ) + 1
for i in range( len(naver_docs), split_len ):
if i == 0:
split = naver_docs[:split_len]
else:
split = naver_docs[i * split_len - overlap: (i + 1) * split_len]
answer, _ = get_qwen_small_answer(f"Summarize important points in a paragraph, given the information below, using only Korean language. Give me only the summary!!! \n {split}")
answers.append(answer)
naver_docs = '\n'.join(answers)
start_time = time.time()
content += "\n Naver ๋ฌธ์„œ: " + naver_docs
completion = gpt_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant that gives detailed answers only in korean."},
{
"role": "user",
"content": message
}
]
)
gpt_resp = completion.choices[0].message.content
content += "\n ๋‹ค๋ฅธ ๋ฌธ์„œ: " + gpt_resp
# content += "\n" + gpt_resp
answer, _ = get_qwen_small_answer(content)
print("-"*70)
print("Question: ", message)
print("Answer: ", answer)
time_taken = time.time() - start_time
print("Time taken to summarize: ", time_taken)
return answer
if __name__ == "__main__":
# multiprocessing.set_start_method("fork", force=True)
# if multiprocessing.get_start_method(allow_none=True) is None:
# multiprocessing.set_start_method("fork")
with gr.ChatInterface( fn=chatFunction, type="messages" ) as demo: pass
demo.launch(share=True)
# with open("test_questions.txt", "r") as f:
# if os.path.exists("comparison_results.csv"):
# if input("Do you want to delete the former results? (y/n): ") == "y":
# os.remove("comparison_results.csv")
# questions = f.readlines()
# print(questions)
# for idx, question in enumerate(questions):
# print(" -> Starting the question number: ", idx)
# results = compare_answers(question)
# df = pd.DataFrame(results)
# df.to_csv("comparison_results.csv", mode='a', index=False)