Spaces:
Running
Running
import utils | |
import os | |
import numpy as np | |
import nest_asyncio | |
import openai | |
import chromadb | |
from llama_index.legacy import ( | |
VectorStoreIndex, | |
SimpleDirectoryReader | |
) | |
from llama_index.core import ( | |
StorageContext, | |
Document, | |
Settings | |
) | |
from llama_index.vector_stores.chroma.base import ChromaVectorStore | |
from llama_index.llms.openai import OpenAI | |
from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding | |
from trulens_eval import Tru | |
from utils import get_prebuilt_trulens_recorder | |
import time | |
nest_asyncio.apply() | |
openai.api_key = utils.get_openai_api_key() | |
def main(): | |
if not os.path.exists("./default.sqlite"): | |
start_time = time.time() | |
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0) | |
fine_tuned_path = "local:./models/fine-tuned-embeddings" | |
Settings.llm = llm | |
Settings.embed_model = fine_tuned_path | |
db = chromadb.PersistentClient(path="./models/chroma_db") | |
chroma_collection = db.get_or_create_collection("quickstart") | |
# assign chroma as the vector_store to the context | |
vector_store = ChromaVectorStore(chroma_collection=chroma_collection) | |
storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
# create your index | |
index = VectorStoreIndex.from_vector_store( | |
vector_store=vector_store, | |
storage_context=storage_context | |
) | |
query_engine = index.as_query_engine() | |
separator = "\n\n" | |
eval_questions = [] | |
with open('./raw_documents/eval_questions.txt', 'r') as file: | |
content = file.read() | |
for question in content.split(separator): | |
print(question) | |
print(separator) | |
eval_questions.append(question.strip()) | |
response = query_engine.query(eval_questions[0]) | |
print(str(response)) | |
tru = Tru(database_file="./models/trulens_eval.sqlite") | |
tru_recorder = get_prebuilt_trulens_recorder(query_engine, | |
app_id="Direct Query Engine") | |
print("Sending each question to llm ..") | |
with tru_recorder as recording: | |
for question in eval_questions: | |
response = query_engine.query(question) | |
records, feedback = tru.get_records_and_feedback(app_ids=[]) | |
os.makedirs("./results", exist_ok=True) | |
records.to_csv("./results/records.csv", index=False) | |
print(tru.db.engine.url.render_as_string(hide_password=False)) | |
end_time = time.time() | |
time_spent_mins = (end_time - start_time) / 60 | |
with open("./results/time_cost.txt", "w") as fp: | |
fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.") | |
if __name__ == "__main__": | |
# main() | |
if False: | |
start_time = time.time() | |
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0) | |
fine_tuned_path = "local:./models/fine-tuned-embeddings" | |
Settings.llm = llm | |
Settings.embed_model = fine_tuned_path | |
db = chromadb.PersistentClient(path="./models/chroma_db") | |
chroma_collection = db.get_or_create_collection("quickstart") | |
# assign chroma as the vector_store to the context | |
vector_store = ChromaVectorStore(chroma_collection=chroma_collection) | |
storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
# create your index | |
index = VectorStoreIndex.from_vector_store( | |
vector_store=vector_store, | |
storage_context=storage_context | |
) | |
query_engine = index.as_query_engine() | |
separator = "\n\n" | |
eval_questions = [] | |
with open('./raw_documents/eval_questions.txt', 'r') as file: | |
content = file.read() | |
for question in content.split(separator): | |
print(question) | |
print(separator) | |
eval_questions.append(question.strip()) | |
response = query_engine.query(eval_questions[0]) | |
print(str(response)) | |
from trulens_eval import Tru | |
tru = Tru() | |
documents = SimpleDirectoryReader( | |
input_files=["./raw_documents/qna.txt"] | |
).load_data() | |
index = VectorStoreIndex.from_documents(documents) | |
query_engine = index.as_query_engine() | |
response = query_engine.query("Which is not a government healthcare philosophy?") | |
print(response) | |
from trulens_eval.feedback.provider.openai import OpenAI | |
openai = OpenAI() | |
# select context to be used in feedback. the location of context is app specific. | |
from trulens_eval.app import App | |
context = App.select_context(query_engine) | |
from trulens_eval import Feedback | |
# Define a groundedness feedback function | |
from trulens_eval.feedback import Groundedness | |
grounded = Groundedness(groundedness_provider=OpenAI()) | |
f_groundedness = ( | |
Feedback(grounded.groundedness_measure_with_cot_reasons) | |
.on(context.collect()) # collect context chunks into a list | |
.on_output() | |
.aggregate(grounded.grounded_statements_aggregator) | |
) | |
# Question/answer relevance between overall question and answer. | |
f_qa_relevance = Feedback(openai.relevance).on_input_output() | |
# Question/statement relevance between question and each context chunk. | |
f_qs_relevance = ( | |
Feedback(openai.qs_relevance) | |
.on_input() | |
.on(context) | |
.aggregate(np.mean) | |
) | |
from trulens_eval import TruLlama | |
tru_query_engine_recorder = TruLlama(query_engine, | |
app_id='LlamaIndex_App1', | |
feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance]) | |
if False: | |
# or as context manager | |
with tru_query_engine_recorder as recording: | |
query_engine.query("Which of the following is TRUE on the similarity of Means Testing and Casemix?") |