Spaces:
Running
Running
lingyit1108
commited on
Commit
·
b2b3b83
1
Parent(s):
187a37b
added trulens implementation for evaluation
Browse files- .gitignore +6 -1
- main.py +49 -19
- raw_documents/eval_answers.txt +3 -0
- raw_documents/eval_questions.txt +3 -0
- requirements.txt +3 -1
- streamlit_app.py +45 -0
- utils.py +157 -1
.gitignore
CHANGED
@@ -1,3 +1,8 @@
|
|
1 |
.DS_Store
|
2 |
|
3 |
-
.streamlit/
|
|
|
|
|
|
|
|
|
|
|
|
1 |
.DS_Store
|
2 |
|
3 |
+
.streamlit/
|
4 |
+
results/
|
5 |
+
|
6 |
+
*.sqlite
|
7 |
+
ux/
|
8 |
+
pages/
|
main.py
CHANGED
@@ -9,32 +9,62 @@ from llama_index import ServiceContext
|
|
9 |
from llama_index.llms import OpenAI
|
10 |
|
11 |
from llama_index.embeddings import HuggingFaceEmbedding
|
|
|
12 |
|
|
|
13 |
|
14 |
openai.api_key = utils.get_openai_api_key()
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
-
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1)
|
27 |
-
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
28 |
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
|
35 |
-
("Intermediate and Long Term Care (ILTC) services are for those who need further care and"
|
36 |
-
"treatment after discharge from the hospital, who may need assistance with their activities of"
|
37 |
-
"daily living. This can be through"
|
38 |
-
)
|
39 |
-
)
|
40 |
-
print(str(response))
|
|
|
9 |
from llama_index.llms import OpenAI
|
10 |
|
11 |
from llama_index.embeddings import HuggingFaceEmbedding
|
12 |
+
from trulens_eval import Tru
|
13 |
|
14 |
+
from utils import get_prebuilt_trulens_recorder
|
15 |
|
16 |
openai.api_key = utils.get_openai_api_key()
|
17 |
|
18 |
+
def main():
|
19 |
+
|
20 |
+
if not os.path.exists("./default.sqlite"):
|
21 |
+
|
22 |
+
documents = SimpleDirectoryReader(
|
23 |
+
input_files=["./raw_documents/HI_knowledge_base.pdf"]
|
24 |
+
).load_data()
|
25 |
+
|
26 |
+
document = Document(text="\n\n".join([doc.text for doc in documents]))
|
27 |
+
|
28 |
+
### gpt-4-1106-preview
|
29 |
+
### gpt-3.5-turbo-1106 / gpt-3.5-turbo
|
30 |
+
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1)
|
31 |
+
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
32 |
+
|
33 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
34 |
+
index = VectorStoreIndex.from_documents([document], service_context=service_context)
|
35 |
+
|
36 |
+
query_engine = index.as_query_engine()
|
37 |
|
38 |
+
separator = "\n\n"
|
39 |
+
eval_questions = []
|
40 |
+
with open('raw_documents/eval_questions.txt', 'r') as file:
|
41 |
+
content = file.read()
|
42 |
|
43 |
+
for question in content.split(separator):
|
44 |
+
print(question)
|
45 |
+
print(separator)
|
46 |
+
eval_questions.append(question.strip())
|
47 |
|
48 |
+
response = query_engine.query(eval_questions[0])
|
49 |
+
print(str(response))
|
|
|
|
|
50 |
|
51 |
+
tru = Tru()
|
52 |
+
# tru.reset_database()
|
53 |
|
54 |
+
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
|
55 |
+
app_id="Direct Query Engine")
|
56 |
+
with tru_recorder as recording:
|
57 |
+
for question in eval_questions:
|
58 |
+
response = query_engine.query(question)
|
59 |
+
|
60 |
+
records, feedback = tru.get_records_and_feedback(app_ids=[])
|
61 |
+
|
62 |
+
os.makedirs("results", exist_ok=True)
|
63 |
+
records.to_csv("results/records.csv", index=False)
|
64 |
+
|
65 |
+
print(tru.db.engine.url.render_as_string(hide_password=False))
|
66 |
+
# tru.run_dashboard()
|
67 |
+
|
68 |
+
if __name__ == "__main__":
|
69 |
|
70 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
raw_documents/eval_answers.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d6f42a68ccf96496a6dcd89016e53ebb1add84c42ecef1fffe08e211037c4df
|
3 |
+
size 332
|
raw_documents/eval_questions.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:931b5c74d5696e5efb242c7d968765734a621d881642a1b16dbd1d004fd2900e
|
3 |
+
size 1473
|
requirements.txt
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
openai==1.6.1
|
2 |
-
streamlit==1.29.0
|
|
|
|
|
|
1 |
openai==1.6.1
|
2 |
+
streamlit==1.29.0
|
3 |
+
trulens==0.13.4
|
4 |
+
trulens-eval==0.20.0
|
streamlit_app.py
CHANGED
@@ -1,13 +1,44 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
|
|
3 |
|
4 |
import openai
|
5 |
from openai import OpenAI
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# App title
|
8 |
st.set_page_config(page_title="💬 Open AI Chatbot")
|
9 |
openai_api = os.getenv("OPENAI_API_KEY")
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Replicate Credentials
|
12 |
with st.sidebar:
|
13 |
st.title("💬 Open AI Chatbot")
|
@@ -33,6 +64,20 @@ with st.sidebar:
|
|
33 |
key="selected_model")
|
34 |
temperature = st.sidebar.slider("temperature", min_value=0.01, max_value=2.0,
|
35 |
value=0.1, step=0.01)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
st.markdown("📖 Reach out to SakiMilo to learn how to create this app!")
|
37 |
|
38 |
# Store LLM generated responses
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
+
import pandas as pd
|
4 |
|
5 |
import openai
|
6 |
from openai import OpenAI
|
7 |
|
8 |
+
import pkg_resources
|
9 |
+
import shutil
|
10 |
+
import main
|
11 |
+
|
12 |
+
### To trigger trulens evaluation
|
13 |
+
main.main()
|
14 |
+
|
15 |
+
### Finally, start streamlit app
|
16 |
+
leaderboard_path = pkg_resources.resource_filename(
|
17 |
+
"trulens_eval", "Leaderboard.py"
|
18 |
+
)
|
19 |
+
evaluation_path = pkg_resources.resource_filename(
|
20 |
+
"trulens_eval", "pages/Evaluations.py"
|
21 |
+
)
|
22 |
+
ux_path = pkg_resources.resource_filename(
|
23 |
+
"trulens_eval", "ux"
|
24 |
+
)
|
25 |
+
shutil.copyfile(leaderboard_path, os.path.join("pages", "1_Leaderboard.py"))
|
26 |
+
shutil.copyfile(evaluation_path, os.path.join("pages", "2_Evaluations.py"))
|
27 |
+
if os.path.exists("./ux"):
|
28 |
+
shutil.rmtree("./ux")
|
29 |
+
shutil.copytree(ux_path, "./ux")
|
30 |
+
|
31 |
# App title
|
32 |
st.set_page_config(page_title="💬 Open AI Chatbot")
|
33 |
openai_api = os.getenv("OPENAI_API_KEY")
|
34 |
|
35 |
+
data_df = pd.DataFrame(
|
36 |
+
{
|
37 |
+
"Completion": [30, 40, 100, 10],
|
38 |
+
}
|
39 |
+
)
|
40 |
+
data_df.index = ["Chapter 1", "Chapter 2", "Chapter 3", "Chapter 4"]
|
41 |
+
|
42 |
# Replicate Credentials
|
43 |
with st.sidebar:
|
44 |
st.title("💬 Open AI Chatbot")
|
|
|
64 |
key="selected_model")
|
65 |
temperature = st.sidebar.slider("temperature", min_value=0.01, max_value=2.0,
|
66 |
value=0.1, step=0.01)
|
67 |
+
st.data_editor(
|
68 |
+
data_df,
|
69 |
+
column_config={
|
70 |
+
"Completion": st.column_config.ProgressColumn(
|
71 |
+
"Completion %",
|
72 |
+
help="Percentage of content covered",
|
73 |
+
format="%.1f%%",
|
74 |
+
min_value=0,
|
75 |
+
max_value=100,
|
76 |
+
),
|
77 |
+
},
|
78 |
+
hide_index=False,
|
79 |
+
)
|
80 |
+
|
81 |
st.markdown("📖 Reach out to SakiMilo to learn how to create this app!")
|
82 |
|
83 |
# Store LLM generated responses
|
utils.py
CHANGED
@@ -1,4 +1,160 @@
|
|
1 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
def get_openai_api_key():
|
4 |
-
return os.getenv("OPENAI_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import numpy as np
|
3 |
+
from trulens_eval import (
|
4 |
+
Feedback,
|
5 |
+
TruLlama,
|
6 |
+
OpenAI
|
7 |
+
)
|
8 |
+
|
9 |
+
from trulens_eval.feedback import Groundedness
|
10 |
+
import nest_asyncio
|
11 |
+
|
12 |
+
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
|
13 |
+
from llama_index.node_parser import SentenceWindowNodeParser
|
14 |
+
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
|
15 |
+
from llama_index.indices.postprocessor import SentenceTransformerRerank
|
16 |
+
from llama_index import load_index_from_storage
|
17 |
+
|
18 |
+
from llama_index.node_parser import HierarchicalNodeParser
|
19 |
+
from llama_index.node_parser import get_leaf_nodes
|
20 |
+
from llama_index import StorageContext
|
21 |
+
from llama_index.retrievers import AutoMergingRetriever
|
22 |
+
from llama_index.indices.postprocessor import SentenceTransformerRerank
|
23 |
+
from llama_index.query_engine import RetrieverQueryEngine
|
24 |
+
|
25 |
+
|
26 |
+
nest_asyncio.apply()
|
27 |
+
openai = OpenAI()
|
28 |
+
|
29 |
+
qa_relevance = (
|
30 |
+
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
|
31 |
+
.on_input_output()
|
32 |
+
)
|
33 |
+
|
34 |
+
qs_relevance = (
|
35 |
+
Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
|
36 |
+
.on_input()
|
37 |
+
.on(TruLlama.select_source_nodes().node.text)
|
38 |
+
.aggregate(np.mean)
|
39 |
+
)
|
40 |
+
|
41 |
+
#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
|
42 |
+
grounded = Groundedness(groundedness_provider=openai)
|
43 |
+
|
44 |
+
groundedness = (
|
45 |
+
Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
|
46 |
+
.on(TruLlama.select_source_nodes().node.text)
|
47 |
+
.on_output()
|
48 |
+
.aggregate(grounded.grounded_statements_aggregator)
|
49 |
+
)
|
50 |
+
|
51 |
+
feedbacks = [qa_relevance, qs_relevance, groundedness]
|
52 |
|
53 |
def get_openai_api_key():
|
54 |
+
return os.getenv("OPENAI_API_KEY")
|
55 |
+
|
56 |
+
def get_trulens_recorder(query_engine, feedbacks, app_id):
|
57 |
+
tru_recorder = TruLlama(
|
58 |
+
query_engine,
|
59 |
+
app_id=app_id,
|
60 |
+
feedbacks=feedbacks
|
61 |
+
)
|
62 |
+
return tru_recorder
|
63 |
+
|
64 |
+
def get_prebuilt_trulens_recorder(query_engine, app_id):
|
65 |
+
tru_recorder = TruLlama(
|
66 |
+
query_engine,
|
67 |
+
app_id=app_id,
|
68 |
+
feedbacks=feedbacks
|
69 |
+
)
|
70 |
+
return tru_recorder
|
71 |
+
|
72 |
+
def build_sentence_window_index(
|
73 |
+
document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
|
74 |
+
):
|
75 |
+
# create the sentence window node parser w/ default settings
|
76 |
+
node_parser = SentenceWindowNodeParser.from_defaults(
|
77 |
+
window_size=3,
|
78 |
+
window_metadata_key="window",
|
79 |
+
original_text_metadata_key="original_text",
|
80 |
+
)
|
81 |
+
sentence_context = ServiceContext.from_defaults(
|
82 |
+
llm=llm,
|
83 |
+
embed_model=embed_model,
|
84 |
+
node_parser=node_parser,
|
85 |
+
)
|
86 |
+
if not os.path.exists(save_dir):
|
87 |
+
sentence_index = VectorStoreIndex.from_documents(
|
88 |
+
[document], service_context=sentence_context
|
89 |
+
)
|
90 |
+
sentence_index.storage_context.persist(persist_dir=save_dir)
|
91 |
+
else:
|
92 |
+
sentence_index = load_index_from_storage(
|
93 |
+
StorageContext.from_defaults(persist_dir=save_dir),
|
94 |
+
service_context=sentence_context,
|
95 |
+
)
|
96 |
+
|
97 |
+
return sentence_index
|
98 |
+
|
99 |
+
def get_sentence_window_query_engine(
|
100 |
+
sentence_index,
|
101 |
+
similarity_top_k=6,
|
102 |
+
rerank_top_n=2,
|
103 |
+
):
|
104 |
+
# define postprocessors
|
105 |
+
postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
|
106 |
+
rerank = SentenceTransformerRerank(
|
107 |
+
top_n=rerank_top_n, model="BAAI/bge-reranker-base"
|
108 |
+
)
|
109 |
+
|
110 |
+
sentence_window_engine = sentence_index.as_query_engine(
|
111 |
+
similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
|
112 |
+
)
|
113 |
+
return sentence_window_engine
|
114 |
+
|
115 |
+
def build_automerging_index(
|
116 |
+
documents,
|
117 |
+
llm,
|
118 |
+
embed_model="local:BAAI/bge-small-en-v1.5",
|
119 |
+
save_dir="merging_index",
|
120 |
+
chunk_sizes=None,
|
121 |
+
):
|
122 |
+
chunk_sizes = chunk_sizes or [2048, 512, 128]
|
123 |
+
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
|
124 |
+
nodes = node_parser.get_nodes_from_documents(documents)
|
125 |
+
leaf_nodes = get_leaf_nodes(nodes)
|
126 |
+
merging_context = ServiceContext.from_defaults(
|
127 |
+
llm=llm,
|
128 |
+
embed_model=embed_model,
|
129 |
+
)
|
130 |
+
storage_context = StorageContext.from_defaults()
|
131 |
+
storage_context.docstore.add_documents(nodes)
|
132 |
+
|
133 |
+
if not os.path.exists(save_dir):
|
134 |
+
automerging_index = VectorStoreIndex(
|
135 |
+
leaf_nodes, storage_context=storage_context, service_context=merging_context
|
136 |
+
)
|
137 |
+
automerging_index.storage_context.persist(persist_dir=save_dir)
|
138 |
+
else:
|
139 |
+
automerging_index = load_index_from_storage(
|
140 |
+
StorageContext.from_defaults(persist_dir=save_dir),
|
141 |
+
service_context=merging_context,
|
142 |
+
)
|
143 |
+
return automerging_index
|
144 |
+
|
145 |
+
def get_automerging_query_engine(
|
146 |
+
automerging_index,
|
147 |
+
similarity_top_k=12,
|
148 |
+
rerank_top_n=2,
|
149 |
+
):
|
150 |
+
base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
|
151 |
+
retriever = AutoMergingRetriever(
|
152 |
+
base_retriever, automerging_index.storage_context, verbose=True
|
153 |
+
)
|
154 |
+
rerank = SentenceTransformerRerank(
|
155 |
+
top_n=rerank_top_n, model="BAAI/bge-reranker-base"
|
156 |
+
)
|
157 |
+
auto_merging_engine = RetrieverQueryEngine.from_args(
|
158 |
+
retriever, node_postprocessors=[rerank]
|
159 |
+
)
|
160 |
+
return auto_merging_engine
|