Spaces:
Sleeping
Sleeping
viboognesh
commited on
Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- app.py +117 -0
- context_vectorstore/index.faiss +3 -0
- context_vectorstore/index.pkl +3 -0
- formatted_vectorstore/index.faiss +3 -0
- formatted_vectorstore/index.pkl +3 -0
- just_vectorstore/index.faiss +3 -0
- just_vectorstore/index.pkl +3 -0
- llm_constants.py +8 -0
- prompts.py +225 -0
- pydantic_models.py +24 -0
- ragchatbot.py +197 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
context_vectorstore/index.faiss filter=lfs diff=lfs merge=lfs -text
|
37 |
+
formatted_vectorstore/index.faiss filter=lfs diff=lfs merge=lfs -text
|
38 |
+
just_vectorstore/index.faiss filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from ragchatbot import RAGChatBot
|
3 |
+
from pydantic_models import RequestModel, ChatHistoryItem
|
4 |
+
|
5 |
+
|
6 |
+
def validate_chat_history_item(chat_history_item: ChatHistoryItem):
|
7 |
+
return ChatHistoryItem.model_validate(chat_history_item.model_dump())
|
8 |
+
|
9 |
+
st.set_page_config(page_title="RAG-Chatbot", page_icon=":mag:", layout="wide")
|
10 |
+
st.title("Test Contextual Retrieval")
|
11 |
+
col1, col2, col3 = st.columns(3)
|
12 |
+
|
13 |
+
col1.title("Contextual Chunking")
|
14 |
+
col2.title("Current Model")
|
15 |
+
col3.title("Formatted Text")
|
16 |
+
|
17 |
+
if "context_ragchatbot" not in st.session_state:
|
18 |
+
st.session_state.context_ragchatbot = RAGChatBot(vectorstore_path="context_vectorstore")
|
19 |
+
|
20 |
+
if "formatted_ragchatbot" not in st.session_state:
|
21 |
+
st.session_state.formatted_ragchatbot = RAGChatBot(vectorstore_path="formatted_vectorstore")
|
22 |
+
|
23 |
+
if "just_ragchatbot" not in st.session_state:
|
24 |
+
st.session_state.just_ragchatbot = RAGChatBot(vectorstore_path="just_vectorstore")
|
25 |
+
|
26 |
+
if "context_chat_history" not in st.session_state:
|
27 |
+
st.session_state.context_chat_history = []
|
28 |
+
|
29 |
+
if "formatted_chat_history" not in st.session_state:
|
30 |
+
st.session_state.formatted_chat_history = []
|
31 |
+
|
32 |
+
if "just_chat_history" not in st.session_state:
|
33 |
+
st.session_state.just_chat_history = []
|
34 |
+
for chat_index in range(0,len(st.session_state.context_chat_history)):
|
35 |
+
assert len(st.session_state.context_chat_history) == len(st.session_state.formatted_chat_history) == len(st.session_state.just_chat_history)
|
36 |
+
for col, chat_history, sources_text in zip(st.columns(3, vertical_alignment="top"), [st.session_state.context_chat_history, st.session_state.just_chat_history, st.session_state.formatted_chat_history], ["Contextual Chunking", "Current Model", "Formatted Text"]):
|
37 |
+
chat = chat_history[chat_index]
|
38 |
+
with col.chat_message("user"):
|
39 |
+
st.write(chat.get("user_message").replace("\n","\n\n"))
|
40 |
+
with col.chat_message("assistant"):
|
41 |
+
st.write(chat.get("assistant_message").replace("\n","\n\n"))
|
42 |
+
st.write(chat.get("search_phrase"))
|
43 |
+
for i, doc in enumerate(chat.get("sources_documents")):
|
44 |
+
with st.expander(f"{sources_text} Sources - {i+1}"):
|
45 |
+
st.subheader(f"{doc.get('heading')} - {doc.get('relevance_score')}")
|
46 |
+
if sources_text == "Contextual Chunking":
|
47 |
+
st.write(doc.get("page_content").replace("\n","\n\n").split("<chunk_content>")[1].split("</chunk_content>")[0])
|
48 |
+
else:
|
49 |
+
st.write(doc.get("page_content").replace("\n","\n\n"))
|
50 |
+
|
51 |
+
# print_session_state_variables()
|
52 |
+
if user_query := st.chat_input("Enter your query"):
|
53 |
+
for col in st.columns(3, vertical_alignment="top"):
|
54 |
+
with col.chat_message("user"):
|
55 |
+
st.write(user_query.replace("\n","\n\n"))
|
56 |
+
with st.spinner("Generating response..."):
|
57 |
+
context_response = st.session_state.context_ragchatbot.get_response(
|
58 |
+
RequestModel(user_question=user_query, chat_history=[ChatHistoryItem(user_message=chat.get("user_message"), assistant_message=chat.get("assistant_message")) for chat in st.session_state.context_chat_history])
|
59 |
+
)
|
60 |
+
sources_documents = [{"heading":doc.heading, "page_content":doc.page_content, "relevance_score":doc.relevance_score} for doc in context_response.sources_documents]
|
61 |
+
st.session_state.context_chat_history.append({
|
62 |
+
"user_message": user_query,
|
63 |
+
"assistant_message": context_response.answer,
|
64 |
+
"search_phrase": context_response.search_phrase,
|
65 |
+
"sources_documents": sources_documents
|
66 |
+
})
|
67 |
+
|
68 |
+
|
69 |
+
just_response = st.session_state.just_ragchatbot.get_response(
|
70 |
+
RequestModel(user_question=user_query, chat_history=[ChatHistoryItem(user_message=chat.get("user_message"), assistant_message=chat.get("assistant_message")) for chat in st.session_state.just_chat_history])
|
71 |
+
)
|
72 |
+
sources_documents = [{"heading":doc.heading, "page_content":doc.page_content, "relevance_score":doc.relevance_score} for doc in just_response.sources_documents]
|
73 |
+
st.session_state.just_chat_history.append({
|
74 |
+
"user_message": user_query,
|
75 |
+
"assistant_message": just_response.answer,
|
76 |
+
"search_phrase": just_response.search_phrase,
|
77 |
+
"sources_documents": sources_documents
|
78 |
+
})
|
79 |
+
|
80 |
+
|
81 |
+
formatted_response = st.session_state.formatted_ragchatbot.get_response(
|
82 |
+
RequestModel(user_question=user_query, chat_history=[ChatHistoryItem(user_message=chat.get("user_message"), assistant_message=chat.get("assistant_message")) for chat in st.session_state.formatted_chat_history])
|
83 |
+
)
|
84 |
+
sources_documents = [{"heading":doc.heading, "page_content":doc.page_content, "relevance_score":doc.relevance_score} for doc in formatted_response.sources_documents]
|
85 |
+
st.session_state.formatted_chat_history.append({
|
86 |
+
"user_message": user_query,
|
87 |
+
"assistant_message": formatted_response.answer,
|
88 |
+
"search_phrase": formatted_response.search_phrase,
|
89 |
+
"sources_documents": sources_documents
|
90 |
+
})
|
91 |
+
|
92 |
+
|
93 |
+
st.rerun()
|
94 |
+
# with col1.chat_message("assistant"):
|
95 |
+
# st.write(context_response.answer.replace("\n","\n\n"))
|
96 |
+
# with col1.expander("Contextual Chunking Sources"):
|
97 |
+
# for doc in context_response.sources_documents:
|
98 |
+
# st.subheader(f"{doc.heading} - {doc.relevance_score}")
|
99 |
+
# st.write(doc.page_content.replace("\n","\n\n").split("<chunk_content>")[1].split("</chunk_content>")[0])
|
100 |
+
# st.divider()
|
101 |
+
# with col2.chat_message("assistant"):
|
102 |
+
# st.write(just_response.answer.replace("\n","\n\n"))
|
103 |
+
# with st.expander("Without Contextual Chunking Sources"):
|
104 |
+
# st.write(just_response.chat_history[-1].search_phrase)
|
105 |
+
# for doc in just_response.sources_documents:
|
106 |
+
# st.subheader(f"{doc.heading} - {doc.relevance_score}")
|
107 |
+
# st.write(doc.page_content.replace("\n","\n\n"))
|
108 |
+
# st.divider()
|
109 |
+
# with col3.chat_message("assistant"):
|
110 |
+
# st.write(formatted_response.answer.replace("\n","\n\n"))
|
111 |
+
# with st.expander("Formatted Contextual Chunking Sources"):
|
112 |
+
# st.write(formatted_response.chat_history[-1].search_phrase)
|
113 |
+
# for doc in formatted_response.sources_documents:
|
114 |
+
# st.subheader(f"{doc.heading} - {doc.relevance_score}")
|
115 |
+
# st.write(doc.page_content.replace("\n","\n\n"))
|
116 |
+
# st.divider()
|
117 |
+
|
context_vectorstore/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2276310b46bf8ec0702a5e70ce20e960a3b1e4bb3ceb42b693f29558f18186b
|
3 |
+
size 3637293
|
context_vectorstore/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70b8a6472268482439479cdf1453e77aa03da3876b8642f87d9441868164a652
|
3 |
+
size 487670
|
formatted_vectorstore/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:948333b48a1d903cb1a76bba85d37a3a97e34bb7dc2b2c0d189c975d53913845
|
3 |
+
size 3637293
|
formatted_vectorstore/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ce172a0907e3688d726d4f5bfa82e9ee3f31a9a286187d6b92358099a51de77
|
3 |
+
size 272735
|
just_vectorstore/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8577cd55dbb9d09321ae7d82782042cbd9f22a11f3ffc64fa97a17e0541e783
|
3 |
+
size 1191981
|
just_vectorstore/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58ef6e07ddc0dd550849793b72fd0b654531e2190b9f3edeea138b59e7cc07aa
|
3 |
+
size 221950
|
llm_constants.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LLM_MODEL_NAME = "claude-3-haiku-20240307"
|
2 |
+
RERANKER_MODEL_NAME = "rerank-multilingual-v2.0"
|
3 |
+
EMBEDDINGS_MODEL_NAME = "text-embedding-3-large"
|
4 |
+
MAX_TOKENS = 1024
|
5 |
+
COHERE_RERANKER_COST = 0.001
|
6 |
+
EMBEDDINGS_TOKENS_COST = 0.13
|
7 |
+
INPUT_TOKENS_COST = 0.25
|
8 |
+
OUTPUT_TOKENS_COST = 1.25
|
prompts.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
system_prompt = """You are a proficient text formatter who is an expert in formatting a given text in markdown format. You will be given a text with list of text section and tables and your task is to format the text in markdown format.
|
2 |
+
|
3 |
+
Instructions:
|
4 |
+
- Read the text given by user.
|
5 |
+
- Understand the structure of the text.
|
6 |
+
- Return the text in markdown format with an appropriate number of # to indicate the heading and subheading.
|
7 |
+
- Do not omit any text that are present in the <text>
|
8 |
+
- Do not remove any numbering which is present in the <text>
|
9 |
+
- Do not omit any tables that are present in the <text>
|
10 |
+
- Do not add any details that is not present the <text>.
|
11 |
+
- Your response should contain two tags <formatted_text> and <completion_bool>
|
12 |
+
- <formatted_text> contains the formatted text and should be returned in markdown format
|
13 |
+
- <completion_boot> should be a boolean value that is set to True once the entire <text> is formatted correctly.
|
14 |
+
- In case you are not able to format the entire text within 4096 tokens, you will be prompted to complete formatting the entire text in the subsequent message.
|
15 |
+
- Continue formatting the rest of the text, when prompted to do so.
|
16 |
+
- Once you have completed formatting the entire text, return the formatted_text within the formatted_text tag and set the value within <completion_bool> to be True like this in your final response: <completion_bool>True</completion_bool>
|
17 |
+
- Make sure to include the <completion_bool>True</completion_bool> tag in your final response when you have formatted the complete text."""
|
18 |
+
|
19 |
+
continue_prompt="""Please continue formatting the rest of the text exactly from where you left off such that when the text within <formatted_text> above and the text within <formatted_text>q in your response are combined together, they are coherent and complete with no additional text."""
|
20 |
+
|
21 |
+
gen_prompt = """You will be provided a text which contains tables and numbered headings and subheadings within <text>. Your task is to format the text to markdown format by adding appropriate number of # to the heading and subheadings that indicate their order.Write your formatted text within <formatted_text> tags. If you cannot complete formatting the entire text within <text> tag, you will be prompted to continue formatting the remaining text in subsequent message from the user. Continue formatting the rest of the text exactly from where you left off such that when the text within <formatted_text> previous message and the text within <formatted_text> in your response are combined together, they are coherent and complete with no additional text. Do not omit any tables, text or numbering that is present in the <text>. Once you have completed formatting the entire text, add <completion_bool>True</completion_bool> after the <formatted_text> tag to indicate that the entire text formatting is complete."""
|
22 |
+
|
23 |
+
GENERATED_SYSTEM_PROMPT = """You will be given a text that contains numbered headings, subheadings, and possibly tables. Your task is to format this text into markdown format. Here's how to proceed:
|
24 |
+
|
25 |
+
1. The input text will be provided within <text> tags
|
26 |
+
|
27 |
+
2. Format the text by adding the appropriate number of '#' symbols to the headings and subheadings to indicate their hierarchy. The number of '#' symbols should correspond to the level of the heading in the original numbering system.
|
28 |
+
|
29 |
+
3. Follow these guidelines for formatting:
|
30 |
+
- Main headings (e.g., "1. Introduction") should be preceded by a single '#'
|
31 |
+
- First-level subheadings (e.g., "1.1 Background") should be preceded by two '##'
|
32 |
+
- Second-level subheadings (e.g., "1.1.1 Historical Context") should be preceded by three '###'
|
33 |
+
- Continue this pattern for deeper levels of subheadings
|
34 |
+
|
35 |
+
4. Preserve all tables, text content, and original numbering exactly as they appear in the input text.
|
36 |
+
|
37 |
+
5. Write your formatted text within <formatted_text> tags.
|
38 |
+
|
39 |
+
6. If you cannot complete formatting the entire text within the character limit of your response, format as much as you can and end your response with the <formatted_text> closing tag. You will be prompted to continue formatting the remaining text in a subsequent message. When this happens, continue formatting exactly where you left off, ensuring that when combined with your previous response, the result is coherent and complete.
|
40 |
+
|
41 |
+
7. Once you have completed formatting the entire text, add the following tag immediately after the closing </formatted_text> tag:
|
42 |
+
<completion_bool>True</completion_bool>
|
43 |
+
|
44 |
+
8. Remember to preserve all original content, including tables and numbering, while adding the appropriate markdown formatting for headings and subheadings.
|
45 |
+
|
46 |
+
9. Your response should contain <formatted_text> tag and the formatted text should be written within the <formatted_text> tag."""
|
47 |
+
|
48 |
+
Unchanged_prompt = """You will be given a text that contains numbered headings, subheadings, and possibly tables. Your task is to format this text into markdown format. Here's how to proceed:
|
49 |
+
|
50 |
+
1. The input text will be provided within <text> tags:
|
51 |
+
|
52 |
+
<text>
|
53 |
+
{{TEXT}}
|
54 |
+
</text>
|
55 |
+
|
56 |
+
2. Format the text by adding the appropriate number of '#' symbols to the headings and subheadings to indicate their hierarchy. The number of '#' symbols should correspond to the level of the heading in the original numbering system.
|
57 |
+
|
58 |
+
3. Follow these guidelines for formatting:
|
59 |
+
- Main headings (e.g., "1. Introduction") should be preceded by a single '#'
|
60 |
+
- First-level subheadings (e.g., "1.1 Background") should be preceded by two '##'
|
61 |
+
- Second-level subheadings (e.g., "1.1.1 Historical Context") should be preceded by three '###'
|
62 |
+
- Continue this pattern for deeper levels of subheadings
|
63 |
+
|
64 |
+
4. Preserve all tables, text content, and original numbering exactly as they appear in the input text.
|
65 |
+
|
66 |
+
5. Write your formatted text within <formatted_text> tags.
|
67 |
+
|
68 |
+
6. If you cannot complete formatting the entire text within the character limit of your response, format as much as you can and end your response with the <formatted_text> closing tag. You will be prompted to continue formatting the remaining text in a subsequent message. When this happens, continue formatting exactly where you left off, ensuring that when combined with your previous response, the result is coherent and complete.
|
69 |
+
|
70 |
+
7. Once you have completed formatting the entire text, add the following tag immediately after the closing </formatted_text> tag:
|
71 |
+
<completion_bool>True</completion_bool>
|
72 |
+
|
73 |
+
8. Here's an example of how your output should look:
|
74 |
+
|
75 |
+
<formatted_text>
|
76 |
+
# 1. Introduction
|
77 |
+
|
78 |
+
## 1.1 Background
|
79 |
+
|
80 |
+
### 1.1.1 Historical Context
|
81 |
+
|
82 |
+
Content of the section...
|
83 |
+
|
84 |
+
## 1.2 Objectives
|
85 |
+
|
86 |
+
Content of the section...
|
87 |
+
|
88 |
+
# 2. Methodology
|
89 |
+
|
90 |
+
## 2.1 Data Collection
|
91 |
+
|
92 |
+
Content of the section...
|
93 |
+
</formatted_text>
|
94 |
+
<completion_bool>True</completion_bool>
|
95 |
+
|
96 |
+
Remember to preserve all original content, including tables and numbering, while adding the appropriate markdown formatting for headings and subheadings."""
|
97 |
+
|
98 |
+
CACHE_PROMPT = """You are an AI assistant tasked to situate a chunk within the whole document and provide short succinct context to situate the chunk within the overall document for the purposes of improving search retrieval of the chunk.
|
99 |
+
|
100 |
+
Instructions:
|
101 |
+
- Read the document and understand the title and scope of the document.
|
102 |
+
- Read the chunk and understand the content of the chunk.
|
103 |
+
- Read the chunk and understand if the section contains any reference to other parts of the document.
|
104 |
+
- In your context, include the title of the document, the scope and areas that the information in the section covers and also information from the referenced parts of the document so that you can understand the content of the chunk.
|
105 |
+
<document>
|
106 |
+
{WHOLE_DOCUMENT}
|
107 |
+
</document>
|
108 |
+
"""
|
109 |
+
|
110 |
+
CHUNK_PROMPT = """Here is the section that we want to situate within the whole document and add context.
|
111 |
+
<부분>
|
112 |
+
{CHUNK_CONTENT}
|
113 |
+
</부분>
|
114 |
+
Your response should be in the same language as the chunk. Answer only with the succinct context and nothing else."""
|
115 |
+
|
116 |
+
CONTEXT_CHUNKING_PROMPT_PART1 = """You are an AI assistant tasked with situating a specific section within the context of a larger document. Your goal is to provide a short, succinct context that will improve search retrieval of this section. Follow these steps carefully:
|
117 |
+
|
118 |
+
1. First, you will be given the entire document.
|
119 |
+
2. Then, you will be given a specific section from this document.
|
120 |
+
|
121 |
+
|
122 |
+
3. Carefully read and analyze both the whole document and the section content. Pay attention to:
|
123 |
+
- The title and overall scope of the document
|
124 |
+
- The main topics covered in the section content
|
125 |
+
- Any references the section makes to other parts of the document
|
126 |
+
|
127 |
+
4. Create a succinct context for the section that includes:
|
128 |
+
- The title of the document
|
129 |
+
- The scope and main areas that the information in the section covers
|
130 |
+
- Relevant information from other parts of the document that the section references or relates to
|
131 |
+
- Additional context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk
|
132 |
+
- Section of the document that is referenced or related to in this section
|
133 |
+
|
134 |
+
5. Your response should be:
|
135 |
+
- In korean language
|
136 |
+
- Concise and to the point, focusing only on the most relevant contextual information
|
137 |
+
- Formatted as plain text, without any additional tags or markers
|
138 |
+
|
139 |
+
6. Do not include any explanations, introductions, or conclusions. Provide only the contextual information as described above.
|
140 |
+
|
141 |
+
Remember, the goal is to create a brief, informative context that will help situate this section within the larger document and improve its searchability.
|
142 |
+
|
143 |
+
<whole_document>
|
144 |
+
{WHOLE_DOCUMENT}
|
145 |
+
</whole_document>"""
|
146 |
+
|
147 |
+
CONTEXT_CHUNKING_PROMPT_PART2 = """
|
148 |
+
|
149 |
+
<section_content>
|
150 |
+
{SECTION_CONTENT}
|
151 |
+
</section_content>"""
|
152 |
+
|
153 |
+
|
154 |
+
CHAT_PROMPT="""You are an expert research assistant specializing in Korean Construction Standards and Regulations. Your task is to answer questions related to this field accurately and comprehensively. To assist you in this task, you have access to a powerful tool called Documents_Retriever.
|
155 |
+
|
156 |
+
The Documents_Retriever tool is designed to search through Korean Construction Standards and Regulations files and retrieve relevant information. When you use this tool, it will return multiple chunks of text from the appropriate documents, along with the name of the file from which each chunk was extracted. These text chunks may contain paragraphs and sometimes tables.
|
157 |
+
|
158 |
+
First, review the following chat history to understand the context of the conversation:
|
159 |
+
|
160 |
+
<chat_history>
|
161 |
+
{CHAT_HISTORY}
|
162 |
+
</chat_history>
|
163 |
+
|
164 |
+
Now, consider the following user question:
|
165 |
+
|
166 |
+
<user_question>
|
167 |
+
{USER_QUESTION}
|
168 |
+
</user_question>
|
169 |
+
|
170 |
+
Analyze the chat history and the user question carefully. Consider any relevant information from the previous conversation that might help in understanding or answering the current question.
|
171 |
+
|
172 |
+
To answer the question, follow these steps:
|
173 |
+
|
174 |
+
1. Identify if the user question is related to Korean Construction Standards and Regulations and if so, use the Documents_Retriever tool. To use the Documents_Retriever tool, follow these steps:
|
175 |
+
- Identify the key terms or phrases from the user's question that are most relevant to Korean Construction Standards and Regulations.
|
176 |
+
- Formulate a clear and concise search phrase using these key terms.
|
177 |
+
- The documents are in Korean Language, so the search phrase should also be in Korean.
|
178 |
+
|
179 |
+
2. Review the information returned by the Documents_Retriever. Pay attention to the content and the source files of the retrieved chunks.
|
180 |
+
|
181 |
+
3. If the retrieved information is not sufficient to answer the question comprehensively, you may use the Documents_Retriever tool again with refined search terms.
|
182 |
+
|
183 |
+
4. Formulate your answer based on the retrieved information. Make sure to:
|
184 |
+
- Directly address the user's question
|
185 |
+
- Answer in a clear and concise manner such that your answer can be verified by looking at the retrieved information
|
186 |
+
- Provide accurate information from the Korean Construction Standards and Regulations
|
187 |
+
- Cite the specific documents or regulations you're referencing
|
188 |
+
- Explain any technical terms or concepts that may not be familiar to a general audience
|
189 |
+
|
190 |
+
5. Present your answer in the following format:
|
191 |
+
<answer>
|
192 |
+
[Your comprehensive answer here]
|
193 |
+
|
194 |
+
Sources:
|
195 |
+
[List the names of the files from which you drew information]
|
196 |
+
</answer>
|
197 |
+
|
198 |
+
Remember, always use the Documents_Retriever tool when answering questions about Korean Construction Standards and Regulations, even if you think you know the answer. This ensures that your responses are always based on the most up-to-date and accurate information from the official documents.
|
199 |
+
|
200 |
+
If you cannot find relevant information using the Documents_Retriever tool, or if the question is outside the scope of Korean Construction Standards and Regulations, politely inform the user that you don't have the necessary information to answer their question accurately."""
|
201 |
+
|
202 |
+
TOOLS = [
|
203 |
+
{
|
204 |
+
"name": "Documents_Retriever",
|
205 |
+
"description": """This tool performs a cosine similarity search on a vectorstore containing documents from Korean Construction Standards and Regulations using the search phrase and returns the most relevant documents that matches the search phrase.
|
206 |
+
Use this tool when the user asks questions related to Korean Construction Standards and Regulations.
|
207 |
+
The tool takes in a search phrase as input. This search phrase will be used to perform the similarity search on the vectorstore to find relevant documents.
|
208 |
+
The vectorstore contains documents in Korean Language so, the search phrase should also be in Korean Language.
|
209 |
+
Remember, the quality of the search phrase is critical for retrieving the most relevant information.
|
210 |
+
""",
|
211 |
+
"input_schema":
|
212 |
+
{
|
213 |
+
"type": "object",
|
214 |
+
"properties":
|
215 |
+
{
|
216 |
+
"search_phrase":
|
217 |
+
{
|
218 |
+
"type": "string",
|
219 |
+
"description": "The search phrase that will be used to perform the similarity search on the vectorstore to find relevant documents.",
|
220 |
+
},
|
221 |
+
},
|
222 |
+
"required": ["search_phrase"],
|
223 |
+
},
|
224 |
+
}
|
225 |
+
]
|
pydantic_models.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List, Optional
|
3 |
+
|
4 |
+
# ConGPT Models
|
5 |
+
class VectorStoreDocumentItem(BaseModel):
|
6 |
+
page_content: str
|
7 |
+
filename: str
|
8 |
+
heading: str
|
9 |
+
relevance_score: float
|
10 |
+
|
11 |
+
class ChatHistoryItem(BaseModel):
|
12 |
+
user_message: str
|
13 |
+
assistant_message: str
|
14 |
+
|
15 |
+
class RequestModel(BaseModel):
|
16 |
+
user_question: str
|
17 |
+
chat_history: Optional[List[ChatHistoryItem]] = []
|
18 |
+
use_tool: Optional[bool] = False
|
19 |
+
|
20 |
+
class ResponseModel(BaseModel):
|
21 |
+
answer: Optional[str] = ""
|
22 |
+
sources_documents: Optional[List[VectorStoreDocumentItem]] = []
|
23 |
+
chat_history: List[ChatHistoryItem]
|
24 |
+
search_phrase: Optional[str] = ""
|
ragchatbot.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llm_constants import LLM_MODEL_NAME, MAX_TOKENS, RERANKER_MODEL_NAME, EMBEDDINGS_MODEL_NAME, EMBEDDINGS_TOKENS_COST, INPUT_TOKENS_COST, OUTPUT_TOKENS_COST, COHERE_RERANKER_COST
|
2 |
+
from prompts import CHAT_PROMPT, TOOLS
|
3 |
+
import os
|
4 |
+
from langchain_openai import OpenAIEmbeddings
|
5 |
+
from langchain_core.documents import Document
|
6 |
+
from langchain_community.retrievers import BM25Retriever
|
7 |
+
from typing import List, Dict, Sequence
|
8 |
+
from pydantic_models import RequestModel, ResponseModel, ChatHistoryItem, VectorStoreDocumentItem
|
9 |
+
import tiktoken
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
load_dotenv()
|
12 |
+
from langchain_community.vectorstores import FAISS
|
13 |
+
import anthropic
|
14 |
+
import cohere
|
15 |
+
|
16 |
+
|
17 |
+
class RAGChatBot:
|
18 |
+
__cohere_api_key = os.getenv("COHERE_API_KEY")
|
19 |
+
__anthroic_api_key = os.getenv("ANTHROPIC_API_KEY")
|
20 |
+
__openai_api_key = os.getenv("OPENAI_API_KEY")
|
21 |
+
__embedding_function = OpenAIEmbeddings(model=EMBEDDINGS_MODEL_NAME)
|
22 |
+
__base_retriever = None
|
23 |
+
__bm25_retriever = None
|
24 |
+
anthropic_client = None
|
25 |
+
cohere_client = None
|
26 |
+
top_n: int = 3
|
27 |
+
chat_history_length: int = 10
|
28 |
+
|
29 |
+
|
30 |
+
def __init__(self, vectorstore_path:str, top_n:int = 3):
|
31 |
+
if self.__cohere_api_key is None:
|
32 |
+
raise ValueError("COHERE_API_KEY must be set in the environment")
|
33 |
+
if self.__anthroic_api_key is None:
|
34 |
+
raise ValueError("ANTHROPIC_API_KEY must be set in the environment")
|
35 |
+
if self.__openai_api_key is None:
|
36 |
+
raise ValueError("OPENAI_API_KEY must be set in the environment")
|
37 |
+
if not isinstance(top_n, int):
|
38 |
+
raise ValueError("top_n must be an integer")
|
39 |
+
self.top_n = top_n
|
40 |
+
self.set_base_retriever(vectorstore_path)
|
41 |
+
self.set_anthropic_client()
|
42 |
+
self.set_cohere_client()
|
43 |
+
|
44 |
+
def set_base_retriever(self, vectorstore_path:str):
|
45 |
+
db = FAISS.load_local(vectorstore_path, self.__embedding_function, allow_dangerous_deserialization=True)
|
46 |
+
retriever = db.as_retriever(search_kwargs={"k": 25})
|
47 |
+
self.__base_retriever = retriever
|
48 |
+
self.__bm25_retriever = BM25Retriever.from_documents(list(db.docstore.__dict__.get('_dict').values()), k=25)
|
49 |
+
|
50 |
+
def set_anthropic_client(self):
|
51 |
+
self.anthropic_client = anthropic.Anthropic(api_key=self.__anthroic_api_key)
|
52 |
+
|
53 |
+
def set_cohere_client(self):
|
54 |
+
self.cohere_client = cohere.Client(self.__cohere_api_key)
|
55 |
+
|
56 |
+
def make_llm_api_call(self, messages:list):
|
57 |
+
return self.anthropic_client.messages.create(
|
58 |
+
model=LLM_MODEL_NAME,
|
59 |
+
max_tokens=MAX_TOKENS,
|
60 |
+
temperature=0,
|
61 |
+
messages=messages,
|
62 |
+
tools=TOOLS
|
63 |
+
)
|
64 |
+
|
65 |
+
|
66 |
+
def make_rerank_api_call(self, search_phrase:str, documents: Sequence[str]):
|
67 |
+
return self.cohere_client.rerank(query=search_phrase, documents=documents, model=RERANKER_MODEL_NAME, top_n=self.top_n)
|
68 |
+
|
69 |
+
|
70 |
+
def retrieve_documents(self, search_phrase:str):
|
71 |
+
similarity_documents = self.__base_retriever.invoke(search_phrase)
|
72 |
+
bm25_documents = self.__bm25_retriever.invoke(search_phrase)
|
73 |
+
unique_docs = []
|
74 |
+
for doc in bm25_documents:
|
75 |
+
if doc not in unique_docs:
|
76 |
+
unique_docs.append(doc)
|
77 |
+
for doc in similarity_documents:
|
78 |
+
if doc not in unique_docs:
|
79 |
+
unique_docs.append(doc)
|
80 |
+
return unique_docs
|
81 |
+
|
82 |
+
|
83 |
+
def retrieve_and_rerank(self, search_phrase:str):
|
84 |
+
documents = self.retrieve_documents(search_phrase)
|
85 |
+
if len(documents) == 0: # to avoid empty api call
|
86 |
+
return []
|
87 |
+
docs = [doc.page_content for doc in documents if isinstance(doc, Document) ]
|
88 |
+
api_result = self.make_rerank_api_call(search_phrase, docs)
|
89 |
+
reranked_docs = []
|
90 |
+
max_score = max([res.relevance_score for res in api_result.results])
|
91 |
+
threshold_score = max_score * 0.8
|
92 |
+
for res in api_result.results:
|
93 |
+
# if res.relevance_score < threshold_score:
|
94 |
+
# continue
|
95 |
+
doc = documents[res.index]
|
96 |
+
documentItem = VectorStoreDocumentItem(page_content=doc.page_content, filename=doc.metadata['filename'], heading=doc.metadata['heading'], relevance_score=res.relevance_score)
|
97 |
+
reranked_docs.append(documentItem)
|
98 |
+
|
99 |
+
return reranked_docs
|
100 |
+
|
101 |
+
|
102 |
+
def get_context_and_docs(self, search_phrase:str):
|
103 |
+
docs = self.retrieve_and_rerank(search_phrase)
|
104 |
+
context = "\n\n\n".join([f"Filename:{doc.heading}\n\n{doc.page_content}" for doc in docs])
|
105 |
+
return context, docs
|
106 |
+
|
107 |
+
|
108 |
+
def get_tool_use_assistant_message(self, tool_use_block):
|
109 |
+
return {'role': 'assistant',
|
110 |
+
'content':tool_use_block
|
111 |
+
}
|
112 |
+
|
113 |
+
|
114 |
+
def get_tool_use_user_message(self, tool_use_id, context):
|
115 |
+
return {'role': 'user',
|
116 |
+
'content': [{'type': 'tool_result',
|
117 |
+
'tool_use_id': tool_use_id,
|
118 |
+
'content': context}]}
|
119 |
+
|
120 |
+
|
121 |
+
def process_tool_call(self, tool_name, tool_input):
|
122 |
+
if tool_name == "Documents_Retriever":
|
123 |
+
context, sources_list = self.get_context_and_docs(tool_input["search_phrase"])
|
124 |
+
search_phrase = tool_input["search_phrase"]
|
125 |
+
return sources_list, search_phrase, context
|
126 |
+
|
127 |
+
|
128 |
+
def calculate_cost(self, input_tokens, output_tokens, search_phrase):
|
129 |
+
MILLION = 1000000
|
130 |
+
if search_phrase:
|
131 |
+
enc = tiktoken.get_encoding("cl100k_base")
|
132 |
+
query_encode = enc.encode(search_phrase)
|
133 |
+
embeddings_cost = len(query_encode) * (EMBEDDINGS_TOKENS_COST/MILLION)
|
134 |
+
total_cost = embeddings_cost + COHERE_RERANKER_COST + (input_tokens*(INPUT_TOKENS_COST/MILLION)) + (output_tokens*(OUTPUT_TOKENS_COST/MILLION))
|
135 |
+
else:
|
136 |
+
total_cost = (input_tokens*(INPUT_TOKENS_COST/MILLION)) + (output_tokens*(OUTPUT_TOKENS_COST/MILLION))
|
137 |
+
return total_cost
|
138 |
+
|
139 |
+
|
140 |
+
|
141 |
+
def chat_with_claude(self, user_message_history:list):
|
142 |
+
input_tokens = 0
|
143 |
+
output_tokens = 0
|
144 |
+
message = self.make_llm_api_call(user_message_history)
|
145 |
+
|
146 |
+
input_tokens += message.usage.input_tokens
|
147 |
+
output_tokens += message.usage.output_tokens
|
148 |
+
|
149 |
+
documents_list = []
|
150 |
+
search_phrase = ""
|
151 |
+
while message.stop_reason == "tool_use":
|
152 |
+
tool_use = next(block for block in message.content if block.type == "tool_use")
|
153 |
+
tool_name = tool_use.name
|
154 |
+
tool_input = tool_use.input
|
155 |
+
tool_use_id = tool_use.id
|
156 |
+
|
157 |
+
documents_list, search_phrase, tool_result = self.process_tool_call(tool_name, tool_input)
|
158 |
+
|
159 |
+
user_message_history.append( self.get_tool_use_assistant_message(message.content))
|
160 |
+
user_message_history.append( self.get_tool_use_user_message(tool_use_id, tool_result))
|
161 |
+
|
162 |
+
message = self.make_llm_api_call(user_message_history)
|
163 |
+
|
164 |
+
input_tokens += message.usage.input_tokens
|
165 |
+
output_tokens += message.usage.output_tokens
|
166 |
+
|
167 |
+
answer = next(
|
168 |
+
(block.text for block in message.content if hasattr(block,"text")),
|
169 |
+
None,
|
170 |
+
)
|
171 |
+
|
172 |
+
if "<answer>" in answer:
|
173 |
+
answer = answer.split("<answer>")[1].split("</answer>")[0].strip()
|
174 |
+
|
175 |
+
total_cost = self.calculate_cost(input_tokens, output_tokens, search_phrase)
|
176 |
+
|
177 |
+
return (documents_list, search_phrase, answer, total_cost)
|
178 |
+
|
179 |
+
|
180 |
+
def get_chat_history_text(self, chat_history: List[ChatHistoryItem]):
|
181 |
+
chat_history_text = ""
|
182 |
+
for chat_message in chat_history:
|
183 |
+
chat_history_text += f"User: {chat_message.user_message}\nAssistant: {chat_message.assistant_message}\n"
|
184 |
+
return chat_history_text.strip()
|
185 |
+
|
186 |
+
def get_response(self, input:RequestModel) -> ResponseModel:
|
187 |
+
chat_history = self.get_chat_history_text(input.chat_history)
|
188 |
+
user_question = input.user_question
|
189 |
+
user_prompt = CHAT_PROMPT.format(CHAT_HISTORY=chat_history, USER_QUESTION=user_question)
|
190 |
+
if input.use_tool:
|
191 |
+
user_prompt = f"{user_prompt}\nUse Documents_Retriever tool in your response."
|
192 |
+
sources_list, search_phrase, answer, _ = self.chat_with_claude([{"role":"user","content":[{"type":"text","text":user_prompt}]}])
|
193 |
+
|
194 |
+
updated_chat_history = input.chat_history.copy()
|
195 |
+
updated_chat_history.append(ChatHistoryItem(user_message=user_question, assistant_message=answer))
|
196 |
+
|
197 |
+
return ResponseModel(answer = answer, sources_documents = sources_list, chat_history=updated_chat_history, search_phrase=search_phrase)
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
langchain_community
|
3 |
+
langchain_openai
|
4 |
+
rank_bm25
|
5 |
+
faiss-cpu
|
6 |
+
anthropic
|
7 |
+
langchain_core
|
8 |
+
cohere
|