madhurjindal commited on
Commit
d8b4b59
·
verified ·
1 Parent(s): 837ceb8

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +190 -0
utils.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.agents import tool
2
+ from typing import Literal
3
+ import json
4
+ from PIL import Image
5
+
6
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
7
+ from langgraph.graph import END, MessagesState
8
+
9
+ from render_mermaid import render_mermaid
10
+ from langchain_community.document_loaders import GithubFileLoader
11
+
12
+ # from langchain_ollama import ChatOllama
13
+ from prompts import *
14
+ from constants import file_extensions
15
+ from __init__ import llm, llm_structured
16
+
17
+
18
+ class GraphState(MessagesState):
19
+ working_knowledge: str
20
+ all_files: list[str]
21
+ remaining_files: list[str]
22
+ explored_files: list[str]
23
+ explored_summaries: str
24
+ document_summaries_store: dict
25
+ documents: list
26
+ final_graph: Image
27
+
28
+
29
+ def load_github_codebase(repo: str, branch: str):
30
+ loader = GithubFileLoader(
31
+ repo=repo, # the repo name
32
+ branch=branch, # the branch name
33
+ github_api_url="https://api.github.com",
34
+ file_filter=lambda file_path: file_path.endswith(tuple(file_extensions)),
35
+ # file_filter=lambda filepath: True,
36
+ encoding="utf-8",
37
+ )
38
+ documents = loader.load()
39
+ return documents
40
+
41
+
42
+ def get_file_content_summary(file_path: str, state: GraphState):
43
+ """Returns the functional summary of a file. Please note that the file_path should not be null.
44
+
45
+ Args:
46
+ file_path: The path of the file for which the summary is required."""
47
+
48
+ summary = check_summary_in_store(file_path, state)
49
+ if summary:
50
+ return summary
51
+ for document in state["documents"]:
52
+ if document.metadata["path"] == file_path:
53
+ doc_content = document.page_content
54
+ break
55
+ # print(content)
56
+ summary = llm.invoke(
57
+ [SystemMessage(content=summarizer_prompt), HumanMessage(content=doc_content)]
58
+ ).content
59
+ summary = json.dumps({"FilePath": file_path, "Summary": summary})
60
+ save_summary_in_store(file_path, summary, state)
61
+ return summary
62
+
63
+
64
+ def explore_file(state: GraphState):
65
+ file_path = state["remaining_files"].pop()
66
+
67
+ summary_dict = json.loads(get_file_content_summary(file_path, state))
68
+ if summary_dict["FilePath"] in state["explored_files"]:
69
+ return state
70
+ knowledge_str = f"""* File Path: {summary_dict['FilePath']}\n\tSummary: {summary_dict['Summary']}\n\n"""
71
+ state["explored_summaries"] += knowledge_str
72
+ state["explored_files"].append(file_path)
73
+ return state
74
+
75
+
76
+ @tool
77
+ def generate_final_mermaid_code():
78
+ """Generate the final mermaid code for the codebase once all the files are explored and the working knowledge is complete."""
79
+ return "generate_mermaid_code"
80
+
81
+
82
+ def check_summary_in_store(file_path: str, state: GraphState):
83
+ if file_path in state["document_summaries_store"]:
84
+ return state["document_summaries_store"][file_path]
85
+ return None
86
+
87
+
88
+ def save_summary_in_store(file_path: str, summary: str, state: GraphState):
89
+ state["document_summaries_store"][file_path] = summary
90
+
91
+
92
+ def get_all_filesnames_in_codebase(state: GraphState):
93
+ """Get a list of all files (as filepaths) in the codebase."""
94
+ filenames = []
95
+ for document in state["documents"]:
96
+ filenames.append(document.metadata["path"])
97
+
98
+ return {
99
+ "all_files": filenames,
100
+ "explored_files": [],
101
+ "remaining_files": filenames,
102
+ "explored_summaries": "",
103
+ "document_summaries_store": {},
104
+ }
105
+
106
+
107
+ def parse_plan(state: GraphState):
108
+ """Parse the plan and return the next action."""
109
+ if "File Exploration Plan" in state["working_knowledge"]:
110
+ plan_working = state["working_knowledge"].split("File Exploration Plan")[1]
111
+ else:
112
+ plan_working = state["working_knowledge"]
113
+ response = llm_structured.invoke(plan_parser.format(plan_list=plan_working))[
114
+ "plan_list"
115
+ ]
116
+ if len(response) > 25:
117
+ response = response[:25]
118
+ # response = eval(llm.invoke(plan_parser.format(plan_list=plan_working)).content)
119
+ return {"remaining_files": response}
120
+
121
+
122
+ def router(state: GraphState):
123
+ """Route the conversation to the appropriate node based on the current state of the conversation."""
124
+ if state["remaining_files"] != []:
125
+ return "explore_file"
126
+ else:
127
+ return "generate_mermaid_code"
128
+
129
+
130
+ def get_plan_for_codebase(state: GraphState):
131
+ new_state = get_all_filesnames_in_codebase(state)
132
+ planner_content = "# File Structure\n" + str(new_state["all_files"])
133
+ plan = llm.invoke(
134
+ [SystemMessage(content=planner_prompt), HumanMessage(content=planner_content)]
135
+ )
136
+
137
+ knowledge_str = f"""# Plan\n{plan.content}"""
138
+ new_state["working_knowledge"] = knowledge_str
139
+ # print(new_state)
140
+ return new_state
141
+
142
+
143
+ def final_mermaid_code_generation(state: GraphState):
144
+ final_graph_content = (
145
+ "# Disjoint Codebase Understanding\n"
146
+ + state["working_knowledge"]
147
+ + "\n\n# Completed Explorations\n"
148
+ + state["explored_summaries"]
149
+ )
150
+ response = llm.invoke(
151
+ [
152
+ SystemMessage(content=final_graph_prompt),
153
+ HumanMessage(content=final_graph_content),
154
+ ]
155
+ )
156
+ return {"messages": [response]}
157
+
158
+
159
+ import time
160
+
161
+
162
+ def extract_mermaid_and_generate_graph(state: GraphState):
163
+ mermaid_code = state["messages"][-1].content
164
+ if "mermaid" in mermaid_code:
165
+ mermaid_code = mermaid_code.split("mermaid")[-1]
166
+ response = llm.invoke(
167
+ [SystemMessage(content=mermaid_extracter), HumanMessage(content=mermaid_code)]
168
+ ).content
169
+ response = response.split("```mermaid")[-1].split("```")[0]
170
+ # Save the mermaid code in a file with the current timestamp
171
+ # print(response)
172
+ file_name = f"mermaid/{int(time.time())}.png"
173
+ render_mermaid(response, file_name)
174
+
175
+ # Read image to return as output
176
+ img = Image.open(file_name)
177
+ return {"messages": [AIMessage(response)], "final_graph": img}
178
+
179
+
180
+ def need_to_update_working_knowledge(state: GraphState):
181
+ messages = state["messages"]
182
+ last_message = messages[-1]
183
+ # prev_to_last_message = messages[-2]
184
+ # If the last call is a tool message, we need to update the working knowledge
185
+ if last_message.content == "generate_mermaid_code":
186
+ return "generate_mermaid_code"
187
+ if isinstance(last_message, ToolMessage):
188
+ return "tools_knowledge_update"
189
+ # Otherwise, we continue with the agent
190
+ return "agent"