Spaces:

Chintan-Donda
/

KKMS-KSSW-HF

Runtime error

Chintan Donda

Application file, KKMS repo and utils files

a447435 almost 2 years ago

3.01 kB

	import os
	import pandas as pd
	from pathlib import Path
	from llama_index import GPTSimpleVectorIndex, download_loader
	from langchain.agents import initialize_agent, Tool
	from langchain.llms import OpenAI
	from langchain.chains.conversation.memory import ConversationBufferMemory



	class DATA_LOADER:
	def __init__(self):
	print()


	def clean_df(self, df, dropna=True, fillna=False):
	if fillna:
	df.fillna('', inplace=True)
	if dropna:
	df.dropna(inplace=True)
	# df = df[~df.isna()]
	df = df.drop_duplicates().reset_index(drop=True)
	return df


	def load_external_links_used_by_FTAs(self,
	sheet_filepath='./data/urls_used_by_ftas/external_links_used_by_FTAs.xlsx'
	):
	xls = pd.ExcelFile(sheet_filepath)
	df = pd.DataFrame(columns=['S.No.', 'Link used for', 'Link type', 'Link'])
	for sheet_name in xls.sheet_names:
	sheet = pd.read_excel(xls, sheet_name)
	if sheet.shape[0] > 0:
	df = pd.concat([df, sheet])
	else:
	print(f'{sheet_name} has no content.')

	df = df[['Link used for', 'Link type', 'Link']]
	# Clean df
	df = clean_df(df)
	print(f'Total links available across all cities: {df.shape[0]}')
	return df


	def load_document(self,
	doc_type='pdf',
	doc_filepath='',
	urls=[]
	):
	documents = []

	if doc_type == 'pdf':
	PDFReader = download_loader("PDFReader")
	loader = PDFReader()
	if os.path.exists(doc_filepath):
	documents = loader.load_data(file=Path(doc_filepath))

	elif doc_type == 'url':
	BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
	loader = BeautifulSoupWebReader()
	if len(urls) > 0:
	# Load data from URLs
	documents = loader.load_data(urls=urls)

	elif doc_type == 'url-kb':
	KnowledgeBaseWebReader = download_loader("KnowledgeBaseWebReader")
	loader = KnowledgeBaseWebReader()
	for url in urls:
	doc = loader.load_data(
	root_url=url,
	link_selectors=['.article-list a', '.article-list a'],
	article_path='/articles',
	body_selector='.article-body',
	title_selector='.article-title',
	subtitle_selector='.article-subtitle',
	)
	documents.extend(doc)

	elif doc_type == 'url-chatgpt':
	BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
	loader = BeautifulSoupWebReader()
	if len(urls) > 0:
	# Load data from URLs
	documents = loader.load_data(urls=urls)
	# Build the Vector database
	index = GPTSimpleVectorIndex(documents)
	tools = [
	Tool(
	name="Website Index",
	func=lambda q: index.query(q),
	description=f"Useful when you want answer questions about the text retrieved from websites.",
	),
	]

	# Call ChatGPT API
	llm = OpenAI(temperature=0) # Keep temperature=0 to search from the given urls only
	memory = ConversationBufferMemory(memory_key="chat_history")
	agent_chain = initialize_agent(
	tools, llm, agent="zero-shot-react-description", memory=memory
	)

	output = agent_chain.run(input="What language is on this website?")

	return documents