Spaces:
Runtime error
Runtime error
from llama_index import LLMPredictor, PromptHelper, StorageContext, ServiceContext, load_index_from_storage, SimpleDirectoryReader, GPTVectorStoreIndex | |
from langchain.chat_models import ChatOpenAI | |
import gradio as gr | |
import sys | |
import os | |
import openai | |
from ratelimit import limits, sleep_and_retry | |
from langchain import HuggingFaceHub | |
# fixing bugs | |
# 1. open ai key: https://stackoverflow.com/questions/76425556/tenacity-retryerror-retryerrorfuture-at-0x7f89bc35eb90-state-finished-raised | |
# 2. rate limit error in lang_chain default version - install langchain==0.0.188. https://github.com/jerryjliu/llama_index/issues/924 | |
# 3. added true Config variable in langchain: https://github.com/pydantic/pydantic/issues/3320 | |
# 4. deploy on huggingfaces https://huggingface.co/welcome | |
# create huggingfaces token https://huggingface.co/settings/tokens | |
# login: huggingface-cli login | |
# add requirements.txt file https://huggingface.co/docs/hub/spaces-dependencies | |
os.environ["OPENAI_API_KEY"] = os.environ.get("openai_key") | |
openai.api_key = os.environ["OPENAI_API_KEY"] | |
# Define the rate limit for API calls (requests per second) | |
RATE_LIMIT = 3 | |
# Implement the rate limiting decorator | |
def create_service_context(): | |
# prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) | |
max_input_size = 4096 | |
num_outputs = 512 | |
max_chunk_overlap = 20 | |
chunk_size_limit = 600 | |
prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio= 0.1, chunk_size_limit=chunk_size_limit) | |
# llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name="gpt-4", max_tokens=num_outputs)) | |
#LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex | |
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=num_outputs)) | |
#constructs service_context | |
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) | |
return service_context | |
# Implement the rate limiting decorator | |
def data_ingestion_indexing(directory_path): | |
#loads data from the specified directory path | |
documents = SimpleDirectoryReader(directory_path).load_data() | |
#when first building the index | |
index = GPTVectorStoreIndex.from_documents( | |
documents, service_context=create_service_context() | |
) | |
#persist index to disk, default "storage" folder | |
index.storage_context.persist() | |
return index | |
def data_querying(input_text): | |
#rebuild storage context | |
storage_context = StorageContext.from_defaults(persist_dir="./storage") | |
#loads index from storage | |
index = load_index_from_storage(storage_context, service_context=create_service_context()) | |
#queries the index with the input text | |
response = index.as_query_engine().query(input_text) | |
return response.response | |
iface = gr.Interface(fn=data_querying, | |
inputs=gr.components.Textbox(lines=20, label="Enter your question"), | |
outputs=gr.components.Textbox(lines=25, label="Response", style="height: 400px; overflow-y: scroll;"), | |
title="Therapy Validation GPT 0.1 pre alpha") | |
#passes in data directory | |
index = data_ingestion_indexing("book-validation") | |
iface.launch(inline=True) | |