SearchPhi / llama_cpp_inf.py
as-cle-bert's picture
Update llama_cpp_inf.py
2e2c144 verified
raw
history blame
1.16 kB
## Imports
import re
from huggingface_hub import hf_hub_download
from gradio_client import Client
api_client = Client("eswardivi/Phi-3-mini-128k-instruct")
def run_inference_lcpp(jsonstr, user_search):
prompt = f"""Instructions for the assistant: Starting from the URLs and the keywords deriving from Google search results and provided to you in JSON format, generate a meaningful summary of the search results that satisfies the user's query.
URLs and keywords in JSON format: {jsonstr}.
User's query to satisfy: {user_search}"""
response = api_client.predict(
prompt, # str in 'Message' Textbox component
0.2, # float (numeric value between 0 and 1) in 'Temperature' Slider component
True, # bool in 'Sampling' Checkbox component
512, # float (numeric value between 128 and 4096) in 'Max new tokens' Slider component
api_name="/chat"
)
jsondict = eval(jsonstr)
addon = "Reference websites:\n- "+ '\n- '.join(list(jsondict.keys()))
input_string = response + "\n\n" + addon
frag_res = re.findall(r'\w+|\s+|[^\w\s]', input_string)
for word in frag_res:
yield word