LFbot / app.py
Ghani-25's picture
first commit
4bd09a6
raw
history blame
2.03 kB
import gradio as gr
import requests
import json
def greet(data):
headers = {
"Authorization": "Bearer 325618b3f00d46a3bdb01340",
"Content-Type": "application/json"
}
body = {
"text": data,
"message" : "",
"top_p": 0.7, #DEFAULT 0.8 Top-P is an alternative way of controlling the randomness and creativity of the generated text. We recommend that only one of Temperature or Top P are used,
#so when using one of them, make sure that the other is set to 1. A rough rule of thumb is that Top-P provides better control for applications in which GPT-J is expected to generate text with accuracy and correctness,
#while Temperature works best for those applications in which original, creative or even amusing responses are sought.
"top_k": 60, #DEFAULT 40 Top-K sampling means sorting by probability and zero-ing out the probabilities for anything below the k'th token. A lower value improves quality by removing the tail and making it less likely to go off topic.
"temperature": 0.5, #DEFAULT 0.0, Temperature controls the randomness of the generated text. A value of 0 makes the engine deterministic, which means that it will always generate the same output for a given input text. A value of 1 makes the engine take the most risks and use a lot of creativity.
#As a frame of reference, it is common for story completion or idea generation to see temperature values between 0.7 to 0.9.
"repetition_penalty": 1.0, #DEFAULT 1.0 Repetition penalty works by lowering the chances of a word being selected again the more times that word has already been used. In other words, it works to prevent repetitive word usage.
"length": 300
}
res = requests.post(
"https://shared-api.forefront.link/organization/GuejzaCOIXGT/codegen-16b-nl/completions/Gu6OxnDd8Tur",
json=body,
headers=headers
)
data = res.json()
return data['result']
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()