Spaces:
Sleeping
Sleeping
import requests | |
import os | |
import streamlit as st | |
from parler_tts import ParlerTTSForConditionalGeneration | |
from transformers import AutoTokenizer | |
import soundfile as sf | |
# load_dotenv(find_dotenv()) | |
# HUGGINGFACEHUB_API_TOKEN = os.getenv("token") | |
headers = {"Authorization": f"Bearer {API_TOKEN}"} | |
def img2text(path): | |
API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large" | |
def query(filename): | |
with open(filename, "rb") as f: | |
data = f.read() | |
response = requests.post(API_URL, headers=headers, data=data) | |
return response.json() | |
output = query(path)[0]['generated_text'] | |
print(output) | |
return output | |
def generate_story(scene): | |
template = f''' | |
You are a poet; | |
You can generate a poem from a simple narrative, understand the theme, and use proper rhyming words. | |
The poem should not be shorter than 16 lines and not be longer than 20 lines. | |
Scenario: {scene} | |
Write a poem based on the provided scenario. | |
''' | |
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
story = query({ | |
"inputs": template, | |
}) | |
story = str(story[0]['generated_text']).split("\n") | |
story = story[12:] | |
s = "" | |
for i in story: | |
s += (i+"\n") | |
story = s | |
del(s) | |
print(story) | |
return story | |
def gen_audio(message): | |
device = "cpu" | |
model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler_tts_mini_v0.1").to(device) | |
tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1") | |
prompt = message | |
description = "A female speaker with a slightly low-pitched, quite expressive voice delivers her words at a normal pace in a poetic manner with proper pauses while speaking inside a confined space with very clear audio." | |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device) | |
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) | |
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids) | |
audio_arr = generation.cpu().numpy().squeeze() | |
sf.write("audio.wav", audio_arr, model.config.sampling_rate) | |
def main(): | |
st.set_page_config(page_title="img 2 poem", page_icon="🤖") | |
st.header("Trun image into poem") | |
uploaded_file = st.file_uploader("choose an image.....", type=["png","jpg","jpeg","svg"]) | |
if uploaded_file is not None: | |
print(uploaded_file) | |
bytes_data = uploaded_file.getvalue() | |
print(bytes_data) | |
with open(uploaded_file.name, "wb") as file: | |
file.write(bytes_data) | |
st.image(uploaded_file, caption="Uploaded Image") | |
scenario = img2text(uploaded_file.name) | |
story = generate_story(scenario) | |
gen_audio(story) | |
with st.expander("Scenario"): | |
st.write(scenario) | |
with st.expander("Poem"): | |
st.write(story) | |
st.audio("audio.wav") | |
if __name__ == "__main__": | |
main() | |