Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import requests | |
from transformers import pipeline | |
import openai | |
from langchain import LLMChain, PromptTemplate | |
from langchain import HuggingFaceHub | |
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler | |
import torch | |
# Suppressing all warnings | |
import warnings | |
warnings.filterwarnings("ignore") | |
api_token = os.getenv('H_TOKEN') | |
# Image-to-text | |
def img2txt(url): | |
print("Initializing captioning model...") | |
captioning_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
print("Generating text from the image...") | |
text = captioning_model(url, max_new_tokens=20)[0]["generated_text"] | |
print(text) | |
return text | |
# Text-to-story | |
model = "tiiuae/falcon-7b-instruct" | |
llm = HuggingFaceHub( | |
huggingfacehub_api_token = api_token, | |
repo_id = model, | |
verbose = False, | |
model_kwargs = {"temperature":0.2, "max_new_tokens": 4000}) | |
def generate_story(scenario, llm): | |
template= """You are a story teller. | |
You get a scenario as an input text, and generates a short story out of it. | |
Context: {scenario} | |
Story: | |
""" | |
prompt = PromptTemplate(template=template, input_variables=["scenario"]) | |
#Let's create our LLM chain now | |
chain = LLMChain(prompt=prompt, llm=llm) | |
story = chain.predict(scenario=scenario) | |
start_index = story.find("Story:") + len("Story:") | |
# Extract the text after "Story:" | |
story = story[start_index:].strip() | |
return story | |
# Text-to-speech | |
def txt2speech(text): | |
print("Initializing text-to-speech conversion...") | |
API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits" | |
headers = {"Authorization": f"Bearer {api_token }"} | |
payloads = {'inputs': text} | |
response = requests.post(API_URL, headers=headers, json=payloads) | |
with open('audio_story.mp3', 'wb') as file: | |
file.write(response.content) | |
# text-to- image | |
def txt2img(text, style="realistic"): | |
model_id = "stabilityai/stable-diffusion-2" | |
# Use the Euler scheduler here instead | |
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler") | |
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16) | |
pipe = pipe.to("cuda") | |
image = pipe(prompt = text, guidance_scale = 7.5).images[0] | |
return image | |
st.sidebar.title("Choose the task") | |
# Streamlit web app main function | |
def main(): | |
with st.sidebar.expander("Audio Story"): | |
st.set_page_config(page_title="π¨ Image-to-Audio Story π§", page_icon="πΌοΈ") | |
st.title("Turn the Image into Audio Story") | |
# Allows users to upload an image file | |
uploaded_file = st.file_uploader("# π· Upload an image...", type=["jpg", "jpeg", "png"]) | |
# Parameters for LLM model (in the sidebar) | |
#st.sidebar.markdown("# LLM Inference Configuration Parameters") | |
#top_k = st.sidebar.number_input("Top-K", min_value=1, max_value=100, value=5) | |
#top_p = st.sidebar.number_input("Top-P", min_value=0.0, max_value=1.0, value=0.8) | |
#temperature = st.sidebar.number_input("Temperature", min_value=0.1, max_value=2.0, value=1.5) | |
if uploaded_file is not None: | |
# Reads and saves uploaded image file | |
bytes_data = uploaded_file.read() | |
with open("uploaded_image.jpg", "wb") as file: | |
file.write(bytes_data) | |
st.image(uploaded_file, caption='πΌοΈ Uploaded Image', use_column_width=True) | |
# Initiates AI processing and story generation | |
with st.spinner("## π€ AI is at Work! "): | |
scenario = img2txt("uploaded_image.jpg") # Extracts text from the image | |
story = generate_story(scenario, llm) # Generates a story based on the image text, LLM params | |
txt2speech(story) # Converts the story to audio | |
st.markdown("---") | |
st.markdown("## π Image Caption") | |
st.write(scenario) | |
st.markdown("---") | |
st.markdown("## π Story") | |
st.write(story) | |
st.markdown("---") | |
st.markdown("## π§ Audio Story") | |
st.audio("audio_story.mp3") | |
with st.sidebar.expander("Image Generator"): | |
st.title("Stable Diffusion Image Generation") | |
st.write("This app lets you generate images using Stable Diffusion with the Euler scheduler.") | |
prompt = st.text_input("Enter your prompt:") | |
image_style = st.selectbox("Style Selection", ["realistic", "cartoon", "watercolor"]) | |
if st.button("Generate Image"): | |
if prompt: | |
with st.spinner("Generating image..."): | |
image = txt2img(prompt= prompt, style = image_style) | |
st.image(image) | |
else: | |
st.error("Please enter a prompt.") | |
st.title("Welcome to your Creative Canvas!") | |
st.write("Use the tools in the sidebar to create audio stories and unique images.") | |
if __name__ == '__main__': | |
main() |