Slyfox12 commited on
Commit
2f12575
·
verified ·
1 Parent(s): f935a2e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv, find_dotenv
2
+ import requests
3
+ import os
4
+ import streamlit as st
5
+ import torch
6
+ from parler_tts import ParlerTTSForConditionalGeneration
7
+ from transformers import AutoTokenizer
8
+ import soundfile as sf
9
+
10
+ # print(torch.cuda.is_available())
11
+
12
+ load_dotenv(find_dotenv())
13
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("token")
14
+ headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
15
+
16
+ def img2text(path):
17
+
18
+ API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
19
+
20
+ def query(filename):
21
+ with open(filename, "rb") as f:
22
+ data = f.read()
23
+ response = requests.post(API_URL, headers=headers, data=data)
24
+ return response.json()
25
+
26
+ output = query(path)[0]['generated_text']
27
+
28
+ print(output)
29
+ return output
30
+
31
+
32
+
33
+ def generate_story(scene):
34
+ template = f'''
35
+ You are a poet;
36
+ You can generate a poem from a simple narrative, understand the theme, and use proper rhyming words.
37
+ The poem should not be shorter than 16 lines and not be longer than 20 lines.
38
+
39
+ Scenario: {scene}
40
+
41
+ Write a poem based on the provided scenario.
42
+ '''
43
+
44
+ API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
45
+
46
+ def query(payload):
47
+ response = requests.post(API_URL, headers=headers, json=payload)
48
+ return response.json()
49
+
50
+ story = query({
51
+ "inputs": template,
52
+ })
53
+
54
+ story = str(story[0]['generated_text']).split("\n")
55
+
56
+ story = story[12:]
57
+ s = ""
58
+ for i in story:
59
+ s += (i+"\n")
60
+
61
+ story = s
62
+ del(s)
63
+
64
+ print(story)
65
+ return story
66
+
67
+ def gen_audio(message):
68
+
69
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
70
+
71
+ model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler_tts_mini_v0.1").to(device)
72
+ tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1")
73
+
74
+ prompt = message
75
+ description = "A female speaker with a slightly low-pitched, quite expressive voice delivers her words at a normal pace in a poetic manner with proper pauses while speaking inside a confined space with very clear audio."
76
+
77
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
78
+ prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
79
+
80
+ generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
81
+ audio_arr = generation.cpu().numpy().squeeze()
82
+ sf.write("audio.wav", audio_arr, model.config.sampling_rate)
83
+
84
+
85
+ def main():
86
+ st.set_page_config(page_title="img 2 poem", page_icon="🤖")
87
+ st.header("Trun image into poem")
88
+ uploaded_file = st.file_uploader("choose an image.....", type=["png","jpg","jpeg","svg"])
89
+
90
+ if uploaded_file is not None:
91
+ print(uploaded_file)
92
+ bytes_data = uploaded_file.getvalue()
93
+ print(bytes_data)
94
+ with open(uploaded_file.name, "wb") as file:
95
+ file.write(bytes_data)
96
+
97
+ st.image(uploaded_file, caption="Uploaded Image")
98
+
99
+ scenario = img2text(uploaded_file.name)
100
+ story = generate_story(scenario)
101
+ gen_audio(story)
102
+
103
+ with st.expander("Scenario"):
104
+ st.write(scenario)
105
+ with st.expander("Poem"):
106
+ st.write(story)
107
+
108
+ st.audio("audio.wav")
109
+
110
+ if __name__ == "__main__":
111
+ # main()
112
+ scene = img2text("couples.jpg")
113
+ story = generate_story(scene)
114
+ # gen_audio(story)
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+