Spaces:
Sleeping
Sleeping
Pushing First version before making full changes
Browse files- .dockerignore +34 -0
- .gcloudignore +19 -0
- .gitignore +91 -0
- Dockerfile +49 -0
- LICENSE +21 -0
- Procfile +1 -0
- README copy.md +100 -0
- app.py +44 -0
- compose.yaml +49 -0
- config.py +15 -0
- main.py +128 -0
- notebooks/audioverse.ipynb +228 -0
- requirements.txt +36 -0
- utils/__init__.py +1 -0
- utils/audio_utils.py +54 -0
- utils/caption_utils.py +73 -0
- utils/image_utils.py +21 -0
- utils/model_serving/__init__.py +0 -0
- utils/model_serving/blip_serve.py +30 -0
- utils/model_serving/model_loader.py +28 -0
- utils/topic_generation.py +51 -0
.dockerignore
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Include any files or directories that you don't want to be copied to your
|
2 |
+
# container here (e.g., local build artifacts, temporary files, etc.).
|
3 |
+
#
|
4 |
+
# For more help, visit the .dockerignore file reference guide at
|
5 |
+
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
6 |
+
|
7 |
+
**/.DS_Store
|
8 |
+
**/__pycache__
|
9 |
+
**/.venv
|
10 |
+
**/.classpath
|
11 |
+
**/.dockerignore
|
12 |
+
**/.env
|
13 |
+
**/.git
|
14 |
+
**/.gitignore
|
15 |
+
**/.project
|
16 |
+
**/.settings
|
17 |
+
**/.toolstarget
|
18 |
+
**/.vs
|
19 |
+
**/.vscode
|
20 |
+
**/*.*proj.user
|
21 |
+
**/*.dbmdl
|
22 |
+
**/*.jfm
|
23 |
+
**/bin
|
24 |
+
**/charts
|
25 |
+
**/docker-compose*
|
26 |
+
**/compose*
|
27 |
+
**/Dockerfile*
|
28 |
+
**/node_modules
|
29 |
+
**/npm-debug.log
|
30 |
+
**/obj
|
31 |
+
**/secrets.dev.yaml
|
32 |
+
**/values.dev.yaml
|
33 |
+
LICENSE
|
34 |
+
README.md
|
.gcloudignore
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file specifies files that are *not* uploaded to Google Cloud
|
2 |
+
# using gcloud. It follows the same syntax as .gitignore, with the addition of
|
3 |
+
# "#!include" directives (which insert the entries of the given .gitignore-style
|
4 |
+
# file at that point).
|
5 |
+
#
|
6 |
+
# For more information, run:
|
7 |
+
# $ gcloud topic gcloudignore
|
8 |
+
#
|
9 |
+
.gcloudignore
|
10 |
+
# If you would like to upload your .git directory, .gitignore file or files
|
11 |
+
# from your .gitignore file, remove the corresponding line
|
12 |
+
# below:
|
13 |
+
.git
|
14 |
+
.gitignore
|
15 |
+
|
16 |
+
# Python pycache:
|
17 |
+
__pycache__/
|
18 |
+
# Ignored by the build system
|
19 |
+
/setup.cfg
|
.gitignore
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
|
5 |
+
# C extensions
|
6 |
+
*.so
|
7 |
+
|
8 |
+
# Distribution / packaging
|
9 |
+
.Python
|
10 |
+
env/
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
*.egg-info/
|
23 |
+
.installed.cfg
|
24 |
+
*.egg
|
25 |
+
|
26 |
+
# PyInstaller
|
27 |
+
# Usually these files are written by a python script from a template
|
28 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
29 |
+
*.manifest
|
30 |
+
*.spec
|
31 |
+
|
32 |
+
# Installer logs
|
33 |
+
pip-log.txt
|
34 |
+
pip-delete-this-directory.txt
|
35 |
+
|
36 |
+
# Unit test / coverage reports
|
37 |
+
htmlcov/
|
38 |
+
.tox/
|
39 |
+
.coverage
|
40 |
+
.coverage.*
|
41 |
+
.cache
|
42 |
+
nosetests.xml
|
43 |
+
coverage.xml
|
44 |
+
*.cover
|
45 |
+
|
46 |
+
# Translations
|
47 |
+
*.mo
|
48 |
+
*.pot
|
49 |
+
|
50 |
+
# Django stuff:
|
51 |
+
*.log
|
52 |
+
|
53 |
+
# Sphinx documentation
|
54 |
+
docs/_build/
|
55 |
+
|
56 |
+
# PyBuilder
|
57 |
+
target/
|
58 |
+
|
59 |
+
# DotEnv configuration
|
60 |
+
.env
|
61 |
+
|
62 |
+
# Database
|
63 |
+
*.db
|
64 |
+
*.rdb
|
65 |
+
|
66 |
+
# Pycharm
|
67 |
+
.idea
|
68 |
+
|
69 |
+
# VS Code
|
70 |
+
.vscode/
|
71 |
+
rediones.code-workspace
|
72 |
+
|
73 |
+
# Spyder
|
74 |
+
.spyproject/
|
75 |
+
|
76 |
+
# Jupyter NB Checkpoints
|
77 |
+
.ipynb_checkpoints/
|
78 |
+
|
79 |
+
# exclude data from source control by default
|
80 |
+
/data/
|
81 |
+
|
82 |
+
# Mac OS-specific storage files
|
83 |
+
.DS_Store
|
84 |
+
|
85 |
+
# vim
|
86 |
+
*.swp
|
87 |
+
*.swo
|
88 |
+
|
89 |
+
# Mypy cache
|
90 |
+
.mypy_cache/
|
91 |
+
.venv
|
Dockerfile
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# syntax=docker/dockerfile:1
|
2 |
+
|
3 |
+
# Comments are provided throughout this file to help you get started.
|
4 |
+
# If you need more help, visit the Dockerfile reference guide at
|
5 |
+
# https://docs.docker.com/engine/reference/builder/
|
6 |
+
|
7 |
+
ARG PYTHON_VERSION=3.12
|
8 |
+
FROM python:${PYTHON_VERSION}-slim as base
|
9 |
+
|
10 |
+
# Prevents Python from writing pyc files.
|
11 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
12 |
+
|
13 |
+
# Keeps Python from buffering stdout and stderr to avoid situations where
|
14 |
+
# the application crashes without emitting any logs due to buffering.
|
15 |
+
ENV PYTHONUNBUFFERED=1
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
# Create a non-privileged user that the app will run under.
|
20 |
+
# See https://docs.docker.com/go/dockerfile-user-best-practices/
|
21 |
+
ARG UID=10001
|
22 |
+
RUN adduser \
|
23 |
+
--disabled-password \
|
24 |
+
--gecos "" \
|
25 |
+
--home "/nonexistent" \
|
26 |
+
--shell "/sbin/nologin" \
|
27 |
+
--no-create-home \
|
28 |
+
--uid "${UID}" \
|
29 |
+
appuser
|
30 |
+
|
31 |
+
# Download dependencies as a separate step to take advantage of Docker's caching.
|
32 |
+
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
|
33 |
+
# Leverage a bind mount to requirements.txt to avoid having to copy them into
|
34 |
+
# into this layer.
|
35 |
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
36 |
+
--mount=type=bind,source=requirements.txt,target=requirements.txt \
|
37 |
+
python -m pip install -r requirements.txt
|
38 |
+
|
39 |
+
# Switch to the non-privileged user to run the application.
|
40 |
+
USER appuser
|
41 |
+
|
42 |
+
# Copy the source code into the container.
|
43 |
+
COPY . .
|
44 |
+
|
45 |
+
# Expose the port that the application listens on.
|
46 |
+
EXPOSE 8000
|
47 |
+
|
48 |
+
# Run the application.
|
49 |
+
CMD uvicorn '.venv.lib.python3.9.site-packages.httpx._transports.asgi:application' --host=0.0.0.0 --port=8000
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Testimony Adekoya
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
Procfile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
web: uvicorn api:app --host 0.0.0.0 --port 10000
|
README copy.md
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Rediones-AI API Documentation
|
2 |
+
|
3 |
+
## Overview
|
4 |
+
Welcome to the Rediones-AI API, a marvel of machine learning that powers the mobile application of Rediones. This API is a collection of endpoints designed to enrich your applications with advanced AI capabilities, including topic generation from images and text, and converting text to realistic voices.
|
5 |
+
|
6 |
+
## Base URL
|
7 |
+
|
8 |
+
All API requests should be made to: [https://api.rediones.com](https://aerial-mission-407204.ue.r.appspot.com/)
|
9 |
+
|
10 |
+
## Endpoints
|
11 |
+
|
12 |
+
### Home
|
13 |
+
|
14 |
+
- **Endpoint**: `/`
|
15 |
+
- **Method**: `GET`
|
16 |
+
- **Description**: The welcoming endpoint of the API, guiding you to the treasure trove of AI capabilities.
|
17 |
+
- **Response**:
|
18 |
+
```json
|
19 |
+
{
|
20 |
+
"message": "Welcome To Rediones API"
|
21 |
+
}
|
22 |
+
```
|
23 |
+
|
24 |
+
### Health Check
|
25 |
+
|
26 |
+
- **Endpoint**: `/health`
|
27 |
+
- **Method**: `GET`
|
28 |
+
- **Description**: Checks the pulse of the API to ensure it's up and running.
|
29 |
+
- **Response**:
|
30 |
+
```json
|
31 |
+
{
|
32 |
+
"message": "OK"
|
33 |
+
}
|
34 |
+
```
|
35 |
+
|
36 |
+
### Topic Generation
|
37 |
+
|
38 |
+
- **Endpoint**: `/topicgen`
|
39 |
+
- **Method**: `POST`
|
40 |
+
- **Description**: Generates topics based on the provided image or text. For images, it captions the image and then generates topics. For text, it directly generates topics.
|
41 |
+
- **Request**:
|
42 |
+
- **Form Data**:
|
43 |
+
- `img`: UploadFile (optional) - The image file for captioning and topic generation.
|
44 |
+
- `text`: string (optional) - The text to generate topics from.
|
45 |
+
- `img_url`: string (optional) - The URL of an image to use for captioning and topic generation.
|
46 |
+
|
47 |
+
- **JSON For text**:
|
48 |
+
```json
|
49 |
+
{
|
50 |
+
"text": "The existential crisis of a teapot"
|
51 |
+
}
|
52 |
+
```
|
53 |
+
|
54 |
+
- **Response**:
|
55 |
+
```json
|
56 |
+
{
|
57 |
+
"topics": ["Philosophy of Inanimate Objects", "The Teapot's Lament"]
|
58 |
+
}
|
59 |
+
```
|
60 |
+
- **Errors**:
|
61 |
+
- 400: "Only one of image_url or image can be accepted."
|
62 |
+
- 400: "Provide at least text or an image."
|
63 |
+
|
64 |
+
### Audioverse
|
65 |
+
|
66 |
+
- **Endpoint**: `/audioverse`
|
67 |
+
- **Method**: `POST`
|
68 |
+
- **Description**: Converts provided text into realistic voices, with functionality for voice cloning.
|
69 |
+
- **Request**:
|
70 |
+
- **Body**:
|
71 |
+
```json
|
72 |
+
{
|
73 |
+
"text": "Hello World"
|
74 |
+
}
|
75 |
+
```
|
76 |
+
- **Response**:
|
77 |
+
```json
|
78 |
+
{
|
79 |
+
"audio_base64": "dGhpcyBpcyBub3QgcmVhbGx5IGJhc2U2NCwgYnV0IHlvdSBnZXQgdGhlIGlkZWE="
|
80 |
+
}
|
81 |
+
```
|
82 |
+
|
83 |
+
## Rate Limits
|
84 |
+
You're limited to 1000 requests per day. Exceed this limit, and you'll receive a 429 Too Many Requests error.
|
85 |
+
|
86 |
+
## Errors
|
87 |
+
|
88 |
+
Understand how our API communicates issues:
|
89 |
+
- `400 Bad Request`: Your request is missing something or formatted incorrectly.
|
90 |
+
- `401 Unauthorized`: You're not authenticated. Make sure your API key is correct.
|
91 |
+
- `429 Too Many Requests`: You've hit the rate limit. Slow down, turbo.
|
92 |
+
|
93 |
+
## Versioning
|
94 |
+
|
95 |
+
We're always improving. Keep an eye on our changelog for updates.
|
96 |
+
|
97 |
+
## Getting Help
|
98 |
+
|
99 |
+
Lost in the wilderness of our API? Have questions or need support? Contact us at [email protected]
|
100 |
+
|
app.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.image_utils import UrlTest
|
2 |
+
from utils.caption_utils import ImageCaptioning
|
3 |
+
from utils.topic_generation import TopicGenerator
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
topic_generator = TopicGenerator()
|
7 |
+
img_caption = ImageCaptioning()
|
8 |
+
path_out = UrlTest()
|
9 |
+
|
10 |
+
|
11 |
+
def return_caption(img):
|
12 |
+
capt = img_caption.get_caption(img)
|
13 |
+
st.image(image=img, caption=capt, width=250, height=250)
|
14 |
+
generated_topics = topic_generator.generate_topics(capt)
|
15 |
+
st.write(f"Topic: {generated_topics}")
|
16 |
+
|
17 |
+
|
18 |
+
def main():
|
19 |
+
st.title("Topic/Title Generator")
|
20 |
+
|
21 |
+
# User input
|
22 |
+
user_input = st.selectbox(label="Text Input or Image Input", options=["Text", "Image", "Image URL"])
|
23 |
+
if user_input == "Text":
|
24 |
+
text_input = st.text_input(label="Put in your Idea, Let's generate a matching Topic/Title 🤗🤗")
|
25 |
+
generated_topics = topic_generator.generate_topics(text_input)
|
26 |
+
st.write(f"Topic:{generated_topics}")
|
27 |
+
elif user_input == "Image":
|
28 |
+
img_input = st.file_uploader(label="Drop an Image you have been admiring, Let's see what we can do🤔🤔",
|
29 |
+
type=["jpg", "png", "jpeg"],
|
30 |
+
accept_multiple_files=True)
|
31 |
+
for in_img in img_input:
|
32 |
+
if in_img is not None:
|
33 |
+
img_loaded = path_out.load_image(in_img)
|
34 |
+
return_caption(img_loaded)
|
35 |
+
|
36 |
+
elif user_input == "Image URL":
|
37 |
+
url_input = st.text_input(label="Do you have a link to the Image you would like to drop, Go Ahead and We got "
|
38 |
+
"you covered😉😉")
|
39 |
+
url_img = path_out.check_url(url_input)
|
40 |
+
return_caption(url_img)
|
41 |
+
|
42 |
+
|
43 |
+
if __name__ == "__main__":
|
44 |
+
main()
|
compose.yaml
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Comments are provided throughout this file to help you get started.
|
2 |
+
# If you need more help, visit the Docker compose reference guide at
|
3 |
+
# https://docs.docker.com/compose/compose-file/
|
4 |
+
|
5 |
+
# Here the instructions define your application as a service called "server".
|
6 |
+
# This service is built from the Dockerfile in the current directory.
|
7 |
+
# You can add other services your application may depend on here, such as a
|
8 |
+
# database or a cache. For examples, see the Awesome Compose repository:
|
9 |
+
# https://github.com/docker/awesome-compose
|
10 |
+
services:
|
11 |
+
server:
|
12 |
+
build:
|
13 |
+
context: .
|
14 |
+
ports:
|
15 |
+
- 8000:8000
|
16 |
+
|
17 |
+
# The commented out section below is an example of how to define a PostgreSQL
|
18 |
+
# database that your application can use. `depends_on` tells Docker Compose to
|
19 |
+
# start the database before your application. The `db-data` volume persists the
|
20 |
+
# database data between container restarts. The `db-password` secret is used
|
21 |
+
# to set the database password. You must create `db/password.txt` and add
|
22 |
+
# a password of your choosing to it before running `docker compose up`.
|
23 |
+
# depends_on:
|
24 |
+
# db:
|
25 |
+
# condition: service_healthy
|
26 |
+
# db:
|
27 |
+
# image: postgres
|
28 |
+
# restart: always
|
29 |
+
# user: postgres
|
30 |
+
# secrets:
|
31 |
+
# - db-password
|
32 |
+
# volumes:
|
33 |
+
# - db-data:/var/lib/postgresql/data
|
34 |
+
# environment:
|
35 |
+
# - POSTGRES_DB=example
|
36 |
+
# - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
|
37 |
+
# expose:
|
38 |
+
# - 5432
|
39 |
+
# healthcheck:
|
40 |
+
# test: [ "CMD", "pg_isready" ]
|
41 |
+
# interval: 10s
|
42 |
+
# timeout: 5s
|
43 |
+
# retries: 5
|
44 |
+
# volumes:
|
45 |
+
# db-data:
|
46 |
+
# secrets:
|
47 |
+
# db-password:
|
48 |
+
# file: db/password.txt
|
49 |
+
|
config.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import AnyHttpUrl
|
2 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
|
8 |
+
class Settings(BaseSettings):
|
9 |
+
PROJECT_NAME: str = "Rediones API"
|
10 |
+
# BACKEND_CORS_ORIGINS: list[AnyHtt)pUrl] = ["*"]
|
11 |
+
|
12 |
+
model_config = SettingsConfigDict(gcase_sensitive=True)
|
13 |
+
|
14 |
+
|
15 |
+
settings = Settings()
|
main.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
from typing import Annotated, Optional
|
3 |
+
from fastapi import (Body, FastAPI, File, Form, HTTPException, UploadFile,
|
4 |
+
status)
|
5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
6 |
+
from fastapi.responses import JSONResponse
|
7 |
+
from pydantic import AnyHttpUrl, UrlConstraints
|
8 |
+
|
9 |
+
from config import settings
|
10 |
+
import uvicorn
|
11 |
+
from utils.audio_utils import AudioUtils
|
12 |
+
from utils.caption_utils import ImageCaptioning
|
13 |
+
from utils.image_utils import UrlTest
|
14 |
+
from utils.topic_generation import TopicGenerator
|
15 |
+
|
16 |
+
app = FastAPI(
|
17 |
+
title=settings.PROJECT_NAME,
|
18 |
+
)
|
19 |
+
|
20 |
+
# CORS
|
21 |
+
if settings:
|
22 |
+
app.add_middleware(
|
23 |
+
CORSMiddleware,
|
24 |
+
allow_origins='*',
|
25 |
+
allow_credentials=True,
|
26 |
+
allow_methods=["*"],
|
27 |
+
allow_headers=["*"],
|
28 |
+
)
|
29 |
+
|
30 |
+
topic_generator = TopicGenerator()
|
31 |
+
img_caption = ImageCaptioning()
|
32 |
+
audio_utils = AudioUtils()
|
33 |
+
utils = UrlTest()
|
34 |
+
|
35 |
+
|
36 |
+
@app.get("/")
|
37 |
+
def root():
|
38 |
+
return {"message": "Welcome To Rediones API"}
|
39 |
+
|
40 |
+
|
41 |
+
@app.get("/health")
|
42 |
+
def health():
|
43 |
+
return {"message": "OK"}
|
44 |
+
|
45 |
+
|
46 |
+
@app.post("/topicgen")
|
47 |
+
def generate_topic(
|
48 |
+
img: UploadFile = File(
|
49 |
+
default=None,
|
50 |
+
description="Image file. It mutually excludes ImgUrl",
|
51 |
+
# regex=r"^.+\.(jpg|png|jpeg)$"
|
52 |
+
),
|
53 |
+
text: Annotated[Optional[str], Form()] = None,
|
54 |
+
img_url: Annotated[
|
55 |
+
Optional[AnyHttpUrl],
|
56 |
+
UrlConstraints(allowed_schemes=["https"]),
|
57 |
+
Form(description=(
|
58 |
+
"Image url only accepts https scheme. It mutually excludes Img"
|
59 |
+
))
|
60 |
+
] = None,
|
61 |
+
):
|
62 |
+
if img_url and img:
|
63 |
+
raise HTTPException(
|
64 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
65 |
+
detail="Only one of image_url or img can be accepted"
|
66 |
+
)
|
67 |
+
# if only text is provided
|
68 |
+
elif text is not None and img is None and img_url is None:
|
69 |
+
generated_topics = topic_generator.generate_topics(text)
|
70 |
+
return {"topics": generated_topics}
|
71 |
+
|
72 |
+
# if image/image_url is provided with or without text
|
73 |
+
elif img or img_url or text:
|
74 |
+
img_file_object = None # initialize img_file_object
|
75 |
+
# decide whether img or img_url is provided
|
76 |
+
if img:
|
77 |
+
# image file must be ended with .jpg, .png, .jpeg
|
78 |
+
if not str(img.filename).endswith(
|
79 |
+
(".jpg", ".png", ".jpeg")
|
80 |
+
):
|
81 |
+
raise HTTPException(
|
82 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
83 |
+
detail="Image file must be ended with .jpg, .png, .jpeg"
|
84 |
+
)
|
85 |
+
img_file_object = img.file
|
86 |
+
elif img_url:
|
87 |
+
img_file_object = utils.load_image(img_url)
|
88 |
+
|
89 |
+
# decide whether text is provided
|
90 |
+
if text is None:
|
91 |
+
capt = img_caption.get_caption(img_file_object)
|
92 |
+
else:
|
93 |
+
capt = str(text) + "." + img_caption.get_caption(img_file_object)
|
94 |
+
|
95 |
+
generated_topics = topic_generator.generate_topics(capt)
|
96 |
+
return {"topics": generated_topics}
|
97 |
+
|
98 |
+
else:
|
99 |
+
raise HTTPException(
|
100 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
101 |
+
detail="enter text or image. "
|
102 |
+
"imageurl and img are mutually exclusive"
|
103 |
+
)
|
104 |
+
|
105 |
+
|
106 |
+
@app.post("/audioverse")
|
107 |
+
def generate_audio(
|
108 |
+
text: Annotated[str, Body(description="Text to be transcribed.")]
|
109 |
+
):
|
110 |
+
if text is not None:
|
111 |
+
audio_bytes = audio_utils.speak(text)
|
112 |
+
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
|
113 |
+
return JSONResponse(content={"audio_base64": audio_base64})
|
114 |
+
|
115 |
+
@app.post("/transcribe")
|
116 |
+
def transcribe_audio(
|
117 |
+
audio: UploadFile = File(
|
118 |
+
default=None,
|
119 |
+
description="Audio file to be transcribed."
|
120 |
+
)
|
121 |
+
):
|
122 |
+
if audio is not None:
|
123 |
+
audio_transcribe = audio_utils.improved_transcribe(0.8, audio_file=audio.file)
|
124 |
+
return JSONResponse(content={"audio_transcription": audio_transcribe})
|
125 |
+
|
126 |
+
|
127 |
+
if __name__ == "__main__":
|
128 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|
notebooks/audioverse.ipynb
ADDED
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from openai import OpenAI\n",
|
10 |
+
"from dotenv import load_dotenv\n",
|
11 |
+
"import os"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 5,
|
17 |
+
"metadata": {},
|
18 |
+
"outputs": [
|
19 |
+
{
|
20 |
+
"data": {
|
21 |
+
"text/plain": [
|
22 |
+
"True"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
"execution_count": 5,
|
26 |
+
"metadata": {},
|
27 |
+
"output_type": "execute_result"
|
28 |
+
}
|
29 |
+
],
|
30 |
+
"source": [
|
31 |
+
"load_dotenv()"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"cell_type": "code",
|
36 |
+
"execution_count": 6,
|
37 |
+
"metadata": {},
|
38 |
+
"outputs": [],
|
39 |
+
"source": [
|
40 |
+
"openai_key = os.getenv(\"OPENAI\")"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 7,
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [],
|
48 |
+
"source": [
|
49 |
+
"client = OpenAI(api_key=openai_key)"
|
50 |
+
]
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"cell_type": "code",
|
54 |
+
"execution_count": 13,
|
55 |
+
"metadata": {},
|
56 |
+
"outputs": [],
|
57 |
+
"source": [
|
58 |
+
"response = client.audio.speech.create(\n",
|
59 |
+
" model=\"tts-1-hd\",\n",
|
60 |
+
" voice=\"alloy\",\n",
|
61 |
+
" input=\"My Name is Testimony[laughs]\"\n",
|
62 |
+
")"
|
63 |
+
]
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"cell_type": "code",
|
67 |
+
"execution_count": 14,
|
68 |
+
"metadata": {},
|
69 |
+
"outputs": [
|
70 |
+
{
|
71 |
+
"name": "stderr",
|
72 |
+
"output_type": "stream",
|
73 |
+
"text": [
|
74 |
+
"/tmp/ipykernel_39704/3198597338.py:1: DeprecationWarning: Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead\n",
|
75 |
+
" response.stream_to_file(\"../data/output.mp3\")\n"
|
76 |
+
]
|
77 |
+
}
|
78 |
+
],
|
79 |
+
"source": [
|
80 |
+
"response.stream_to_file(\"../data/output.mp3\")"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"cell_type": "code",
|
85 |
+
"execution_count": 16,
|
86 |
+
"metadata": {},
|
87 |
+
"outputs": [],
|
88 |
+
"source": [
|
89 |
+
"audio_file = open(\"../data/output.mp3\", \"rb\")"
|
90 |
+
]
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"cell_type": "code",
|
94 |
+
"execution_count": 17,
|
95 |
+
"metadata": {},
|
96 |
+
"outputs": [],
|
97 |
+
"source": [
|
98 |
+
"text_response = client.audio.transcriptions.create(\n",
|
99 |
+
" model=\"whisper-1\",\n",
|
100 |
+
" file= audio_file,\n",
|
101 |
+
" response_format= 'text'\n",
|
102 |
+
")"
|
103 |
+
]
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"cell_type": "code",
|
107 |
+
"execution_count": 19,
|
108 |
+
"metadata": {},
|
109 |
+
"outputs": [
|
110 |
+
{
|
111 |
+
"data": {
|
112 |
+
"text/plain": [
|
113 |
+
"'My name is testimony.\\n'"
|
114 |
+
]
|
115 |
+
},
|
116 |
+
"execution_count": 19,
|
117 |
+
"metadata": {},
|
118 |
+
"output_type": "execute_result"
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"source": [
|
122 |
+
"text_response"
|
123 |
+
]
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"cell_type": "code",
|
127 |
+
"execution_count": 23,
|
128 |
+
"metadata": {},
|
129 |
+
"outputs": [],
|
130 |
+
"source": [
|
131 |
+
"\n",
|
132 |
+
"\n",
|
133 |
+
"\n",
|
134 |
+
"def transcribe(audio_file):\n",
|
135 |
+
" transcription = client.audio.transcriptions.create(\n",
|
136 |
+
" model=\"whisper-1\", \n",
|
137 |
+
" file=audio_file, \n",
|
138 |
+
" response_format=\"text\",\n",
|
139 |
+
" prompt=\"ZyntriQix, Digique Plus, CynapseFive, VortiQore V8, EchoNix Array, OrbitalLink Seven, DigiFractal Matrix, PULSE, RAPT, B.R.I.C.K., Q.U.A.R.T.Z., F.L.I.N.T.\"\n",
|
140 |
+
" )\n",
|
141 |
+
" return transcription"
|
142 |
+
]
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"cell_type": "code",
|
146 |
+
"execution_count": 28,
|
147 |
+
"metadata": {},
|
148 |
+
"outputs": [],
|
149 |
+
"source": [
|
150 |
+
"system_prompt = \n",
|
151 |
+
"def generate_corrected_transcript(temperature, system_prompt, audio_file):\n",
|
152 |
+
" response = client.chat.completions.create(\n",
|
153 |
+
" model=\"gpt-3.5-turbo-16k\",\n",
|
154 |
+
" temperature=temperature,\n",
|
155 |
+
" messages=[\n",
|
156 |
+
" {\n",
|
157 |
+
" \"role\": \"system\",\n",
|
158 |
+
" \"content\": system_prompt\n",
|
159 |
+
" },\n",
|
160 |
+
" {\n",
|
161 |
+
" \"role\": \"user\",\n",
|
162 |
+
" \"content\": transcribe(audio_file)\n",
|
163 |
+
" }\n",
|
164 |
+
" ]\n",
|
165 |
+
" )\n",
|
166 |
+
" return response.choices[0].message.content\n"
|
167 |
+
]
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"cell_type": "code",
|
171 |
+
"execution_count": 29,
|
172 |
+
"metadata": {},
|
173 |
+
"outputs": [],
|
174 |
+
"source": [
|
175 |
+
"audio_fil = open(\"../data/output.mp3\", \"rb\")\n",
|
176 |
+
"corrected_text = generate_corrected_transcript(0.7, system_prompt, audio_file=audio_fil)\n"
|
177 |
+
]
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"cell_type": "code",
|
181 |
+
"execution_count": 30,
|
182 |
+
"metadata": {},
|
183 |
+
"outputs": [
|
184 |
+
{
|
185 |
+
"data": {
|
186 |
+
"text/plain": [
|
187 |
+
"'My name is Testimony.'"
|
188 |
+
]
|
189 |
+
},
|
190 |
+
"execution_count": 30,
|
191 |
+
"metadata": {},
|
192 |
+
"output_type": "execute_result"
|
193 |
+
}
|
194 |
+
],
|
195 |
+
"source": [
|
196 |
+
"corrected_text"
|
197 |
+
]
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"cell_type": "code",
|
201 |
+
"execution_count": null,
|
202 |
+
"metadata": {},
|
203 |
+
"outputs": [],
|
204 |
+
"source": []
|
205 |
+
}
|
206 |
+
],
|
207 |
+
"metadata": {
|
208 |
+
"kernelspec": {
|
209 |
+
"display_name": "base",
|
210 |
+
"language": "python",
|
211 |
+
"name": "python3"
|
212 |
+
},
|
213 |
+
"language_info": {
|
214 |
+
"codemirror_mode": {
|
215 |
+
"name": "ipython",
|
216 |
+
"version": 3
|
217 |
+
},
|
218 |
+
"file_extension": ".py",
|
219 |
+
"mimetype": "text/x-python",
|
220 |
+
"name": "python",
|
221 |
+
"nbconvert_exporter": "python",
|
222 |
+
"pygments_lexer": "ipython3",
|
223 |
+
"version": "3.11.5"
|
224 |
+
}
|
225 |
+
},
|
226 |
+
"nbformat": 4,
|
227 |
+
"nbformat_minor": 2
|
228 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
requests~=2.27.1
|
2 |
+
pillow~=10.2.0
|
3 |
+
streamlit
|
4 |
+
openai
|
5 |
+
annotated-types==0.6.0
|
6 |
+
sentencepiece~=0.1.99
|
7 |
+
anyio==3.7.1
|
8 |
+
certifi==2023.11.17
|
9 |
+
click==8.1.7
|
10 |
+
dnspython==2.4.2
|
11 |
+
email-validator==2.1.0.post1
|
12 |
+
exceptiongroup==1.2.0
|
13 |
+
fastapi==0.105.0
|
14 |
+
h11==0.14.0
|
15 |
+
httpcore==1.0.2
|
16 |
+
httptools==0.6.1
|
17 |
+
httpx==0.25.2
|
18 |
+
idna==3.6
|
19 |
+
itsdangerous==2.1.2
|
20 |
+
Jinja2==3.1.2
|
21 |
+
MarkupSafe==2.1.3
|
22 |
+
orjson==3.9.10
|
23 |
+
pydantic==2.5.2
|
24 |
+
pydantic-core==2.14.5
|
25 |
+
pydantic-extra-types==2.2.0
|
26 |
+
pydantic-settings==2.1.0
|
27 |
+
python-dotenv==1.0.0
|
28 |
+
python-multipart==0.0.6
|
29 |
+
PyYAML==6.0.1
|
30 |
+
sniffio==1.3.0
|
31 |
+
starlette==0.27.0
|
32 |
+
typing-extensions==4.9.0
|
33 |
+
ujson==5.9.0
|
34 |
+
uvicorn==0.24.0.post1
|
35 |
+
watchfiles==0.21.0
|
36 |
+
websockets==12.0
|
utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
utils/audio_utils.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import os
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
openai_key = os.getenv("OPENAI")
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
class AudioUtils:
|
12 |
+
def __init__(self):
|
13 |
+
self.client = OpenAI(api_key=openai_key)
|
14 |
+
|
15 |
+
def speak(self, text="Hello World!"):
|
16 |
+
print("Model Loaded")
|
17 |
+
response = self.client.audio.speech.create(
|
18 |
+
model="tts-1-hd",
|
19 |
+
voice="alloy",
|
20 |
+
input=text,
|
21 |
+
)
|
22 |
+
|
23 |
+
return response.content
|
24 |
+
|
25 |
+
def transcribe(self, audio):
|
26 |
+
response = self.client.audio.transcriptions.create(
|
27 |
+
model="whisper-1",
|
28 |
+
file=audio,
|
29 |
+
response_format="text",
|
30 |
+
temperature=0.1,
|
31 |
+
)
|
32 |
+
|
33 |
+
return response
|
34 |
+
|
35 |
+
def improved_transcribe(self, temperature, audio_file):
|
36 |
+
with open(audio_file, "rb") as file:
|
37 |
+
audio = file.read()
|
38 |
+
|
39 |
+
response = self.client.chat.completions.create(
|
40 |
+
model="gpt-3.5-turbo-1106",
|
41 |
+
temperature=temperature,
|
42 |
+
messages=[
|
43 |
+
{
|
44 |
+
"role": "system",
|
45 |
+
"content":"You are a helpful assistant for the company Rediones. Your task is to correct any spelling discrepancies in the transcribed text. Make sure that the names of the following products are spelled correctly: ZyntriQix, Digique Plus, CynapseFive, VortiQore V8, EchoNix Array, OrbitalLink Seven, DigiFractal Matrix, PULSE, RAPT, B.R.I.C.K., Q.U.A.R.T.Z., F.L.I.N.T. Only add necessary punctuation such as periods, commas, and capitalization, and use only the context provided."
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"role": "user",
|
49 |
+
"content": self.transcribe(audio)
|
50 |
+
}
|
51 |
+
]
|
52 |
+
)
|
53 |
+
return response.choices[0].message.content
|
54 |
+
|
utils/caption_utils.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
|
2 |
+
import torch
|
3 |
+
import requests
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from image_utils import UrlTest
|
6 |
+
import os
|
7 |
+
|
8 |
+
img = UrlTest()
|
9 |
+
|
10 |
+
class ImageCaptioning:
|
11 |
+
def __init__(self):
|
12 |
+
# Initialize Model and Tokenizer
|
13 |
+
self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
14 |
+
self.blip_model = BlipForConditionalGeneration.from_pretrained('Salesforce/blip-image-captioning-base')
|
15 |
+
self.topic_generator_processor = AutoTokenizer.from_pretrained("google/flan-t5-large")
|
16 |
+
self.topic_generator_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
|
17 |
+
self.blip_model.eval()
|
18 |
+
self.topic_generator_model.eval()
|
19 |
+
|
20 |
+
|
21 |
+
def generate_caption(self, image):
|
22 |
+
# Generate Caption
|
23 |
+
input_text = self.blip_processor(image, return_tensors="pt")
|
24 |
+
outputs = self.blip_model.generate(pixel_values=input_text["pixel_values"], max_new_tokens=128, do_sample=True, temperature=0.9, top_k=50, top_p=0.95)
|
25 |
+
caption_output = [self.blip_processor.decode(output, skip_special_tokens=True) for output in outputs]
|
26 |
+
|
27 |
+
return outputs
|
28 |
+
|
29 |
+
|
30 |
+
def generate_topics(self, user_input, num_topics=3):
|
31 |
+
query = f"""Generate a topic sentence idea based on the user input.
|
32 |
+
The generated topics should portray the context or idea behind the given sentences or phrase.
|
33 |
+
For Instance,
|
34 |
+
- "Grocery Shopping" OR "Grocery List" OR "Shopping List": "I'm going grocery shopping tomorrow,
|
35 |
+
and I would like to get the following things on my grocery list: Milk, Soybeans, Cowpeas,
|
36 |
+
Saturated Water, Onions, Tomatoes, etc."
|
37 |
+
- "Studying For Exams" OR "Exams Studies": "Exams aare coming up and I have to prepare for the core
|
38 |
+
courses. I'll be studying for Control Systems, Software Engineering and Circuit Theory."
|
39 |
+
- "Healthy Breakfast": "To prepare a healthy breakfast, I need the appropriate combination of balanced
|
40 |
+
diet. I'll need oats, yogurt, fresh berries, honey and smoothies."
|
41 |
+
- "Fitness Routine": "Starting a fitness routine involves workout clothes, running shoes,
|
42 |
+
a water bottles, and a gym membership. With this, I can start a proper fitness plan."
|
43 |
+
- "Summer Vacation": "Packing swimsuits and enjoy the view of the ocean."
|
44 |
+
- "Coffee Break": "Sipping Coffee at the table."
|
45 |
+
- "Relaxation": "Sitting at the table enjoying."
|
46 |
+
|
47 |
+
This is what I'm expecting the model to do. Here is the input: {user_input}
|
48 |
+
"""
|
49 |
+
|
50 |
+
caption_input = self.topic_generator_processor(query, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
51 |
+
caption_output = self.topic_generator_model.generate(**caption_input, temperature=0.1, num_return_sequences=num_topics, do_sample=True, max_length=50, top_k=50, top_p=0.95, num_beams=5)
|
52 |
+
caption_output = [self.topic_generator_processor.decode(output, skip_special_tokens=True) for output in caption_output]
|
53 |
+
|
54 |
+
return caption_output
|
55 |
+
|
56 |
+
def combo_model(self, image):
|
57 |
+
image = img.load_image(image)
|
58 |
+
caption = self.generate_caption(image)
|
59 |
+
caption = self.blip_processor.decode(caption[0], skip_special_tokens=True)
|
60 |
+
topics = self.generate_topics(caption)
|
61 |
+
topics = [topic for topic in topics if len(topic) > 0]
|
62 |
+
return {"caption": caption,
|
63 |
+
"topics": topics}
|
64 |
+
|
65 |
+
|
66 |
+
if __name__ == "__main__":
|
67 |
+
# Initialize Model
|
68 |
+
model = ImageCaptioning()
|
69 |
+
# Test Image
|
70 |
+
image = "1071642.jpg"
|
71 |
+
# Generate Caption and Topics
|
72 |
+
outputs = model.combo_model(image)
|
73 |
+
print(outputs)
|
utils/image_utils.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from PIL import Image
|
3 |
+
import urllib.parse as parse
|
4 |
+
import os
|
5 |
+
|
6 |
+
|
7 |
+
class UrlTest:
|
8 |
+
|
9 |
+
def check_url(self, url_string):
|
10 |
+
try:
|
11 |
+
result = parse.urlparse(url_string)
|
12 |
+
return all([result.scheme, result.netloc, result.path])
|
13 |
+
except:
|
14 |
+
return url_string
|
15 |
+
# Load an image
|
16 |
+
|
17 |
+
def load_image(self, image_path):
|
18 |
+
if self.check_url(image_path):
|
19 |
+
return Image.open(requests.get(image_path, stream=True).raw)
|
20 |
+
elif os.path.exists(image_path):
|
21 |
+
return Image.open(image_path)
|
utils/model_serving/__init__.py
ADDED
File without changes
|
utils/model_serving/blip_serve.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
from transformers import BlipForConditionalGeneration, BlipProcessor
|
3 |
+
import torch
|
4 |
+
import litserve as ls
|
5 |
+
import os
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
hf_token = os.getenv("HUGGINGFACE")
|
10 |
+
|
11 |
+
class RedionesBlipModel():
|
12 |
+
def __init__(self):
|
13 |
+
self.model_name = "Salesforce/blip-image-captioning-base"
|
14 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
+
self.token = hf_token
|
16 |
+
|
17 |
+
def setup(self, device):
|
18 |
+
device = self.device
|
19 |
+
self.model = BlipForConditionalGeneration.from_pretrained(self.model_name,
|
20 |
+
use_auth_token=self.token,
|
21 |
+
)
|
22 |
+
self.tokenizer = BlipProcessor.from_pretrained(self.model_name, use_auth_token=self.token)
|
23 |
+
self.model.to(device)
|
24 |
+
self.model.eval()
|
25 |
+
|
26 |
+
def predict(self, image):
|
27 |
+
input_text = self.tokenizer(image, return_tensors="pt")
|
28 |
+
outputs = self.model.generate(input_ids = input_text["input_ids"].to(self.device), max_new_tokens=50)
|
29 |
+
|
30 |
+
return outputs
|
utils/model_serving/model_loader.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
|
2 |
+
|
3 |
+
|
4 |
+
class ModelLoader:
|
5 |
+
def __init__(self):
|
6 |
+
self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
7 |
+
self.blip_model = BlipForConditionalGeneration.from_pretrained('Salesforce/blip-image-captioning-base')
|
8 |
+
self.topic_generator_processor = AutoTokenizer.from_pretrained("google/flan-t5-large")
|
9 |
+
self.topic_generator_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
|
10 |
+
|
11 |
+
|
12 |
+
def load_blip(self):
|
13 |
+
model = self.blip_model
|
14 |
+
processor = self.blip_processor
|
15 |
+
model.eval()
|
16 |
+
return model, processor
|
17 |
+
|
18 |
+
def load_topic_generator(self):
|
19 |
+
model = self.topic_generator_model
|
20 |
+
processor = self.topic_generator_processor
|
21 |
+
model.eval()
|
22 |
+
return model, processor
|
23 |
+
|
24 |
+
|
25 |
+
# testing the model
|
26 |
+
model_load = ModelLoader()
|
27 |
+
blip_models, blip_processors = model_load.blip_model()
|
28 |
+
topic_generator_models, topic_generator_processors = model_load.load_topic_generator()
|
utils/topic_generation.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import os
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
huggingface = os.getenv("HUGGINGFACE")
|
8 |
+
|
9 |
+
|
10 |
+
class TopicGenerator:
|
11 |
+
|
12 |
+
def __init__(self):
|
13 |
+
# Initialize API-URL and authorization headers
|
14 |
+
self.url = "https://api-inference.huggingface.co/models/google/flan-t5-large"
|
15 |
+
self.headers = {"Authorization": f"Bearer {huggingface}"}
|
16 |
+
|
17 |
+
def query(self, payload):
|
18 |
+
response = requests.post(self.url, headers=self.headers,
|
19 |
+
json=payload)
|
20 |
+
return response
|
21 |
+
|
22 |
+
def generate_topics(self, user_input, num_topics=3):
|
23 |
+
payload = {
|
24 |
+
"inputs": f"""Generate a topic sentence idea based on the user input.
|
25 |
+
The generated topics should portray the context or idea behind the given sentences or phrase.
|
26 |
+
For Instance,
|
27 |
+
- "Grocery Shopping" OR "Grocery List" OR "Shopping List": "I'm going grocery shopping tomorrow,
|
28 |
+
and I would like to get the following things on my grocery list: Milk, Soybeans, Cowpeas,
|
29 |
+
Saturated Water, Onions, Tomatoes, etc."
|
30 |
+
- "Studying For Exams" OR "Exams Studies": "Exams aare coming up and I have to prepare for the core
|
31 |
+
courses. I'll be studying for Control Systems, Software Engineering and Circuit Theory."
|
32 |
+
- "Healthy Breakfast": "To prepare a healthy breakfast, I need the appropriate combination of balanced
|
33 |
+
diet. I'll need oats, yogurt, fresh berries, honey and smoothies."
|
34 |
+
- "Fitness Routine": "Starting a fitness routine involves workout clothes, running shoes,
|
35 |
+
a water bottles, and a gym membership. With this, I can start a proper fitness plan."
|
36 |
+
- "Summer Vacation": "Packing swimsuits and enjoy the view of the ocean."
|
37 |
+
- "Coffee Break": "Sipping Coffee at the table."
|
38 |
+
- "Relaxation": "Sitting at the table enjoying."
|
39 |
+
|
40 |
+
This is what I'm expecting the model to do. Here is the input: {user_input}
|
41 |
+
""",
|
42 |
+
"do_sample": True,
|
43 |
+
"temperature": 0.7,
|
44 |
+
"num_return_sequences": num_topics
|
45 |
+
}
|
46 |
+
output = self.query(payload)
|
47 |
+
if output.status_code == 200:
|
48 |
+
topic = output.json()
|
49 |
+
return topic
|
50 |
+
else:
|
51 |
+
return f"Error: Received response code {output.status_code}"
|